R⁷RS Scheme parser combinator library for decoding BARE messages
git clone https://git.8pit.net/kahl.git
1;; parse.scm -- Parser Combinators 2;; 3;; This file provides Scheme parser combinators for binary data. This 4;; implementation is based on (chibi parse) as provided by the 5;; chibi-scheme <https://github.com/ashinn/chibi-scheme> R7RS Scheme 6;; implementation. The original code was written by Alex Shinn 7;; (license below). 8 9;; Copyright (C) 2000-2015 Alex Shinn. All rights reserved. 10;; 11;; Redistribution and use in source and binary forms, with or without 12;; modification, are permitted provided that the following conditions 13;; are met: 14;; 15;; 1. Redistributions of source code must retain the above copyright 16;; notice, this list of conditions and the following disclaimer. 17;; 2. Redistributions in binary form must reproduce the above copyright 18;; notice, this list of conditions and the following disclaimer in the 19;; documentation and/or other materials provided with the distribution. 20;; 3. The name of the author may not be used to endorse or promote products 21;; derived from this software without specific prior written permission. 22;; 23;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 24;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 25;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 26;; IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 27;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 28;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 32;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 33 34;; Copyright (C) 2021 Sören Tempel 35;; 36;; This program is free software: you can redistribute it and/or modify 37;; it under the terms of the GNU General Public License as published by 38;; the Free Software Foundation, either version 3 of the License, or 39;; (at your option) any later version. 40;; 41;; This program is distributed in the hope that it will be useful, 42;; but WITHOUT ANY WARRANTY; without even the implied warranty of 43;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 44;; GNU General Public License for more details. 45;; 46;; You should have received a copy of the GNU General Public License 47;; along with this program. If not, see <https://www.gnu.org/licenses/>. 48 49;;> \section{Parse Streams} 50 51;;> Parse streams are an abstraction to treat ports as proper streams 52;;> so that we can backtrack from previous states. A single 53;;> Parse-Stream record represents a single buffered chunk of text. 54 55(define-record-type Parse-Stream 56 (%make-parse-stream 57 filename port buffer offset prev-byte tail) 58 parse-stream? 59 ;; The file the data came from, for debugging and error reporting. 60 (filename parse-stream-filename) 61 ;; The underlying port. 62 (port parse-stream-port) 63 ;; A bytevector of characters read from the port. 64 (buffer parse-stream-buffer) 65 ;; The current offset of filled characters in the buffer. 66 ;; If offset is non-zero, (bytevector-u8-ref buffer (- offset 1)) is 67 ;; valid. 68 (offset parse-stream-offset parse-stream-offset-set!) 69 ;; The previous byte before the beginning of this Parse-Stream. 70 ;; Used for line/word-boundary checks. 71 (prev-byte parse-stream-prev-byte) 72 ;; The successor Parse-Stream chunk, created on demand and filled 73 ;; from the same port. 74 (tail %parse-stream-tail %parse-stream-tail-set!)) 75 76;; We want to balance avoiding reallocating buffers with avoiding 77;; holding many memoized values in memory. 78(define default-buffer-size 256) 79 80;;> Create a parse stream open on the given \var{filename}, with a 81;;> possibly already opened \var{port}. 82 83(define (make-parse-stream filename . o) 84 (let ((port (if (pair? o) (car o) (open-input-file filename))) 85 (len (if (and (pair? o) (pair? (cdr o))) 86 (cadr o) 87 default-buffer-size))) 88 (%make-parse-stream 89 filename port (make-bytevector len 0) 0 #f #f))) 90 91;;> Create a parse stream on a bytevector \var{bv}. 92 93(define (bytevector->parse-stream bv) 94 (make-parse-stream #f (open-input-bytevector bv))) 95 96;;> Access the next buffered chunk of a parse stream. 97 98(define (parse-stream-tail source) 99 (or (%parse-stream-tail source)100 (let* ((len (vector-length (parse-stream-buffer source)))101 (tail (%make-parse-stream (parse-stream-filename source)102 (parse-stream-port source)103 (make-bytevector len 0)104 0105 (parse-stream-last-byte source)106 #f)))107 (%parse-stream-tail-set! source tail)108 tail)))109110;; Fill the buffer of the given parse \var{source} with up to111;; \var{i} bytes. Returns \scheme{#t} on success and \scheme{#f}112;; on failure.113114(define (parse-stream-fill! source i)115 (let ((off (parse-stream-offset source))116 (buf (parse-stream-buffer source)))117 (if (<= off i)118 (call-with-current-continuation119 (lambda (k)120 (do ((off off (+ off 1)))121 ((> off i) (parse-stream-offset-set! source off))122 (let ((byte (read-u8 (parse-stream-port source))))123 (if (eof-object? byte)124 (k #f)125 (bytevector-u8-set! buf off byte))))))126 #t)))127128(define (parse-stream-ref source i)129 (if (parse-stream-fill! source i)130 (bytevector-u8-ref (parse-stream-buffer source) i)131 #f))132133(define (parse-stream-last-byte source)134 (let ((buf (parse-stream-buffer source)))135 (let lp ((i (min (- (bytevector-length buf) 1)136 (parse-stream-offset source))))137 (if (negative? i)138 (parse-stream-prev-byte source)139 (let ((ch (bytevector-u8-ref buf i)))140 (if (eof-object? ch)141 (lp (- i 1))142 ch))))))143144(define (parse-stream-next-source source i)145 (if (>= (+ i 1) (bytevector-length (parse-stream-buffer source)))146 (parse-stream-tail source)147 source))148149(define (parse-stream-next-index source i)150 (if (>= (+ i 1) (bytevector-length (parse-stream-buffer source)))151 0152 (+ i 1)))153154;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;155156;;> \section{Parser Interface}157158;;> Call the parser combinator \var{f} on the parse stream159;;> \var{source}, starting at index \var{index}, passing the result to160;;> the given success continuation \var{sk}, which should be a161;;> procedure of the form \scheme{(result source index fail)}. The162;;> optional failure continuation should be a procedure of the form163;;> \scheme{(source index reason)}, and defaults to just returning164;;> \scheme{#f}.165166(define (call-with-parse f source index sk . o)167 (let ((s (if (bytevector? source)168 (bytevector->parse-stream source)169 source))170 (fk (if (pair? o)171 (car o)172 (lambda (s i reason) #f))))173 (f s index sk fk)))174175;;> Call the parser combinator \var{f} on the parse stream176;;> \var{source}, at index \var{index}, and return the result, or177;;> \scheme{#f} if parsing fails.178179(define (parse f source . o)180 (let ((index (if (pair? o) (car o) 0)))181 (call-with-parse f source index (lambda (r s i fk) r))))182183;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;184185;; \section{Constant Parsers}186187;; Parse nothing successfully.188189(define parse-epsilon190 (lambda (source index sk fk)191 (sk #t source index fk)))192193;; Parse nothing successfully and return \var{ret}.194195(define (parse-result ret)196 (parse-bind parse-epsilon197 (lambda (x) ret)))198199;; Always fails to parse with \var{msg}.200201(define (parse-fail msg)202 (lambda (source index sk fk)203 (fk source index msg)))204205;; Parse byte if it statifies the predicate \var{pred}, fail otherwise.206207(define (parse-pred pred)208 (lambda (source index sk fk)209 (let ((byte (parse-stream-ref source index)))210 (if byte211 (if (pred byte)212 (sk byte213 (parse-stream-next-source source index)214 (parse-stream-next-index source index)215 fk)216 (fk source index "failed predicate"))217 (fk source index "unexpected eof")))))218219;; Parse next byte.220221(define parse-byte222 (parse-pred (lambda (x) #t)))223224;; Parse bytevector of the given \var{size} in bytes. Returns a225;; bytevector of the given size, not a list.226227(define (parse-bytevector size)228 (parse-bind229 (parse-seq-list (make-list size parse-byte))230 (lambda (lst)231 (if (zero? size)232 #u8()233 (apply bytevector lst)))))234235(define (parse-with-context ctx f)236 (define yield (lambda (r s i fk) r))237238 (lambda (source index sk fk)239 ;; call-with-parse modifies source and needs to be called first.240 (let* ((ctx (call-with-parse ctx source index yield fk))241 (field-start (parse-stream-offset source)))242 (if ctx243 ((f ctx) source field-start sk fk)244 (fk source index "context parser failed")))))245246(define (parse-seq-list o)247 (define ignored-value (list 'ignore))248249 (cond250 ((null? o)251 parse-epsilon)252 ((null? (cdr o))253 (let ((f (car o)))254 (lambda (s i sk fk)255 (f s i (lambda (r s i fk) (sk (list r) s i fk)) fk))))256 (else257 (let* ((f (car o))258 (o (cdr o))259 (g (car o))260 (o (cdr o))261 (g (if (pair? o)262 (apply parse-seq g o)263 (lambda (s i sk fk)264 (g s i (lambda (r s i fk) (sk (list r) s i fk)) fk)))))265 (lambda (source index sk fk)266 (f source267 index268 (lambda (r s i fk)269 (g s i (lambda (r2 s i fk)270 (let ((r2 (if (eq? r ignored-value) r2 (cons r r2))))271 (sk r2 s i fk)))272 fk))273 fk))))))274275;; The sequence combinator. Each combinator is applied in turn just276;; past the position of the previous. If all succeed, returns a list277;; of the results in order, skipping any ignored values.278279(define (parse-seq . o)280 (parse-seq-list o))281282;; The repetition combinator. Parse \var{f} repeatedly and return a283;; list of the results. \var{lo} is the minimum number of parses284;; (deafult 0) to be considered a successful parse, and \var{hi} is285;; the maximum number (default infinite) before stopping.286287(define (parse-repeat f . o)288 (let ((lo (if (pair? o) (car o) 0))289 (hi (and (pair? o) (pair? (cdr o)) (cadr o))))290 (parse-repeat-kons cons f lo hi)))291292(define (parse-repeat-kons kons f lo hi)293 (lambda (source0 index0 sk fk)294 (let repeat ((source source0) (index index0) (fk fk) (j 0) (res '()))295 (let ((fk (if (>= j lo)296 (lambda (s i r) (sk (reverse res) source index fk))297 fk)))298 (if (and hi (= j hi))299 (sk (reverse res) source index fk)300 (f source301 index302 (lambda (r s i fk)303 (let ((v (kons r res)))304 (if v305 (repeat s i fk (+ j 1) v)306 (fk source index "kons failed"))))307 fk))))))308309;; Parse \var{f} and apply the procedure \var{proc} to the result on success.310311(define (parse-bind f proc)312 (lambda (source index sk fk)313 (f source index (lambda (res s i fk)314 (sk (with-exception-handler315 (lambda (e) (fk source index (error-object-message e)))316 (lambda () (proc res)))317 s i fk)) fk)))