kahl

R⁷RS Scheme parser combinator library for decoding BARE messages

git clone https://git.8pit.net/kahl.git

  1;; parse.scm -- Parser Combinators
  2;;
  3;; This file provides Scheme parser combinators for binary data. This
  4;; implementation is based on (chibi parse) as provided by the
  5;; chibi-scheme <https://github.com/ashinn/chibi-scheme> R7RS Scheme
  6;; implementation. The original code was written by Alex Shinn
  7;; (license below).
  8
  9;; Copyright (C) 2000-2015 Alex Shinn. All rights reserved.
 10;;
 11;; Redistribution and use in source and binary forms, with or without
 12;; modification, are permitted provided that the following conditions
 13;; are met:
 14;;
 15;; 1. Redistributions of source code must retain the above copyright
 16;;    notice, this list of conditions and the following disclaimer.
 17;; 2. Redistributions in binary form must reproduce the above copyright
 18;;    notice, this list of conditions and the following disclaimer in the
 19;;    documentation and/or other materials provided with the distribution.
 20;; 3. The name of the author may not be used to endorse or promote products
 21;;    derived from this software without specific prior written permission.
 22;;
 23;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 24;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 25;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 26;; IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 27;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 28;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 29;; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 30;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 31;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 32;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 33
 34;; Copyright (C) 2021 Sören Tempel
 35;;
 36;; This program is free software: you can redistribute it and/or modify
 37;; it under the terms of the GNU General Public License as published by
 38;; the Free Software Foundation, either version 3 of the License, or
 39;; (at your option) any later version.
 40;;
 41;; This program is distributed in the hope that it will be useful,
 42;; but WITHOUT ANY WARRANTY; without even the implied warranty of
 43;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 44;; GNU General Public License for more details.
 45;;
 46;; You should have received a copy of the GNU General Public License
 47;; along with this program. If not, see <https://www.gnu.org/licenses/>.
 48
 49;;> \section{Parse Streams}
 50
 51;;> Parse streams are an abstraction to treat ports as proper streams
 52;;> so that we can backtrack from previous states.  A single
 53;;> Parse-Stream record represents a single buffered chunk of text.
 54
 55(define-record-type Parse-Stream
 56  (%make-parse-stream
 57   filename port buffer offset prev-byte tail)
 58  parse-stream?
 59  ;; The file the data came from, for debugging and error reporting.
 60  (filename parse-stream-filename)
 61  ;; The underlying port.
 62  (port parse-stream-port)
 63  ;; A bytevector of characters read from the port.
 64  (buffer parse-stream-buffer)
 65  ;; The current offset of filled characters in the buffer.
 66  ;; If offset is non-zero, (bytevector-u8-ref buffer (- offset 1)) is
 67  ;; valid.
 68  (offset parse-stream-offset parse-stream-offset-set!)
 69  ;; The previous byte before the beginning of this Parse-Stream.
 70  ;; Used for line/word-boundary checks.
 71  (prev-byte parse-stream-prev-byte)
 72  ;; The successor Parse-Stream chunk, created on demand and filled
 73  ;; from the same port.
 74  (tail %parse-stream-tail %parse-stream-tail-set!))
 75
 76;; We want to balance avoiding reallocating buffers with avoiding
 77;; holding many memoized values in memory.
 78(define default-buffer-size 256)
 79
 80;;> Create a parse stream open on the given \var{filename}, with a
 81;;> possibly already opened \var{port}.
 82
 83(define (make-parse-stream filename . o)
 84  (let ((port (if (pair? o) (car o) (open-input-file filename)))
 85        (len (if (and (pair? o) (pair? (cdr o)))
 86               (cadr o)
 87               default-buffer-size)))
 88    (%make-parse-stream
 89     filename port (make-bytevector len 0) 0 #f #f)))
 90
 91;;> Create a parse stream on a bytevector \var{bv}.
 92
 93(define (bytevector->parse-stream bv)
 94  (make-parse-stream #f (open-input-bytevector bv)))
 95
 96;;> Access the next buffered chunk of a parse stream.
 97
 98(define (parse-stream-tail source)
 99  (or (%parse-stream-tail source)
100      (let* ((len (vector-length (parse-stream-buffer source)))
101             (tail (%make-parse-stream (parse-stream-filename source)
102                                       (parse-stream-port source)
103                                       (make-bytevector len 0)
104                                       0
105                                       (parse-stream-last-byte source)
106                                       #f)))
107        (%parse-stream-tail-set! source tail)
108        tail)))
109
110;; Fill the buffer of the given parse \var{source} with up to
111;; \var{i} bytes. Returns \scheme{#t} on success and \scheme{#f}
112;; on failure.
113
114(define (parse-stream-fill! source i)
115  (let ((off (parse-stream-offset source))
116        (buf (parse-stream-buffer source)))
117    (if (<= off i)
118      (call-with-current-continuation
119        (lambda (k)
120          (do ((off off (+ off 1)))
121              ((> off i) (parse-stream-offset-set! source off))
122            (let ((byte (read-u8 (parse-stream-port source))))
123              (if (eof-object? byte)
124                (k #f)
125                (bytevector-u8-set! buf off byte))))))
126        #t)))
127
128(define (parse-stream-ref source i)
129  (if (parse-stream-fill! source i)
130    (bytevector-u8-ref (parse-stream-buffer source) i)
131    #f))
132
133(define (parse-stream-last-byte source)
134  (let ((buf (parse-stream-buffer source)))
135    (let lp ((i (min (- (bytevector-length buf) 1)
136                     (parse-stream-offset source))))
137      (if (negative? i)
138          (parse-stream-prev-byte source)
139          (let ((ch (bytevector-u8-ref buf i)))
140            (if (eof-object? ch)
141                (lp (- i 1))
142                ch))))))
143
144(define (parse-stream-next-source source i)
145  (if (>= (+ i 1) (bytevector-length (parse-stream-buffer source)))
146      (parse-stream-tail source)
147      source))
148
149(define (parse-stream-next-index source i)
150  (if (>= (+ i 1) (bytevector-length (parse-stream-buffer source)))
151      0
152      (+ i 1)))
153
154;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
155
156;;> \section{Parser Interface}
157
158;;> Call the parser combinator \var{f} on the parse stream
159;;> \var{source}, starting at index \var{index}, passing the result to
160;;> the given success continuation \var{sk}, which should be a
161;;> procedure of the form \scheme{(result source index fail)}.  The
162;;> optional failure continuation should be a procedure of the form
163;;> \scheme{(source index reason)}, and defaults to just returning
164;;> \scheme{#f}.
165
166(define (call-with-parse f source index sk . o)
167  (let ((s (if (bytevector? source)
168             (bytevector->parse-stream source)
169             source))
170        (fk (if (pair? o)
171              (car o)
172              (lambda (s i reason) #f))))
173    (f s index sk fk)))
174
175;;> Call the parser combinator \var{f} on the parse stream
176;;> \var{source}, at index \var{index}, and return the result, or
177;;> \scheme{#f} if parsing fails.
178
179(define (parse f source . o)
180  (let ((index (if (pair? o) (car o) 0)))
181    (call-with-parse f source index (lambda (r s i fk) r))))
182
183;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
184
185;; \section{Constant Parsers}
186
187;; Parse nothing successfully.
188
189(define parse-epsilon
190  (lambda (source index sk fk)
191    (sk #t source index fk)))
192
193;; Parse nothing successfully and return \var{ret}.
194
195(define (parse-result ret)
196  (parse-bind parse-epsilon
197              (lambda (x) ret)))
198
199;; Always fails to parse with \var{msg}.
200
201(define (parse-fail msg)
202  (lambda (source index sk fk)
203    (fk source index msg)))
204
205;; Parse byte if it statifies the predicate \var{pred}, fail otherwise.
206
207(define (parse-pred pred)
208  (lambda (source index sk fk)
209    (let ((byte (parse-stream-ref source index)))
210      (if byte
211        (if (pred byte)
212            (sk byte
213                (parse-stream-next-source source index)
214                (parse-stream-next-index source index)
215                fk)
216            (fk source index "failed predicate"))
217        (fk source index "unexpected eof")))))
218
219;; Parse next byte.
220
221(define parse-byte
222  (parse-pred (lambda (x) #t)))
223
224;; Parse bytevector of the given \var{size} in bytes. Returns a
225;; bytevector of the given size, not a list.
226
227(define (parse-bytevector size)
228  (parse-bind
229    (parse-seq-list (make-list size parse-byte))
230    (lambda (lst)
231      (if (zero? size)
232        #u8()
233        (apply bytevector lst)))))
234
235(define (parse-with-context ctx f)
236  (define yield (lambda (r s i fk) r))
237
238  (lambda (source index sk fk)
239    ;; call-with-parse modifies source and needs to be called first.
240    (let* ((ctx (call-with-parse ctx source index yield fk))
241           (field-start (parse-stream-offset source)))
242      (if ctx
243        ((f ctx) source field-start sk fk)
244        (fk source index "context parser failed")))))
245
246(define (parse-seq-list o)
247  (define ignored-value (list 'ignore))
248
249  (cond
250   ((null? o)
251    parse-epsilon)
252   ((null? (cdr o))
253    (let ((f (car o)))
254      (lambda (s i sk fk)
255        (f s i (lambda (r s i fk) (sk (list r) s i fk)) fk))))
256   (else
257    (let* ((f (car o))
258           (o (cdr o))
259           (g (car o))
260           (o (cdr o))
261           (g (if (pair? o)
262                  (apply parse-seq g o)
263                  (lambda (s i sk fk)
264                    (g s i (lambda (r s i fk) (sk (list r) s i fk)) fk)))))
265      (lambda (source index sk fk)
266        (f source
267           index
268           (lambda (r s i fk)
269             (g s i (lambda (r2 s i fk)
270                      (let ((r2 (if (eq? r ignored-value) r2 (cons r r2))))
271                        (sk r2 s i fk)))
272                fk))
273           fk))))))
274
275;; The sequence combinator. Each combinator is applied in turn just
276;; past the position of the previous. If all succeed, returns a list
277;; of the results in order, skipping any ignored values.
278
279(define (parse-seq . o)
280  (parse-seq-list o))
281
282;; The repetition combinator.  Parse \var{f} repeatedly and return a
283;; list of the results.  \var{lo} is the minimum number of parses
284;; (deafult 0) to be considered a successful parse, and \var{hi} is
285;; the maximum number (default infinite) before stopping.
286
287(define (parse-repeat f . o)
288  (let ((lo (if (pair? o) (car o) 0))
289        (hi (and (pair? o) (pair? (cdr o)) (cadr o))))
290    (parse-repeat-kons cons f lo hi)))
291
292(define (parse-repeat-kons kons f lo hi)
293  (lambda (source0 index0 sk fk)
294    (let repeat ((source source0) (index index0) (fk fk) (j 0) (res '()))
295      (let ((fk (if (>= j lo)
296                  (lambda (s i r) (sk (reverse res) source index fk))
297                  fk)))
298        (if (and hi (= j hi))
299          (sk (reverse res) source index fk)
300          (f source
301             index
302             (lambda (r s i fk)
303               (let ((v (kons r res)))
304                 (if v
305                   (repeat s i fk (+ j 1) v)
306                   (fk source index "kons failed"))))
307             fk))))))
308
309;; Parse \var{f} and apply the procedure \var{proc} to the result on success.
310
311(define (parse-bind f proc)
312  (lambda (source index sk fk)
313    (f source index (lambda (res s i fk)
314                      (sk (with-exception-handler
315                            (lambda (e) (fk source index (error-object-message e)))
316                            (lambda ()  (proc res)))
317                          s i fk)) fk)))