edward

An extensible POSIX-compatible implementation of the ed(1) text editor

git clone https://git.8pit.net/edward.git

  1;; parse.scm -- Parser Combinators
  2;; Copyright (c) 2013 Alex Shinn.  All rights reserved.
  3;; BSD-style license: http://synthcode.com/license.txt
  4
  5;;>| Parse Streams
  6;;>
  7;;> Parse streams are an abstraction to treat ports as proper streams
  8;;> so that we can backtrack from previous states.  A single
  9;;> Parse-Stream record represents a single buffered chunk of text.
 10
 11(define-record-type Parse-Stream
 12  (%make-parse-stream
 13   filename port buffer cache offset prev-char line column tail fk)
 14  parse-stream?
 15  ;; The file the data came from, for debugging and error reporting.
 16  (filename parse-stream-filename)
 17  ;; The underlying port.
 18  (port parse-stream-port)
 19  ;; A vector of characters read from the port.  We use a vector
 20  ;; rather than a string for guaranteed O(1) access.
 21  (buffer parse-stream-buffer)
 22  ;; A vector of caches corresponding to parser successes or failures
 23  ;; starting from the corresponding char.  Currently each cache is
 24  ;; just an alist, optimized under the assumption that the number of
 25  ;; possible memoized parsers is relatively small.  Note that
 26  ;; memoization is only enabled explicitly.
 27  (cache parse-stream-cache)
 28  ;; The current offset of filled characters in the buffer.
 29  ;; If offset is non-zero, (vector-ref buffer (- offset 1)) is
 30  ;; valid.
 31  (offset parse-stream-offset parse-stream-offset-set!)
 32  ;; The previous char before the beginning of this Parse-Stream.
 33  ;; Used for line/word-boundary checks.
 34  (prev-char parse-stream-prev-char)
 35  ;; The debug info for the start line and column of this chunk.
 36  (line parse-stream-line)
 37  (column parse-stream-column)
 38  ;; The successor Parse-Stream chunk, created on demand and filled
 39  ;; from the same port.
 40  (tail %parse-stream-tail %parse-stream-tail-set!)
 41  ;; Initial fk as passed to call-with-parse. Retained as part of
 42  ;; the Parse-Stream for the parse-commit procedure.
 43  (fk parse-stream-fk parse-stream-fk-set!))
 44
 45;; We want to balance avoiding reallocating buffers with avoiding
 46;; holding many memoized values in memory.
 47(define default-buffer-size 256)
 48
 49;;> Create a parse stream open on the given `filename`, with a
 50;;> possibly already opened `port`.
 51
 52(define (make-parse-stream filename . o)
 53  (let ((port (if (pair? o) (car o) (open-input-file filename)))
 54        (len (if (and (pair? o) (pair? (cdr o))) (cadr o) default-buffer-size)))
 55    (%make-parse-stream
 56     filename port (make-vector len #f) (make-vector len '()) 0 #f 0 0 #f #f)))
 57
 58;;> Open `filename` and create a parse stream on it.
 59
 60(define (file->parse-stream filename)
 61  (make-parse-stream filename (open-input-file filename)))
 62
 63;;> Create a parse stream on a string `str`.
 64
 65(define (string->parse-stream str)
 66  (make-parse-stream #f (open-input-string str)))
 67
 68;;> Access the next buffered chunk of a parse stream.
 69
 70(define (parse-stream-tail source)
 71  (or (%parse-stream-tail source)
 72      (let* ((len (vector-length (parse-stream-buffer source)))
 73             (line-info (parse-stream-count-lines source))
 74             (line (+ (parse-stream-line source) (car line-info)))
 75             (col (if (zero? (car line-info))
 76                      (+ (parse-stream-column source) (cadr line-info))
 77                      (cadr line-info)))
 78             (tail (%make-parse-stream (parse-stream-filename source)
 79                                       (parse-stream-port source)
 80                                       (make-vector len #f)
 81                                       (make-vector len '())
 82                                       0
 83                                       (parse-stream-last-char source)
 84                                       line
 85                                       col
 86                                       #f
 87                                       (parse-stream-fk source))))
 88        (%parse-stream-tail-set! source tail)
 89        tail)))
 90
 91(define (parse-stream-fill! source i)
 92  (let ((off (parse-stream-offset source))
 93        (buf (parse-stream-buffer source))
 94        (src (parse-stream-port source)))
 95    (if (<= off i)
 96      ;; Optionally, the parse-stream-port can refer to a POSIX
 97      ;; file descriptor. In which case data will be accessed
 98      ;; using the (file-read) procedure. This is required in
 99      ;; order to read past EOF (required by ed(1)).
100      ;;
101      ;; TODO Currently a bit hacky, revisit after CHICKEN 6.
102      (if (port? src)
103        (do ((off off (+ off 1)))
104            ((> off i) (parse-stream-offset-set! source off))
105          (vector-set! buf off (read-char src)))
106        (let* ((siz (inc (- i off)))
107               (str (make-string siz))
108               (num (cadr (file-read src siz str))))
109          (if (zero? num)
110            ;; When EOF was encountered, add one eof-object to
111            ;; the buffer, then read past the EOF through recursion.
112            (begin
113              (vector-set! buf off (eof-object))
114              (parse-stream-offset-set! source (inc off))
115              (parse-stream-fill! source i))
116
117            ;; Copy data retrieved from file (via file-read) to buffer.
118            ;; XXX: Can't copy to vector buffer directly, unfortunately.
119            (do ((off off (+ off 1)))
120                ((> off i) (parse-stream-offset-set! source off))
121              (vector-set! buf off (string-ref str (- i off)))))))
122      #f)))
123
124;;> Returns true iff `i` is the first character position in the
125;;> parse stream `source`.
126
127(define (parse-stream-start? source i)
128  (and (zero? i) (not (parse-stream-prev-char source))))
129
130;;> Returns true iff `i` is the last character position in the
131;;> parse stream `source`.
132
133(define (parse-stream-end? source i)
134  (eof-object? (parse-stream-ref source i)))
135
136;;> Returns the character in parse stream `source` indexed by
137;;> `i`.
138
139(define (parse-stream-ref source i)
140  (parse-stream-fill! source i)
141  (vector-ref (parse-stream-buffer source) i))
142
143(define (parse-stream-last-char source)
144  (let ((buf (parse-stream-buffer source)))
145    (let lp ((i (min (- (vector-length buf) 1) (parse-stream-offset source))))
146      (if (negative? i)
147          (parse-stream-prev-char source)
148          (let ((ch (vector-ref buf i)))
149            (if (eof-object? ch)
150                (lp (- i 1))
151                ch))))))
152
153(define (parse-stream-char-before source i)
154  (if (> i (parse-stream-offset source))
155      (parse-stream-ref source (- i 1))
156      (parse-stream-prev-char source)))
157
158(define (parse-stream-max-char source)
159  (let ((buf (parse-stream-buffer source)))
160    (let lp ((i (min (- (vector-length buf) 1)
161                     (parse-stream-offset source))))
162      (if (or (negative? i)
163              (char? (vector-ref buf i)))
164          i
165          (lp (- i 1))))))
166
167(define (parse-stream-count-lines source . o)
168  (let* ((buf (parse-stream-buffer source))
169         (end (if (pair? o) (car o) (vector-length buf))))
170    (let lp ((i 0) (from 0) (lines 0))
171      (if (>= i end)
172          (list lines (- i from) from)
173          (let ((ch (vector-ref buf i)))
174            (cond
175             ((not (char? ch))
176              (list lines (- i from) from))
177             ((eqv? ch #\newline)
178              (lp (+ i 1) i (+ lines 1)))
179             (else
180              (lp (+ i 1) from lines))))))))
181
182(define (parse-stream-end-of-line source i)
183  (let* ((buf (parse-stream-buffer source))
184         (end (vector-length buf)))
185    (let lp ((i i))
186      (if (>= i end)
187          i
188          (let ((ch (vector-ref buf i)))
189            (if (or (not (char? ch)) (eqv? ch #\newline))
190                i
191                (lp (+ i 1))))))))
192
193(define (parse-stream-debug-info s i)
194  ;; i is the failed parse index, but we want the furthest reached
195  ;; location
196  (if (%parse-stream-tail s)
197      (parse-stream-debug-info (%parse-stream-tail s) i)
198      (let ((max-char (parse-stream-max-char s)))
199        (if (< max-char 0)
200            (list 0 0 "")
201            (let* ((line-info
202                    (parse-stream-count-lines s max-char))
203                   (line (+ (parse-stream-line s) (car line-info)))
204                   (col (if (zero? (car line-info))
205                            (+ (parse-stream-column s) (cadr line-info))
206                            (cadr line-info)))
207                   (from (car (cddr line-info)))
208                   (to (parse-stream-end-of-line s (+ from 1)))
209                   (str (parse-stream-substring s from s to)))
210              (list line col str))))))
211
212(define (parse-stream-next-source source i)
213  (if (>= (+ i 1) (vector-length (parse-stream-buffer source)))
214      (parse-stream-tail source)
215      source))
216
217(define (parse-stream-next-index source i)
218  (if (>= (+ i 1) (vector-length (parse-stream-buffer source)))
219      0
220      (+ i 1)))
221
222(define (parse-stream-close source)
223  (let ((src (parse-stream-port source)))
224    (if (port? src)
225      (close-input-port (parse-stream-port source))
226      (file-close src))))
227
228(define (vector-substring vec start . o)
229  (let* ((end (if (pair? o) (car o) (vector-length vec)))
230         (res (make-string (- end start))))
231    (do ((i start (+ i 1)))
232        ((= i end) res)
233      (string-set! res (- i start) (vector-ref vec i)))))
234
235(define (parse-stream-in-tail? s0 s1)
236  (let ((s0^ (%parse-stream-tail s0)))
237    (or (eq? s0^ s1)
238        (and s0^ (parse-stream-in-tail? s0^ s1)))))
239
240(define (parse-stream< s0 i0 s1 i1)
241  (if (eq? s0 s1)
242      (< i0 i1)
243      (parse-stream-in-tail? s0 s1)))
244
245;;> Returns a string composed of the characters starting at parse
246;;> stream `s0` index `i0` (inclusive), and ending at `s1`
247;;> index `i1` (exclusive).
248
249(define (parse-stream-substring s0 i0 s1 i1)
250  (cond
251   ((eq? s0 s1)
252    (parse-stream-fill! s0 i1)
253    (vector-substring (parse-stream-buffer s0) i0 i1))
254   (else
255    (let lp ((s (parse-stream-tail s0))
256             (res (list (vector-substring (parse-stream-buffer s0) i0))))
257      (let ((buf (parse-stream-buffer s)))
258        (cond
259         ((eq? s s1)
260          (apply string-append
261                 (reverse (cons (vector-substring buf 0 i1) res))))
262         (else
263          (lp (parse-stream-tail s)
264              (cons (vector-substring buf 0) res)))))))))
265
266(define (parse-stream-cache-cell s i f)
267  (assv f (vector-ref (parse-stream-cache s) i)))
268
269(define (parse-stream-cache-set! s i f x)
270  (let ((cache (vector-ref (parse-stream-cache s) i)))
271    (cond
272     ((assv f cache)
273      => (lambda (cell)
274           ;; prefer longer matches
275           (if (and (pair? (cdr cell))
276                    (parse-stream< (car (cddr cell)) (cadr (cddr cell)) s i))
277               (set-cdr! cell x))))
278     (else
279      (vector-set! (parse-stream-cache s) i (cons (cons f x) cache))))))
280
281;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
282
283;;>| Parser Interface
284;;>
285;;> Procedures for operating on a created parse stream.
286
287;;> Combinator to indicate failure.
288
289(define (parse-failure s i reason)
290  (let ((line+col (parse-stream-debug-info s i)))
291    (error "incomplete parse at" (append line+col (list reason)))))
292
293;;> Call the parser combinator `f` on the parse stream
294;;> `source`, starting at index `index`, passing the result to
295;;> the given success continuation `sk`, which should be a
296;;> procedure of the form `(result source index fail)`.  The
297;;> optional failure continuation should be a procedure of the form
298;;> `(source index reason)`, and defaults to just returning
299;;> `#f`.
300
301(define (call-with-parse f source index sk . o)
302  (let ((s (if (string? source) (string->parse-stream source) source))
303        (fk (if (pair? o) (car o) (lambda (s i reason) #f))))
304    (parse-stream-fk-set! s fk)
305    (f s index sk fk)))
306
307;;> Call the parser combinator `f` on the parse stream
308;;> `source`, at index `index`, and return the result, or
309;;> `#f` if parsing fails.
310
311(define (parse f source . o)
312  (let ((index (if (pair? o) (car o) 0)))
313    (call-with-parse f source index (lambda (r s i fk) r))))
314
315;;> Call the parser combinator `f` on the parse stream
316;;> `source`, at index `index`.  If the entire source is not
317;;> parsed, raises an error, otherwise returns the result.
318
319(define (parse-fully f source . o)
320  (let ((s (if (string? source) (string->parse-stream source) source))
321        (index (if (pair? o) (car o) 0)))
322    (call-with-parse
323     f s index
324     (lambda (r s i fk)
325       (if (parse-stream-end? s i) r (fk s i "incomplete parse")))
326     parse-failure)))
327
328;;> The fundamental parse iterator.  Repeatedly applies the parser
329;;> combinator `f` to `source`, starting at `index`, as
330;;> long as a valid parse is found.  On each successful parse applies
331;;> the procedure `kons` to the parse result and the previous
332;;> `kons` result, beginning with `knil`.  If no parses
333;;> succeed returns `knil`.
334
335(define (parse-fold f kons knil source . o)
336  (let lp ((p (if (string? source) (string->parse-stream source) source))
337           (index (if (pair? o) (car o) 0))
338           (acc knil))
339    (f p index (lambda (r s i fk) (lp s i (kons r acc))) (lambda (s i r) acc))))
340
341;;> Parse as many of the parser combinator `f` from the parse
342;;> stream `source`, starting at `index`, as possible, and
343;;> return the result as a list.
344
345(define (parse->list f source . o)
346  (let ((index (if (pair? o) (car o) 0)))
347    (reverse (parse-fold f cons '() source index))))
348
349;;> As `parse->list` but requires the entire source be parsed
350;;> with no left over characters, signalling an error otherwise.
351
352(define (parse-fully->list f source . o)
353  (let lp ((s (if (string? source) (string->parse-stream source) source))
354           (index (if (pair? o) (car o) 0))
355           (acc '()))
356    (f s index
357       (lambda (r s i fk)
358         (if (eof-object? r) (reverse acc) (lp s i (cons r acc))))
359       (lambda (s i reason) (error "incomplete parse")))))
360
361;;> Return a new parser combinator with the same behavior as `f`,
362;;> but on failure replaces the reason with `reason`.  This can be
363;;> useful to provide more descriptive parse failure reasons when
364;;> chaining combinators.  For example, `parse-string` just
365;;> expects to parse a single fixed string.  If it were defined in
366;;> terms of `parse-char`, failure would indicate some char
367;;> failed to match, but it's more useful to describe the whole string
368;;> we were expecting to see.
369
370(define (parse-with-failure-reason f reason)
371  (lambda (r s i fk)
372    (f r s i (lambda (s i r) (fk s i reason)))))
373
374;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
375
376;;>| Basic Parsing Combinators
377;;>
378;;> Combinators to construct new parsers.
379
380;;> Parse nothing successfully.
381
382(define parse-epsilon
383  (lambda (source index sk fk)
384    (sk #t source index fk)))
385
386;;> Parse any single character successfully.  Fails at end of input.
387
388(define parse-anything
389  (lambda (source index sk fk)
390    (if (parse-stream-end? source index)
391        (fk source index "end of input")
392        (sk (parse-stream-ref source index)
393            (parse-stream-next-source source index)
394            (parse-stream-next-index source index)
395            fk))))
396
397;;> Always fail to parse.
398
399(define parse-nothing
400  (lambda (source index sk fk)
401    (fk source index "nothing")))
402
403;;> The disjunction combinator.  Returns the first combinator that
404;;> succeeds parsing from the same source and index.
405
406(define (parse-or f . o)
407  (if (null? o)
408      f
409      (let ((g (apply parse-or o)))
410        (lambda (source index sk fk)
411          (let ((fk2 (lambda (s i r)
412                       (g source index sk fk
413                          ;; (lambda (s2 i2 r2)
414                          ;;   (fk s2 i2 `(or ,r ,r2)))
415                          ))))
416            (f source index sk fk2))))))
417
418;;> The conjunction combinator.  If both `f` and `g` parse
419;;> successfully starting at the same source and index, returns the
420;;> result of `g`.  Otherwise fails.
421
422(define (parse-and f g)
423  (lambda (source index sk fk)
424    (f source index (lambda (r s i fk) (g source index sk fk)) fk)))
425
426;;> The negation combinator.  If `f` succeeds, fails, otherwise
427;;> succeeds with `#t`.
428
429(define (parse-not f)
430  (lambda (source index sk fk)
431    (f source index (lambda (r s i fk) (fk s i "not"))
432       (lambda (s i r) (sk #t source index fk)))))
433
434(define (parse-seq-list o)
435  (cond
436   ((null? o)
437    parse-epsilon)
438   ((null? (cdr o))
439    (let ((f (car o)))
440      (lambda (s i sk fk)
441        (f s i (lambda (r s i fk)
442                 (sk (if (eq? r ignored-value) '() (list r)) s i fk))
443           fk))))
444   (else
445    (let* ((f (car o))
446           (o (cdr o))
447           (g (car o))
448           (o (cdr o))
449           (g (if (pair? o)
450                  (apply parse-seq g o)
451                  (lambda (s i sk fk)
452                    (g s i (lambda (r s i fk)
453                             (sk (if (eq? r ignored-value) '() (list r))
454                                 s i fk))
455                       fk)))))
456      (lambda (source index sk fk)
457        (f source
458           index
459           (lambda (r s i fk)
460             (g s i (lambda (r2 s i fk)
461                      (let ((r2 (if (eq? r ignored-value) r2 (cons r r2))))
462                        (sk r2 s i fk)))
463                fk))
464           fk))))))
465
466;;> The sequence combinator.  Each combinator is applied in turn just
467;;> past the position of the previous.  If all succeed, returns a list
468;;> of the results in order, skipping any ignored values.
469
470(define (parse-seq . o)
471  (parse-seq-list o))
472
473;;> Convert the list of parser combinators `ls` to a
474;;> `parse-seq` sequence.
475
476(define (list->parse-seq ls)
477  (if (null? (cdr ls)) (car ls) (parse-seq-list ls)))
478
479;;> The optional combinator.  Parse the combinator `f` (in
480;;> sequence with any additional combinator args `o`), and return
481;;> the result, or parse nothing successully on failure.
482
483(define (parse-optional f . o)
484  (if (pair? o)
485      (parse-optional (apply parse-seq f o))
486      (lambda (source index sk fk)
487        (f source index sk (lambda (s i r) (sk #f source index fk))))))
488
489(define ignored-value (list 'ignore))
490
491;;> The repetition combinator.  Parse `f` repeatedly and return a
492;;> list of the results.  `lo` is the minimum number of parses
493;;> (deafult 0) to be considered a successful parse, and `hi` is
494;;> the maximum number (default infinite) before stopping.
495
496(define (parse-repeat f . o)
497  (let ((lo (if (pair? o) (car o) 0))
498        (hi (and (pair? o) (pair? (cdr o)) (cadr o))))
499    (lambda (source0 index0 sk fk)
500      (let repeat ((source source0) (index index0) (fk fk) (j 0) (res '()))
501        (let ((fk (if (>= j lo)
502                      (lambda (s i r) (sk (reverse res) source index fk))
503                      fk)))
504          (if (and hi (= j hi))
505              (sk (reverse res) source index fk)
506              (f source
507                 index
508                 (lambda (r s i fk) (repeat s i fk (+ j 1) (cons r res)))
509                 fk)))))))
510
511;;> Parse `f` one or more times.
512
513(define (parse-repeat+ f)
514  (parse-repeat f 1))
515
516;;> Parse `f` and apply the procedure `proc` to the result on success.
517
518(define (parse-map f proc)
519  (lambda (source index sk fk)
520    (f source index (lambda (res s i fk) (sk (proc res) s i fk)) fk)))
521
522;;> Parse `f` and apply the procedure `proc` to the substring
523;;> of the parsed data.  `proc` defaults to the identity.
524
525(define (parse-map-substring f . o)
526  (let ((proc (if (pair? o) (car o) (lambda (res) res))))
527    (lambda (source index sk fk)
528      (f source
529         index
530         (lambda (res s i fk)
531           (sk (proc (parse-stream-substring source index s i)) s i fk))
532         fk))))
533
534;;> Parses the same streams as `f` but ignores the result on
535;;> success.  Inside a `parse-seq` the result will not be
536;;> included in the list of results.  Useful for discarding
537;;> boiler-plate without the need for post-processing results.
538
539(define (parse-ignore f)
540  (parse-map f (lambda (res) ignored-value)))
541
542;;> Parse with `f` and further require `check?` to return true
543;;> when applied to the result.
544
545(define (parse-assert f check?)
546  (lambda (source index sk fk)
547    (f source
548       index
549       (lambda (res s i fk)
550         (if (check? res) (sk res s i fk) (fk s i "assertion failed")))
551       fk)))
552
553;;> Parse with `f` once and keep the first result, not allowing
554;;> further backtracking within `f`.
555
556(define (parse-atomic f)
557  (lambda (source index sk fk)
558    (f source index (lambda (res s i fk2) (sk res s i fk)) fk)))
559
560;;> Parse with `f` once, keep the first result, and commit to the
561;;> current parse path, discarding any prior backtracking options.
562;;> Can optionally be passed a failure reason with which all resulting
563;;> failure messages will be prefixed.
564
565(define (parse-commit f . o)
566  (let ((prefix (if (pair? o) (string-append (car o) ": ") "")))
567    (lambda (source index sk fk)
568      (let ((commit-fk (parse-stream-fk source)))
569        (f
570          source
571          index
572          (lambda (res s i fk)
573            (sk res s i (lambda (s i r)
574                          (commit-fk s i (string-append prefix r)))))
575          fk)))))
576
577;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
578
579;;>| Boundary Checks
580;;>
581;;> Procedures for performing boundary checks within a parser combinator.
582
583;;> Returns true iff `index` is the first index of the first parse
584;;> stream `source`.
585
586(define parse-beginning
587  (lambda (source index sk fk)
588    (if (parse-stream-start? source index)
589        (sk #t source index fk)
590        (fk source index "expected beginning"))))
591
592;;> Returns true iff `index` is the last index of the last parse
593;;> stream `source`.
594
595(define parse-end
596  (lambda (source index sk fk)
597    (if (parse-stream-end? source index)
598        (begin
599          (sk #t
600              (parse-stream-next-source source index)
601              (parse-stream-next-index source index)
602              fk))
603      (fk source index "expected end"))))
604
605;;> Returns true iff `source`, `index` indicate the beginning
606;;> of a line (or the entire stream).
607
608(define parse-beginning-of-line
609  (lambda (source index sk fk)
610    (let ((before (parse-stream-char-before source index)))
611      (if (or (not before) (eqv? #\newline before))
612          (sk #t source index fk)
613          (fk source index "expected beginning of line")))))
614
615;;> Returns true iff `source`, `index` indicate the end of a
616;;> line (or the entire stream).
617
618(define parse-end-of-line
619  (lambda (source index sk fk)
620    (if (or (parse-stream-end? source index)
621            (eqv? #\newline (parse-stream-ref source index)))
622        (sk #t source index fk)
623        (fk source index "expected end of line"))))
624
625;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
626
627;;>| Constant Parsers
628;;>
629;;> Underlying combinators which parse a constant input and, contrary to
630;;> the parsers documented above, cannot be passed parser combinators as
631;;> procedure arguments.
632
633(define (parse-char-pred pred)
634  (lambda (source index sk fk)
635    (let ((ch (parse-stream-ref source index)))
636      (if (and (char? ch) (pred ch))
637          (sk ch
638              (parse-stream-next-source source index)
639              (parse-stream-next-index source index)
640              fk)
641          (fk source index "failed char pred")))))
642
643(define (x->char-predicate x)
644  (cond
645   ((char? x)
646    (lambda (ch) (eqv? ch x)))
647   ((char-set? x)
648    (lambda (ch) (and (char? ch) (char-set-contains? x ch))))
649   ((procedure? x)
650    (lambda (ch) (and (char? ch) (x ch))))
651   (else
652    (error "don't know how to handle char predicate" x))))
653
654;;> Parse a single char which matches `x`, which can be a
655;;> character, character set, or arbitrary procedure.
656
657(define (parse-char x)
658  (parse-char-pred (x->char-predicate x)))
659
660;;> Parse a single char which does not match `x`, which can be a
661;;> character, character set, or arbitrary procedure.
662
663(define (parse-not-char x)
664  (let ((pred (x->char-predicate x)))
665    (parse-char-pred (lambda (ch) (not (pred ch))))))
666
667;;> Parse the exact string `str`.
668
669(define (parse-string str)
670  (parse-map (parse-with-failure-reason
671              (parse-seq-list (map parse-char (string->list str)))
672              (string-append "expected '" str "'"))
673             list->string))
674
675;;> Parse a sequence of characters matching `x` as with
676;;> `parse-char`, and return the resulting substring.
677
678(define (parse-token x)
679  ;; (parse-map (parse-repeat+ (parse-char x)) list->string)
680  ;; Tokens are atomic - we don't want to split them at any point in
681  ;; the middle - so the implementation is slightly more complex than
682  ;; the above.  With a sane grammar the result would be the same
683  ;; either way, but this provides a useful optimization.
684  (let ((f (parse-char x)))
685    (lambda (source0 index0 sk fk)
686      (let lp ((source source0) (index index0))
687        (f source
688           index
689           (lambda (r s i fk) (lp s i))
690           (lambda (s i r)
691             (if (and (eq? source source0) (eqv? index index0))
692                 (fk s i r)
693                 (sk (parse-stream-substring source0 index0 source index)
694                     source index fk))))))))
695
696;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
697
698;;>| Laziness and Memoization
699;;>
700;;> [Lazy evaluation][wikipedia lazy] of parser combinators and [memoization][wikipedia memoization].
701;;>
702;;> [wikipedia lazy]: https://en.wikipedia.org/wiki/Lazy_evaluation
703;;> [wikipedia memoization]: https://en.wikipedia.org/wiki/Memoization
704
705;;> A delayed combinator.  This is equivalent to the parser combinator
706;;> `f`, but is delayed so it can be more efficient if never used
707;;> and `f` is expensive to compute.  Moreover, it can allow
708;;> self-referentiality as in:
709;;>
710;;>    (letrec* ((f (parse-lazy (parse-or (parse-seq g f) h))))
711;;>      ...)
712
713(define-syntax parse-lazy
714  (syntax-rules ()
715    ((parse-lazy f)
716     (let ((g (delay f)))
717       (lambda (source index sk fk)
718         ((force g) source index sk fk))))))
719
720;; Utility definitions for memoization.
721
722;; debugging
723(define *procedures* '())
724(define (procedure-name f)
725  (cond ((assq f *procedures*) => cdr) (else #f)))
726(define (procedure-name-set! f name)
727  (set! *procedures* (cons (cons f name) *procedures*)))
728
729(define memoized-failure (list 'failure))
730
731;;> Parse the same strings as `f`, but memoize the result at each
732;;> source and index to avoid exponential backtracking.  `name` is
733;;> provided for debugging only.
734
735(define (parse-memoize name f)
736  ;;(if (not (procedure-name f)) (procedure-name-set! f name))
737  (lambda (source index sk fk)
738    (cond
739     ((parse-stream-cache-cell source index f)
740      => (lambda (cell)
741           (if (and (pair? (cdr cell)) (eq? memoized-failure (cadr cell)))
742               (fk source index (cddr cell))
743               (apply sk (append (cdr cell) (list fk))))))
744     (else
745      (f source
746         index
747         (lambda (res s i fk)
748           (parse-stream-cache-set! source index f (list res s i))
749           (sk res s i fk))
750         (lambda (s i r)
751           (if (not (pair? (parse-stream-cache-cell source index f)))
752               (parse-stream-cache-set!
753                source index f (cons memoized-failure r)))
754           (fk s i r)))))))