kahl

R⁷RS Scheme parser combinator library for decoding BARE messages

git clone https://git.8pit.net/kahl.git

  1;;> \section{BARE Parsers}
  2
  3;;> \subsection{Primitive Types}
  4
  5;;> Parses an unsigned integer with a variable-length encoding. The
  6;;> maximum precision of such a number is 64-bits. The maximum length of
  7;;> an encoded uint is therefore 10 octets. If the message exceeds these
  8;;> limits a parsing error will be raised.
  9
 10(define parse-uint
 11  (parse-bind
 12    (parse-seq
 13      ;; Each octet has MSB set, except the last one.
 14      ;; A maximum of 10 octets can be passed, i.e. 9 with MSB set and one without.
 15      (parse-repeat (parse-pred msb-set?) 0 9)
 16      (parse-pred (lambda (x) (not (msb-set? x)))))
 17    (lambda (lst)
 18      (let* ((join (append (car lst) (cdr lst)))
 19             (bv   (apply bytevector join)))
 20        (if (and (eq? 10 (bytevector-length bv))
 21                 (> (bytevector-u8-ref bv 9) 1))
 22          (error "uint overflows a 64-bit integer")
 23          (bytevector->uint bv))))))
 24
 25;;> Parses a signed integer with a variable-length encoding. The
 26;;> maximum precision of such a number is 64-bits. The maximum length of
 27;;> an encoded uint is therefore 10 octets. While the length is checked,
 28;;> the precision is currently not.
 29
 30(define parse-int
 31  (parse-bind
 32    parse-uint
 33    uint->number))
 34
 35(define (parse-fixed-uint size)
 36  (parse-bind
 37    (parse-bytevector size)
 38    (lambda (bv)
 39      (bytevector->number size bv))))
 40
 41(define (parse-fixed-int size)
 42  (parse-bind
 43    (parse-fixed-uint size)
 44    (lambda (n)
 45      (from-twocomp (* size 8) n))))
 46
 47(define parse-u8  (parse-fixed-uint 1))
 48(define parse-u16 (parse-fixed-uint 2))
 49(define parse-u32 (parse-fixed-uint 4))
 50
 51;;> Parses unsigned integers of a fixed precision, respectively
 52;;> 8, 16, 32, 64 bits. The result is converted to a Scheme
 53;;> \scheme{number} in host byte order.
 54
 55(define parse-u64 (parse-fixed-uint 8))
 56
 57(define parse-i8  (parse-fixed-int 1))
 58(define parse-i16 (parse-fixed-int 2))
 59(define parse-i32 (parse-fixed-int 4))
 60
 61;;> Parses signed integers of a fixed precision, respectively
 62;;> 8, 16, 32, 64 bits. The result is converted to a Scheme
 63;;> \scheme{number} in host byte order.
 64
 65(define parse-i64 (parse-fixed-int 8))
 66
 67;;> Parses a boolean value, either \scheme{#t} or \scheme{#f}.
 68;;> If a value other than one or zero is found in the u8
 69;;> representation of the boolean value, a parsing error
 70;;> is raised.
 71
 72(define parse-bool
 73  (parse-with-context
 74    parse-byte
 75    (lambda (byte)
 76      (lambda (source index sk fk)
 77        (cond
 78          ((zero? byte)  (sk #f source index fk))
 79          ((eqv? byte 1) (sk #t source index fk))
 80          (else (fk source index "invalid boolean value")))))))
 81
 82;;> Parses an unsigned integer value from a set of possible values
 83;;> agreed upon in advance. The parsed value must be part of the
 84;;> \var{values} list, otherwise a parsing error is raised. The
 85;;> \var{values} list must be non-empty and each member of \var{values}
 86;;> must have a unique value otherwise a parser construction error
 87;;> is raised.
 88
 89(define (parse-enum values)
 90  (if (or (null? values) (not (lset-unique? values)))
 91    (error "enum must be a non-empty list of unique values")
 92    (parse-with-context
 93      parse-uint
 94      (lambda (v)
 95        (lambda (source index sk fk)
 96          ;; Comparing numbers → eqv? (memv) should suffice
 97          (if (memv v values)
 98            (sk v source index fk)
 99            (fk source index "enum value not part of given list")))))))
100
101;;> Parses a string of text. If the data is found to contain invalid
102;;> UTF-8 sequences, it should be considered invalid. However, this
103;;> implementation currently uses \scheme{utf8->string} internally,
104;;> which does not necessarily raise an error on invalid UTF-8 sequences.
105
106(define parse-string
107  ;; TODO: Explicitly check for invalid UTF-8 sequences.
108  (parse-with-context
109    parse-uint
110    (lambda (size)
111      (parse-bind
112        (parse-bytevector size)
113        utf8->string))))
114
115;;> Parses arbitrary data with a fixed \var{length} in octets.
116;;> If no \var{length} is given, arbitrary data of a variable
117;;> length in octets is parsed. The data must not be greater
118;;> than 18,446,744,073,709,551,615 octets in length (the
119;;> maximum value of a u64). A parsing error will be raised
120;;> if this limit is exceeded. The implementation will raise a
121;;> parser construction error if \var{length} is given as zero.
122
123(define (parse-data . length)
124  (if (null? length)
125    (parse-with-context
126      parse-uint
127      (lambda (size)
128        (parse-bytevector size)))
129    (let ((l (car length)))
130      (if (zero? l)
131        (error "length of fixed-length data must be at least 1")
132        (parse-bytevector l)))))
133
134;;> Parses a type with zero length. This always returns a \scheme{'void}
135;;> symbol on success.
136
137(define parse-void
138  (parse-result 'void))
139
140;;> \subsection{Aggregate Types}
141
142;;> Parses a value of \var{type} which may or may not be present. An
143;;> optional value whose initial value is set to a number other than
144;;> zero or one is considered invalid and results in a parsing error.
145;;> If no value is present \scheme{'nothing} is returned.
146
147(define (parse-optional type)
148  (parse-with-context
149    parse-u8
150    (lambda (opt)
151      (cond
152        ((zero? opt)  (parse-result 'nothing))
153        ((eqv? opt 1) type)
154        (else (parse-fail "invalid option value"))))))
155
156;;> Parses a list of \var{size} values of \var{type}. If no \var{size}
157;;> was specified, a variable-length list of values of \var{type} is
158;;> parsed. In the latter case, a parser construction error is raised
159;;> if the given length is zero.
160
161(define (parse-list type . length)
162  (if (null? length)
163    (parse-with-context
164      parse-uint
165      (lambda (size)
166        (parse-repeat
167          type
168          size size)))
169    (let ((l (car length)))
170      (if (zero? l)
171        (error "length of fixed-length arrays must be at least 1")
172        (parse-repeat type l l)))))
173
174;;> Parses a mapping of values of type \var{val-type} keyed by
175;;> values of type \var{val-type}. A message with repeated keys
176;;> is considered invalid. A parsing error is raised if such a
177;;> message is encountered.
178
179(define (parse-map key-type val-type)
180  (parse-with-context
181    parse-uint
182    (lambda (size)
183      (parse-repeat-kons
184        (lambda (x xs)
185          (and (not (member x xs))
186               (cons x xs)))
187        (parse-seq key-type val-type)
188        size size))))
189
190;;> Parses a tagged union whose value may be one of any type from a
191;;> \var{type-vector} of types. Each combinators index in \var{type-vector}
192;;> response to the numeric identifier for this type as encoded in the
193;;> message. A union with a tag value that does not have a corresponding
194;;> type assigned is considered invalid. A parsing error is raised when
195;;> encountering such a message. A parser construction error is raised
196;;> if \var{type-vector} is null.
197
198(define (parse-union type-vector)
199  (if (null? type-vector)
200    (error "unions must have at least one type")
201    (parse-with-context
202      parse-uint
203      (lambda (id)
204        (if (>= id (vector-length type-vector))
205          (parse-fail "unexpected tag in tagged union")
206          (vector-ref type-vector id))))))
207
208;;> Parses a set of values of arbitrary types, concatenated in the
209;;> order given by \var{types}. The result is concatenated to a
210;;> \scheme{vector}. A parser construction error is raised if
211;;> \var{types} is not given or null.
212
213(define (parse-struct . types)
214  (if (or (null? types)
215          (null? (car types)))
216    (error "structs must have at least one field")
217    (parse-bind
218      (parse-seq-list types)
219      list->vector)))