R⁷RS Scheme parser combinator library for decoding BARE messages
git clone https://git.8pit.net/kahl.git
1;;> \section{BARE Parsers} 2 3;;> \subsection{Primitive Types} 4 5;;> Parses an unsigned integer with a variable-length encoding. The 6;;> maximum precision of such a number is 64-bits. The maximum length of 7;;> an encoded uint is therefore 10 octets. If the message exceeds these 8;;> limits a parsing error will be raised. 9 10(define parse-uint 11 (parse-bind 12 (parse-seq 13 ;; Each octet has MSB set, except the last one. 14 ;; A maximum of 10 octets can be passed, i.e. 9 with MSB set and one without. 15 (parse-repeat (parse-pred msb-set?) 0 9) 16 (parse-pred (lambda (x) (not (msb-set? x))))) 17 (lambda (lst) 18 (let* ((join (append (car lst) (cdr lst))) 19 (bv (apply bytevector join))) 20 (if (and (eq? 10 (bytevector-length bv)) 21 (> (bytevector-u8-ref bv 9) 1)) 22 (error "uint overflows a 64-bit integer") 23 (bytevector->uint bv)))))) 24 25;;> Parses a signed integer with a variable-length encoding. The 26;;> maximum precision of such a number is 64-bits. The maximum length of 27;;> an encoded uint is therefore 10 octets. While the length is checked, 28;;> the precision is currently not. 29 30(define parse-int 31 (parse-bind 32 parse-uint 33 uint->number)) 34 35(define (parse-fixed-uint size) 36 (parse-bind 37 (parse-bytevector size) 38 (lambda (bv) 39 (bytevector->number size bv)))) 40 41(define (parse-fixed-int size) 42 (parse-bind 43 (parse-fixed-uint size) 44 (lambda (n) 45 (from-twocomp (* size 8) n)))) 46 47(define parse-u8 (parse-fixed-uint 1)) 48(define parse-u16 (parse-fixed-uint 2)) 49(define parse-u32 (parse-fixed-uint 4)) 50 51;;> Parses unsigned integers of a fixed precision, respectively 52;;> 8, 16, 32, 64 bits. The result is converted to a Scheme 53;;> \scheme{number} in host byte order. 54 55(define parse-u64 (parse-fixed-uint 8)) 56 57(define parse-i8 (parse-fixed-int 1)) 58(define parse-i16 (parse-fixed-int 2)) 59(define parse-i32 (parse-fixed-int 4)) 60 61;;> Parses signed integers of a fixed precision, respectively 62;;> 8, 16, 32, 64 bits. The result is converted to a Scheme 63;;> \scheme{number} in host byte order. 64 65(define parse-i64 (parse-fixed-int 8)) 66 67;;> Parses a boolean value, either \scheme{#t} or \scheme{#f}. 68;;> If a value other than one or zero is found in the u8 69;;> representation of the boolean value, a parsing error 70;;> is raised. 71 72(define parse-bool 73 (parse-with-context 74 parse-byte 75 (lambda (byte) 76 (lambda (source index sk fk) 77 (cond 78 ((zero? byte) (sk #f source index fk)) 79 ((eqv? byte 1) (sk #t source index fk)) 80 (else (fk source index "invalid boolean value"))))))) 81 82;;> Parses an unsigned integer value from a set of possible values 83;;> agreed upon in advance. The parsed value must be part of the 84;;> \var{values} list, otherwise a parsing error is raised. The 85;;> \var{values} list must be non-empty and each member of \var{values} 86;;> must have a unique value otherwise a parser construction error 87;;> is raised. 88 89(define (parse-enum values) 90 (if (or (null? values) (not (lset-unique? values))) 91 (error "enum must be a non-empty list of unique values") 92 (parse-with-context 93 parse-uint 94 (lambda (v) 95 (lambda (source index sk fk) 96 ;; Comparing numbers → eqv? (memv) should suffice 97 (if (memv v values) 98 (sk v source index fk) 99 (fk source index "enum value not part of given list")))))))100101;;> Parses a string of text. If the data is found to contain invalid102;;> UTF-8 sequences, it should be considered invalid. However, this103;;> implementation currently uses \scheme{utf8->string} internally,104;;> which does not necessarily raise an error on invalid UTF-8 sequences.105106(define parse-string107 ;; TODO: Explicitly check for invalid UTF-8 sequences.108 (parse-with-context109 parse-uint110 (lambda (size)111 (parse-bind112 (parse-bytevector size)113 utf8->string))))114115;;> Parses arbitrary data with a fixed \var{length} in octets.116;;> If no \var{length} is given, arbitrary data of a variable117;;> length in octets is parsed. The data must not be greater118;;> than 18,446,744,073,709,551,615 octets in length (the119;;> maximum value of a u64). A parsing error will be raised120;;> if this limit is exceeded. The implementation will raise a121;;> parser construction error if \var{length} is given as zero.122123(define (parse-data . length)124 (if (null? length)125 (parse-with-context126 parse-uint127 (lambda (size)128 (parse-bytevector size)))129 (let ((l (car length)))130 (if (zero? l)131 (error "length of fixed-length data must be at least 1")132 (parse-bytevector l)))))133134;;> Parses a type with zero length. This always returns a \scheme{'void}135;;> symbol on success.136137(define parse-void138 (parse-result 'void))139140;;> \subsection{Aggregate Types}141142;;> Parses a value of \var{type} which may or may not be present. An143;;> optional value whose initial value is set to a number other than144;;> zero or one is considered invalid and results in a parsing error.145;;> If no value is present \scheme{'nothing} is returned.146147(define (parse-optional type)148 (parse-with-context149 parse-u8150 (lambda (opt)151 (cond152 ((zero? opt) (parse-result 'nothing))153 ((eqv? opt 1) type)154 (else (parse-fail "invalid option value"))))))155156;;> Parses a list of \var{size} values of \var{type}. If no \var{size}157;;> was specified, a variable-length list of values of \var{type} is158;;> parsed. In the latter case, a parser construction error is raised159;;> if the given length is zero.160161(define (parse-list type . length)162 (if (null? length)163 (parse-with-context164 parse-uint165 (lambda (size)166 (parse-repeat167 type168 size size)))169 (let ((l (car length)))170 (if (zero? l)171 (error "length of fixed-length arrays must be at least 1")172 (parse-repeat type l l)))))173174;;> Parses a mapping of values of type \var{val-type} keyed by175;;> values of type \var{val-type}. A message with repeated keys176;;> is considered invalid. A parsing error is raised if such a177;;> message is encountered.178179(define (parse-map key-type val-type)180 (parse-with-context181 parse-uint182 (lambda (size)183 (parse-repeat-kons184 (lambda (x xs)185 (and (not (member x xs))186 (cons x xs)))187 (parse-seq key-type val-type)188 size size))))189190;;> Parses a tagged union whose value may be one of any type from a191;;> \var{type-vector} of types. Each combinators index in \var{type-vector}192;;> response to the numeric identifier for this type as encoded in the193;;> message. A union with a tag value that does not have a corresponding194;;> type assigned is considered invalid. A parsing error is raised when195;;> encountering such a message. A parser construction error is raised196;;> if \var{type-vector} is null.197198(define (parse-union type-vector)199 (if (null? type-vector)200 (error "unions must have at least one type")201 (parse-with-context202 parse-uint203 (lambda (id)204 (if (>= id (vector-length type-vector))205 (parse-fail "unexpected tag in tagged union")206 (vector-ref type-vector id))))))207208;;> Parses a set of values of arbitrary types, concatenated in the209;;> order given by \var{types}. The result is concatenated to a210;;> \scheme{vector}. A parser construction error is raised if211;;> \var{types} is not given or null.212213(define (parse-struct . types)214 (if (or (null? types)215 (null? (car types)))216 (error "structs must have at least one field")217 (parse-bind218 (parse-seq-list types)219 list->vector)))