1;;>| Replacement Parser2;;>3;;> Edward [parser combinators][edward parse] for parsing replacement strings.4;;>5;;> [edward parse]: edward.parse.html67(define parse-backref8 (parse-map9 (parse-seq10 (parse-char #\\)11 parse-digits)12 (lambda (lst)13 (cons 'backref (cadr lst)))))1415(define parse-matched16 (parse-map17 (parse-char #\&)18 (lambda (ch)19 (cons 'matched ch))))2021(define (parse-restr delim)22 (define replace-ctrl23 (char-set-adjoin (char-set #\\ #\& #\newline) delim))2425 (parse-map26 (parse-as-string27 (parse-repeat+28 (parse-or29 ;; special handling for '%' as it does not neccessarily30 ;; need to be escaped unless it's the only character.31 (parse-esc32 (parse-char (char-set-adjoin replace-ctrl #\%)))33 (parse-not-char replace-ctrl))))34 (lambda (str)35 (cons 'restr str))))3637;;> Parse a replacement string within text enclosed with the delimiter38;;> `delim`. While the combinator does not parse the enclosed character,39;;> it ensures that this `delim` character is escaped (using a `\`)40;;> within the replacement.41;;>42;;> Refer to the documentation of the [ed substitute][ed substitute]43;;> command for more information on special character support within44;;> the replacement. All of these special characters can also be45;;> escaped.46;;>47;;> [ed substitute]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/ed.html#tag_20_38_13_254849(define (parse-replace delim)50 (parse-map51 (parse-repeat52 (parse-atomic53 (parse-or54 parse-backref55 parse-matched56 (parse-restr delim))))57 (lambda (lst)58 ;; If the replacement is empty replace matched text with an empty string.59 (if (null? lst)60 (cons '(restr . "") lst)61 lst))))6263;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;6465;;>| Replacement Procedures66;;>67;;> Procedures for performing replacements using a parsed replacement string.6869(define (submatch subm bv n)70 (if (>= n (vector-length subm))71 (string->utf8 (number->string n)) ;; XXX: Handling for invalid backreferences72 (let ((match (vector-ref subm n)))73 (if match74 (bytevector-copy bv (car match) (cdr match))75 #u8()))))7677(define (regex-replace* regex subst bv nth)78 (define (apply-replacement subm bv replacement)79 (fold (lambda (x y)80 (bytevector-append y81 (match x82 (('restr . s) (string->utf8 s))83 (('matched . _) (submatch subm bv 0))84 (('backref . n) (submatch subm bv n)))))85 #u8() replacement))8687 ;; TODO: Refactor this function and make it more readable.88 ;; Also don't rely on (values …) truncation (not in R7RS).89 (define (%regex-replace* re start n)90 (let* ((v (bytevector-copy bv start))91 (subm (regex-exec regex v)))92 (if subm93 (let* ((m (vector-ref subm 0)) ;; submatch for entire regex94 (s (car m)) ;; start of submatch95 (e (cdr m)) ;; end of submatch9697 (i (+ start e)) ;; next index in bv98 (r (delay (bytevector-append99 (bytevector-copy v 0 s)100 (apply-replacement subm v re)))))101 (values102 (if (eqv? n nth)103 (bytevector-append (force r) (bytevector-copy bv i))104 (bytevector-append105 (if (zero? nth) (force r) (bytevector-copy v 0 e))106 (%regex-replace* re i (+ n 1))))107 #t))108 (values v #f))))109110 (%regex-replace* subst 0 1))111112;;> Replace `nth` occurrence of `regex` in `str` with `subst`. If `nth`113;;> is zero, all occurrences are replaced. Returns two results: The string114;;> after performing all replacement and a boolean indicating if any115;;> replacements were successfully performed. The `regex` must be116;;> created using [make-regex][make-regex], while the replacement string117;;> `subst` must be parsed using [parse-replace][parse-replace].118;;>119;;> [make-regex]: https://wiki.call-cc.org/eggref/5/posix-regex#make-regex120;;> [parse-replace]: #parse-replace121122(define (regex-replace regex subst str nth)123 ;; regexec(3p) offsets are byte, not character offsets.124 ;; Thus, the string needs to be converted to a bytevector.125 (let-values (((result modified) (regex-replace* regex subst (string->utf8 str) nth)))126 (values (utf8->string result) modified)))