An extensible POSIX-compatible implementation of the ed(1) text editor
git clone https://git.8pit.net/edward.git
1;;>| Replacement Parser 2;;> 3;;> Edward [parser combinators][edward parse] for parsing replacement strings. 4;;> 5;;> [edward parse]: edward.parse.html 6 7(define parse-backref 8 (parse-map 9 (parse-seq 10 (parse-char #\\) 11 parse-digits) 12 (lambda (lst) 13 (cons 'backref (cadr lst))))) 14 15(define parse-matched 16 (parse-map 17 (parse-char #\&) 18 (lambda (ch) 19 (cons 'matched ch)))) 20 21(define (parse-restr delim) 22 (define replace-ctrl 23 (char-set-adjoin (char-set #\\ #\& #\newline) delim)) 24 25 (parse-map 26 (parse-as-string 27 (parse-repeat+ 28 (parse-or 29 ;; special handling for '%' as it does not neccessarily 30 ;; need to be escaped unless it's the only character. 31 (parse-esc 32 (parse-char (char-set-adjoin replace-ctrl #\%))) 33 (parse-not-char replace-ctrl)))) 34 (lambda (str) 35 (cons 'restr str)))) 36 37;;> Parse a replacement string within text enclosed with the delimiter 38;;> `delim`. While the combinator does not parse the enclosed character, 39;;> it ensures that this `delim` character is escaped (using a `\`) 40;;> within the replacement. 41;;> 42;;> Refer to the documentation of the [ed substitute][ed substitute] 43;;> command for more information on special character support within 44;;> the replacement. All of these special characters can also be 45;;> escaped. 46;;> 47;;> [ed substitute]: https://pubs.opengroup.org/onlinepubs/9699919799/utilities/ed.html#tag_20_38_13_25 48 49(define (parse-replace delim) 50 (parse-map 51 (parse-repeat 52 (parse-atomic 53 (parse-or 54 parse-backref 55 parse-matched 56 (parse-restr delim)))) 57 (lambda (lst) 58 ;; If the replacement is empty replace matched text with an empty string. 59 (if (null? lst) 60 (cons '(restr . "") lst) 61 lst)))) 62 63;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 64 65;;>| Replacement Procedures 66;;> 67;;> Procedures for performing replacements using a parsed replacement string. 68 69(define (submatch subm bv n) 70 (if (>= n (vector-length subm)) 71 (string->utf8 (number->string n)) ;; XXX: Handling for invalid backreferences 72 (let ((match (vector-ref subm n))) 73 (if match 74 (bytevector-copy bv (car match) (cdr match)) 75 #u8())))) 76 77(define (regex-replace* regex subst bv nth) 78 (define (apply-replacement subm bv replacement) 79 (fold (lambda (x y) 80 (bytevector-append y 81 (match x 82 (('restr . s) (string->utf8 s)) 83 (('matched . _) (submatch subm bv 0)) 84 (('backref . n) (submatch subm bv n))))) 85 #u8() replacement)) 86 87 ;; TODO: Refactor this function and make it more readable. 88 ;; Also don't rely on (values …) truncation (not in R7RS). 89 (define (%regex-replace* re start n) 90 (let* ((v (bytevector-copy bv start)) 91 (subm (regex-exec regex v))) 92 (if subm 93 (let* ((m (vector-ref subm 0)) ;; submatch for entire regex 94 (s (car m)) ;; start of submatch 95 (e (cdr m)) ;; end of submatch 96 97 (i (+ start e)) ;; next index in bv 98 (r (delay (bytevector-append 99 (bytevector-copy v 0 s)100 (apply-replacement subm v re)))))101 (values102 (if (eqv? n nth)103 (bytevector-append (force r) (bytevector-copy bv i))104 (bytevector-append105 (if (zero? nth) (force r) (bytevector-copy v 0 e))106 (%regex-replace* re i (+ n 1))))107 #t))108 (values v #f))))109110 (%regex-replace* subst 0 1))111112;;> Replace `nth` occurrence of `regex` in `str` with `subst`. If `nth`113;;> is zero, all occurrences are replaced. Returns two results: The string114;;> after performing all replacement and a boolean indicating if any115;;> replacements were successfully performed. The `regex` must be116;;> created using [make-regex][make-regex], while the replacement string117;;> `subst` must be parsed using [parse-replace][parse-replace].118;;>119;;> [make-regex]: https://wiki.call-cc.org/eggref/5/posix-regex#make-regex120;;> [parse-replace]: #parse-replace121122(define (regex-replace regex subst str nth)123 ;; regexec(3p) offsets are byte, not character offsets.124 ;; Thus, the string needs to be converted to a bytevector.125 (let-values (((result modified) (regex-replace* regex subst (string->utf8 str) nth)))126 (values (utf8->string result) modified)))