📄 regex.scm
字号:
(foreign-lambda* int ((int i)) "return(C_regex_ovector[i * 2]);") )(define ovector-end-ref (foreign-lambda* int ((int i)) "return(C_regex_ovector[(i * 2) + 1]);") );;; Gather matched result strings or positions:(define (gather-result-positions result) (let ([mc (car result)] [cc (cadr result)]) (and (fx> mc 0) (let loop ([i 0]) (cond [(fx>= i cc) '()] [(fx>= i mc) (cons #f (loop (fx+ i 1)))] [else (let ([start (ovector-start-ref i)]) (cons (and (fx>= start 0) (list start (ovector-end-ref i))) (loop (fx+ i 1)) ) ) ] ) ) ) ) )(define gather-results (let ([substring substring]) (lambda (str result) (let ([ps (gather-result-positions result)]) (and ps (##sys#map (lambda (poss) (and poss (apply substring str poss))) ps) ) ) ) ) );;; Common match string with compile regular expression:(define re-match (foreign-lambda* int (((const nonnull-pcre) code) ((const pcre_extra) extra) (nonnull-scheme-pointer str) (int start) (int range) (unsigned-integer options)) "return(pcre_exec(code, extra, str, start + range, start, options, C_regex_ovector, STATIC_OVECTOR_LEN * OVECTOR_LENGTH_MULTIPLE));") )(define re-match-capture-count (foreign-lambda* int (((const nonnull-pcre) code) ((const pcre_extra) extra)) "int cc;" "pcre_fullinfo(code, extra, PCRE_INFO_CAPTURECOUNT, &cc);" "return(cc + 1);") )(define (perform-match rgxp str si ri loc) (let* ([extra #f] [options 0] [rx (cond [(string? rgxp) (re-checked-compile rgxp 0 #f loc)] [(%regexp? rgxp) (set! extra (%regexp-extra rgxp)) (set! options (%regexp-options rgxp)) (%regexp-code rgxp)] [else (##sys#signal-hook #:type-error loc "bad argument type - not a string or compiled regular expression" rgxp)] )] [cc (re-match-capture-count rx extra)] [mc (re-match rx extra str si ri options)]) (when (string? rgxp) (re-finalizer rx)) (list mc cc) ) );;; Match string with regular expression:;; Note that start is a BYTE offset(define string-match)(define string-match-positions)(let () (define (prepare-match rgxp str start loc) (##sys#check-string str loc) (let ([si (if (pair? start) (car start) 0)]) (##sys#check-exact si loc) (perform-match (if (string? rgxp) (make-anchored-pattern rgxp (fx< 0 si)) rgxp) str si (fx- (##sys#size str) si) loc) ) ) (set! string-match (lambda (rgxp str . start) (gather-results str (prepare-match rgxp str start 'string-match)) ) ) (set! string-match-positions (lambda (rgxp str . start) (gather-result-positions (prepare-match rgxp str start 'string-match-positions)) ) ) );;; Search string with regular expression:;; Note that start & range are BYTE offsets(define string-search)(define string-search-positions)(let () (define (prepare-search rgxp str start-and-range loc) (##sys#check-string str loc) (let* ([range (and (pair? start-and-range) (cdr start-and-range)) ] [si (if range (car start-and-range) 0)] [ri (if (pair? range) (car range) (fx- (##sys#size str) si))] ) (##sys#check-exact si loc) (##sys#check-exact ri loc) (perform-match rgxp str si ri loc) ) ) (set! string-search (lambda (rgxp str . start-and-range) (gather-results str (prepare-search rgxp str start-and-range 'string-search)) ) ) (set! string-search-positions (lambda (rgxp str . start-and-range) (gather-result-positions (prepare-search rgxp str start-and-range 'string-search-positions)) ) ) );;; Split string into fields:(define string-split-fields (let ([reverse reverse] [substring substring] [string-search-positions string-search-positions] ) (lambda (rgxp str . mode-and-start) (##sys#check-string str 'string-split-fields) (let* ([argc (length mode-and-start)] [len (##sys#size str)] [mode (if (fx> argc 0) (car mode-and-start) #t)] [start (if (fx> argc 1) (cadr mode-and-start) 0)] [fini (case mode [(#:suffix) (lambda (ms start) (if (fx< start len) (##sys#error 'string-split-fields "record does not end with suffix" str rgxp) (reverse ms) ) ) ] [(#:infix) (lambda (ms start) (if (fx>= start len) (reverse (cons "" ms)) (reverse (cons (substring str start len) ms)) ) ) ] [else (lambda (ms start) (reverse ms)) ] ) ] [fetch (case mode [(#:infix #:suffix) (lambda (start from to) (substring str start from))] [else (lambda (start from to) (substring str from to))] ) ] ) (let loop ([ms '()] [start start]) (let ([m (string-search-positions rgxp str start)]) (if m (let* ([mp (car m)] [from (car mp)] [to (cadr mp)] ) (if (fx= from to) (if (fx= to len) (fini ms start) (loop (cons (fetch start (fx+ from 1) (fx+ to 2)) ms) (fx+ to 1)) ) (loop (cons (fetch start from to) ms) to) ) ) (fini ms start) ) ) ) ) ) ) );;; Substitute matching strings:(define string-substitute (let ([substring substring] [reverse reverse] [make-string make-string] [string-search-positions string-search-positions] ) (lambda (regex subst string . flag) (##sys#check-string subst 'string-substitute) (let* ([which (if (pair? flag) (car flag) 1)] [substlen (##sys#size subst)] [substlen-1 (fx- substlen 1)] [result '()] [total 0] ) (define (push x) (set! result (cons x result)) (set! total (fx+ total (##sys#size x))) ) (define (substitute matches) (let loop ([start 0] [index 0]) (if (fx>= index substlen-1) (push (if (fx= start 0) subst (substring subst start substlen))) (let ([c (##core#inline "C_subchar" subst index)] [index+1 (fx+ index 1)] ) (if (char=? c #\\) (let ([c2 (##core#inline "C_subchar" subst index+1)]) (if (and (not (char=? #\\ c2)) (char-numeric? c2)) (let ([mi (list-ref matches (fx- (char->integer c2) 48))]) (push (substring subst start index)) (push (substring string (car mi) (cadr mi))) (loop (fx+ index 2) index+1) ) (loop start (fx+ index+1 1)) ) ) (loop start index+1) ) ) ) ) ) (let loop ([index 0] [count 1]) (let ([matches (string-search-positions regex string index)]) (cond [matches (let* ([range (car matches)] [upto (cadr range)] ) (cond ((fx= 0 (fx- (cadr range) (car range))) (##sys#error 'string-substitute "empty substitution match" regex) ) ((or (not (fixnum? which)) (fx= count which)) (push (substring string index (car range))) (substitute matches) (loop upto #f) ) (else (push (substring string index upto)) (loop upto (fx+ count 1)) ) ) ) ] [else (push (substring string index (##sys#size string))) (##sys#fragments->string total (reverse result)) ] ) ) ) ) ) ) )(define string-substitute* (let ([string-substitute string-substitute]) (lambda (str smap . mode) (##sys#check-string str 'string-substitute*) (##sys#check-list smap 'string-substitute*) (let ((mode (and (pair? mode) (car mode)))) (let loop ((str str) (smap smap)) (if (null? smap) str (let ((sm (car smap))) (loop (string-substitute (car sm) (cdr sm) str mode) (cdr smap) ) ) ) ) ) ) ) );;; Glob support:;FIXME is it worthwhile making this accurate?(define (glob? str) (##sys#check-string str 'glob?) (let loop ([idx (fx- (string-length str) 1)]) (and (fx<= 0 idx) (case (string-ref str idx) [(#\* #\] #\?) (or (fx= 0 idx) (not (char=? #\\ (string-ref str (fx- idx 1)))) (loop (fx- idx 2)))] [else (loop (fx- idx 1))]) ) ) )(define glob->regexp (let ([list->string list->string] [string->list string->list] ) (lambda (s) (##sys#check-string s 'glob->regexp) (list->string (let loop ((cs (string->list s))) (if (null? cs) '() (let ([c (car cs)] [rest (cdr cs)] ) (cond [(char=? c #\*) `(#\. #\* ,@(loop rest))] [(char=? c #\?) (cons '#\. (loop rest))] [(char=? c #\[) (cons #\[ (let loop2 ((rest rest)) (match rest [(#\] . more) (cons #\] (loop more))] [(#\- c . more) `(#\- ,c ,@(loop2 more))] [(c1 #\- c2 . more) `(,c1 #\- ,c2 ,@(loop2 more))] [(c . more) (cons c (loop2 more))] [() (error 'glob->regexp "unexpected end of character class" s)] ) ) ) ] [(or (char-alphabetic? c) (char-numeric? c)) (cons c (loop rest))] [else `(#\\ ,c ,@(loop rest))] ) ) ) ) ) ) ) );;; Grep-like function on list:(define grep (let ([string-search string-search]) (lambda (rgxp lst) (##sys#check-list lst 'grep) (let loop ([lst lst]) (if (null? lst) '() (let ([x (car lst)] [r (cdr lst)] ) (if (string-search rgxp x) (cons x (loop r)) (loop r) ) ) ) ) ) ) );;; Escape regular expression (suggested by Peter Bex):(define regexp-escape (let ([open-output-string open-output-string] [get-output-string get-output-string] ) (lambda (str) (##sys#check-string str 'regexp-escape) (let ([out (open-output-string)] [len (##sys#size str)] ) (let loop ([i 0]) (cond [(fx>= i len) (get-output-string out)] [(memq (##core#inline "C_subchar" str i) '(#\. #\\ #\? #\* #\+ #\^ #\$ #\( #\) #\[ #\] #\| #\{ #\})) (##sys#write-char-0 #\\ out) (##sys#write-char-0 (##core#inline "C_subchar" str i) out) (loop (fx+ i 1)) ] [else (##sys#write-char-0 (##core#inline "C_subchar" str i) out) (loop (fx+ i 1)) ] ) ) ) ) ) );;; Anchored pattern:(define make-anchored-pattern (let ([string-append string-append]) (lambda (rgxp . args) (let-optionals args ([nos #f] [noe #f]) (cond [(string? rgxp) (string-append (if nos "" "^") rgxp (if noe "" "$"))] [else (##sys#check-structure rgxp 'regexp 'make-anchored-pattern) (when (or nos noe) (warning 'make-anchored-pattern "cannot select partial anchor for compiled regular expression") ) (%regexp-options-set! rgxp (bitwise-ior (%regexp-options regexp) (pcre-option->number 'anchored))) rgxp] ) ) ) ) )
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -