📄 srfi-13.scm
字号:
;;; This algorithm is O(m + n) where m and n are the ;;; lengths of the pattern and string respectively;;; KMP search source[start,end) for PATTERN. Return starting index of;;; leftmost match or #f.(define (%kmp-search pattern text c= p-start p-end t-start t-end) (let ((plen (- p-end p-start)) (rv (make-kmp-restart-vector pattern c= p-start p-end))) ;; The search loop. TJ & PJ are redundant state. (let lp ((ti t-start) (pi 0) (tj (- t-end t-start)) ; (- tlen ti) -- how many chars left. (pj plen)) ; (- plen pi) -- how many chars left. (if (= pi plen) (- ti plen) ; Win. (and (<= pj tj) ; Lose. (if (c= (string-ref text ti) ; Search. (string-ref pattern (+ p-start pi))) (lp (+ 1 ti) (+ 1 pi) (- tj 1) (- pj 1)) ; Advance. (let ((pi (vector-ref rv pi))) ; Retreat. (if (= pi -1) (lp (+ ti 1) 0 (- tj 1) plen) ; Punt. (lp ti pi tj (- plen pi))))))))));;; (make-kmp-restart-vector pattern [c= start end]) -> integer-vector;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Compute the KMP restart vector RV for string PATTERN. If;;; we have matched chars 0..i-1 of PATTERN against a search string S, and;;; PATTERN[i] doesn't match S[k], then reset i := RV[i], and try again to;;; match S[k]. If RV[i] = -1, then punt S[k] completely, and move on to;;; S[k+1] and PATTERN[0] -- no possible match of PAT[0..i] contains S[k].;;;;;; In other words, if you have matched the first i chars of PATTERN, but;;; the i+1'th char doesn't match, RV[i] tells you what the next-longest;;; prefix of PATTERN is that you have matched.;;;;;; - C= (default CHAR=?) is used to compare characters for equality.;;; Pass in CHAR-CI=? for case-folded string search.;;;;;; - START & END restrict the pattern to the indicated substring; the;;; returned vector will be of length END - START. The numbers stored;;; in the vector will be values in the range [0,END-START) -- that is,;;; they are valid indices into the restart vector; you have to add START;;; to them to use them as indices into PATTERN.;;;;;; I've split this out as a separate function in case other constant-string;;; searchers might want to use it.;;;;;; E.g.:;;; a b d a b x;;; #(-1 0 0 -1 1 2)(define (make-kmp-restart-vector pattern . maybe-c=+start+end) (let-optionals* maybe-c=+start+end ((c= char=?) rest) ; (procedure? c=)) (receive (rest2 start end) (string-parse-start+end make-kmp-restart-vector pattern rest) (let* ((rvlen (- end start)) (rv (make-vector rvlen -1))) (if (> rvlen 0) (let ((rvlen-1 (- rvlen 1)) (c0 (string-ref pattern start))) ;; Here's the main loop. We have set rv[0] ... rv[i]. ;; K = I + START -- it is the corresponding index into PATTERN. (let lp1 ((i 0) (j -1) (k start)) (if (< i rvlen-1) (let ((ck (string-ref pattern k))) ;; lp2 invariant: ;; pat[(k-j) .. k-1] matches pat[start .. start+j-1] ;; or j = -1. (let lp2 ((j j)) (cond ((= j -1) (let ((i1 (+ i 1))) (vector-set! rv i1 (if (c= ck c0) -1 0)) (lp1 i1 0 (+ k 1)))) ;; pat[(k-j) .. k] matches pat[start..start+j]. ((c= ck (string-ref pattern (+ j start))) (let* ((i1 (+ 1 i)) (j1 (+ 1 j))) (vector-set! rv i1 j1) (lp1 i1 j1 (+ k 1)))) (else (lp2 (vector-ref rv j)))))))))) rv))));;; We've matched I chars from PAT. C is the next char from the search string.;;; Return the new I after handling C. ;;;;;; The pattern is (VECTOR-LENGTH RV) chars long, beginning at index PAT-START;;; in PAT (PAT-START is usually 0). The I chars of the pattern we've matched;;; are ;;; PAT[PAT-START .. PAT-START + I].;;;;;; It's *not* an oversight that there is no friendly error checking or;;; defaulting of arguments. This is a low-level, inner-loop procedure;;; that we want integrated/inlined into the point of call.(define (kmp-step pat rv c i c= p-start) (let lp ((i i)) (if (c= c (string-ref pat (+ i p-start))) ; Match => (+ i 1) ; Done. (let ((i (vector-ref rv i))) ; Back up in PAT. (if (= i -1) 0 ; Can't back up further. (lp i)))))) ; Keep trying for match.;;; Zip through S[start,end), looking for a match of PAT. Assume we've;;; already matched the first I chars of PAT when we commence at S[start].;;; - <0: If we find a match *ending* at index J, return -J.;;; - >=0: If we get to the end of the S[start,end) span without finding;;; a complete match, return the number of chars from PAT we'd matched;;; when we ran off the end.;;;;;; This is useful for searching *across* buffers -- that is, when your;;; input comes in chunks of text. We hand-integrate the KMP-STEP loop;;; for speed.(define (string-kmp-partial-search pat rv s i . c=+p-start+s-start+s-end); (check-arg vector? rv string-kmp-partial-search) (let-optionals* c=+p-start+s-start+s-end ((c= char=?) ; (procedure? c=)) (p-start 0) rest) ; (and (integer? p-start) (exact? p-start) (<= 0 p-start))) (receive (rest2 s-start s-end) (string-parse-start+end string-kmp-partial-search s rest) ;; Enough prelude. Here's the actual code. (let ((patlen (vector-length rv))) (let lp ((si s-start) ; An index into S. (vi i)) ; An index into RV. (cond ((= vi patlen) (- si)) ; Win. ((= si s-end) vi) ; Ran off the end. (else ; Match s[si] & loop. (let ((c (string-ref s si))) (lp (+ si 1) (let lp2 ((vi vi)) ; This is just KMP-STEP. (if (c= c (string-ref pat (+ vi p-start))) (+ vi 1) (let ((vi (vector-ref rv vi))) (if (= vi -1) 0 (lp2 vi))))))))))))) );;; Misc;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (string-null? s);;; (string-reverse s [start end]);;; (string-reverse! s [start end]);;; (reverse-list->string clist);;; (string->list s [start end])(define (string-null? s) (##core#inline "C_i_string_null_p" s))(define (string-reverse s . maybe-start+end) (let-string-start+end (start end) string-reverse s maybe-start+end (let* ((len (- end start)) (ans (make-string len))) (do ((i start (+ i 1)) (j (- len 1) (- j 1))) ((< j 0)) (string-set! ans j (string-ref s i))) ans)))(define (string-reverse! s . maybe-start+end) (let-string-start+end (start end) string-reverse! s maybe-start+end (do ((i (- end 1) (- i 1)) (j start (+ j 1))) ((<= i j)) (let ((ci (string-ref s i))) (string-set! s i (string-ref s j)) (string-set! s j ci)))))#| this is already available in library.scm:(define (reverse-list->string clist) (let* ((len (length clist)) (s (make-string len))) (do ((i (- len 1) (- i 1)) (clist clist (cdr clist))) ((not (pair? clist))) (string-set! s i (car clist))) s))|#;(define (string->list s . maybe-start+end); (apply string-fold-right cons '() s maybe-start+end))(define (string->list s . maybe-start+end) (let-string-start+end (start end) string->list s maybe-start+end (do ((i (- end 1) (- i 1)) (ans '() (cons (string-ref s i) ans))) ((< i start) ans))));;; Defined by R5RS, so commented out here.;(define (list->string lis) (string-unfold null? car cdr lis));;; string-concatenate string-list -> string;;; string-concatenate/shared string-list -> string;;; string-append/shared s ... -> string;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; STRING-APPEND/SHARED has license to return a string that shares storage;;; with any of its arguments. In particular, if there is only one non-empty;;; string amongst its parameters, it is permitted to return that string as;;; its result. STRING-APPEND, by contrast, always allocates new storage.;;;;;; STRING-CONCATENATE & STRING-CONCATENATE/SHARED are passed a list of;;; strings, which they concatenate into a result string. STRING-CONCATENATE;;; always allocates a fresh string; STRING-CONCATENATE/SHARED may (or may;;; not) return a result that shares storage with any of its arguments. In;;; particular, if it is applied to a singleton list, it is permitted to;;; return the car of that list as its value.(define (string-append/shared . strings) (string-concatenate/shared strings))(define (string-concatenate/shared strings) (let lp ((strings strings) (nchars 0) (first #f)) (cond ((pair? strings) ; Scan the args, add up total (let* ((string (car strings)) ; length, remember 1st (tail (cdr strings)) ; non-empty string. (slen (string-length string))) (if (zero? slen) (lp tail nchars first) (lp tail (+ nchars slen) (or first strings))))) ((zero? nchars) "") ;; Just one non-empty string! Return it. ((= nchars (string-length (car first))) (car first)) (else (let ((ans (make-string nchars))) (let lp ((strings first) (i 0)) (if (pair? strings) (let* ((s (car strings)) (slen (string-length s))) (%string-copy! ans i s 0 slen) (lp (cdr strings) (+ i slen))))) ans))))) ; Alas, Scheme 48's APPLY blows up if you have many, many arguments.;(define (string-concatenate strings) (apply string-append strings));;; Here it is written out. I avoid using REDUCE to add up string lengths;;; to avoid non-R5RS dependencies.(define (string-concatenate strings) (let* ((total (do ((strings strings (cdr strings)) (i 0 (+ i (string-length (car strings))))) ((not (pair? strings)) i))) (ans (make-string total))) (let lp ((i 0) (strings strings)) (if (pair? strings) (let* ((s (car strings)) (slen (string-length s))) (%string-copy! ans i s 0 slen) (lp (+ i slen) (cdr strings))))) ans)) ;;; Defined by R5RS, so commented out here.;(define (string-append . strings) (string-concatenate strings));;; string-concatenate-reverse string-list [final-string end] -> string;;; string-concatenate-reverse/shared string-list [final-string end] -> string;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Return;;; (string-concatenate ;;; (reverse;;; (cons (substring final-string 0 end) string-list)))(define (string-concatenate-reverse string-list . maybe-final+end) (let-optionals* maybe-final+end ((final ""); (string? final)) (end (string-length final)) ); (and (integer? end); (exact? end); (<= 0 end (string-length final))))) (##sys#check-exact end 'string-concatenate-reverse) (let ((len (let lp ((sum 0) (lis string-list)) (if (pair? lis) (lp (+ sum (string-length (car lis))) (cdr lis)) sum)))) (%finish-string-concatenate-reverse len string-list final end))))(define (string-concatenate-reverse/shared string-list . maybe-final+end) (let-optionals* maybe-final+end ((final ""); (string? final)) (end (string-length final))); (and (integer? end); (exact? end); (<= 0 end (string-length final))))) (##sys#check-exact end 'string-concatenate-reverse/shared) ;; Add up the lengths of all the strings in STRING-LIST; also get a ;; pointer NZLIST into STRING-LIST showing where the first non-zero-length ;; string starts. (let lp ((len 0) (nzlist #f) (lis string-list)) (if (pair? lis) (let ((slen (string-length (car lis)))) (lp (+ len slen) (if (or nzlist (zero? slen)) nzlist lis) (cdr lis))) (cond ((zero? len) (substring/shared final 0 end)) ;; LEN > 0, so NZLIST is non-empty. ((and (zero? end) (= len (string-length (car nzlist)))) (car nzlist)) (else (%finish-string-concatenate-reverse len nzlist final end)))))))(define (%finish-string-concatenate-reverse len string-list final end) (let ((ans (make-string (+ end len)))) (%string-copy! ans len final 0 end) (let lp ((i len) (lis string-list)) (if (pair? lis) (let* ((s (car lis)) (lis (cdr lis)) (slen (string-length s)) (i (- i slen))) (%string-copy! ans i s 0 slen) (lp i lis)))) ans));;; string-replace s1 s2 start1 end1 [start2 end2] -> string;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Replace S1[START1,END1) with S2[START2,END2).(define (string-replace s1 s2 start1 end1 . maybe-start+end) (check-substring-spec string-replace s1 start1 end1) (let-string-start+end (start2 end2) string-replace s2 maybe-start+end (let* ((slen1 (string-length s1)) (sublen2 (- end2 start2)) (alen (+ (- slen1 (- end1 start1)) sublen2)) (ans (make-string alen))) (%string-copy! ans 0 s1 0 start1) (%string-copy! ans start1 s2 start2 end2) (%string-copy! ans (+ start1 sublen2) s1 end1 slen1) ans)));;; string-tokenize s [token
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -