📄 regex.scm
字号:
;;;; Copyright (C) 1997, 1999, 2001 Free Software Foundation, Inc.;;;;;;;; This program is free software; you can redistribute it and/or modify;;;; it under the terms of the GNU General Public License as published by;;;; the Free Software Foundation; either version 2, or (at your option);;;; any later version.;;;;;;;; This program is distributed in the hope that it will be useful,;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the;;;; GNU General Public License for more details.;;;;;;;; You should have received a copy of the GNU General Public License;;;; along with this software; see the file COPYING. If not, write to;;;; the Free Software Foundation, Inc., 59 Temple Place, Suite 330,;;;; Boston, MA 02111-1307 USA;;;;;;;; As a special exception, the Free Software Foundation gives permission;;;; for additional uses of the text contained in its release of GUILE.;;;;;;;; The exception is that, if you link the GUILE library with other files;;;; to produce an executable, this does not by itself cause the;;;; resulting executable to be covered by the GNU General Public License.;;;; Your use of that executable is in no way restricted on account of;;;; linking the GUILE library code into it.;;;;;;;; This exception does not however invalidate any other reasons why;;;; the executable file might be covered by the GNU General Public License.;;;;;;;; This exception applies only to the code released by the;;;; Free Software Foundation under the name GUILE. If you copy;;;; code from other Free Software Foundation releases into a copy of;;;; GUILE, as the General Public License permits, the exception does;;;; not apply to the code that you add in this way. To avoid misleading;;;; anyone as to the status of such modified files, you must delete;;;; this exception notice from them.;;;;;;;; If you write modifications of your own for GUILE, it is your choice;;;; whether to permit this exception to apply to your modifications.;;;; If you do not wish that, delete this exception notice.;;;;;;; Commentary:;; These procedures are exported:;; (match:count match);; (match:string match);; (match:prefix match);; (match:suffix match);; (regexp-match? match);; (regexp-quote string);; (match:start match . submatch-num);; (match:end match . submatch-num);; (match:substring match . submatch-num);; (string-match pattern str . start);; (regexp-substitute port match . items);; (fold-matches regexp string init proc . flags);; (list-matches regexp string . flags);; (regexp-substitute/global port regexp string . items);;; Code:;;;; POSIX regex support functions.(define-module (ice-9 regex) :export (match:count match:string match:prefix match:suffix regexp-match? regexp-quote match:start match:end match:substring string-match regexp-substitute fold-matches list-matches regexp-substitute/global));;; FIXME:;;; It is not clear what should happen if a `match' function;;; is passed a `match number' which is out of bounds for the;;; regexp match: return #f, or throw an error? These routines;;; throw an out-of-range error.;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; These procedures are not defined in SCSH, but I found them useful.(define (match:count match) (- (vector-length match) 1))(define (match:string match) (vector-ref match 0))(define (match:prefix match) (substring (match:string match) 0 (match:start match 0)))(define (match:suffix match) (substring (match:string match) (match:end match 0)));;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; SCSH compatibility routines.(define (regexp-match? match) (and (vector? match) (string? (vector-ref match 0)) (let loop ((i 1)) (cond ((>= i (vector-length match)) #t) ((and (pair? (vector-ref match i)) (integer? (car (vector-ref match i))) (integer? (cdr (vector-ref match i)))) (loop (+ 1 i))) (else #f)))))(define (regexp-quote string) (call-with-output-string (lambda (p) (let loop ((i 0)) (and (< i (string-length string)) (begin (case (string-ref string i) ((#\* #\. #\( #\) #\+ #\? #\\ #\^ #\$ #\{ #\}) (write-char #\\ p))) (write-char (string-ref string i) p) (loop (1+ i))))))))(define (match:start match . args) (let* ((matchnum (if (pair? args) (+ 1 (car args)) 1)) (start (car (vector-ref match matchnum)))) (if (= start -1) #f start)))(define (match:end match . args) (let* ((matchnum (if (pair? args) (+ 1 (car args)) 1)) (end (cdr (vector-ref match matchnum)))) (if (= end -1) #f end)))(define (match:substring match . args) (let* ((matchnum (if (pair? args) (car args) 0)) (start (match:start match matchnum)) (end (match:end match matchnum))) (and start end (substring (match:string match) start end))))(define (string-match pattern str . args) (let ((rx (make-regexp pattern)) (start (if (pair? args) (car args) 0))) (regexp-exec rx str start)))(define (regexp-substitute port match . items) ;; If `port' is #f, send output to a string. (if (not port) (call-with-output-string (lambda (p) (apply regexp-substitute p match items))) ;; Otherwise, process each substitution argument in `items'. (for-each (lambda (obj) (cond ((string? obj) (display obj port)) ((integer? obj) (display (match:substring match obj) port)) ((eq? 'pre obj) (display (match:prefix match) port)) ((eq? 'post obj) (display (match:suffix match) port)) (else (error 'wrong-type-arg obj)))) items)));;; If we call fold-matches, below, with a regexp that can match the;;; empty string, it's not obvious what "all the matches" means. How;;; many empty strings are there in the string "a"? Our answer:;;;;;; This function applies PROC to every non-overlapping, maximal;;; match of REGEXP in STRING.;;;;;; "non-overlapping": There are two non-overlapping matches of "" in;;; "a" --- one before the `a', and one after. There are three;;; non-overlapping matches of "q|x*" in "aqb": the empty strings;;; before `a' and after `b', and `q'. The two empty strings before;;; and after `q' don't count, because they overlap with the match of;;; "q".;;;;;; "maximal": There are three distinct maximal matches of "x*" in;;; "axxxb": one before the `a', one covering `xxx', and one after the;;; `b'. Around or within `xxx', only the match covering all three;;; x's counts, because the rest are not maximal.(define (fold-matches regexp string init proc . flags) (let ((regexp (if (regexp? regexp) regexp (make-regexp regexp))) (flags (if (null? flags) 0 flags))) (let loop ((start 0) (value init) (abuts #f)) ; True if start abuts a previous match. (let ((m (if (> start (string-length string)) #f (regexp-exec regexp string start flags)))) (cond ((not m) value) ((and (= (match:start m) (match:end m)) abuts) ;; We matched an empty string, but that would overlap the ;; match immediately before. Try again at a position ;; further to the right. (loop (+ start 1) value #f)) (else (loop (match:end m) (proc m value) #t)))))))(define (list-matches regexp string . flags) (reverse! (apply fold-matches regexp string '() cons flags)))(define (regexp-substitute/global port regexp string . items) ;; If `port' is #f, send output to a string. (if (not port) (call-with-output-string (lambda (p) (apply regexp-substitute/global p regexp string items))) ;; Walk the set of non-overlapping, maximal matches. (let next-match ((matches (list-matches regexp string)) (start 0)) (if (null? matches) (display (substring string start) port) (let ((m (car matches))) ;; Process all of the items for this match. Don't use ;; for-each, because we need to make sure 'post at the ;; end of the item list is a tail call. (let next-item ((items items)) (define (do-item item) (cond ((string? item) (display item port)) ((integer? item) (display (match:substring m item) port)) ((procedure? item) (display (item m) port)) ((eq? item 'pre) (display (substring string start (match:start m)) port)) ((eq? item 'post) (next-match (cdr matches) (match:end m))) (else (error 'wrong-type-arg item)))) (if (pair? items) (if (null? (cdr items)) (do-item (car items)) ; This is a tail call. (begin (do-item (car items)) ; This is not. (next-item (cdr items)))))))))))
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -