⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 regex.scm

📁 Scheme跨平台编译器
💻 SCM
📖 第 1 页 / 共 2 页
字号:
;;;; regex.scm - Unit for using the PCRE regex package;; Copyright (c) 2000-2007, Felix L. Winkelmann; Copyright (c) 2008, The Chicken Team; All rights reserved.;; Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following; conditions are met:;;   Redistributions of source code must retain the above copyright notice, this list of conditions and the following;     disclaimer.;   Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following;     disclaimer in the documentation and/or other materials provided with the distribution.;   Neither the name of the author nor the names of its contributors may be used to endorse or promote;     products derived from this software without specific prior written permission.;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY; AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR; OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE; POSSIBILITY OF SUCH DAMAGE.(cond-expand [chicken-compile-shared] [else (declare (unit regex))] )(declare  (usual-integrations)  (disable-interrupts)  (generic) ; PCRE options use lotsa bits  (disable-warning var)  (bound-to-procedure    ;; Forward reference    regex-chardef-table? make-anchored-pattern    ;; Imports    get-output-string open-output-string    string->list list->string string-length string-ref substring make-string string-append    reverse list-ref    char=? char-alphabetic? char-numeric? char->integer    set-finalizer!    ##sys#pointer?    ##sys#slot ##sys#setslot ##sys#size    ##sys#make-structure ##sys#structure?    ##sys#error ##sys#signal-hook    ##sys#substring ##sys#fragments->string ##sys#make-c-string ##sys#string-append    ##sys#write-char-0 )  (export    regex-chardef-table? regex-chardef-table    regexp? regexp regexp*    regexp-optimize    make-anchored-pattern    string-match string-match-positions string-search string-search-positions    string-split-fields string-substitute string-substitute*    glob? glob->regexp    grep    regexp-escape ) )(cond-expand [paranoia] [else  (declare    (no-bound-checks)    (no-procedure-checks-for-usual-bindings) ) ] )(cond-expand [unsafe  (eval-when (compile)    (define-macro (##sys#check-chardef-table . _) '(##core#undefined))    (define-macro (##sys#check-integer . _) '(##core#undefined))    (define-macro (##sys#check-blob . _) '(##core#undefined))    (define-macro (##sys#check-vector . _) '(##core#undefined))    (define-macro (##sys#check-structure . _) '(##core#undefined))    (define-macro (##sys#check-range . _) '(##core#undefined))    (define-macro (##sys#check-pair . _) '(##core#undefined))    (define-macro (##sys#check-list . _) '(##core#undefined))    (define-macro (##sys#check-symbol . _) '(##core#undefined))    (define-macro (##sys#check-string . _) '(##core#undefined))    (define-macro (##sys#check-char . _) '(##core#undefined))    (define-macro (##sys#check-exact . _) '(##core#undefined))    (define-macro (##sys#check-port . _) '(##core#undefined))    (define-macro (##sys#check-number . _) '(##core#undefined))    (define-macro (##sys#check-byte-vector . _) '(##core#undefined)) ) ] [else  (define (##sys#check-chardef-table x loc)    (unless (regex-chardef-table? x)      (##sys#error loc "invalid character definition tables structure" x) ) )  (declare    (bound-to-procedure      ;; Imports      ##sys#check-string ##sys#check-list ##sys#check-exact ##sys#check-vector      ##sys#check-structure ##sys#check-symbol ##sys#check-blob ##sys#check-integer )    (export      ##sys#check-chardef-table )    (emit-exports "regex.exports") ) ] );;;#>#include "pcre.h"<#;;;(register-feature! 'regex 'pcre);;; From unit lolevel:(define-inline (%tag-pointer ptr tag)  (let ([tp (##sys#make-tagged-pointer tag)])    (##core#inline "C_copy_pointer" ptr tp)    tp ) )(define-inline (%tagged-pointer? x tag)  (and (##core#inline "C_blockp" x)       (##core#inline "C_taggedpointerp" x)       (eq? tag (##sys#slot x 1)) ) );;; PCRE Types:(define-foreign-type pcre (c-pointer "pcre"))(define-foreign-type nonnull-pcre (nonnull-c-pointer "pcre"))(define-foreign-type pcre_extra (c-pointer "pcre_extra"))(define-foreign-type nonnull-pcre_extra (nonnull-c-pointer "pcre_extra"))(define-foreign-variable PCRE_CASELESS unsigned-integer)(define-foreign-variable PCRE_EXTENDED unsigned-integer)(define-foreign-variable PCRE_UTF8 unsigned-integer);FIXME the use of 'define-foreign-enum' causes unused global variable warning!(define-foreign-enum (pcre-option unsigned-integer)  (caseless             PCRE_CASELESS)  (multiline            PCRE_MULTILINE)  (dotall               PCRE_DOTALL)  (extended             PCRE_EXTENDED)  (anchored             PCRE_ANCHORED)  (dollar-endonly       PCRE_DOLLAR_ENDONLY)  (extra                PCRE_EXTRA)  (notbol               PCRE_NOTBOL)  (noteol               PCRE_NOTEOL)  (ungreedy             PCRE_UNGREEDY)  (notempty             PCRE_NOTEMPTY)  (utf8                 PCRE_UTF8)  (no-auto-capture      PCRE_NO_AUTO_CAPTURE)  (no-utf8-check        PCRE_NO_UTF8_CHECK)  (auto-callout         PCRE_AUTO_CALLOUT)  (partial              PCRE_PARTIAL)  (dfa-shortest         PCRE_DFA_SHORTEST)  (dfa-restart          PCRE_DFA_RESTART)  (firstline            PCRE_FIRSTLINE)  (dupnames             PCRE_DUPNAMES)  (newline-cr           PCRE_NEWLINE_CR)  (newline-lf           PCRE_NEWLINE_LF)  (newline-crlf         PCRE_NEWLINE_CRLF)  (newline-any          PCRE_NEWLINE_ANY)  (newline-anycrlf      PCRE_NEWLINE_ANYCRLF)  (bsr-anycrlf          PCRE_BSR_ANYCRLF)  (bsr-unicode          PCRE_BSR_UNICODE) );;; The regexp structure primitives:(define re-finalizer  (foreign-lambda void "pcre_free" c-pointer) )(define-inline (%make-regexp code)  (set-finalizer! code re-finalizer)  (##sys#make-structure 'regexp code #f 0) )(define-inline (%regexp? x)  (##sys#structure? x 'regexp) )(define-inline (%regexp-code rx)  (##sys#slot rx 1) )(define-inline (%regexp-extra rx)  (##sys#slot rx 2) )(define-inline (%regexp-options rx)  (##sys#slot rx 3) )(define-inline (%regexp-extra-set! rx extra)  (when extra (set-finalizer! extra re-finalizer))  (##sys#setslot rx 2 extra) )(define-inline (%regexp-options-set! rx options)  (##sys#setslot rx 3 options) );;; Character Definition Tables:;; The minimum necessary to handle chardef table parameters.;;(define (regex-chardef-table? x)  (%tagged-pointer? x 'chardef-table) );; Get a character definitions tables structure for the current locale.(define regex-chardef-table  (let ([re-maketables          (foreign-lambda* (c-pointer unsigned-char) ()            "return (pcre_maketables ());")]        [re-make-chardef-table-type          (lambda (tables)            (%tag-pointer tables 'chardef-table) ) ] )    (lambda (#!optional tables)      ; Using this to type tag a ref is a bit of a hack but beats      ; having another public variable.      (if tables          ; then existing reference so just tag it          (if (##sys#pointer? tables)              (re-make-chardef-table-type tables)              (##sys#signal-hook #:type-error 'regex-chardef-table               "bad argument type - not a pointer" tables) )          ; else make a new chardef tables          (let ([tables (re-maketables)])            (if tables                (let ([tables (re-make-chardef-table-type tables)])                  (set-finalizer! tables re-finalizer)                  tables )                (##sys#error-hook 6 'regex-chardef-table) ) ) ) ) ) );;; Regexp record:(define (regexp? x)  (%regexp? x) );;; PCRE errors:#>static const char *C_regex_error;static int C_regex_error_offset;<#(define-foreign-variable C_regex_error c-string)(define-foreign-variable C_regex_error_offset int)(define re-error  (let ([string-append string-append])    (lambda (loc msg . args)      (apply ##sys#error loc (string-append msg " - " C_regex_error) args) ) ) );;; Compile regular expression:;FIXME nonnull-unsigned-c-string causes problems - converted string is too long!(define re-compile  (foreign-lambda* pcre ((nonnull-c-string patt) (unsigned-integer options) ((const (c-pointer unsigned-char)) tables))    "return(pcre_compile(patt, options, &C_regex_error, &C_regex_error_offset, tables));") )(define (re-checked-compile pattern options tables loc)  (##sys#check-string pattern loc)  (or (re-compile pattern options #f)      (re-error loc "cannot compile regular expression" pattern C_regex_error_offset) ) );; Compile with subset of options and no tables(define (regexp pattern . options)  (let ([options->integer          (lambda ()            (if (null? options)                0                (+ (if (car options) PCRE_CASELESS 0)                   (let ((options (cdr options)))                     (if (null? options)                         0                         (+ (if (car options) PCRE_EXTENDED 0)                            (let ((options (cdr options)))                              (if (and (pair? options) (car options)) PCRE_UTF8 0 ) ) ) ) ) ) ) )])    (%make-regexp (re-checked-compile pattern (options->integer) #f 'regexp)) ) );; Compile with full options and tables available(define (regexp* pattern . args)  (let-optionals args ([options '()] [tables #f])    (##sys#check-string pattern 'regexp*)    (##sys#check-list options 'regexp*)    (when tables (##sys#check-chardef-table tables 'regexp*))    (%make-regexp (re-checked-compile pattern (pcre-option->number options) tables 'regexp*)) ) );;; Optimize compiled regular expression:;; Invoke optimizer(define re-study  (foreign-lambda* pcre_extra (((const nonnull-pcre) code))    "return(pcre_study(code, 0, &C_regex_error));"));; Optimize compiled regular expression;; Returns whether optimization performed(define (regexp-optimize rx)  (##sys#check-structure rx 'regexp 'regexp-optimize)  (let ([extra (re-study (%regexp-code rx))])    (cond [C_regex_error            (re-error 'regexp-optimize "cannot optimize regular expression" rx)]          [extra            (%regexp-extra-set! rx extra)            #t]          [else            #f] ) ) );;; Captured results vector:;; Match positions vector (PCRE ovector)#>#define OVECTOR_LENGTH_MULTIPLE 3#define STATIC_OVECTOR_LEN 256static int C_regex_ovector[OVECTOR_LENGTH_MULTIPLE * STATIC_OVECTOR_LEN];<#;;(define ovector-start-ref

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -