📄 srfi-14.scm
字号:
;;;; srfi-14.scm - Shivers' reference implementation of SRFI-14(declare (unit srfi-14) (fixnum) (disable-interrupts) (standard-bindings) (extended-bindings) (hide %char-set:s/check %string-iter %char-set-diff+intersection! %char->latin1 %latin1->char %ucs-range->char-set! %string->char-set! %list->char-set! %set-char-set! %char-set-unfold! %char-set-algebra %char-set-cursor-next %char-set-filter! %set-char-set c0 c1 %string-copy %default-base) )(cond-expand [paranoia] [else (declare (no-procedure-checks-for-usual-bindings) (bound-to-procedure char-set char-set-complement ucs-range->char-set! ucs-range->char-set char-set-union char-set-adjoin string->char-set list->char-set string-copy make-char-set char-set-copy char-set? char-set-size char-set:s) (no-bound-checks) ) ] )(cond-expand [unsafe (eval-when (compile) (define-macro (##sys#check-structure . _) '(##core#undefined)) (define-macro (##sys#check-range . _) '(##core#undefined)) (define-macro (##sys#check-pair . _) '(##core#undefined)) (define-macro (##sys#check-list . _) '(##core#undefined)) (define-macro (##sys#check-symbol . _) '(##core#undefined)) (define-macro (##sys#check-string . _) '(##core#undefined)) (define-macro (##sys#check-char . _) '(##core#undefined)) (define-macro (##sys#check-exact . _) '(##core#undefined)) (define-macro (##sys#check-port . _) '(##core#undefined)) (define-macro (##sys#check-number . _) '(##core#undefined)) (define-macro (##sys#check-byte-vector . _) '(##core#undefined)) ) ] [else (declare (emit-exports "srfi-14.exports"))] )(register-feature! 'srfi-14)(define (%latin1->char n) (integer->char n))(define (%char->latin1 c) (char->integer c));;; SRFI-14 character-sets library -*- Scheme -*-;;;;;; - Ported from MIT Scheme runtime by Brian D. Carlstrom.;;; - Massively rehacked & extended by Olin Shivers 6/98.;;; - Massively redesigned and rehacked 5/2000 during SRFI process.;;; At this point, the code bears the following relationship to the;;; MIT Scheme code: "This is my grandfather's axe. My father replaced;;; the head, and I have replaced the handle." Nonetheless, we preserve;;; the MIT Scheme copyright:;;; Copyright (c) 1988-1995 Massachusetts Institute of Technology;;; The MIT Scheme license is a "free software" license. See the end of;;; this file for the tedious details. ;;; Exports:;;; char-set? char-set= char-set<=;;; char-set-hash ;;; char-set-cursor char-set-ref char-set-cursor-next end-of-char-set?;;; char-set-fold char-set-unfold char-set-unfold!;;; char-set-for-each char-set-map;;; char-set-copy;;;;;; char-set list->char-set string->char-set ;;; char-set! list->char-set! string->char-set! ;;;;;; filterchar-set ucs-range->char-set ->char-set;;; filterchar-set! ucs-range->char-set!;;;;;; char-set->list char-set->string;;;;;; char-set-size char-set-count char-set-contains?;;; char-set-every char-set-any;;;;;; char-set-adjoin char-set-delete ;;; char-set-adjoin! char-set-delete!;;; ;;; char-set-complement char-set-union char-set-intersection char-set-difference;;; char-set-complement! char-set-union! char-set-intersection! char-set-difference!;;;;;; char-set-difference char-set-xor char-set-diff+intersection;;; char-set-difference! char-set-xor! char-set-diff+intersection!;;;;;; char-set:lower-case char-set:upper-case char-set:title-case;;; char-set:letter char-set:digit char-set:letter+digit;;; char-set:graphic char-set:printing char-set:whitespace;;; char-set:iso-control char-set:punctuation char-set:symbol;;; char-set:hex-digit char-set:blank char-set:ascii;;; char-set:empty char-set:full;;; Imports;;; This code has the following non-R5RS dependencies:;;; - ERROR;;; - %LATIN1->CHAR %CHAR->LATIN1;;; - LET-OPTIONALS* and :OPTIONAL macros for parsing, checking & defaulting;;; optional arguments from rest lists.;;; - BITWISE-AND for CHAR-SET-HASH;;; - The SRFI-19 DEFINE-RECORD-TYPE record macro;;; - A simple CHECK-ARG procedure: ;;; (lambda (pred val caller) (if (not (pred val)) (error val caller)));;; This is simple code, not great code. Char sets are represented as 256-char;;; strings. If char I is ASCII/Latin-1 0, then it isn't in the set; if char I;;; is ASCII/Latin-1 1, then it is in the set.;;; - Should be rewritten to use bit strings or byte vecs.;;; - Is Latin-1 specific. Would certainly have to be rewritten for Unicode.;;; See the end of the file for porting and performance-tuning notes.;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;(define (make-char-set s) (##sys#make-structure 'char-set s))(define (char-set:s cs) (##sys#slot cs 1))(define (char-set? x) (##sys#structure? x 'char-set))#|(define-record-type :char-set (make-char-set s) char-set? (s char-set:s))|#(define (%string-copy s) (substring s 0 (string-length s)));;; Parse, type-check & default a final optional BASE-CS parameter from;;; a rest argument. Return a *fresh copy* of the underlying string.;;; The default is the empty set. The PROC argument is to help us;;; generate informative error exceptions.(define (%default-base maybe-base proc) (if (pair? maybe-base) (let ((bcs (car maybe-base)) (tail (cdr maybe-base))) (if (null? tail) (if (char-set? bcs) (%string-copy (char-set:s bcs)) (##sys#error "BASE-CS parameter not a char-set" proc bcs)) (##sys#error "Expected final base char set -- too many parameters" proc maybe-base))) (make-string 256 (%latin1->char 0))));;; If CS is really a char-set, do CHAR-SET:S, otw report an error msg on;;; behalf of our caller, PROC. This procedure exists basically to provide;;; explicit error-checking & reporting.(define (%char-set:s/check cs proc) (let lp ((cs cs)) (if (char-set? cs) (char-set:s cs) (lp (##sys#error proc "Not a char-set" cs)))));;; These internal functions hide a lot of the dependency on the;;; underlying string representation of char sets. They should be;;; inlined if possible.(define-inline (si=0? s i) (zero? (%char->latin1 (string-ref s i))))(define-inline (si=1? s i) (not (si=0? s i)))(define-inline (si s i) (%char->latin1 (string-ref s i)))(define-inline (%set0! s i) (string-set! s i c0))(define-inline (%set1! s i) (string-set! s i c1))(define c0 (%latin1->char 0))(define c1 (%latin1->char 1));;; These do various "s[i] := s[i] op val" operations -- see ;;; %CHAR-SET-ALGEBRA. They are used to implement the various;;; set-algebra procedures.(define-inline (setv! s i v) (string-set! s i (%latin1->char v))) ; SET to a Value.(define-inline (%not! s i v) (setv! s i (- 1 v)))(define-inline (%and! s i v) (if (zero? v) (%set0! s i)))(define-inline (%or! s i v) (if (not (zero? v)) (%set1! s i)))(define-inline (%minus! s i v) (if (not (zero? v)) (%set0! s i)))(define-inline (%xor! s i v) (if (not (zero? v)) (setv! s i (- 1 (si s i)))))(define (char-set-copy cs) (make-char-set (%string-copy (%char-set:s/check cs 'char-set-copy))))(define char-set= (lambda rest (or (null? rest) (let* ((cs1 (car rest)) (rest (cdr rest)) (s1 (%char-set:s/check cs1 'char-set=))) (let lp ((rest rest)) (or (not (pair? rest)) (and (string=? s1 (%char-set:s/check (car rest) 'char-set=)) (lp (cdr rest)))))))))(define char-set<= (lambda rest (or (null? rest) (let ((cs1 (car rest)) (rest (cdr rest))) (let lp ((s1 (%char-set:s/check cs1 'char-set<=)) (rest rest)) (or (not (pair? rest)) (let ((s2 (%char-set:s/check (car rest) 'char-set<=)) (rest (cdr rest))) (if (eq? s1 s2) (lp s2 rest) ; Fast path (let lp2 ((i 255)) ; Real test (if (< i 0) (lp s2 rest) (and (<= (si s1 i) (si s2 i)) (lp2 (- i 1))))))))))) ));;; Hash;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Compute (c + 37 c + 37^2 c + ...) modulo BOUND.;;; If you keep BOUND small enough, the intermediate calculations will ;;; always be fixnums. How small is dependent on the underlying Scheme system; ;;; we use a default BOUND of 2^22 = 4194304, which should hack it in;;; Schemes that give you at least 29 signed bits for fixnums. The core ;;; calculation that you don't want to overflow is, worst case,;;; (+ 65535 (* 37 (- bound 1)));;; where 65535 is the max character code. Choose the default BOUND to be the;;; biggest power of two that won't cause this expression to fixnum overflow, ;;; and everything will be copacetic.(define (char-set-hash cs . maybe-bound) (let ((bound (:optional maybe-bound 4194304))) (if (zero? bound) (set! bound 4194304)) (##sys#check-exact bound 'char-set-hash) (let* ((s (%char-set:s/check cs 'char-set-hash)) ;; Compute a 111...1 mask that will cover BOUND-1: (mask (let lp ((i #x10000)) ; Let's skip first 16 iterations, eh? (if (>= i bound) (- i 1) (lp (+ i i)))))) (let lp ((i 255) (ans 0)) (if (< i 0) (modulo ans bound) (lp (- i 1) (if (si=0? s i) ans (fxand mask (+ (* 37 ans) i)))))))) )(define (char-set-contains? cs char) (##sys#check-char char 'char-set-contains?) (si=1? (%char-set:s/check cs 'char-set-contains?); (%char->latin1 (check-arg char? char char-set-contains?)))) (%char->latin1 char) ) )(define (char-set-size cs) (let ((s (%char-set:s/check cs 'char-set-size))) (let lp ((i 255) (size 0)) (if (< i 0) size (lp (- i 1) (+ size (si s i)))))))(define (char-set-count pred cset); (check-arg procedure? pred char-set-count) (let ((s (%char-set:s/check cset 'char-set-count))) (let lp ((i 255) (count 0)) (if (< i 0) count (lp (- i 1) (if (and (si=1? s i) (pred (%latin1->char i))) (+ count 1) count))))));;; -- Adjoin & delete(define (%set-char-set set proc cs chars) (let ((s (%string-copy (%char-set:s/check cs proc)))) (for-each (lambda (c) (set s (%char->latin1 c))) chars) (make-char-set s)))(define (%set-char-set! set proc cs chars) (let ((s (%char-set:s/check cs proc))) (for-each (lambda (c) (set s (%char->latin1 c))) chars)) cs)(define (char-set-adjoin cs . chars) (%set-char-set %set1! 'char-set-adjoin cs chars))(define (char-set-adjoin! cs . chars) (%set-char-set! %set1! 'char-set-adjoin! cs chars))(define (char-set-delete cs . chars) (%set-char-set %set0! 'char-set-delete cs chars))(define (char-set-delete! cs . chars) (%set-char-set! %set0! 'char-set-delete! cs chars));;; Cursors;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -