📄 srfi-14.scm
字号:
;;; -- Intersection(define (char-set-intersection! cset1 . csets) (%char-set-algebra (%char-set:s/check cset1 'char-set-intersection!) csets %and! 'char-set-intersection!) cset1)(define (char-set-intersection . csets) (if (pair? csets) (let ((s (%string-copy (%char-set:s/check (car csets) 'char-set-intersection)))) (%char-set-algebra s (cdr csets) %and! 'char-set-intersection) (make-char-set s)) (char-set-copy char-set:full)));;; -- Difference(define (char-set-difference! cset1 . csets) (%char-set-algebra (%char-set:s/check cset1 'char-set-difference!) csets %minus! 'char-set-difference!) cset1)(define (char-set-difference cs1 . csets) (if (pair? csets) (let ((s (%string-copy (%char-set:s/check cs1 'char-set-difference)))) (%char-set-algebra s csets %minus! 'char-set-difference) (make-char-set s)) (char-set-copy cs1)));;; -- Xor(define (char-set-xor! cset1 . csets) (%char-set-algebra (%char-set:s/check cset1 'char-set-xor!) csets %xor! 'char-set-xor!) cset1)(define (char-set-xor . csets) (if (pair? csets) (let ((s (%string-copy (%char-set:s/check (car csets) 'char-set-xor)))) (%char-set-algebra s (cdr csets) %xor! 'char-set-xor) (make-char-set s)) (char-set-copy char-set:empty)));;; -- Difference & intersection(define (%char-set-diff+intersection! diff int csets proc) (for-each (lambda (cs) (%string-iter (lambda (i v) (if (not (zero? v)) (cond ((si=1? diff i) (%set0! diff i) (%set1! int i))))) (%char-set:s/check cs proc))) csets))(define (char-set-diff+intersection! cs1 cs2 . csets) (let ((s1 (%char-set:s/check cs1 'char-set-diff+intersection!)) (s2 (%char-set:s/check cs2 'char-set-diff+intersection!))) (%string-iter (lambda (i v) (if (zero? v) (%set0! s2 i) (if (si=1? s2 i) (%set0! s1 i)))) s1) (%char-set-diff+intersection! s1 s2 csets 'char-set-diff+intersection!)) (values cs1 cs2))(define (char-set-diff+intersection cs1 . csets) (let ((diff (string-copy (%char-set:s/check cs1 'char-set-diff+intersection))) (int (make-string 256 c0))) (%char-set-diff+intersection! diff int csets 'char-set-diff+intersection) (values (make-char-set diff) (make-char-set int))));;;; System character sets;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; These definitions are for Latin-1.;;;;;; If your Scheme implementation allows you to mark the underlying strings;;; as immutable, you should do so -- it would be very, very bad if a client's;;; buggy code corrupted these constants.(define char-set:empty (char-set))(define char-set:full (char-set-complement char-set:empty))(define char-set:lower-case (let* ((a-z (ucs-range->char-set #x61 #x7B)) (latin1 (ucs-range->char-set! #xdf #xf7 #t a-z)) (latin2 (ucs-range->char-set! #xf8 #x100 #t latin1))) (char-set-adjoin! latin2 (%latin1->char #xb5))))(define char-set:upper-case (let ((A-Z (ucs-range->char-set #x41 #x5B))) ;; Add in the Latin-1 upper-case chars. (ucs-range->char-set! #xd8 #xdf #t (ucs-range->char-set! #xc0 #xd7 #t A-Z))))(define char-set:title-case char-set:empty)(define char-set:letter (let ((u/l (char-set-union char-set:upper-case char-set:lower-case))) (char-set-adjoin! u/l (%latin1->char #xaa) ; FEMININE ORDINAL INDICATOR (%latin1->char #xba)))) ; MASCULINE ORDINAL INDICATOR(define char-set:digit (string->char-set "0123456789"))(define char-set:hex-digit (string->char-set "0123456789abcdefABCDEF"))(define char-set:letter+digit (char-set-union char-set:letter char-set:digit))(define char-set:punctuation (let ((ascii (string->char-set "!\"#%&'()*,-./:;?@[\\]_{}")) (latin-1-chars (map %latin1->char '(#xA1 ; INVERTED EXCLAMATION MARK #xAB ; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK #xAD ; SOFT HYPHEN #xB7 ; MIDDLE DOT #xBB ; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK #xBF)))) ; INVERTED QUESTION MARK (list->char-set! latin-1-chars ascii)))(define char-set:symbol (let ((ascii (string->char-set "$+<=>^`|~")) (latin-1-chars (map %latin1->char '(#x00A2 ; CENT SIGN #x00A3 ; POUND SIGN #x00A4 ; CURRENCY SIGN #x00A5 ; YEN SIGN #x00A6 ; BROKEN BAR #x00A7 ; SECTION SIGN #x00A8 ; DIAERESIS #x00A9 ; COPYRIGHT SIGN #x00AC ; NOT SIGN #x00AE ; REGISTERED SIGN #x00AF ; MACRON #x00B0 ; DEGREE SIGN #x00B1 ; PLUS-MINUS SIGN #x00B4 ; ACUTE ACCENT #x00B6 ; PILCROW SIGN #x00B8 ; CEDILLA #x00D7 ; MULTIPLICATION SIGN #x00F7)))) ; DIVISION SIGN (list->char-set! latin-1-chars ascii))) (define char-set:graphic (char-set-union char-set:letter+digit char-set:punctuation char-set:symbol))(define char-set:whitespace (list->char-set (map %latin1->char '(#x09 ; HORIZONTAL TABULATION #x0A ; LINE FEED #x0B ; VERTICAL TABULATION #x0C ; FORM FEED #x0D ; CARRIAGE RETURN #x20 ; SPACE #xA0))))(define char-set:printing (char-set-union char-set:whitespace char-set:graphic)) ; NO-BREAK SPACE(define char-set:blank (list->char-set (map %latin1->char '(#x09 ; HORIZONTAL TABULATION #x20 ; SPACE #xA0)))) ; NO-BREAK SPACE(define char-set:iso-control (ucs-range->char-set! #x7F #xA0 #t (ucs-range->char-set 0 32)))(define char-set:ascii (ucs-range->char-set 0 128));;; Porting & performance-tuning notes;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; See the section at the beginning of this file on external dependencies.;;;;;; First and foremost, rewrite this code to use bit vectors of some sort.;;; This will give big speedup and memory savings.;;;;;; - LET-OPTIONALS* macro.;;; This is only used once. You can rewrite the use, port the hairy macro;;; definition (which is implemented using a Clinger-Rees low-level;;; explicit-renaming macro system), or port the simple, high-level;;; definition, which is less efficient.;;;;;; - :OPTIONAL macro;;; Very simply defined using an R5RS high-level macro.;;;;;; Implementations that can arrange for the base char sets to be immutable;;; should do so. (E.g., Scheme 48 allows one to mark a string as immutable,;;; which can be used to protect the underlying strings.) It would be very,;;; very bad if a client's buggy code corrupted these constants.;;;;;; There is a fair amount of argument checking. This is, strictly speaking,;;; unnecessary -- the actual body of the procedures will blow up if an;;; illegal value is passed in. However, the error message will not be as good;;; as if the error were caught at the "higher level." Also, a very, very;;; smart Scheme compiler may be able to exploit having the type checks done;;; early, so that the actual body of the procedures can assume proper values.;;; This isn't likely; this kind of compiler technology isn't common any;;; longer.;;; ;;; The overhead of optional-argument parsing is irritating. The optional;;; arguments must be consed into a rest list on entry, and then parsed out.;;; Function call should be a matter of a few register moves and a jump; it;;; should not involve heap allocation! Your Scheme system may have a superior;;; non-R5RS optional-argument system that can eliminate this overhead. If so,;;; then this is a prime candidate for optimising these procedures,;;; *especially* the many optional BASE-CS parameters.;;;;;; Note that optional arguments are also a barrier to procedure integration.;;; If your Scheme system permits you to specify alternate entry points;;; for a call when the number of optional arguments is known in a manner;;; that enables inlining/integration, this can provide performance ;;; improvements.;;;;;; There is enough *explicit* error checking that *all* internal operations;;; should *never* produce a type or index-range error. Period. Feel like;;; living dangerously? *Big* performance win to be had by replacing string;;; and record-field accessors and setters with unsafe equivalents in the;;; code. Similarly, fixnum-specific operators can speed up the arithmetic;;; done on the index values in the inner loops. The only arguments that are;;; not completely error checked are;;; - string lists (complete checking requires time proportional to the;;; length of the list);;; - procedure arguments, such as char->char maps & predicates.;;; There is no way to check the range & domain of procedures in Scheme.;;; Procedures that take these parameters cannot fully check their;;; arguments. But all other types to all other procedures are fully;;; checked.;;;;;; This does open up the alternate possibility of simply *removing* these ;;; checks, and letting the safe primitives raise the errors. On a dumb;;; Scheme system, this would provide speed (by eliminating the redundant;;; error checks) at the cost of error-message clarity.;;;;;; In an interpreted Scheme, some of these procedures, or the internal;;; routines with % prefixes, are excellent candidates for being rewritten;;; in C.;;;;;; It would also be nice to have the ability to mark some of these;;; routines as candidates for inlining/integration.;;; ;;; See the comments preceding the hash function code for notes on tuning;;; the default bound so that the code never overflows your implementation's;;; fixnum size into bignum calculation.;;;;;; All the %-prefixed routines in this source code are written;;; to be called internally to this library. They do *not* perform;;; friendly error checks on the inputs; they assume everything is;;; proper. They also do not take optional arguments. These two properties;;; save calling overhead and enable procedure integration -- but they;;; are not appropriate for exported routines.;;; Copyright notice;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Copyright (c) 1988-1995 Massachusetts Institute of Technology;;; ;;; This material was developed by the Scheme project at the Massachusetts;;; Institute of Technology, Department of Electrical Engineering and;;; Computer Science. Permission to copy and modify this software, to;;; redistribute either the original software or a modified version, and;;; to use this software for any purpose is granted, subject to the;;; following restrictions and understandings.;;; ;;; 1. Any copy made of this software must include this copyright notice;;; in full.;;; ;;; 2. Users of this software agree to make their best efforts (a) to;;; return to the MIT Scheme project any improvements or extensions that;;; they make, so that these may be included in future releases; and (b);;; to inform MIT of noteworthy uses of this software.;;; ;;; 3. All materials developed as a consequence of the use of this;;; software shall duly acknowledge such use, in accordance with the usual;;; standards of acknowledging credit in academic research.;;; ;;; 4. MIT has made no warrantee or representation that the operation of;;; this software will be error-free, and MIT is under no obligation to;;; provide any services, by way of maintenance, update, or otherwise.;;; ;;; 5. In conjunction with products arising from the use of this material,;;; there shall be no use of the name of the Massachusetts Institute of;;; Technology nor of any adaptation thereof in any advertising,;;; promotional, or sales literature without prior written consent from;;; MIT in each case.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -