⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mb-util.lisp

📁 开源跨平台Lisp编译器
💻 LISP
字号:
(in-package "SB!IMPL")(defun make-multibyte-mapper (list)  (let ((list (sort (copy-list list) #'< :key #'car))        (hi (loop for x in list maximize (max (car x) (cadr x)))))    (make-array (list (length list) 2)                :element-type (list 'integer 0 hi)                :initial-contents list)))(defmacro define-multibyte-mapper (name list)  `(defparameter ,name     (make-multibyte-mapper ,list)))(defun get-multibyte-mapper (table code)  (declare (optimize speed (safety 0))           (type (array * (* 2)) table)           (type fixnum code))  (labels ((recur (start end)             (declare (type fixnum start end))             (let* ((m (ash (+ start end) -1))                    (x (aref table m 0)))               (declare (type fixnum m x))               (cond ((= x code)                      (aref table m 1))                     ((and (< x code) (< m end))                      (recur (1+ m) end))                     ((and (> x code) (> m start))                      (recur start (1- m)))))))    (recur 0 (1- (array-dimension table 0)))))(eval-when (:compile-toplevel :load-toplevel :execute)  ;; FIXME: better to change make-od-name() to accept multiple  ;; arguments in octets.lisp?  (defun make-od-name-list (&rest syms)    (reduce #'make-od-name syms))  (defun define-bytes-per-mb-character-1 (accessor type format                                          mb-len mb-continuation-byte-p)    (let ((name (make-od-name-list 'bytes-per format 'character accessor))          (invalid-mb-starter-byte           (make-od-name-list 'invalid format 'starter-byte))          (invalid-mb-continuation-byte           (make-od-name-list 'invalid format 'continuation-byte)))      `(progn         ;;(declaim (inline ,name))         (defun ,name (array pos end)           (declare (optimize speed (safety 0))                    (type ,type array)                    (type array-range pos end))           ;; returns the number of bytes consumed and nil if it's a           ;; valid character or the number of bytes consumed and a           ;; replacement string if it's not.           (let ((initial-byte (,accessor array pos))                 (reject-reason nil)                 (reject-position pos)                 (remaining-bytes (- end pos)))             (declare (type array-range reject-position remaining-bytes))             (labels ((valid-starter-byte-p (b)                        (declare (type (unsigned-byte 8) b))                        (let ((ok (,mb-len b)))                          (unless ok                            (setf reject-reason ',invalid-mb-starter-byte))                          ok))                      (enough-bytes-left-p (x)                        (let ((ok (> end (+ pos (1- x)))))                          (unless ok                            (setf reject-reason 'end-of-input-in-character))                          ok))                      (valid-secondary-p (x)                        (let* ((idx (the array-range (+ pos x)))                               (b (,accessor array idx))                               (ok (,mb-continuation-byte-p b)))                          (unless ok                            (setf reject-reason ',invalid-mb-continuation-byte)                            (setf reject-position idx))                          ok))                      (preliminary-ok-for-length (maybe-len len)                        (and (eql maybe-len len)                             ;; Has to be done in this order so that                             ;; certain broken sequences (e.g., the                             ;; two-byte sequence `"initial (length 3)"                             ;; "non-continuation"' -- `#xef #x32')                             ;; signal only part of that sequence as                             ;; erroneous.                             (loop for i from 1 below (min len remaining-bytes)                                always (valid-secondary-p i))                             (enough-bytes-left-p len))))               (declare (inline valid-starter-byte-p                                enough-bytes-left-p                                valid-secondary-p                                preliminary-ok-for-length))               (let ((maybe-len (valid-starter-byte-p initial-byte)))                 (cond ((eql maybe-len 1)                        (values 1 nil))                       ((preliminary-ok-for-length maybe-len 2)                        (values 2 nil))                       ((preliminary-ok-for-length maybe-len 3)                        (values 3 nil))                       (t                        (let* ((bad-end (ecase reject-reason                                          (,invalid-mb-starter-byte                                           (1+ pos))                                          (end-of-input-in-character                                           end)                                          (,invalid-mb-continuation-byte                                           reject-position)))                               (bad-len (- bad-end pos)))                          (declare (type array-range bad-end bad-len))                          (let ((replacement (decoding-error array pos bad-end ,format reject-reason reject-position)))                            (values bad-len replacement))))))))))))  (defun define-simple-get-mb-char-1 (accessor type format mb-to-ucs)    (let ((name (make-od-name-list 'simple-get format 'char accessor))          (malformed (make-od-name 'malformed format)))      `(progn         (declaim (inline ,name))         (defun ,name (array pos bytes)           (declare (optimize speed (safety 0))                    (type ,type array)                    (type array-range pos)                    (type (integer 1 3) bytes))           (flet ((cref (x)                    (,accessor array (the array-range (+ pos x)))))             (declare (inline cref))             (let ((code (,mb-to-ucs (ecase bytes                                       (1 (cref 0))                                       (2 (logior (ash (cref 0) 8) (cref 1)))                                       (3 (logior (ash (cref 0) 16)                                                  (ash (cref 1) 8)                                                  (cref 2)))))))               (if code                   (code-char code)                   (decoding-error array pos (+ pos bytes) ,format                                   ',malformed pos))))))))  (defun define-mb->string-1 (accessor type format)    (let ((name           (make-od-name-list format '>string accessor))          (bytes-per-mb-character           (make-od-name-list 'bytes-per format 'character accessor))          (simple-get-mb-char           (make-od-name-list 'simple-get format 'char accessor)))      `(progn         (defun ,name (array astart aend)           (declare (optimize speed (safety 0))                    (type ,type array)                    (type array-range astart aend))           (let ((string (make-array 0 :adjustable t :fill-pointer 0 :element-type 'character)))             (loop with pos = astart                while (< pos aend)                do (multiple-value-bind (bytes invalid)                       (,bytes-per-mb-character array pos aend)                     (declare (type (or null string) invalid))                     (cond                       ((null invalid)                        (vector-push-extend (,simple-get-mb-char array pos bytes) string))                       (t                        (dotimes (i (length invalid))                          (vector-push-extend (char invalid i) string))))                     (incf pos bytes)))             (coerce string 'simple-string))))))  (declaim (inline mb-char-len))  (defun mb-char-len (code)    (declare (optimize speed (safety 0))             (type fixnum code))    (cond ((< code 0) (bug "can't happen"))          ((< code #x100) 1)          ((< code #x10000) 2)          ((< code #x1000000) 3)          (t (bug "can't happen"))))  )(defmacro define-multibyte-encoding (format aliases                                     ucs-to-mb mb-to-ucs                                     mb-len mb-continuation-byte-p)  (let ((char->mb (make-od-name 'char-> format))        (string->mb (make-od-name 'string-> format))        (define-bytes-per-mb-character         (make-od-name-list 'define-bytes-per format 'character))        (define-simple-get-mb-char         (make-od-name-list 'define-simple-get format 'char))        (define-mb->string         (make-od-name-list 'define format '>string)))    `(progn       ;; for fd-stream.lisp       (define-external-format/variable-width ,aliases t         (mb-char-len (or (,ucs-to-mb (char-code byte)) -1))         (let ((mb (,ucs-to-mb bits)))           (if (null mb)               (external-format-encoding-error stream byte)               (ecase size                 (1 (setf (sap-ref-8 sap tail) mb))                 (2 (setf (sap-ref-8 sap tail) (ldb (byte 8 8) mb)                          (sap-ref-8 sap (1+ tail)) (ldb (byte 8 0) mb)))                 (3 (setf (sap-ref-8 sap tail) (ldb (byte 8 16) mb)                          (sap-ref-8 sap (1+ tail)) (ldb (byte 8 8) mb)                          (sap-ref-8 sap (+ 2 tail)) (ldb (byte 8 0) mb))))))         (,mb-len byte)         (let* ((mb (ecase size                      (1 byte)                      (2 (let ((byte2 (sap-ref-8 sap (1+ head))))                           (unless (,mb-continuation-byte-p byte2)                             (return-from decode-break-reason 2))                           (dpb byte (byte 8 8) byte2)))                      (3 (let ((byte2 (sap-ref-8 sap (1+ head)))                               (byte3 (sap-ref-8 sap (+ 2 head))))                           (unless (,mb-continuation-byte-p byte2)                             (return-from decode-break-reason 2))                           (unless (,mb-continuation-byte-p byte3)                             (return-from decode-break-reason 3))                           (dpb byte (byte 8 16) (dpb byte2 (byte 8 8) byte3))))))                (ucs (,mb-to-ucs mb)))           (if (null ucs)               (return-from decode-break-reason 1)               (code-char ucs))))       ;; for octets.lisp       (define-condition ,(make-od-name 'malformed format)           (octet-decoding-error) ())       (define-condition ,(make-od-name-list 'invalid format 'starter-byte)           (octet-decoding-error) ())       (define-condition ,(make-od-name-list 'invalid format 'continuation-byte)           (octet-decoding-error) ())       (declaim (inline ,char->mb))       (defun ,char->mb (char dest string pos)         (declare (optimize speed (safety 0))                  (type (array (unsigned-byte 8) (*)) dest))         (let ((code (,ucs-to-mb (char-code char))))           (if code               (flet ((add-byte (b)                        (declare (type (unsigned-byte 8) b))                        (vector-push-extend b dest)))                 (declare (inline add-byte))                 (setf code (the fixnum code))                 (ecase (mb-char-len code)                   (1                    (add-byte code))                   (2                    (add-byte (ldb (byte 8 8) code))                    (add-byte (ldb (byte 8 0) code)))                   (3                    (add-byte (ldb (byte 8 16) code))                    (add-byte (ldb (byte 8 8) code))                    (add-byte (ldb (byte 8 0) code)))))               (encoding-error ,format string pos))))       (defun ,string->mb (string sstart send additional-space)         (declare (optimize speed (safety 0))                  (type simple-string string)                  (type array-range sstart send additional-space))         (let ((array (make-array (+ additional-space (- send sstart))                                  :element-type '(unsigned-byte 8)                                  :adjustable t                                  :fill-pointer 0)))           (loop for i from sstart below send              do (,char->mb (char string i) array string i))           (dotimes (i additional-space)             (vector-push-extend 0 array))           (coerce array '(simple-array (unsigned-byte 8) (*)))))       (defmacro ,define-bytes-per-mb-character (accessor type)         (define-bytes-per-mb-character-1 accessor type ',format                                          ',mb-len ',mb-continuation-byte-p))       (instantiate-octets-definition ,define-bytes-per-mb-character)       (defmacro ,define-simple-get-mb-char (accessor type)         (define-simple-get-mb-char-1 accessor type ',format ',mb-to-ucs))       (instantiate-octets-definition ,define-simple-get-mb-char)       (defmacro ,define-mb->string (accessor type)         (define-mb->string-1 accessor type ',format))       (instantiate-octets-definition ,define-mb->string)       (add-external-format-funs ',aliases                                 '(,(make-od-name format '>string-aref)                                   ,string->mb))       )))

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -