tclutf.c
来自「tcl是工具命令语言」· C语言 代码 · 共 1,949 行 · 第 1/4 页
C
1,949 行
* * Compare at most n bytes of utf-8 strings cs and ct. Both cs * and ct are assumed to be at least n bytes long. * * Results: * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. * * Side effects: * None. * *---------------------------------------------------------------------- */intTclpUtfNcmp2(cs, ct, n) CONST char *cs; /* UTF string to compare to ct. */ CONST char *ct; /* UTF string cs is compared to. */ unsigned long n; /* Number of *bytes* to compare. */{ /* * We can't simply call 'memcmp(cs, ct, n);' because we need to check * for Tcl's \xC0\x80 non-utf-8 null encoding. * Otherwise utf-8 lexes fine in the strcmp manner. */ register int result = 0; for ( ; n != 0; n--, cs++, ct++) { if (*cs != *ct) { result = UCHAR(*cs) - UCHAR(*ct); break; } } if (n && ((UCHAR(*cs) == 0xC0) || (UCHAR(*ct) == 0xC0))) { unsigned char c1, c2; c1 = ((UCHAR(*cs) == 0xC0) && (UCHAR(cs[1]) == 0x80)) ? 0 : UCHAR(*cs); c2 = ((UCHAR(*ct) == 0xC0) && (UCHAR(ct[1]) == 0x80)) ? 0 : UCHAR(*ct); result = (c1 - c2); } return result;}/* *---------------------------------------------------------------------- * * Tcl_UtfNcmp -- * * Compare at most n UTF chars of string cs to string ct. Both cs * and ct are assumed to be at least n UTF chars long. * * Results: * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_UtfNcmp(cs, ct, n) CONST char *cs; /* UTF string to compare to ct. */ CONST char *ct; /* UTF string cs is compared to. */ unsigned long n; /* Number of UTF chars to compare. */{ Tcl_UniChar ch1, ch2; /* * Cannot use 'memcmp(cs, ct, n);' as byte representation of * \u0000 (the pair of bytes 0xc0,0x80) is larger than byte * representation of \u0001 (the byte 0x01.) */ while (n-- > 0) { /* * n must be interpreted as chars, not bytes. * This should be called only when both strings are of * at least n chars long (no need for \0 check) */ cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { return (ch1 - ch2); } } return 0;}/* *---------------------------------------------------------------------- * * Tcl_UtfNcasecmp -- * * Compare at most n UTF chars of string cs to string ct case * insensitive. Both cs and ct are assumed to be at least n * UTF chars long. * * Results: * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_UtfNcasecmp(cs, ct, n) CONST char *cs; /* UTF string to compare to ct. */ CONST char *ct; /* UTF string cs is compared to. */ unsigned long n; /* Number of UTF chars to compare. */{ Tcl_UniChar ch1, ch2; while (n-- > 0) { /* * n must be interpreted as chars, not bytes. * This should be called only when both strings are of * at least n chars long (no need for \0 check) */ cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); if (ch1 != ch2) { ch1 = Tcl_UniCharToLower(ch1); ch2 = Tcl_UniCharToLower(ch2); if (ch1 != ch2) { return (ch1 - ch2); } } } return 0;}/* *---------------------------------------------------------------------- * * Tcl_UniCharToUpper -- * * Compute the uppercase equivalent of the given Unicode character. * * Results: * Returns the uppercase Unicode character. * * Side effects: * None. * *---------------------------------------------------------------------- */Tcl_UniCharTcl_UniCharToUpper(ch) int ch; /* Unicode character to convert. */{ int info = GetUniCharInfo(ch); if (GetCaseType(info) & 0x04) { return (Tcl_UniChar) (ch - GetDelta(info)); } else { return ch; }}/* *---------------------------------------------------------------------- * * Tcl_UniCharToLower -- * * Compute the lowercase equivalent of the given Unicode character. * * Results: * Returns the lowercase Unicode character. * * Side effects: * None. * *---------------------------------------------------------------------- */Tcl_UniCharTcl_UniCharToLower(ch) int ch; /* Unicode character to convert. */{ int info = GetUniCharInfo(ch); if (GetCaseType(info) & 0x02) { return (Tcl_UniChar) (ch + GetDelta(info)); } else { return ch; }}/* *---------------------------------------------------------------------- * * Tcl_UniCharToTitle -- * * Compute the titlecase equivalent of the given Unicode character. * * Results: * Returns the titlecase Unicode character. * * Side effects: * None. * *---------------------------------------------------------------------- */Tcl_UniCharTcl_UniCharToTitle(ch) int ch; /* Unicode character to convert. */{ int info = GetUniCharInfo(ch); int mode = GetCaseType(info); if (mode & 0x1) { /* * Subtract or add one depending on the original case. */ return (Tcl_UniChar) (ch + ((mode & 0x4) ? -1 : 1)); } else if (mode == 0x4) { return (Tcl_UniChar) (ch - GetDelta(info)); } else { return ch; }}/* *---------------------------------------------------------------------- * * Tcl_UniCharLen -- * * Find the length of a UniChar string. The str input must be null * terminated. * * Results: * Returns the length of str in UniChars (not bytes). * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_UniCharLen(str) CONST Tcl_UniChar *str; /* Unicode string to find length of. */{ int len = 0; while (*str != '\0') { len++; str++; } return len;}/* *---------------------------------------------------------------------- * * Tcl_UniCharNcmp -- * * Compare at most n unichars of string cs to string ct. Both cs * and ct are assumed to be at least n unichars long. * * Results: * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_UniCharNcmp(cs, ct, n) CONST Tcl_UniChar *cs; /* Unicode string to compare to ct. */ CONST Tcl_UniChar *ct; /* Unicode string cs is compared to. */ unsigned long n; /* Number of unichars to compare. */{#ifdef WORDS_BIGENDIAN /* * We are definitely on a big-endian machine; memcmp() is safe */ return memcmp(cs, ct, n*sizeof(Tcl_UniChar));#else /* !WORDS_BIGENDIAN */ /* * We can't simply call memcmp() because that is not lexically correct. */ for ( ; n != 0; cs++, ct++, n--) { if (*cs != *ct) { return (*cs - *ct); } } return 0;#endif /* WORDS_BIGENDIAN */}/* *---------------------------------------------------------------------- * * Tcl_UniCharNcasecmp -- * * Compare at most n unichars of string cs to string ct case * insensitive. Both cs and ct are assumed to be at least n * unichars long. * * Results: * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_UniCharNcasecmp(cs, ct, n) CONST Tcl_UniChar *cs; /* Unicode string to compare to ct. */ CONST Tcl_UniChar *ct; /* Unicode string cs is compared to. */ unsigned long n; /* Number of unichars to compare. */{ for ( ; n != 0; n--, cs++, ct++) { if ((*cs != *ct) && (Tcl_UniCharToLower(*cs) != Tcl_UniCharToLower(*ct))) { return (*cs - *ct); } } return 0;}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsAlnum -- * * Test if a character is an alphanumeric Unicode character. * * Results: * Returns 1 if character is alphanumeric. * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_UniCharIsAlnum(ch) int ch; /* Unicode character to test. */{ register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); return (((ALPHA_BITS | DIGIT_BITS) >> category) & 1);}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsAlpha -- * * Test if a character is an alphabetic Unicode character. * * Results: * Returns 1 if character is alphabetic. * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_UniCharIsAlpha(ch) int ch; /* Unicode character to test. */{ register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); return ((ALPHA_BITS >> category) & 1);}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsControl -- * * Test if a character is a Unicode control character. * * Results: * Returns non-zero if character is a control. * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_UniCharIsControl(ch) int ch; /* Unicode character to test. */{ return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == CONTROL);}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsDigit -- * * Test if a character is a numeric Unicode character. * * Results: * Returns non-zero if character is a digit. * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_UniCharIsDigit(ch) int ch; /* Unicode character to test. */{ return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == DECIMAL_DIGIT_NUMBER);}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsGraph -- * * Test if a character is any Unicode print character except space. * * Results: * Returns non-zero if character is printable, but not space. * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_UniCharIsGraph(ch) int ch; /* Unicode character to test. */{ register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); return (((PRINT_BITS >> category) & 1) && ((unsigned char) ch != ' '));}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsLower -- * * Test if a character is a lowercase Unicode character. * * Results: * Returns non-zero if character is lowercase. * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_UniCharIsLower(ch) int ch; /* Unicode character to test. */{ return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == LOWERCASE_LETTER);}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsPrint -- * * Test if a character is a Unicode print character. * * Results: * Returns non-zero if character is printable. * * Side effects: * None. * *---------------------------------------------------------------------- */int
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?