tclutf.c
来自「tcl是工具命令语言」· C语言 代码 · 共 1,949 行 · 第 1/4 页
C
1,949 行
Tcl_UniCharIsPrint(ch) int ch; /* Unicode character to test. */{ register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); return ((PRINT_BITS >> category) & 1);}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsPunct -- * * Test if a character is a Unicode punctuation character. * * Results: * Returns non-zero if character is punct. * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_UniCharIsPunct(ch) int ch; /* Unicode character to test. */{ register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); return ((PUNCT_BITS >> category) & 1);}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsSpace -- * * Test if a character is a whitespace Unicode character. * * Results: * Returns non-zero if character is a space. * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_UniCharIsSpace(ch) int ch; /* Unicode character to test. */{ register int category; /* * If the character is within the first 127 characters, just use the * standard C function, otherwise consult the Unicode table. */ if (ch < 0x80) { return isspace(UCHAR(ch)); /* INTL: ISO space */ } else { category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); return ((SPACE_BITS >> category) & 1); }}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsUpper -- * * Test if a character is a uppercase Unicode character. * * Results: * Returns non-zero if character is uppercase. * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_UniCharIsUpper(ch) int ch; /* Unicode character to test. */{ return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == UPPERCASE_LETTER);}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsWordChar -- * * Test if a character is alphanumeric or a connector punctuation * mark. * * Results: * Returns 1 if character is a word character. * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_UniCharIsWordChar(ch) int ch; /* Unicode character to test. */{ register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK); return (((ALPHA_BITS | DIGIT_BITS | CONNECTOR_BITS) >> category) & 1);}/* *---------------------------------------------------------------------- * * Tcl_UniCharCaseMatch -- * * See if a particular Unicode string matches a particular pattern. * Allows case insensitivity. This is the Unicode equivalent of * the char* Tcl_StringCaseMatch. The UniChar strings must be * NULL-terminated. This has no provision for counted UniChar * strings, thus should not be used where NULLs are expected in the * UniChar string. Use TclUniCharMatch where possible. * * Results: * The return value is 1 if string matches pattern, and * 0 otherwise. The matching operation permits the following * special characters in the pattern: *?\[] (see the manual * entry for details on what these mean). * * Side effects: * None. * *---------------------------------------------------------------------- */intTcl_UniCharCaseMatch(string, pattern, nocase) CONST Tcl_UniChar *string; /* Unicode String. */ CONST Tcl_UniChar *pattern; /* Pattern, which may contain special * characters. */ int nocase; /* 0 for case sensitive, 1 for insensitive */{ Tcl_UniChar ch1, p; while (1) { p = *pattern; /* * See if we're at the end of both the pattern and the string. If * so, we succeeded. If we're at the end of the pattern but not at * the end of the string, we failed. */ if (p == 0) { return (*string == 0); } if ((*string == 0) && (p != '*')) { return 0; } /* * Check for a "*" as the next pattern character. It matches any * substring. We handle this by skipping all the characters up to the * next matching one in the pattern, and then calling ourselves * recursively for each postfix of string, until either we match or we * reach the end of the string. */ if (p == '*') { /* * Skip all successive *'s in the pattern */ while (*(++pattern) == '*') {} p = *pattern; if (p == 0) { return 1; } if (nocase) { p = Tcl_UniCharToLower(p); } while (1) { /* * Optimization for matching - cruise through the string * quickly if the next char in the pattern isn't a special * character */ if ((p != '[') && (p != '?') && (p != '\\')) { if (nocase) { while (*string && (p != *string) && (p != Tcl_UniCharToLower(*string))) { string++; } } else { while (*string && (p != *string)) { string++; } } } if (Tcl_UniCharCaseMatch(string, pattern, nocase)) { return 1; } if (*string == 0) { return 0; } string++; } } /* * Check for a "?" as the next pattern character. It matches * any single character. */ if (p == '?') { pattern++; string++; continue; } /* * Check for a "[" as the next pattern character. It is followed * by a list of characters that are acceptable, or by a range * (two characters separated by "-"). */ if (p == '[') { Tcl_UniChar startChar, endChar; pattern++; ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string); string++; while (1) { if ((*pattern == ']') || (*pattern == 0)) { return 0; } startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern); pattern++; if (*pattern == '-') { pattern++; if (*pattern == 0) { return 0; } endChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern); pattern++; if (((startChar <= ch1) && (ch1 <= endChar)) || ((endChar <= ch1) && (ch1 <= startChar))) { /* * Matches ranges of form [a-z] or [z-a]. */ break; } } else if (startChar == ch1) { break; } } while (*pattern != ']') { if (*pattern == 0) { pattern--; break; } pattern++; } pattern++; continue; } /* * If the next pattern character is '\', just strip off the '\' * so we do exact matching on the character that follows. */ if (p == '\\') { if (*(++pattern) == '\0') { return 0; } } /* * There's no special character. Just make sure that the next * bytes of each string match. */ if (nocase) { if (Tcl_UniCharToLower(*string) != Tcl_UniCharToLower(*pattern)) { return 0; } } else if (*string != *pattern) { return 0; } string++; pattern++; }}/* *---------------------------------------------------------------------- * * TclUniCharMatch -- * * See if a particular Unicode string matches a particular pattern. * Allows case insensitivity. This is the Unicode equivalent of the * char* Tcl_StringCaseMatch. This variant of Tcl_UniCharCaseMatch * uses counted Strings, so embedded NULLs are allowed. * * Results: * The return value is 1 if string matches pattern, and * 0 otherwise. The matching operation permits the following * special characters in the pattern: *?\[] (see the manual * entry for details on what these mean). * * Side effects: * None. * *---------------------------------------------------------------------- */intTclUniCharMatch(string, strLen, pattern, ptnLen, nocase) CONST Tcl_UniChar *string; /* Unicode String. */ int strLen; /* length of String */ CONST Tcl_UniChar *pattern; /* Pattern, which may contain special * characters. */ int ptnLen; /* length of Pattern */ int nocase; /* 0 for case sensitive, 1 for insensitive */{ CONST Tcl_UniChar *stringEnd, *patternEnd; Tcl_UniChar p; stringEnd = string + strLen; patternEnd = pattern + ptnLen; while (1) { /* * See if we're at the end of both the pattern and the string. If * so, we succeeded. If we're at the end of the pattern but not at * the end of the string, we failed. */ if (pattern == patternEnd) { return (string == stringEnd); } p = *pattern; if ((string == stringEnd) && (p != '*')) { return 0; } /* * Check for a "*" as the next pattern character. It matches any * substring. We handle this by skipping all the characters up to the * next matching one in the pattern, and then calling ourselves * recursively for each postfix of string, until either we match or we * reach the end of the string. */ if (p == '*') { /* * Skip all successive *'s in the pattern */ while (*(++pattern) == '*') {} if (pattern == patternEnd) { return 1; } p = *pattern; if (nocase) { p = Tcl_UniCharToLower(p); } while (1) { /* * Optimization for matching - cruise through the string * quickly if the next char in the pattern isn't a special * character */ if ((p != '[') && (p != '?') && (p != '\\')) { if (nocase) { while ((string < stringEnd) && (p != *string) && (p != Tcl_UniCharToLower(*string))) { string++; } } else { while ((string < stringEnd) && (p != *string)) { string++; } } } if (TclUniCharMatch(string, stringEnd - string, pattern, patternEnd - pattern, nocase)) { return 1; } if (string == stringEnd) { return 0; } string++; } } /* * Check for a "?" as the next pattern character. It matches * any single character. */ if (p == '?') { pattern++; string++; continue; } /* * Check for a "[" as the next pattern character. It is followed * by a list of characters that are acceptable, or by a range * (two characters separated by "-"). */ if (p == '[') { Tcl_UniChar ch1, startChar, endChar; pattern++; ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string); string++; while (1) { if ((*pattern == ']') || (pattern == patternEnd)) { return 0; } startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern); pattern++; if (*pattern == '-') { pattern++; if (pattern == patternEnd) { return 0; } endChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern); pattern++; if (((startChar <= ch1) && (ch1 <= endChar)) || ((endChar <= ch1) && (ch1 <= startChar))) { /* * Matches ranges of form [a-z] or [z-a]. */ break; } } else if (startChar == ch1) { break; } } while (*pattern != ']') { if (pattern == patternEnd) { pattern--; break; } pattern++; } pattern++; continue; } /* * If the next pattern character is '\', just strip off the '\' * so we do exact matching on the character that follows. */ if (p == '\\') { if (++pattern == patternEnd) { return 0; } } /* * There's no special character. Just make sure that the next * bytes of each string match. */ if (nocase) { if (Tcl_UniCharToLower(*string) != Tcl_UniCharToLower(*pattern)) { return 0; } } else if (*string != *pattern) { return 0; } string++; pattern++; }}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?