tclutf.c

来自「tcl是工具命令语言」· C语言 代码 · 共 1,949 行 · 第 1/4 页

C
1,949
字号
Tcl_UniCharIsPrint(ch)    int ch;			/* Unicode character to test. */{    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);    return ((PRINT_BITS >> category) & 1);}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsPunct -- * *	Test if a character is a Unicode punctuation character. * * Results: *	Returns non-zero if character is punct. * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_UniCharIsPunct(ch)    int ch;			/* Unicode character to test. */{    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);    return ((PUNCT_BITS >> category) & 1);}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsSpace -- * *	Test if a character is a whitespace Unicode character. * * Results: *	Returns non-zero if character is a space. * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_UniCharIsSpace(ch)    int ch;			/* Unicode character to test. */{    register int category;    /*     * If the character is within the first 127 characters, just use the     * standard C function, otherwise consult the Unicode table.     */    if (ch < 0x80) {	return isspace(UCHAR(ch)); /* INTL: ISO space */    } else {	category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);	return ((SPACE_BITS >> category) & 1);    }}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsUpper -- * *	Test if a character is a uppercase Unicode character. * * Results: *	Returns non-zero if character is uppercase. * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_UniCharIsUpper(ch)    int ch;			/* Unicode character to test. */{    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == UPPERCASE_LETTER);}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsWordChar -- * *	Test if a character is alphanumeric or a connector punctuation *	mark. * * Results: *	Returns 1 if character is a word character. * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_UniCharIsWordChar(ch)    int ch;			/* Unicode character to test. */{    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);    return (((ALPHA_BITS | DIGIT_BITS | CONNECTOR_BITS) >> category) & 1);}/* *---------------------------------------------------------------------- * * Tcl_UniCharCaseMatch -- * *	See if a particular Unicode string matches a particular pattern. *	Allows case insensitivity.  This is the Unicode equivalent of *	the char* Tcl_StringCaseMatch.  The UniChar strings must be *	NULL-terminated.  This has no provision for counted UniChar *	strings, thus should not be used where NULLs are expected in the *	UniChar string.  Use TclUniCharMatch where possible. * * Results: *	The return value is 1 if string matches pattern, and *	0 otherwise.  The matching operation permits the following *	special characters in the pattern: *?\[] (see the manual *	entry for details on what these mean). * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_UniCharCaseMatch(string, pattern, nocase)    CONST Tcl_UniChar *string;	/* Unicode String. */    CONST Tcl_UniChar *pattern;	/* Pattern, which may contain special				 * characters. */    int nocase;			/* 0 for case sensitive, 1 for insensitive */{    Tcl_UniChar ch1, p;        while (1) {	p = *pattern;		/*	 * See if we're at the end of both the pattern and the string.  If	 * so, we succeeded.  If we're at the end of the pattern but not at	 * the end of the string, we failed.	 */		if (p == 0) {	    return (*string == 0);	}	if ((*string == 0) && (p != '*')) {	    return 0;	}	/*	 * Check for a "*" as the next pattern character.  It matches any	 * substring.  We handle this by skipping all the characters up to the	 * next matching one in the pattern, and then calling ourselves	 * recursively for each postfix of string, until either we match or we	 * reach the end of the string.	 */		if (p == '*') {	    /*	     * Skip all successive *'s in the pattern	     */	    while (*(++pattern) == '*') {}	    p = *pattern;	    if (p == 0) {		return 1;	    }	    if (nocase) {		p = Tcl_UniCharToLower(p);	    }	    while (1) {		/*		 * Optimization for matching - cruise through the string		 * quickly if the next char in the pattern isn't a special		 * character		 */		if ((p != '[') && (p != '?') && (p != '\\')) {		    if (nocase) {			while (*string && (p != *string)				&& (p != Tcl_UniCharToLower(*string))) {			    string++;			}		    } else {			while (*string && (p != *string)) { string++; }		    }		}		if (Tcl_UniCharCaseMatch(string, pattern, nocase)) {		    return 1;		}		if (*string == 0) {		    return 0;		}		string++;	    }	}	/*	 * Check for a "?" as the next pattern character.  It matches	 * any single character.	 */	if (p == '?') {	    pattern++;	    string++;	    continue;	}	/*	 * Check for a "[" as the next pattern character.  It is followed	 * by a list of characters that are acceptable, or by a range	 * (two characters separated by "-").	 */		if (p == '[') {	    Tcl_UniChar startChar, endChar;	    pattern++;	    ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string);	    string++;	    while (1) {		if ((*pattern == ']') || (*pattern == 0)) {		    return 0;		}		startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern);		pattern++;		if (*pattern == '-') {		    pattern++;		    if (*pattern == 0) {			return 0;		    }		    endChar = (nocase ? Tcl_UniCharToLower(*pattern)			    : *pattern);		    pattern++;		    if (((startChar <= ch1) && (ch1 <= endChar))			    || ((endChar <= ch1) && (ch1 <= startChar))) {			/*			 * Matches ranges of form [a-z] or [z-a].			 */			break;		    }		} else if (startChar == ch1) {		    break;		}	    }	    while (*pattern != ']') {		if (*pattern == 0) {		    pattern--;		    break;		}		pattern++;	    }	    pattern++;	    continue;	}	/*	 * If the next pattern character is '\', just strip off the '\'	 * so we do exact matching on the character that follows.	 */	if (p == '\\') {	    if (*(++pattern) == '\0') {		return 0;	    }	}	/*	 * There's no special character.  Just make sure that the next	 * bytes of each string match.	 */	if (nocase) {	    if (Tcl_UniCharToLower(*string) != Tcl_UniCharToLower(*pattern)) {		return 0;	    }	} else if (*string != *pattern) {	    return 0;	}	string++;	pattern++;    }}/* *---------------------------------------------------------------------- * * TclUniCharMatch -- * *	See if a particular Unicode string matches a particular pattern. *	Allows case insensitivity.  This is the Unicode equivalent of the *	char* Tcl_StringCaseMatch.  This variant of Tcl_UniCharCaseMatch *	uses counted Strings, so embedded NULLs are allowed. * * Results: *	The return value is 1 if string matches pattern, and *	0 otherwise.  The matching operation permits the following *	special characters in the pattern: *?\[] (see the manual *	entry for details on what these mean). * * Side effects: *	None. * *---------------------------------------------------------------------- */intTclUniCharMatch(string, strLen, pattern, ptnLen, nocase)    CONST Tcl_UniChar *string;	/* Unicode String. */    int strLen;			/* length of String */    CONST Tcl_UniChar *pattern;	/* Pattern, which may contain special				 * characters. */    int ptnLen;			/* length of Pattern */    int nocase;			/* 0 for case sensitive, 1 for insensitive */{    CONST Tcl_UniChar *stringEnd, *patternEnd;    Tcl_UniChar p;    stringEnd  = string + strLen;    patternEnd = pattern + ptnLen;    while (1) {	/*	 * See if we're at the end of both the pattern and the string.  If	 * so, we succeeded.  If we're at the end of the pattern but not at	 * the end of the string, we failed.	 */	if (pattern == patternEnd) {	    return (string == stringEnd);	}	p = *pattern;	if ((string == stringEnd) && (p != '*')) {	    return 0;	}	/*	 * Check for a "*" as the next pattern character.  It matches any	 * substring.  We handle this by skipping all the characters up to the	 * next matching one in the pattern, and then calling ourselves	 * recursively for each postfix of string, until either we match or we	 * reach the end of the string.	 */		if (p == '*') {	    /*	     * Skip all successive *'s in the pattern	     */	    while (*(++pattern) == '*') {}	    if (pattern == patternEnd) {		return 1;	    }	    p = *pattern;	    if (nocase) {		p = Tcl_UniCharToLower(p);	    }	    while (1) {		/*		 * Optimization for matching - cruise through the string		 * quickly if the next char in the pattern isn't a special		 * character		 */		if ((p != '[') && (p != '?') && (p != '\\')) {		    if (nocase) {			while ((string < stringEnd) && (p != *string)				&& (p != Tcl_UniCharToLower(*string))) {			    string++;			}		    } else {			while ((string < stringEnd) && (p != *string)) {			    string++;			}		    }		}		if (TclUniCharMatch(string, stringEnd - string,			pattern, patternEnd - pattern, nocase)) {		    return 1;		}		if (string == stringEnd) {		    return 0;		}		string++;	    }	}	/*	 * Check for a "?" as the next pattern character.  It matches	 * any single character.	 */	if (p == '?') {	    pattern++;	    string++;	    continue;	}	/*	 * Check for a "[" as the next pattern character.  It is followed	 * by a list of characters that are acceptable, or by a range	 * (two characters separated by "-").	 */		if (p == '[') {	    Tcl_UniChar ch1, startChar, endChar;	    pattern++;	    ch1 = (nocase ? Tcl_UniCharToLower(*string) : *string);	    string++;	    while (1) {		if ((*pattern == ']') || (pattern == patternEnd)) {		    return 0;		}		startChar = (nocase ? Tcl_UniCharToLower(*pattern) : *pattern);		pattern++;		if (*pattern == '-') {		    pattern++;		    if (pattern == patternEnd) {			return 0;		    }		    endChar = (nocase ? Tcl_UniCharToLower(*pattern)			    : *pattern);		    pattern++;		    if (((startChar <= ch1) && (ch1 <= endChar))			    || ((endChar <= ch1) && (ch1 <= startChar))) {			/*			 * Matches ranges of form [a-z] or [z-a].			 */			break;		    }		} else if (startChar == ch1) {		    break;		}	    }	    while (*pattern != ']') {		if (pattern == patternEnd) {		    pattern--;		    break;		}		pattern++;	    }	    pattern++;	    continue;	}	/*	 * If the next pattern character is '\', just strip off the '\'	 * so we do exact matching on the character that follows.	 */	if (p == '\\') {	    if (++pattern == patternEnd) {		return 0;	    }	}	/*	 * There's no special character.  Just make sure that the next	 * bytes of each string match.	 */	if (nocase) {	    if (Tcl_UniCharToLower(*string) != Tcl_UniCharToLower(*pattern)) {		return 0;	    }	} else if (*string != *pattern) {	    return 0;	}	string++;	pattern++;    }}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?