tclutf.c

来自「tcl是工具命令语言」· C语言 代码 · 共 1,949 行 · 第 1/4 页

C
1,949
字号
 * *	Compare at most n bytes of utf-8 strings cs and ct.  Both cs *	and ct are assumed to be at least n bytes long. * * Results: *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. * * Side effects: *	None. * *---------------------------------------------------------------------- */intTclpUtfNcmp2(cs, ct, n)    CONST char *cs;		/* UTF string to compare to ct. */    CONST char *ct;		/* UTF string cs is compared to. */    unsigned long n;		/* Number of *bytes* to compare. */{    /*     * We can't simply call 'memcmp(cs, ct, n);' because we need to check     * for Tcl's \xC0\x80 non-utf-8 null encoding.     * Otherwise utf-8 lexes fine in the strcmp manner.     */    register int result = 0;    for ( ; n != 0; n--, cs++, ct++) {	if (*cs != *ct) {	    result = UCHAR(*cs) - UCHAR(*ct);	    break;	}    }    if (n && ((UCHAR(*cs) == 0xC0) || (UCHAR(*ct) == 0xC0))) {	unsigned char c1, c2;	c1 = ((UCHAR(*cs) == 0xC0) && (UCHAR(cs[1]) == 0x80)) ? 0 : UCHAR(*cs);	c2 = ((UCHAR(*ct) == 0xC0) && (UCHAR(ct[1]) == 0x80)) ? 0 : UCHAR(*ct);	result = (c1 - c2);    }    return result;}/* *---------------------------------------------------------------------- * * Tcl_UtfNcmp -- * *	Compare at most n UTF chars of string cs to string ct.  Both cs *	and ct are assumed to be at least n UTF chars long. * * Results: *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_UtfNcmp(cs, ct, n)    CONST char *cs;		/* UTF string to compare to ct. */    CONST char *ct;		/* UTF string cs is compared to. */    unsigned long n;		/* Number of UTF chars to compare. */{    Tcl_UniChar ch1, ch2;    /*     * Cannot use 'memcmp(cs, ct, n);' as byte representation of     * \u0000 (the pair of bytes 0xc0,0x80) is larger than byte     * representation of \u0001 (the byte 0x01.)     */    while (n-- > 0) {	/*	 * n must be interpreted as chars, not bytes.	 * This should be called only when both strings are of	 * at least n chars long (no need for \0 check)	 */	cs += TclUtfToUniChar(cs, &ch1);	ct += TclUtfToUniChar(ct, &ch2);	if (ch1 != ch2) {	    return (ch1 - ch2);	}    }    return 0;}/* *---------------------------------------------------------------------- * * Tcl_UtfNcasecmp -- * *	Compare at most n UTF chars of string cs to string ct case *	insensitive.  Both cs and ct are assumed to be at least n *	UTF chars long. * * Results: *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_UtfNcasecmp(cs, ct, n)    CONST char *cs;		/* UTF string to compare to ct. */    CONST char *ct;		/* UTF string cs is compared to. */    unsigned long n;			/* Number of UTF chars to compare. */{    Tcl_UniChar ch1, ch2;    while (n-- > 0) {	/*	 * n must be interpreted as chars, not bytes.	 * This should be called only when both strings are of	 * at least n chars long (no need for \0 check)	 */	cs += TclUtfToUniChar(cs, &ch1);	ct += TclUtfToUniChar(ct, &ch2);	if (ch1 != ch2) {	    ch1 = Tcl_UniCharToLower(ch1);	    ch2 = Tcl_UniCharToLower(ch2);	    if (ch1 != ch2) {		return (ch1 - ch2);	    }	}    }    return 0;}/* *---------------------------------------------------------------------- * * Tcl_UniCharToUpper -- * *	Compute the uppercase equivalent of the given Unicode character. * * Results: *	Returns the uppercase Unicode character. * * Side effects: *	None. * *---------------------------------------------------------------------- */Tcl_UniCharTcl_UniCharToUpper(ch)    int ch;			/* Unicode character to convert. */{    int info = GetUniCharInfo(ch);    if (GetCaseType(info) & 0x04) {	return (Tcl_UniChar) (ch - GetDelta(info));    } else {	return ch;    }}/* *---------------------------------------------------------------------- * * Tcl_UniCharToLower -- * *	Compute the lowercase equivalent of the given Unicode character. * * Results: *	Returns the lowercase Unicode character. * * Side effects: *	None. * *---------------------------------------------------------------------- */Tcl_UniCharTcl_UniCharToLower(ch)    int ch;			/* Unicode character to convert. */{    int info = GetUniCharInfo(ch);    if (GetCaseType(info) & 0x02) {	return (Tcl_UniChar) (ch + GetDelta(info));    } else {	return ch;    }}/* *---------------------------------------------------------------------- * * Tcl_UniCharToTitle -- * *	Compute the titlecase equivalent of the given Unicode character. * * Results: *	Returns the titlecase Unicode character. * * Side effects: *	None. * *---------------------------------------------------------------------- */Tcl_UniCharTcl_UniCharToTitle(ch)    int ch;			/* Unicode character to convert. */{    int info = GetUniCharInfo(ch);    int mode = GetCaseType(info);    if (mode & 0x1) {	/*	 * Subtract or add one depending on the original case.	 */	return (Tcl_UniChar) (ch + ((mode & 0x4) ? -1 : 1));    } else if (mode == 0x4) {	return (Tcl_UniChar) (ch - GetDelta(info));    } else {	return ch;    }}/* *---------------------------------------------------------------------- * * Tcl_UniCharLen -- * *	Find the length of a UniChar string.  The str input must be null *	terminated. * * Results: *	Returns the length of str in UniChars (not bytes). * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_UniCharLen(str)    CONST Tcl_UniChar *str;	/* Unicode string to find length of. */{    int len = 0;        while (*str != '\0') {	len++;	str++;    }    return len;}/* *---------------------------------------------------------------------- * * Tcl_UniCharNcmp -- * *	Compare at most n unichars of string cs to string ct.  Both cs *	and ct are assumed to be at least n unichars long. * * Results: *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_UniCharNcmp(cs, ct, n)    CONST Tcl_UniChar *cs;		/* Unicode string to compare to ct. */    CONST Tcl_UniChar *ct;		/* Unicode string cs is compared to. */    unsigned long n;			/* Number of unichars to compare. */{#ifdef WORDS_BIGENDIAN    /*     * We are definitely on a big-endian machine; memcmp() is safe     */    return memcmp(cs, ct, n*sizeof(Tcl_UniChar));#else /* !WORDS_BIGENDIAN */    /*     * We can't simply call memcmp() because that is not lexically correct.     */    for ( ; n != 0; cs++, ct++, n--) {	if (*cs != *ct) {	    return (*cs - *ct);	}    }    return 0;#endif /* WORDS_BIGENDIAN */}/* *---------------------------------------------------------------------- * * Tcl_UniCharNcasecmp -- * *	Compare at most n unichars of string cs to string ct case *	insensitive.  Both cs and ct are assumed to be at least n *	unichars long. * * Results: *	Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct. * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_UniCharNcasecmp(cs, ct, n)    CONST Tcl_UniChar *cs;		/* Unicode string to compare to ct. */    CONST Tcl_UniChar *ct;		/* Unicode string cs is compared to. */    unsigned long n;			/* Number of unichars to compare. */{    for ( ; n != 0; n--, cs++, ct++) {	if ((*cs != *ct) &&		(Tcl_UniCharToLower(*cs) != Tcl_UniCharToLower(*ct))) {	    return (*cs - *ct);	}    }    return 0;}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsAlnum -- * *	Test if a character is an alphanumeric Unicode character. * * Results: *	Returns 1 if character is alphanumeric. * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_UniCharIsAlnum(ch)    int ch;			/* Unicode character to test. */{    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);    return (((ALPHA_BITS | DIGIT_BITS) >> category) & 1);}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsAlpha -- * *	Test if a character is an alphabetic Unicode character. * * Results: *	Returns 1 if character is alphabetic. * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_UniCharIsAlpha(ch)    int ch;			/* Unicode character to test. */{    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);    return ((ALPHA_BITS >> category) & 1);}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsControl -- * *	Test if a character is a Unicode control character. * * Results: *	Returns non-zero if character is a control. * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_UniCharIsControl(ch)    int ch;			/* Unicode character to test. */{    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == CONTROL);}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsDigit -- * *	Test if a character is a numeric Unicode character. * * Results: *	Returns non-zero if character is a digit. * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_UniCharIsDigit(ch)    int ch;			/* Unicode character to test. */{    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK)	    == DECIMAL_DIGIT_NUMBER);}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsGraph -- * *	Test if a character is any Unicode print character except space. * * Results: *	Returns non-zero if character is printable, but not space. * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_UniCharIsGraph(ch)    int ch;			/* Unicode character to test. */{    register int category = (GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK);    return (((PRINT_BITS >> category) & 1) && ((unsigned char) ch != ' '));}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsLower -- * *	Test if a character is a lowercase Unicode character. * * Results: *	Returns non-zero if character is lowercase. * * Side effects: *	None. * *---------------------------------------------------------------------- */intTcl_UniCharIsLower(ch)    int ch;			/* Unicode character to test. */{    return ((GetUniCharInfo(ch) & UNICODE_CATEGORY_MASK) == LOWERCASE_LETTER);}/* *---------------------------------------------------------------------- * * Tcl_UniCharIsPrint -- * *	Test if a character is a Unicode print character. * * Results: *	Returns non-zero if character is printable. * * Side effects: *	None. * *---------------------------------------------------------------------- */int

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?