tclutf.c

来自「tcl是工具命令语言」· C语言 代码 · 共 1,949 行 · 第 1/4 页

C
1,949
字号
				 * for strlen(string). */{    Tcl_UniChar ch;    register Tcl_UniChar *chPtr = &ch;    register int i;    /*     * The separate implementations are faster.     *     * Since this is a time-sensitive function, we also do the check for     * the single-byte char case specially.     */    i = 0;    if (len < 0) {	while (1) {	    str += TclUtfToUniChar(str, chPtr);	    if (ch == '\0') {		break;	    }	    i++;	}    } else {	register int n;	while (len > 0) {	    if (UCHAR(*str) < 0xC0) {		len--;		str++;	    } else {		n = Tcl_UtfToUniChar(str, chPtr);		len -= n;		str += n;	    }	    i++;	}    }    return i;}/* *--------------------------------------------------------------------------- * * Tcl_UtfFindFirst -- * *	Returns a pointer to the first occurance of the given Tcl_UniChar *	in the NULL-terminated UTF-8 string.  The NULL terminator is *	considered part of the UTF-8 string.  Equivalent to Plan 9 *	utfrune(). * * Results: *	As above.  If the Tcl_UniChar does not exist in the given string, *	the return value is NULL. * * Side effects: *	None. * *--------------------------------------------------------------------------- */CONST char *Tcl_UtfFindFirst(string, ch)    CONST char *string;		/* The UTF-8 string to be searched. */    int ch;			/* The Tcl_UniChar to search for. */{    int len;    Tcl_UniChar find;        while (1) {	len = TclUtfToUniChar(string, &find);	if (find == ch) {	    return string;	}	if (*string == '\0') {	    return NULL;	}	string += len;    }}/* *--------------------------------------------------------------------------- * * Tcl_UtfFindLast -- * *	Returns a pointer to the last occurance of the given Tcl_UniChar *	in the NULL-terminated UTF-8 string.  The NULL terminator is *	considered part of the UTF-8 string.  Equivalent to Plan 9 *	utfrrune(). * * Results: *	As above.  If the Tcl_UniChar does not exist in the given string, *	the return value is NULL. * * Side effects: *	None. * *--------------------------------------------------------------------------- */CONST char *Tcl_UtfFindLast(string, ch)    CONST char *string;		/* The UTF-8 string to be searched. */    int ch;			/* The Tcl_UniChar to search for. */{    int len;    Tcl_UniChar find;    CONST char *last;	    last = NULL;    while (1) {	len = TclUtfToUniChar(string, &find);	if (find == ch) {	    last = string;	}	if (*string == '\0') {	    break;	}	string += len;    }    return last;}/* *--------------------------------------------------------------------------- * * Tcl_UtfNext -- * *	Given a pointer to some current location in a UTF-8 string, *	move forward one character.  The caller must ensure that they *	are not asking for the next character after the last character *	in the string. * * Results: *	The return value is the pointer to the next character in *	the UTF-8 string. * * Side effects: *	None. * *--------------------------------------------------------------------------- */ CONST char *Tcl_UtfNext(str)     CONST char *str;		    /* The current location in the string. */{    Tcl_UniChar ch;    return str + TclUtfToUniChar(str, &ch);}/* *--------------------------------------------------------------------------- * * Tcl_UtfPrev -- * *	Given a pointer to some current location in a UTF-8 string, *	move backwards one character.  This works correctly when the *	pointer is in the middle of a UTF-8 character. * * Results: *	The return value is a pointer to the previous character in the *	UTF-8 string.  If the current location was already at the *	beginning of the string, the return value will also be a *	pointer to the beginning of the string. * * Side effects: *	None. * *--------------------------------------------------------------------------- */CONST char *Tcl_UtfPrev(str, start)    CONST char *str;		    /* The current location in the string. */    CONST char *start;		    /* Pointer to the beginning of the				     * string, to avoid going backwards too				     * far. */{    CONST char *look;    int i, byte;        str--;    look = str;    for (i = 0; i < TCL_UTF_MAX; i++) {	if (look < start) {	    if (str < start) {		str = start;	    }	    break;	}	byte = *((unsigned char *) look);	if (byte < 0x80) {	    break;	}	if (byte >= 0xC0) {	    return look;	}	look--;    }    return str;}	/* *--------------------------------------------------------------------------- * * Tcl_UniCharAtIndex -- * *	Returns the Unicode character represented at the specified *	character (not byte) position in the UTF-8 string. * * Results: *	As above. * * Side effects: *	None. * *--------------------------------------------------------------------------- */ Tcl_UniCharTcl_UniCharAtIndex(src, index)    register CONST char *src;	/* The UTF-8 string to dereference. */    register int index;		/* The position of the desired character. */{    Tcl_UniChar ch;    while (index >= 0) {	index--;	src += TclUtfToUniChar(src, &ch);    }    return ch;}/* *--------------------------------------------------------------------------- * * Tcl_UtfAtIndex -- * *	Returns a pointer to the specified character (not byte) position *	in the UTF-8 string. * * Results: *	As above. * * Side effects: *	None. * *--------------------------------------------------------------------------- */CONST char *Tcl_UtfAtIndex(src, index)    register CONST char *src;	/* The UTF-8 string. */    register int index;		/* The position of the desired character. */{    Tcl_UniChar ch;        while (index > 0) {	index--;	src += TclUtfToUniChar(src, &ch);    }    return src;}/* *--------------------------------------------------------------------------- * * Tcl_UtfBackslash -- * *	Figure out how to handle a backslash sequence. * * Results: *	Stores the bytes represented by the backslash sequence in dst and *	returns the number of bytes written to dst.  At most TCL_UTF_MAX *	bytes are written to dst; dst must have been large enough to accept *	those bytes.  If readPtr isn't NULL then it is filled in with a *	count of the number of bytes in the backslash sequence.   * * Side effects: *	The maximum number of bytes it takes to represent a Unicode *	character in UTF-8 is guaranteed to be less than the number of *	bytes used to express the backslash sequence that represents *	that Unicode character.  If the target buffer into which the *	caller is going to store the bytes that represent the Unicode *	character is at least as large as the source buffer from which *	the backslashed sequence was extracted, no buffer overruns should *	occur. * *--------------------------------------------------------------------------- */intTcl_UtfBackslash(src, readPtr, dst)    CONST char *src;		/* Points to the backslash character of				 * a backslash sequence. */    int *readPtr;		/* Fill in with number of characters read				 * from src, unless NULL. */    char *dst;			/* Filled with the bytes represented by the				 * backslash sequence. */{#define LINE_LENGTH 128    int numRead;    int result;    result = TclParseBackslash(src, LINE_LENGTH, &numRead, dst);    if (numRead == LINE_LENGTH) {	/* We ate a whole line.  Pay the price of a strlen() */	result = TclParseBackslash(src, (int)strlen(src), &numRead, dst);    }    if (readPtr != NULL) {	*readPtr = numRead;    }    return result;}/* *---------------------------------------------------------------------- * * Tcl_UtfToUpper -- * *	Convert lowercase characters to uppercase characters in a UTF *	string in place.  The conversion may shrink the UTF string. * * Results: *	Returns the number of bytes in the resulting string *	excluding the trailing null. * * Side effects: *	Writes a terminating null after the last converted character. * *---------------------------------------------------------------------- */intTcl_UtfToUpper(str)    char *str;			/* String to convert in place. */{    Tcl_UniChar ch, upChar;    char *src, *dst;    int bytes;    /*     * Iterate over the string until we hit the terminating null.     */    src = dst = str;    while (*src) {        bytes = TclUtfToUniChar(src, &ch);	upChar = Tcl_UniCharToUpper(ch);	/*	 * To keep badly formed Utf strings from getting inflated by	 * the conversion (thereby causing a segfault), only copy the	 * upper case char to dst if its size is <= the original char.	 */		if (bytes < UtfCount(upChar)) {	    memcpy(dst, src, (size_t) bytes);	    dst += bytes;	} else {	    dst += Tcl_UniCharToUtf(upChar, dst);	}	src += bytes;    }    *dst = '\0';    return (dst - str);}/* *---------------------------------------------------------------------- * * Tcl_UtfToLower -- * *	Convert uppercase characters to lowercase characters in a UTF *	string in place.  The conversion may shrink the UTF string. * * Results: *	Returns the number of bytes in the resulting string *	excluding the trailing null. * * Side effects: *	Writes a terminating null after the last converted character. * *---------------------------------------------------------------------- */intTcl_UtfToLower(str)    char *str;			/* String to convert in place. */{    Tcl_UniChar ch, lowChar;    char *src, *dst;    int bytes;        /*     * Iterate over the string until we hit the terminating null.     */    src = dst = str;    while (*src) {	bytes = TclUtfToUniChar(src, &ch);	lowChar = Tcl_UniCharToLower(ch);	/*	 * To keep badly formed Utf strings from getting inflated by	 * the conversion (thereby causing a segfault), only copy the	 * lower case char to dst if its size is <= the original char.	 */		if (bytes < UtfCount(lowChar)) {	    memcpy(dst, src, (size_t) bytes);	    dst += bytes;	} else {	    dst += Tcl_UniCharToUtf(lowChar, dst);	}	src += bytes;    }    *dst = '\0';    return (dst - str);}/* *---------------------------------------------------------------------- * * Tcl_UtfToTitle -- * *	Changes the first character of a UTF string to title case or *	uppercase and the rest of the string to lowercase.  The *	conversion happens in place and may shrink the UTF string. * * Results: *	Returns the number of bytes in the resulting string *	excluding the trailing null. * * Side effects: *	Writes a terminating null after the last converted character. * *---------------------------------------------------------------------- */intTcl_UtfToTitle(str)    char *str;			/* String to convert in place. */{    Tcl_UniChar ch, titleChar, lowChar;    char *src, *dst;    int bytes;        /*     * Capitalize the first character and then lowercase the rest of the     * characters until we get to a null.     */    src = dst = str;    if (*src) {	bytes = TclUtfToUniChar(src, &ch);	titleChar = Tcl_UniCharToTitle(ch);	if (bytes < UtfCount(titleChar)) {	    memcpy(dst, src, (size_t) bytes);	    dst += bytes;	} else {	    dst += Tcl_UniCharToUtf(titleChar, dst);	}	src += bytes;    }    while (*src) {	bytes = TclUtfToUniChar(src, &ch);	lowChar = Tcl_UniCharToLower(ch);	if (bytes < UtfCount(lowChar)) {	    memcpy(dst, src, (size_t) bytes);	    dst += bytes;	} else {	    dst += Tcl_UniCharToUtf(lowChar, dst);	}	src += bytes;    }    *dst = '\0';    return (dst - str);}/* *---------------------------------------------------------------------- * * TclpUtfNcmp2 --

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?