tclutf.c
来自「tcl是工具命令语言」· C语言 代码 · 共 1,949 行 · 第 1/4 页
C
1,949 行
* for strlen(string). */{ Tcl_UniChar ch; register Tcl_UniChar *chPtr = &ch; register int i; /* * The separate implementations are faster. * * Since this is a time-sensitive function, we also do the check for * the single-byte char case specially. */ i = 0; if (len < 0) { while (1) { str += TclUtfToUniChar(str, chPtr); if (ch == '\0') { break; } i++; } } else { register int n; while (len > 0) { if (UCHAR(*str) < 0xC0) { len--; str++; } else { n = Tcl_UtfToUniChar(str, chPtr); len -= n; str += n; } i++; } } return i;}/* *--------------------------------------------------------------------------- * * Tcl_UtfFindFirst -- * * Returns a pointer to the first occurance of the given Tcl_UniChar * in the NULL-terminated UTF-8 string. The NULL terminator is * considered part of the UTF-8 string. Equivalent to Plan 9 * utfrune(). * * Results: * As above. If the Tcl_UniChar does not exist in the given string, * the return value is NULL. * * Side effects: * None. * *--------------------------------------------------------------------------- */CONST char *Tcl_UtfFindFirst(string, ch) CONST char *string; /* The UTF-8 string to be searched. */ int ch; /* The Tcl_UniChar to search for. */{ int len; Tcl_UniChar find; while (1) { len = TclUtfToUniChar(string, &find); if (find == ch) { return string; } if (*string == '\0') { return NULL; } string += len; }}/* *--------------------------------------------------------------------------- * * Tcl_UtfFindLast -- * * Returns a pointer to the last occurance of the given Tcl_UniChar * in the NULL-terminated UTF-8 string. The NULL terminator is * considered part of the UTF-8 string. Equivalent to Plan 9 * utfrrune(). * * Results: * As above. If the Tcl_UniChar does not exist in the given string, * the return value is NULL. * * Side effects: * None. * *--------------------------------------------------------------------------- */CONST char *Tcl_UtfFindLast(string, ch) CONST char *string; /* The UTF-8 string to be searched. */ int ch; /* The Tcl_UniChar to search for. */{ int len; Tcl_UniChar find; CONST char *last; last = NULL; while (1) { len = TclUtfToUniChar(string, &find); if (find == ch) { last = string; } if (*string == '\0') { break; } string += len; } return last;}/* *--------------------------------------------------------------------------- * * Tcl_UtfNext -- * * Given a pointer to some current location in a UTF-8 string, * move forward one character. The caller must ensure that they * are not asking for the next character after the last character * in the string. * * Results: * The return value is the pointer to the next character in * the UTF-8 string. * * Side effects: * None. * *--------------------------------------------------------------------------- */ CONST char *Tcl_UtfNext(str) CONST char *str; /* The current location in the string. */{ Tcl_UniChar ch; return str + TclUtfToUniChar(str, &ch);}/* *--------------------------------------------------------------------------- * * Tcl_UtfPrev -- * * Given a pointer to some current location in a UTF-8 string, * move backwards one character. This works correctly when the * pointer is in the middle of a UTF-8 character. * * Results: * The return value is a pointer to the previous character in the * UTF-8 string. If the current location was already at the * beginning of the string, the return value will also be a * pointer to the beginning of the string. * * Side effects: * None. * *--------------------------------------------------------------------------- */CONST char *Tcl_UtfPrev(str, start) CONST char *str; /* The current location in the string. */ CONST char *start; /* Pointer to the beginning of the * string, to avoid going backwards too * far. */{ CONST char *look; int i, byte; str--; look = str; for (i = 0; i < TCL_UTF_MAX; i++) { if (look < start) { if (str < start) { str = start; } break; } byte = *((unsigned char *) look); if (byte < 0x80) { break; } if (byte >= 0xC0) { return look; } look--; } return str;} /* *--------------------------------------------------------------------------- * * Tcl_UniCharAtIndex -- * * Returns the Unicode character represented at the specified * character (not byte) position in the UTF-8 string. * * Results: * As above. * * Side effects: * None. * *--------------------------------------------------------------------------- */ Tcl_UniCharTcl_UniCharAtIndex(src, index) register CONST char *src; /* The UTF-8 string to dereference. */ register int index; /* The position of the desired character. */{ Tcl_UniChar ch; while (index >= 0) { index--; src += TclUtfToUniChar(src, &ch); } return ch;}/* *--------------------------------------------------------------------------- * * Tcl_UtfAtIndex -- * * Returns a pointer to the specified character (not byte) position * in the UTF-8 string. * * Results: * As above. * * Side effects: * None. * *--------------------------------------------------------------------------- */CONST char *Tcl_UtfAtIndex(src, index) register CONST char *src; /* The UTF-8 string. */ register int index; /* The position of the desired character. */{ Tcl_UniChar ch; while (index > 0) { index--; src += TclUtfToUniChar(src, &ch); } return src;}/* *--------------------------------------------------------------------------- * * Tcl_UtfBackslash -- * * Figure out how to handle a backslash sequence. * * Results: * Stores the bytes represented by the backslash sequence in dst and * returns the number of bytes written to dst. At most TCL_UTF_MAX * bytes are written to dst; dst must have been large enough to accept * those bytes. If readPtr isn't NULL then it is filled in with a * count of the number of bytes in the backslash sequence. * * Side effects: * The maximum number of bytes it takes to represent a Unicode * character in UTF-8 is guaranteed to be less than the number of * bytes used to express the backslash sequence that represents * that Unicode character. If the target buffer into which the * caller is going to store the bytes that represent the Unicode * character is at least as large as the source buffer from which * the backslashed sequence was extracted, no buffer overruns should * occur. * *--------------------------------------------------------------------------- */intTcl_UtfBackslash(src, readPtr, dst) CONST char *src; /* Points to the backslash character of * a backslash sequence. */ int *readPtr; /* Fill in with number of characters read * from src, unless NULL. */ char *dst; /* Filled with the bytes represented by the * backslash sequence. */{#define LINE_LENGTH 128 int numRead; int result; result = TclParseBackslash(src, LINE_LENGTH, &numRead, dst); if (numRead == LINE_LENGTH) { /* We ate a whole line. Pay the price of a strlen() */ result = TclParseBackslash(src, (int)strlen(src), &numRead, dst); } if (readPtr != NULL) { *readPtr = numRead; } return result;}/* *---------------------------------------------------------------------- * * Tcl_UtfToUpper -- * * Convert lowercase characters to uppercase characters in a UTF * string in place. The conversion may shrink the UTF string. * * Results: * Returns the number of bytes in the resulting string * excluding the trailing null. * * Side effects: * Writes a terminating null after the last converted character. * *---------------------------------------------------------------------- */intTcl_UtfToUpper(str) char *str; /* String to convert in place. */{ Tcl_UniChar ch, upChar; char *src, *dst; int bytes; /* * Iterate over the string until we hit the terminating null. */ src = dst = str; while (*src) { bytes = TclUtfToUniChar(src, &ch); upChar = Tcl_UniCharToUpper(ch); /* * To keep badly formed Utf strings from getting inflated by * the conversion (thereby causing a segfault), only copy the * upper case char to dst if its size is <= the original char. */ if (bytes < UtfCount(upChar)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { dst += Tcl_UniCharToUtf(upChar, dst); } src += bytes; } *dst = '\0'; return (dst - str);}/* *---------------------------------------------------------------------- * * Tcl_UtfToLower -- * * Convert uppercase characters to lowercase characters in a UTF * string in place. The conversion may shrink the UTF string. * * Results: * Returns the number of bytes in the resulting string * excluding the trailing null. * * Side effects: * Writes a terminating null after the last converted character. * *---------------------------------------------------------------------- */intTcl_UtfToLower(str) char *str; /* String to convert in place. */{ Tcl_UniChar ch, lowChar; char *src, *dst; int bytes; /* * Iterate over the string until we hit the terminating null. */ src = dst = str; while (*src) { bytes = TclUtfToUniChar(src, &ch); lowChar = Tcl_UniCharToLower(ch); /* * To keep badly formed Utf strings from getting inflated by * the conversion (thereby causing a segfault), only copy the * lower case char to dst if its size is <= the original char. */ if (bytes < UtfCount(lowChar)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { dst += Tcl_UniCharToUtf(lowChar, dst); } src += bytes; } *dst = '\0'; return (dst - str);}/* *---------------------------------------------------------------------- * * Tcl_UtfToTitle -- * * Changes the first character of a UTF string to title case or * uppercase and the rest of the string to lowercase. The * conversion happens in place and may shrink the UTF string. * * Results: * Returns the number of bytes in the resulting string * excluding the trailing null. * * Side effects: * Writes a terminating null after the last converted character. * *---------------------------------------------------------------------- */intTcl_UtfToTitle(str) char *str; /* String to convert in place. */{ Tcl_UniChar ch, titleChar, lowChar; char *src, *dst; int bytes; /* * Capitalize the first character and then lowercase the rest of the * characters until we get to a null. */ src = dst = str; if (*src) { bytes = TclUtfToUniChar(src, &ch); titleChar = Tcl_UniCharToTitle(ch); if (bytes < UtfCount(titleChar)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { dst += Tcl_UniCharToUtf(titleChar, dst); } src += bytes; } while (*src) { bytes = TclUtfToUniChar(src, &ch); lowChar = Tcl_UniCharToLower(ch); if (bytes < UtfCount(lowChar)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { dst += Tcl_UniCharToUtf(lowChar, dst); } src += bytes; } *dst = '\0'; return (dst - str);}/* *---------------------------------------------------------------------- * * TclpUtfNcmp2 --
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?