📄 tclencoding.c
字号:
* * LoadTableEncoding -- * * Helper function for LoadEncodingTable(). Loads a table to that * converts between Unicode and some other encoding and creates an * encoding (using a TableEncoding structure) from that information. * * File contains binary data, but begins with a marker to indicate * byte-ordering, so that same binary file can be read on either * endian platforms. * * Results: * The return value is the new encoding, or NULL if the encoding * could not be created (because the file contained invalid data). * * Side effects: * None. * *------------------------------------------------------------------------- */static Tcl_EncodingLoadTableEncoding(interp, name, type, chan) Tcl_Interp *interp; /* Interp for temporary obj while reading. */ CONST char *name; /* Name for new encoding. */ int type; /* Type of encoding (ENCODING_?????). */ Tcl_Channel chan; /* File containing new encoding. */{ Tcl_DString lineString; Tcl_Obj *objPtr; char *line; int i, hi, lo, numPages, symbol, fallback; unsigned char used[256]; unsigned int size; TableEncodingData *dataPtr; unsigned short *pageMemPtr; Tcl_EncodingType encType; /* * Speed over memory. Use a full 256 character table to decode hex * sequences in the encoding files. */ static char staticHex[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 ... 15 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16 ... 31 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 32 ... 47 */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, /* 48 ... 63 */ 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 64 ... 79 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 ... 95 */ 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 96 ... 111 */ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, /* 112 ... 127 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 128 ... 143 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 144 ... 159 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 160 ... 175 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 176 ... 191 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 192 ... 207 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 208 ... 223 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 224 ... 239 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 240 ... 255 */ }; Tcl_DStringInit(&lineString); Tcl_Gets(chan, &lineString); line = Tcl_DStringValue(&lineString); fallback = (int) strtol(line, &line, 16); symbol = (int) strtol(line, &line, 10); numPages = (int) strtol(line, &line, 10); Tcl_DStringFree(&lineString); if (numPages < 0) { numPages = 0; } else if (numPages > 256) { numPages = 256; } memset(used, 0, sizeof(used));#undef PAGESIZE#define PAGESIZE (256 * sizeof(unsigned short)) dataPtr = (TableEncodingData *) ckalloc(sizeof(TableEncodingData)); memset(dataPtr, 0, sizeof(TableEncodingData)); dataPtr->fallback = fallback; /* * Read the table that maps characters to Unicode. Performs a single * malloc to get the memory for the array and all the pages needed by * the array. */ size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE; dataPtr->toUnicode = (unsigned short **) ckalloc(size); memset(dataPtr->toUnicode, 0, size); pageMemPtr = (unsigned short *) (dataPtr->toUnicode + 256); if (interp == NULL) { objPtr = Tcl_NewObj(); } else { objPtr = Tcl_GetObjResult(interp); } for (i = 0; i < numPages; i++) { int ch; char *p; Tcl_ReadChars(chan, objPtr, 3 + 16 * (16 * 4 + 1), 0); p = Tcl_GetString(objPtr); hi = (staticHex[(unsigned int)p[0]] << 4) + staticHex[(unsigned int)p[1]]; dataPtr->toUnicode[hi] = pageMemPtr; p += 2; for (lo = 0; lo < 256; lo++) { if ((lo & 0x0f) == 0) { p++; } ch = (staticHex[(unsigned int)p[0]] << 12) + (staticHex[(unsigned int)p[1]] << 8) + (staticHex[(unsigned int)p[2]] << 4) + staticHex[(unsigned int)p[3]]; if (ch != 0) { used[ch >> 8] = 1; } *pageMemPtr = (unsigned short) ch; pageMemPtr++; p += 4; } } if (interp == NULL) { Tcl_DecrRefCount(objPtr); } else { Tcl_ResetResult(interp); } if (type == ENCODING_DOUBLEBYTE) { memset(dataPtr->prefixBytes, 1, sizeof(dataPtr->prefixBytes)); } else { for (hi = 1; hi < 256; hi++) { if (dataPtr->toUnicode[hi] != NULL) { dataPtr->prefixBytes[hi] = 1; } } } /* * Invert toUnicode array to produce the fromUnicode array. Performs a * single malloc to get the memory for the array and all the pages * needed by the array. While reading in the toUnicode array, we * remembered what pages that would be needed for the fromUnicode array. */ if (symbol) { used[0] = 1; } numPages = 0; for (hi = 0; hi < 256; hi++) { if (used[hi]) { numPages++; } } size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE; dataPtr->fromUnicode = (unsigned short **) ckalloc(size); memset(dataPtr->fromUnicode, 0, size); pageMemPtr = (unsigned short *) (dataPtr->fromUnicode + 256); for (hi = 0; hi < 256; hi++) { if (dataPtr->toUnicode[hi] == NULL) { dataPtr->toUnicode[hi] = emptyPage; } else { for (lo = 0; lo < 256; lo++) { int ch; ch = dataPtr->toUnicode[hi][lo]; if (ch != 0) { unsigned short *page; page = dataPtr->fromUnicode[ch >> 8]; if (page == NULL) { page = pageMemPtr; pageMemPtr += 256; dataPtr->fromUnicode[ch >> 8] = page; } page[ch & 0xff] = (unsigned short) ((hi << 8) + lo); } } } } if (type == ENCODING_MULTIBYTE) { /* * If multibyte encodings don't have a backslash character, define * one. Otherwise, on Windows, native file names won't work because * the backslash in the file name will map to the unknown character * (question mark) when converting from UTF-8 to external encoding. */ if (dataPtr->fromUnicode[0] != NULL) { if (dataPtr->fromUnicode[0]['\\'] == '\0') { dataPtr->fromUnicode[0]['\\'] = '\\'; } } } if (symbol) { unsigned short *page; /* * Make a special symbol encoding that not only maps the symbol * characters from their Unicode code points down into page 0, but * also ensure that the characters on page 0 map to themselves. * This is so that a symbol font can be used to display a simple * string like "abcd" and have alpha, beta, chi, delta show up, * rather than have "unknown" chars show up because strictly * speaking the symbol font doesn't have glyphs for those low ascii * chars. */ page = dataPtr->fromUnicode[0]; if (page == NULL) { page = pageMemPtr; dataPtr->fromUnicode[0] = page; } for (lo = 0; lo < 256; lo++) { if (dataPtr->toUnicode[0][lo] != 0) { page[lo] = (unsigned short) lo; } } } for (hi = 0; hi < 256; hi++) { if (dataPtr->fromUnicode[hi] == NULL) { dataPtr->fromUnicode[hi] = emptyPage; } } /* * For trailing 'R'everse encoding, see [Patch #689341] */ Tcl_DStringInit(&lineString); do { int len; /* skip leading empty lines */ while ((len = Tcl_Gets(chan, &lineString)) == 0) ; if (len < 0) { break; } line = Tcl_DStringValue(&lineString); if (line[0] != 'R') { break; } for (Tcl_DStringSetLength(&lineString, 0); (len = Tcl_Gets(chan, &lineString)) >= 0; Tcl_DStringSetLength(&lineString, 0)) { unsigned char* p; int to, from; if (len < 5) { continue; } p = (unsigned char*) Tcl_DStringValue(&lineString); to = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8) + (staticHex[p[2]] << 4) + staticHex[p[3]]; if (to == 0) { continue; } for (p += 5, len -= 5; len >= 0 && *p; p += 5, len -= 5) { from = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8) + (staticHex[p[2]] << 4) + staticHex[p[3]]; if (from == 0) { continue; } dataPtr->fromUnicode[from >> 8][from & 0xff] = to; } } } while (0); Tcl_DStringFree(&lineString); encType.encodingName = name; encType.toUtfProc = TableToUtfProc; encType.fromUtfProc = TableFromUtfProc; encType.freeProc = TableFreeProc; encType.nullSize = (type == ENCODING_DOUBLEBYTE) ? 2 : 1; encType.clientData = (ClientData) dataPtr; return Tcl_CreateEncoding(&encType);}/* *------------------------------------------------------------------------- * * LoadEscapeEncoding -- * * Helper function for LoadEncodingTable(). Loads a state machine * that converts between Unicode and some other encoding. * * File contains text data that describes the escape sequences that * are used to choose an encoding and the associated names for the * sub-encodings. * * Results: * The return value is the new encoding, or NULL if the encoding * could not be created (because the file contained invalid data). * * Side effects: * None. * *------------------------------------------------------------------------- */static Tcl_EncodingLoadEscapeEncoding(name, chan) CONST char *name; /* Name for new encoding. */ Tcl_Channel chan; /* File containing new encoding. */{ int i; unsigned int size; Tcl_DString escapeData; char init[16], final[16]; EscapeEncodingData *dataPtr; Tcl_EncodingType type; init[0] = '\0'; final[0] = '\0'; Tcl_DStringInit(&escapeData); while (1) { int argc; CONST char **argv; char *line; Tcl_DString lineString; Tcl_DStringInit(&lineString); if (Tcl_Gets(chan, &lineString) < 0) { break; } line = Tcl_DStringValue(&lineString); if (Tcl_SplitList(NULL, line, &argc, &argv) != TCL_OK) { continue; } if (argc >= 2) { if (strcmp(argv[0], "name") == 0) { ; } else if (strcmp(argv[0], "init") == 0) { strncpy(init, argv[1], sizeof(init)); init[sizeof(init) - 1] = '\0'; } else if (strcmp(argv[0], "final") == 0) { strncpy(final, argv[1], sizeof(final)); final[sizeof(final) - 1] = '\0'; } else { EscapeSubTable est; strncpy(est.sequence, argv[1], sizeof(est.sequence)); est.sequence[sizeof(est.sequence) - 1] = '\0'; est.sequenceLen = strlen(est.sequence); strncpy(est.name, argv[0], sizeof(est.name)); est.name[sizeof(est.name) - 1] = '\0'; /* To avoid infinite recursion in [encoding system iso2022-*]*/ Tcl_GetEncoding(NULL, est.name); est.encodingPtr = NULL; Tcl_DStringAppend(&escapeData, (char *) &est, sizeof(est)); } } ckfree((char *) argv); Tcl_DStringFree(&lineString); } size = sizeof(EscapeEncodingData) - sizeof(EscapeSubTable) + Tcl_DStringLength(&escapeData); dataPtr = (EscapeEncodingData *) ckalloc(size); dataPtr->initLen = strlen(init); strcpy(dataPtr->init, init); dataPtr->finalLen = strlen(final); strcpy(dataPtr->final, final); dataPtr->numSubTables = Tcl_DStringLength(&escapeData) / sizeof(EscapeSubTable); memcpy((VOID *) dataPtr->subTables, (VOID *) Tcl_DStringValue(&escapeData), (size_t) Tcl_DStringLength(&escapeData)); Tcl_DStringFree(&escapeData); memset(dataPtr->prefixBytes, 0, sizeof(dataPtr->prefixBytes)); for (i = 0; i < dataPtr->numSubTables; i++) { dataPtr->prefixBytes[UCHAR(dataPtr->subTables[i].sequence[0])] = 1; } if (dataPtr->init[0] != '\0') { dataPtr->prefixBytes[UCHAR(dataPtr->init[0])] = 1; } if (dataPtr->final[0] != '\0') { dataPtr->prefixBytes[UCHAR(dataPtr->final[0])] = 1; } type.encodingName = name; type.toUtfProc = EscapeToUtfProc; type.fromUtfProc = EscapeFromUtfProc; type.freeProc = EscapeFreeProc; type.nullSize = 1; type.clientData = (ClientData) dataPtr; return Tcl_CreateEncoding(&type);}/* *------------------------------------------------------------------------- * * BinaryProc -- * * The default conversion when no other conversion is specified. * No translation is done; source bytes are copied directly to * destination bytes. * * Results: * Returns TCL_OK if conversion was successful. * * Side effects: * None. * *------------------------------------------------------------------------- */static intBinaryProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr) ClientData clientData; /* Not used. */ CONST char *src; /* Source string (unknown encoding). */ int srcLen; /* Source string length in bytes. */ int flags; /* Conversion control flags. */ Tcl_EncodingState *statePtr;/* Place for conversion routine to store * state information used during a piecewise * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ char *dst; /* Output buffer in which converted string * is stored. */ int dstLen; /* The maximum length of output buffer in * bytes. */ int *srcReadPtr; /* Filled with the number of bytes from the * source string that were converted. */ int *dstWrotePtr; /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ int *dstCharsPtr; /* Filled with the number of characters that * correspond to the bytes stored in the
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -