📄 tclencoding.c
字号:
/* * tclEncoding.c -- * * Contains the implementation of the encoding conversion package. * * Copyright (c) 1996-1998 Sun Microsystems, Inc. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * * RCS: @(#) $Id: tclEncoding.c,v 1.16 2003/02/21 02:40:58 hobbs Exp $ */#include "tclInt.h"#include "tclPort.h"typedef size_t (LengthProc)_ANSI_ARGS_((CONST char *src));/* * The following data structure represents an encoding, which describes how * to convert between various character sets and UTF-8. */typedef struct Encoding { char *name; /* Name of encoding. Malloced because (1) * hash table entry that owns this encoding * may be freed prior to this encoding being * freed, (2) string passed in the * Tcl_EncodingType structure may not be * persistent. */ Tcl_EncodingConvertProc *toUtfProc; /* Procedure to convert from external * encoding into UTF-8. */ Tcl_EncodingConvertProc *fromUtfProc; /* Procedure to convert from UTF-8 into * external encoding. */ Tcl_EncodingFreeProc *freeProc; /* If non-NULL, procedure to call when this * encoding is deleted. */ int nullSize; /* Number of 0x00 bytes that signify * end-of-string in this encoding. This * number is used to determine the source * string length when the srcLen argument is * negative. This number can be 1 or 2. */ ClientData clientData; /* Arbitrary value associated with encoding * type. Passed to conversion procedures. */ LengthProc *lengthProc; /* Function to compute length of * null-terminated strings in this encoding. * If nullSize is 1, this is strlen; if * nullSize is 2, this is a function that * returns the number of bytes in a 0x0000 * terminated string. */ int refCount; /* Number of uses of this structure. */ Tcl_HashEntry *hPtr; /* Hash table entry that owns this encoding. */} Encoding;/* * The following structure is the clientData for a dynamically-loaded, * table-driven encoding created by LoadTableEncoding(). It maps between * Unicode and a single-byte, double-byte, or multibyte (1 or 2 bytes only) * encoding. */typedef struct TableEncodingData { int fallback; /* Character (in this encoding) to * substitute when this encoding cannot * represent a UTF-8 character. */ char prefixBytes[256]; /* If a byte in the input stream is a lead * byte for a 2-byte sequence, the * corresponding entry in this array is 1, * otherwise it is 0. */ unsigned short **toUnicode; /* Two dimensional sparse matrix to map * characters from the encoding to Unicode. * Each element of the toUnicode array points * to an array of 256 shorts. If there is no * corresponding character in Unicode, the * value in the matrix is 0x0000. malloc'd. */ unsigned short **fromUnicode; /* Two dimensional sparse matrix to map * characters from Unicode to the encoding. * Each element of the fromUnicode array * points to an array of 256 shorts. If there * is no corresponding character the encoding, * the value in the matrix is 0x0000. * malloc'd. */} TableEncodingData;/* * The following structures is the clientData for a dynamically-loaded, * escape-driven encoding that is itself comprised of other simpler * encodings. An example is "iso-2022-jp", which uses escape sequences to * switch between ascii, jis0208, jis0212, gb2312, and ksc5601. Note that * "escape-driven" does not necessarily mean that the ESCAPE character is * the character used for switching character sets. */typedef struct EscapeSubTable { unsigned int sequenceLen; /* Length of following string. */ char sequence[16]; /* Escape code that marks this encoding. */ char name[32]; /* Name for encoding. */ Encoding *encodingPtr; /* Encoding loaded using above name, or NULL * if this sub-encoding has not been needed * yet. */} EscapeSubTable;typedef struct EscapeEncodingData { int fallback; /* Character (in this encoding) to * substitute when this encoding cannot * represent a UTF-8 character. */ unsigned int initLen; /* Length of following string. */ char init[16]; /* String to emit or expect before first char * in conversion. */ unsigned int finalLen; /* Length of following string. */ char final[16]; /* String to emit or expect after last char * in conversion. */ char prefixBytes[256]; /* If a byte in the input stream is the * first character of one of the escape * sequences in the following array, the * corresponding entry in this array is 1, * otherwise it is 0. */ int numSubTables; /* Length of following array. */ EscapeSubTable subTables[1];/* Information about each EscapeSubTable * used by this encoding type. The actual * size will be as large as necessary to * hold all EscapeSubTables. */} EscapeEncodingData;/* * Constants used when loading an encoding file to identify the type of the * file. */#define ENCODING_SINGLEBYTE 0#define ENCODING_DOUBLEBYTE 1#define ENCODING_MULTIBYTE 2#define ENCODING_ESCAPE 3/* * Initialize the default encoding directory. If this variable contains * a non NULL value, it will be the first path used to locate the * system encoding files. */char *tclDefaultEncodingDir = NULL;static int encodingsInitialized = 0;/* * Hash table that keeps track of all loaded Encodings. Keys are * the string names that represent the encoding, values are (Encoding *). */ static Tcl_HashTable encodingTable;TCL_DECLARE_MUTEX(encodingMutex)/* * The following are used to hold the default and current system encodings. * If NULL is passed to one of the conversion routines, the current setting * of the system encoding will be used to perform the conversion. */static Tcl_Encoding defaultEncoding;static Tcl_Encoding systemEncoding;/* * The following variable is used in the sparse matrix code for a * TableEncoding to represent a page in the table that has no entries. */static unsigned short emptyPage[256];/* * Procedures used only in this module. */static int BinaryProc _ANSI_ARGS_((ClientData clientData, CONST char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr));static void EscapeFreeProc _ANSI_ARGS_((ClientData clientData));static int EscapeFromUtfProc _ANSI_ARGS_((ClientData clientData, CONST char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr));static int EscapeToUtfProc _ANSI_ARGS_((ClientData clientData, CONST char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr));static void FreeEncoding _ANSI_ARGS_((Tcl_Encoding encoding));static Encoding * GetTableEncoding _ANSI_ARGS_(( EscapeEncodingData *dataPtr, int state));static Tcl_Encoding LoadEncodingFile _ANSI_ARGS_((Tcl_Interp *interp, CONST char *name));static Tcl_Encoding LoadTableEncoding _ANSI_ARGS_((Tcl_Interp *interp, CONST char *name, int type, Tcl_Channel chan));static Tcl_Encoding LoadEscapeEncoding _ANSI_ARGS_((CONST char *name, Tcl_Channel chan));static Tcl_Channel OpenEncodingFile _ANSI_ARGS_((CONST char *dir, CONST char *name));static void TableFreeProc _ANSI_ARGS_((ClientData clientData));static int TableFromUtfProc _ANSI_ARGS_((ClientData clientData, CONST char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr));static int TableToUtfProc _ANSI_ARGS_((ClientData clientData, CONST char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr));static size_t unilen _ANSI_ARGS_((CONST char *src));static int UnicodeToUtfProc _ANSI_ARGS_((ClientData clientData, CONST char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr));static int UtfToUnicodeProc _ANSI_ARGS_((ClientData clientData, CONST char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr));static int UtfToUtfProc _ANSI_ARGS_((ClientData clientData, CONST char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr));/* *--------------------------------------------------------------------------- * * TclInitEncodingSubsystem -- * * Initialize all resources used by this subsystem on a per-process * basis. * * Results: * None. * * Side effects: * Depends on the memory, object, and IO subsystems. * *--------------------------------------------------------------------------- */voidTclInitEncodingSubsystem(){ Tcl_EncodingType type; Tcl_MutexLock(&encodingMutex); Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS); Tcl_MutexUnlock(&encodingMutex); /* * Create a few initial encodings. Note that the UTF-8 to UTF-8 * translation is not a no-op, because it will turn a stream of * improperly formed UTF-8 into a properly formed stream. */ type.encodingName = "identity"; type.toUtfProc = BinaryProc; type.fromUtfProc = BinaryProc; type.freeProc = NULL; type.nullSize = 1; type.clientData = NULL; defaultEncoding = Tcl_CreateEncoding(&type); systemEncoding = Tcl_GetEncoding(NULL, type.encodingName); type.encodingName = "utf-8"; type.toUtfProc = UtfToUtfProc; type.fromUtfProc = UtfToUtfProc; type.freeProc = NULL; type.nullSize = 1; type.clientData = NULL; Tcl_CreateEncoding(&type); type.encodingName = "unicode"; type.toUtfProc = UnicodeToUtfProc; type.fromUtfProc = UtfToUnicodeProc; type.freeProc = NULL; type.nullSize = 2; type.clientData = NULL; Tcl_CreateEncoding(&type);}/* *---------------------------------------------------------------------- * * TclFinalizeEncodingSubsystem -- * * Release the state associated with the encoding subsystem. * * Results: * None. * * Side effects: * Frees all of the encodings. * *---------------------------------------------------------------------- */voidTclFinalizeEncodingSubsystem(){ Tcl_HashSearch search; Tcl_HashEntry *hPtr; Tcl_MutexLock(&encodingMutex); encodingsInitialized = 0; hPtr = Tcl_FirstHashEntry(&encodingTable, &search); while (hPtr != NULL) { /* * Call FreeEncoding instead of doing it directly to handle refcounts * like escape encodings use. [Bug #524674] * Make sure to call Tcl_FirstHashEntry repeatedly so that all * encodings are eventually cleaned up. */ FreeEncoding((Tcl_Encoding) Tcl_GetHashValue(hPtr)); hPtr = Tcl_FirstHashEntry(&encodingTable, &search); } Tcl_DeleteHashTable(&encodingTable); Tcl_MutexUnlock(&encodingMutex);}/* *------------------------------------------------------------------------- * * Tcl_GetDefaultEncodingDir -- * * * Results: * * Side effects: * *------------------------------------------------------------------------- */CONST char *Tcl_GetDefaultEncodingDir(){ return tclDefaultEncodingDir;}/* *------------------------------------------------------------------------- * * Tcl_SetDefaultEncodingDir -- * * * Results: * * Side effects: * *------------------------------------------------------------------------- */voidTcl_SetDefaultEncodingDir(path) CONST char *path;{ tclDefaultEncodingDir = (char *)ckalloc((unsigned) strlen(path) + 1); strcpy(tclDefaultEncodingDir, path);}/* *------------------------------------------------------------------------- * * Tcl_GetEncoding -- * * Given the name of a encoding, find the corresponding Tcl_Encoding * token. If the encoding did not already exist, Tcl attempts to * dynamically load an encoding by that name. * * Results: * Returns a token that represents the encoding. If the name didn't * refer to any known or loadable encoding, NULL is returned. If * NULL was returned, an error message is left in interp's result * object, unless interp was NULL. * * Side effects: * The new encoding type is entered into a table visible to all * interpreters, keyed off the encoding's name. For each call to * this procedure, there should eventually be a call to * Tcl_FreeEncoding, so that the database can be cleaned up when * encodings aren't needed anymore. * *------------------------------------------------------------------------- */Tcl_EncodingTcl_GetEncoding(interp, name) Tcl_Interp *interp; /* Interp for error reporting, if not NULL. */ CONST char *name; /* The name of the desired encoding. */{ Tcl_HashEntry *hPtr; Encoding *encodingPtr; Tcl_MutexLock(&encodingMutex); if (name == NULL) { encodingPtr = (Encoding *) systemEncoding; encodingPtr->refCount++; Tcl_MutexUnlock(&encodingMutex); return systemEncoding; } hPtr = Tcl_FindHashEntry(&encodingTable, name); if (hPtr != NULL) { encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr); encodingPtr->refCount++; Tcl_MutexUnlock(&encodingMutex); return (Tcl_Encoding) encodingPtr; } Tcl_MutexUnlock(&encodingMutex); return LoadEncodingFile(interp, name);}/* *--------------------------------------------------------------------------- * * Tcl_FreeEncoding -- * * This procedure is called to release an encoding allocated by * Tcl_CreateEncoding() or Tcl_GetEncoding(). * * Results: * None. * * Side effects: * The reference count associated with the encoding is decremented * and the encoding may be deleted if nothing is using it anymore.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -