⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tclencoding.c

📁 tcl是工具命令语言
💻 C
📖 第 1 页 / 共 5 页
字号:
/* * tclEncoding.c -- * *	Contains the implementation of the encoding conversion package. * * Copyright (c) 1996-1998 Sun Microsystems, Inc. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * * RCS: @(#) $Id: tclEncoding.c,v 1.16 2003/02/21 02:40:58 hobbs Exp $ */#include "tclInt.h"#include "tclPort.h"typedef size_t (LengthProc)_ANSI_ARGS_((CONST char *src));/* * The following data structure represents an encoding, which describes how * to convert between various character sets and UTF-8. */typedef struct Encoding {    char *name;			/* Name of encoding.  Malloced because (1)				 * hash table entry that owns this encoding				 * may be freed prior to this encoding being				 * freed, (2) string passed in the				 * Tcl_EncodingType structure may not be				 * persistent. */    Tcl_EncodingConvertProc *toUtfProc;				/* Procedure to convert from external				 * encoding into UTF-8. */    Tcl_EncodingConvertProc *fromUtfProc;				/* Procedure to convert from UTF-8 into				 * external encoding. */    Tcl_EncodingFreeProc *freeProc;				/* If non-NULL, procedure to call when this				 * encoding is deleted. */    int nullSize;		/* Number of 0x00 bytes that signify				 * end-of-string in this encoding.  This				 * number is used to determine the source				 * string length when the srcLen argument is				 * negative.  This number can be 1 or 2. */    ClientData clientData;	/* Arbitrary value associated with encoding				 * type.  Passed to conversion procedures. */    LengthProc *lengthProc;	/* Function to compute length of				 * null-terminated strings in this encoding.				 * If nullSize is 1, this is strlen; if				 * nullSize is 2, this is a function that				 * returns the number of bytes in a 0x0000				 * terminated string. */    int refCount;		/* Number of uses of this structure. */    Tcl_HashEntry *hPtr;	/* Hash table entry that owns this encoding. */} Encoding;/* * The following structure is the clientData for a dynamically-loaded, * table-driven encoding created by LoadTableEncoding().  It maps between * Unicode and a single-byte, double-byte, or multibyte (1 or 2 bytes only) * encoding. */typedef struct TableEncodingData {    int fallback;		/* Character (in this encoding) to				 * substitute when this encoding cannot				 * represent a UTF-8 character. */    char prefixBytes[256];	/* If a byte in the input stream is a lead				 * byte for a 2-byte sequence, the				 * corresponding entry in this array is 1,				 * otherwise it is 0. */    unsigned short **toUnicode;	/* Two dimensional sparse matrix to map				 * characters from the encoding to Unicode.				 * Each element of the toUnicode array points				 * to an array of 256 shorts.  If there is no				 * corresponding character in Unicode, the				 * value in the matrix is 0x0000.  malloc'd. */    unsigned short **fromUnicode;				/* Two dimensional sparse matrix to map				 * characters from Unicode to the encoding.				 * Each element of the fromUnicode array				 * points to an array of 256 shorts.  If there				 * is no corresponding character the encoding,				 * the value in the matrix is 0x0000.				 * malloc'd. */} TableEncodingData;/* * The following structures is the clientData for a dynamically-loaded, * escape-driven encoding that is itself comprised of other simpler * encodings.  An example is "iso-2022-jp", which uses escape sequences to * switch between ascii, jis0208, jis0212, gb2312, and ksc5601.  Note that * "escape-driven" does not necessarily mean that the ESCAPE character is * the character used for switching character sets. */typedef struct EscapeSubTable {    unsigned int sequenceLen;	/* Length of following string. */    char sequence[16];		/* Escape code that marks this encoding. */    char name[32];		/* Name for encoding. */    Encoding *encodingPtr;	/* Encoding loaded using above name, or NULL				 * if this sub-encoding has not been needed				 * yet. */} EscapeSubTable;typedef struct EscapeEncodingData {    int fallback;		/* Character (in this encoding) to				 * substitute when this encoding cannot				 * represent a UTF-8 character. */    unsigned int initLen;	/* Length of following string. */    char init[16];		/* String to emit or expect before first char				 * in conversion. */    unsigned int finalLen;	/* Length of following string. */    char final[16];		/* String to emit or expect after last char				 * in conversion. */    char prefixBytes[256];	/* If a byte in the input stream is the 				 * first character of one of the escape 				 * sequences in the following array, the 				 * corresponding entry in this array is 1,				 * otherwise it is 0. */    int numSubTables;		/* Length of following array. */    EscapeSubTable subTables[1];/* Information about each EscapeSubTable				 * used by this encoding type.  The actual 				 * size will be as large as necessary to 				 * hold all EscapeSubTables. */} EscapeEncodingData;/* * Constants used when loading an encoding file to identify the type of the * file. */#define ENCODING_SINGLEBYTE	0#define ENCODING_DOUBLEBYTE	1#define ENCODING_MULTIBYTE	2#define ENCODING_ESCAPE		3/* * Initialize the default encoding directory.  If this variable contains * a non NULL value, it will be the first path used to locate the * system encoding files. */char *tclDefaultEncodingDir = NULL;static int encodingsInitialized  = 0;/* * Hash table that keeps track of all loaded Encodings.  Keys are * the string names that represent the encoding, values are (Encoding *). */ static Tcl_HashTable encodingTable;TCL_DECLARE_MUTEX(encodingMutex)/* * The following are used to hold the default and current system encodings.   * If NULL is passed to one of the conversion routines, the current setting  * of the system encoding will be used to perform the conversion. */static Tcl_Encoding defaultEncoding;static Tcl_Encoding systemEncoding;/* * The following variable is used in the sparse matrix code for a * TableEncoding to represent a page in the table that has no entries. */static unsigned short emptyPage[256];/* * Procedures used only in this module. */static int		BinaryProc _ANSI_ARGS_((ClientData clientData,			    CONST char *src, int srcLen, int flags,			    Tcl_EncodingState *statePtr, char *dst, int dstLen,			    int *srcReadPtr, int *dstWrotePtr,			    int *dstCharsPtr));static void		EscapeFreeProc _ANSI_ARGS_((ClientData clientData));static int		EscapeFromUtfProc _ANSI_ARGS_((ClientData clientData,			    CONST char *src, int srcLen, int flags,			    Tcl_EncodingState *statePtr, char *dst, int dstLen,			    int *srcReadPtr, int *dstWrotePtr,			    int *dstCharsPtr));static int		EscapeToUtfProc _ANSI_ARGS_((ClientData clientData,			    CONST char *src, int srcLen, int flags,			    Tcl_EncodingState *statePtr, char *dst, int dstLen,			    int *srcReadPtr, int *dstWrotePtr,			    int *dstCharsPtr));static void		FreeEncoding _ANSI_ARGS_((Tcl_Encoding encoding));static Encoding *	GetTableEncoding _ANSI_ARGS_((			    EscapeEncodingData *dataPtr, int state));static Tcl_Encoding	LoadEncodingFile _ANSI_ARGS_((Tcl_Interp *interp,			    CONST char *name));static Tcl_Encoding	LoadTableEncoding _ANSI_ARGS_((Tcl_Interp *interp,			    CONST char *name, int type, Tcl_Channel chan));static Tcl_Encoding	LoadEscapeEncoding _ANSI_ARGS_((CONST char *name, 			    Tcl_Channel chan));static Tcl_Channel	OpenEncodingFile _ANSI_ARGS_((CONST char *dir,			    CONST char *name));static void		TableFreeProc _ANSI_ARGS_((ClientData clientData));static int		TableFromUtfProc _ANSI_ARGS_((ClientData clientData,			    CONST char *src, int srcLen, int flags,			    Tcl_EncodingState *statePtr, char *dst, int dstLen,			    int *srcReadPtr, int *dstWrotePtr,			    int *dstCharsPtr));static int		TableToUtfProc _ANSI_ARGS_((ClientData clientData,			    CONST char *src, int srcLen, int flags,			    Tcl_EncodingState *statePtr, char *dst, int dstLen,			    int *srcReadPtr, int *dstWrotePtr,			    int *dstCharsPtr));static size_t		unilen _ANSI_ARGS_((CONST char *src));static int		UnicodeToUtfProc _ANSI_ARGS_((ClientData clientData,			    CONST char *src, int srcLen, int flags,			    Tcl_EncodingState *statePtr, char *dst, int dstLen,			    int *srcReadPtr, int *dstWrotePtr,			    int *dstCharsPtr));static int		UtfToUnicodeProc _ANSI_ARGS_((ClientData clientData,			    CONST char *src, int srcLen, int flags,			    Tcl_EncodingState *statePtr, char *dst, int dstLen,			    int *srcReadPtr, int *dstWrotePtr,			    int *dstCharsPtr));static int		UtfToUtfProc _ANSI_ARGS_((ClientData clientData,			    CONST char *src, int srcLen, int flags,			    Tcl_EncodingState *statePtr, char *dst, int dstLen,			    int *srcReadPtr, int *dstWrotePtr,			    int *dstCharsPtr));/* *--------------------------------------------------------------------------- * * TclInitEncodingSubsystem -- * *	Initialize all resources used by this subsystem on a per-process *	basis.   * * Results: *	None. * * Side effects: *	Depends on the memory, object, and IO subsystems. * *--------------------------------------------------------------------------- */voidTclInitEncodingSubsystem(){    Tcl_EncodingType type;    Tcl_MutexLock(&encodingMutex);    Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);    Tcl_MutexUnlock(&encodingMutex);        /*     * Create a few initial encodings.  Note that the UTF-8 to UTF-8      * translation is not a no-op, because it will turn a stream of     * improperly formed UTF-8 into a properly formed stream.     */    type.encodingName	= "identity";    type.toUtfProc	= BinaryProc;    type.fromUtfProc	= BinaryProc;    type.freeProc	= NULL;    type.nullSize	= 1;    type.clientData	= NULL;    defaultEncoding	= Tcl_CreateEncoding(&type);    systemEncoding	= Tcl_GetEncoding(NULL, type.encodingName);    type.encodingName	= "utf-8";    type.toUtfProc	= UtfToUtfProc;    type.fromUtfProc    = UtfToUtfProc;    type.freeProc	= NULL;    type.nullSize	= 1;    type.clientData	= NULL;    Tcl_CreateEncoding(&type);    type.encodingName   = "unicode";    type.toUtfProc	= UnicodeToUtfProc;    type.fromUtfProc    = UtfToUnicodeProc;    type.freeProc	= NULL;    type.nullSize	= 2;    type.clientData	= NULL;    Tcl_CreateEncoding(&type);}/* *---------------------------------------------------------------------- * * TclFinalizeEncodingSubsystem -- * *	Release the state associated with the encoding subsystem. * * Results: *	None. * * Side effects: *	Frees all of the encodings. * *---------------------------------------------------------------------- */voidTclFinalizeEncodingSubsystem(){    Tcl_HashSearch search;    Tcl_HashEntry *hPtr;    Tcl_MutexLock(&encodingMutex);    encodingsInitialized  = 0;    hPtr = Tcl_FirstHashEntry(&encodingTable, &search);    while (hPtr != NULL) {	/*	 * Call FreeEncoding instead of doing it directly to handle refcounts	 * like escape encodings use.  [Bug #524674]	 * Make sure to call Tcl_FirstHashEntry repeatedly so that all	 * encodings are eventually cleaned up.	 */	FreeEncoding((Tcl_Encoding) Tcl_GetHashValue(hPtr));	hPtr = Tcl_FirstHashEntry(&encodingTable, &search);    }    Tcl_DeleteHashTable(&encodingTable);    Tcl_MutexUnlock(&encodingMutex);}/* *------------------------------------------------------------------------- * * Tcl_GetDefaultEncodingDir -- * * * Results: * * Side effects: * *------------------------------------------------------------------------- */CONST char *Tcl_GetDefaultEncodingDir(){    return tclDefaultEncodingDir;}/* *------------------------------------------------------------------------- * * Tcl_SetDefaultEncodingDir -- * * * Results: * * Side effects: * *------------------------------------------------------------------------- */voidTcl_SetDefaultEncodingDir(path)    CONST char *path;{    tclDefaultEncodingDir = (char *)ckalloc((unsigned) strlen(path) + 1);    strcpy(tclDefaultEncodingDir, path);}/* *------------------------------------------------------------------------- * * Tcl_GetEncoding -- * *	Given the name of a encoding, find the corresponding Tcl_Encoding *	token.  If the encoding did not already exist, Tcl attempts to *	dynamically load an encoding by that name. * * Results: *	Returns a token that represents the encoding.  If the name didn't *	refer to any known or loadable encoding, NULL is returned.  If *	NULL was returned, an error message is left in interp's result *	object, unless interp was NULL. * * Side effects: *	The new encoding type is entered into a table visible to all *	interpreters, keyed off the encoding's name.  For each call to *	this procedure, there should eventually be a call to *	Tcl_FreeEncoding, so that the database can be cleaned up when *	encodings aren't needed anymore. * *------------------------------------------------------------------------- */Tcl_EncodingTcl_GetEncoding(interp, name)    Tcl_Interp *interp;		/* Interp for error reporting, if not NULL. */    CONST char *name;		/* The name of the desired encoding. */{    Tcl_HashEntry *hPtr;    Encoding *encodingPtr;    Tcl_MutexLock(&encodingMutex);    if (name == NULL) {	encodingPtr = (Encoding *) systemEncoding;	encodingPtr->refCount++;	Tcl_MutexUnlock(&encodingMutex);	return systemEncoding;    }    hPtr = Tcl_FindHashEntry(&encodingTable, name);    if (hPtr != NULL) {	encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr);	encodingPtr->refCount++;	Tcl_MutexUnlock(&encodingMutex);	return (Tcl_Encoding) encodingPtr;    }    Tcl_MutexUnlock(&encodingMutex);    return LoadEncodingFile(interp, name);}/* *--------------------------------------------------------------------------- * * Tcl_FreeEncoding -- * *	This procedure is called to release an encoding allocated by *	Tcl_CreateEncoding() or Tcl_GetEncoding(). * * Results: *	None. * * Side effects: *	The reference count associated with the encoding is decremented *	and the encoding may be deleted if nothing is using it anymore.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -