tclstringobj.c

来自「tcl是工具命令语言」· C语言 代码 · 共 1,878 行 · 第 1/4 页

C
1,878
字号
/*  * tclStringObj.c -- * *	This file contains procedures that implement string operations on Tcl *	objects.  Some string operations work with UTF strings and others *	require Unicode format.  Functions that require knowledge of the width *	of each character, such as indexing, operate on Unicode data. * *	A Unicode string is an internationalized string.  Conceptually, a *	Unicode string is an array of 16-bit quantities organized as a sequence *	of properly formed UTF-8 characters.  There is a one-to-one map between *	Unicode and UTF characters.  Because Unicode characters have a fixed *	width, operations such as indexing operate on Unicode data.  The String *	object is optimized for the case where each UTF char in a string is *	only one byte.  In this case, we store the value of numChars, but we *	don't store the Unicode data (unless Tcl_GetUnicode is explicitly *	called). * *	The String object type stores one or both formats.  The default *	behavior is to store UTF.  Once Unicode is calculated by a function, it *	is stored in the internal rep for future access (without an additional *	O(n) cost). * *	To allow many appends to be done to an object without constantly *	reallocating the space for the string or Unicode representation, we *	allocate double the space for the string or Unicode and use the *	internal representation to keep track of how much space is used *	vs. allocated. * * Copyright (c) 1995-1997 Sun Microsystems, Inc. * Copyright (c) 1999 by Scriptics Corporation. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * * RCS: @(#) $Id: tclStringObj.c,v 1.32 2003/02/19 16:43:28 das Exp $ */#include "tclInt.h"/* * Prototypes for procedures defined later in this file: */static void		AppendUnicodeToUnicodeRep _ANSI_ARGS_((    			    Tcl_Obj *objPtr, CONST Tcl_UniChar *unicode,			    int appendNumChars));static void		AppendUnicodeToUtfRep _ANSI_ARGS_((    			    Tcl_Obj *objPtr, CONST Tcl_UniChar *unicode,			    int numChars));static void		AppendUtfToUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr,    			    CONST char *bytes, int numBytes));static void		AppendUtfToUtfRep _ANSI_ARGS_((Tcl_Obj *objPtr,    			    CONST char *bytes, int numBytes));static void		FillUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr));static void		FreeStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));static void		DupStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr,			    Tcl_Obj *copyPtr));static int		SetStringFromAny _ANSI_ARGS_((Tcl_Interp *interp,			    Tcl_Obj *objPtr));static void		UpdateStringOfString _ANSI_ARGS_((Tcl_Obj *objPtr));/* * The structure below defines the string Tcl object type by means of * procedures that can be invoked by generic object code. */Tcl_ObjType tclStringType = {    "string",				/* name */    FreeStringInternalRep,		/* freeIntRepPro */    DupStringInternalRep,		/* dupIntRepProc */    UpdateStringOfString,		/* updateStringProc */    SetStringFromAny			/* setFromAnyProc */};/* * The following structure is the internal rep for a String object. * It keeps track of how much memory has been used and how much has been * allocated for the Unicode and UTF string to enable growing and * shrinking of the UTF and Unicode reps of the String object with fewer * mallocs.  To optimize string length and indexing operations, this * structure also stores the number of characters (same of UTF and Unicode!) * once that value has been computed. */typedef struct String {    int numChars;		/* The number of chars in the string.				 * -1 means this value has not been				 * calculated. >= 0 means that there is a				 * valid Unicode rep, or that the number				 * of UTF bytes == the number of chars. */    size_t allocated;		/* The amount of space actually allocated				 * for the UTF string (minus 1 byte for				 * the termination char). */    size_t uallocated;		/* The amount of space actually allocated				 * for the Unicode string (minus 2 bytes for				 * the termination char). */    int hasUnicode;		/* Boolean determining whether the string				 * has a Unicode representation. */    Tcl_UniChar unicode[2];	/* The array of Unicode chars.  The actual				 * size of this field depends on the				 * 'uallocated' field above. */} String;#define STRING_UALLOC(numChars)	\		(numChars * sizeof(Tcl_UniChar))#define STRING_SIZE(ualloc)	\		((unsigned) (sizeof(String) - sizeof(Tcl_UniChar) + ualloc))#define GET_STRING(objPtr) \		((String *) (objPtr)->internalRep.otherValuePtr)#define SET_STRING(objPtr, stringPtr) \		(objPtr)->internalRep.otherValuePtr = (VOID *) (stringPtr)/* * TCL STRING GROWTH ALGORITHM * * When growing strings (during an append, for example), the following growth * algorithm is used: * *   Attempt to allocate 2 * (originalLength + appendLength) *   On failure: *	attempt to allocate originalLength + 2*appendLength + *			TCL_GROWTH_MIN_ALLOC  * * This algorithm allows very good performance, as it rapidly increases the * memory allocated for a given string, which minimizes the number of * reallocations that must be performed.  However, using only the doubling * algorithm can lead to a significant waste of memory.  In particular, it * may fail even when there is sufficient memory available to complete the * append request (but there is not 2 * totalLength memory available).  So when * the doubling fails (because there is not enough memory available), the * algorithm requests a smaller amount of memory, which is still enough to * cover the request, but which hopefully will be less than the total available * memory. *  * The addition of TCL_GROWTH_MIN_ALLOC allows for efficient handling * of very small appends.  Without this extra slush factor, a sequence * of several small appends would cause several memory allocations. * As long as TCL_GROWTH_MIN_ALLOC is a reasonable size, we can * avoid that behavior. * * The growth algorithm can be tuned by adjusting the following parameters: * * TCL_GROWTH_MIN_ALLOC		Additional space, in bytes, to allocate when *				the double allocation has failed. *				Default is 1024 (1 kilobyte). */#ifndef TCL_GROWTH_MIN_ALLOC#define TCL_GROWTH_MIN_ALLOC	1024#endif/* *---------------------------------------------------------------------- * * Tcl_NewStringObj -- * *	This procedure is normally called when not debugging: i.e., when *	TCL_MEM_DEBUG is not defined. It creates a new string object and *	initializes it from the byte pointer and length arguments. * *	When TCL_MEM_DEBUG is defined, this procedure just returns the *	result of calling the debugging version Tcl_DbNewStringObj. * * Results: *	A newly created string object is returned that has ref count zero. * * Side effects: *	The new object's internal string representation will be set to a *	copy of the length bytes starting at "bytes". If "length" is *	negative, use bytes up to the first NULL byte; i.e., assume "bytes" *	points to a C-style NULL-terminated string. The object's type is set *	to NULL. An extra NULL is added to the end of the new object's byte *	array. * *---------------------------------------------------------------------- */#ifdef TCL_MEM_DEBUG#undef Tcl_NewStringObjTcl_Obj *Tcl_NewStringObj(bytes, length)    CONST char *bytes;		/* Points to the first of the length bytes				 * used to initialize the new object. */    int length;			/* The number of bytes to copy from "bytes"				 * when initializing the new object. If 				 * negative, use bytes up to the first				 * NULL byte. */{    return Tcl_DbNewStringObj(bytes, length, "unknown", 0);}#else /* if not TCL_MEM_DEBUG */Tcl_Obj *Tcl_NewStringObj(bytes, length)    CONST char *bytes;		/* Points to the first of the length bytes				 * used to initialize the new object. */    int length;			/* The number of bytes to copy from "bytes"				 * when initializing the new object. If 				 * negative, use bytes up to the first				 * NULL byte. */{    register Tcl_Obj *objPtr;    if (length < 0) {	length = (bytes? strlen(bytes) : 0);    }    TclNewObj(objPtr);    TclInitStringRep(objPtr, bytes, length);    return objPtr;}#endif /* TCL_MEM_DEBUG *//* *---------------------------------------------------------------------- * * Tcl_DbNewStringObj -- * *	This procedure is normally called when debugging: i.e., when *	TCL_MEM_DEBUG is defined. It creates new string objects. It is the *	same as the Tcl_NewStringObj procedure above except that it calls *	Tcl_DbCkalloc directly with the file name and line number from its *	caller. This simplifies debugging since then the [memory active] *	command	will report the correct file name and line number when *	reporting objects that haven't been freed. * *	When TCL_MEM_DEBUG is not defined, this procedure just returns the *	result of calling Tcl_NewStringObj. * * Results: *	A newly created string object is returned that has ref count zero. * * Side effects: *	The new object's internal string representation will be set to a *	copy of the length bytes starting at "bytes". If "length" is *	negative, use bytes up to the first NULL byte; i.e., assume "bytes" *	points to a C-style NULL-terminated string. The object's type is set *	to NULL. An extra NULL is added to the end of the new object's byte *	array. * *---------------------------------------------------------------------- */#ifdef TCL_MEM_DEBUGTcl_Obj *Tcl_DbNewStringObj(bytes, length, file, line)    CONST char *bytes;		/* Points to the first of the length bytes				 * used to initialize the new object. */    int length;			/* The number of bytes to copy from "bytes"				 * when initializing the new object. If 				 * negative, use bytes up to the first				 * NULL byte. */    CONST char *file;		/* The name of the source file calling this				 * procedure; used for debugging. */    int line;			/* Line number in the source file; used				 * for debugging. */{    register Tcl_Obj *objPtr;    if (length < 0) {	length = (bytes? strlen(bytes) : 0);    }    TclDbNewObj(objPtr, file, line);    TclInitStringRep(objPtr, bytes, length);    return objPtr;}#else /* if not TCL_MEM_DEBUG */Tcl_Obj *Tcl_DbNewStringObj(bytes, length, file, line)    CONST char *bytes;		/* Points to the first of the length bytes				 * used to initialize the new object. */    register int length;	/* The number of bytes to copy from "bytes"				 * when initializing the new object. If 				 * negative, use bytes up to the first				 * NULL byte. */    CONST char *file;		/* The name of the source file calling this				 * procedure; used for debugging. */    int line;			/* Line number in the source file; used				 * for debugging. */{    return Tcl_NewStringObj(bytes, length);}#endif /* TCL_MEM_DEBUG *//* *--------------------------------------------------------------------------- * * Tcl_NewUnicodeObj -- * *	This procedure is creates a new String object and initializes *	it from the given Unicode String.  If the Utf String is the same size *	as the Unicode string, don't duplicate the data. * * Results: *	The newly created object is returned.  This object will have no *	initial string representation.  The returned object has a ref count *	of 0. * * Side effects: *	Memory allocated for new object and copy of Unicode argument. * *--------------------------------------------------------------------------- */Tcl_Obj *Tcl_NewUnicodeObj(unicode, numChars)    CONST Tcl_UniChar *unicode;	/* The unicode string used to initialize				 * the new object. */    int numChars;		/* Number of characters in the unicode				 * string. */{    Tcl_Obj *objPtr;    String *stringPtr;    size_t uallocated;    if (numChars < 0) {	numChars = 0;	if (unicode) {	    while (unicode[numChars] != 0) { numChars++; }	}    }    uallocated = STRING_UALLOC(numChars);    /*     * Create a new obj with an invalid string rep.     */    TclNewObj(objPtr);    Tcl_InvalidateStringRep(objPtr);    objPtr->typePtr = &tclStringType;    stringPtr = (String *) ckalloc(STRING_SIZE(uallocated));    stringPtr->numChars = numChars;    stringPtr->uallocated = uallocated;    stringPtr->hasUnicode = (numChars > 0);    stringPtr->allocated = 0;    memcpy((VOID *) stringPtr->unicode, (VOID *) unicode, uallocated);    stringPtr->unicode[numChars] = 0;    SET_STRING(objPtr, stringPtr);    return objPtr;}/* *---------------------------------------------------------------------- * * Tcl_GetCharLength -- * *	Get the length of the Unicode string from the Tcl object. * * Results: *	Pointer to unicode string representing the unicode object. * * Side effects: *	Frees old internal rep.  Allocates memory for new "String" *	internal rep. * *---------------------------------------------------------------------- */intTcl_GetCharLength(objPtr)    Tcl_Obj *objPtr;	/* The String object to get the num chars of. */{    String *stringPtr;        SetStringFromAny(NULL, objPtr);    stringPtr = GET_STRING(objPtr);    /*     * If numChars is unknown, then calculate the number of characaters     * while populating the Unicode string.     */        if (stringPtr->numChars == -1) {	register int i = objPtr->length;	register unsigned char *str = (unsigned char *) objPtr->bytes;	/*	 * This is a speed sensitive function, so run specially over the	 * string to count continuous ascii characters before resorting	 * to the Tcl_NumUtfChars call.  This is a long form of:	 stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length);	*/	while (i && (*str < 0xC0)) { i--; str++; }	stringPtr->numChars = objPtr->length - i;	if (i) {	    stringPtr->numChars += Tcl_NumUtfChars(objPtr->bytes		    + (objPtr->length - i), i);	} 	if (stringPtr->numChars == objPtr->length) {	    /*	     * Since we've just calculated the number of chars, and all	     * UTF chars are 1-byte long, we don't need to store the	     * unicode string.	     */	    stringPtr->hasUnicode = 0;	} else {    	    /*	     * Since we've just calucalated the number of chars, and not	     * all UTF chars are 1-byte long, go ahead and populate the	     * unicode string.	     */	    FillUnicodeRep(objPtr);	    /*	     * We need to fetch the pointer again because we have just	     * reallocated the structure to make room for the Unicode data.	     */	    	    stringPtr = GET_STRING(objPtr);	}    }    return stringPtr->numChars;}/* *---------------------------------------------------------------------- * * Tcl_GetUniChar -- * *	Get the index'th Unicode character from the String object.  The *	index is assumed to be in the appropriate range. * * Results: *	Returns the index'th Unicode character in the Object. * * Side effects: *	Fills unichar with the index'th Unicode character. * *---------------------------------------------------------------------- */Tcl_UniCharTcl_GetUniChar(objPtr, index)    Tcl_Obj *objPtr;	/* The object to get the Unicode charater from. */    int index;		/* Get the index'th Unicode character. */{    Tcl_UniChar unichar;    String *stringPtr;        SetStringFromAny(NULL, objPtr);    stringPtr = GET_STRING(objPtr);    if (stringPtr->numChars == -1) {	/*	 * We haven't yet calculated the length, so we don't have the	 * Unicode str.  We need to know the number of chars before we	 * can do indexing.	 */	Tcl_GetCharLength(objPtr);	/*	 * We need to fetch the pointer again because we may have just	 * reallocated the structure.	 */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?