tclstringobj.c
来自「tcl是工具命令语言」· C语言 代码 · 共 1,878 行 · 第 1/4 页
C
1,878 行
/* * tclStringObj.c -- * * This file contains procedures that implement string operations on Tcl * objects. Some string operations work with UTF strings and others * require Unicode format. Functions that require knowledge of the width * of each character, such as indexing, operate on Unicode data. * * A Unicode string is an internationalized string. Conceptually, a * Unicode string is an array of 16-bit quantities organized as a sequence * of properly formed UTF-8 characters. There is a one-to-one map between * Unicode and UTF characters. Because Unicode characters have a fixed * width, operations such as indexing operate on Unicode data. The String * object is optimized for the case where each UTF char in a string is * only one byte. In this case, we store the value of numChars, but we * don't store the Unicode data (unless Tcl_GetUnicode is explicitly * called). * * The String object type stores one or both formats. The default * behavior is to store UTF. Once Unicode is calculated by a function, it * is stored in the internal rep for future access (without an additional * O(n) cost). * * To allow many appends to be done to an object without constantly * reallocating the space for the string or Unicode representation, we * allocate double the space for the string or Unicode and use the * internal representation to keep track of how much space is used * vs. allocated. * * Copyright (c) 1995-1997 Sun Microsystems, Inc. * Copyright (c) 1999 by Scriptics Corporation. * * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * * RCS: @(#) $Id: tclStringObj.c,v 1.32 2003/02/19 16:43:28 das Exp $ */#include "tclInt.h"/* * Prototypes for procedures defined later in this file: */static void AppendUnicodeToUnicodeRep _ANSI_ARGS_(( Tcl_Obj *objPtr, CONST Tcl_UniChar *unicode, int appendNumChars));static void AppendUnicodeToUtfRep _ANSI_ARGS_(( Tcl_Obj *objPtr, CONST Tcl_UniChar *unicode, int numChars));static void AppendUtfToUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr, CONST char *bytes, int numBytes));static void AppendUtfToUtfRep _ANSI_ARGS_((Tcl_Obj *objPtr, CONST char *bytes, int numBytes));static void FillUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr));static void FreeStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr));static void DupStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr, Tcl_Obj *copyPtr));static int SetStringFromAny _ANSI_ARGS_((Tcl_Interp *interp, Tcl_Obj *objPtr));static void UpdateStringOfString _ANSI_ARGS_((Tcl_Obj *objPtr));/* * The structure below defines the string Tcl object type by means of * procedures that can be invoked by generic object code. */Tcl_ObjType tclStringType = { "string", /* name */ FreeStringInternalRep, /* freeIntRepPro */ DupStringInternalRep, /* dupIntRepProc */ UpdateStringOfString, /* updateStringProc */ SetStringFromAny /* setFromAnyProc */};/* * The following structure is the internal rep for a String object. * It keeps track of how much memory has been used and how much has been * allocated for the Unicode and UTF string to enable growing and * shrinking of the UTF and Unicode reps of the String object with fewer * mallocs. To optimize string length and indexing operations, this * structure also stores the number of characters (same of UTF and Unicode!) * once that value has been computed. */typedef struct String { int numChars; /* The number of chars in the string. * -1 means this value has not been * calculated. >= 0 means that there is a * valid Unicode rep, or that the number * of UTF bytes == the number of chars. */ size_t allocated; /* The amount of space actually allocated * for the UTF string (minus 1 byte for * the termination char). */ size_t uallocated; /* The amount of space actually allocated * for the Unicode string (minus 2 bytes for * the termination char). */ int hasUnicode; /* Boolean determining whether the string * has a Unicode representation. */ Tcl_UniChar unicode[2]; /* The array of Unicode chars. The actual * size of this field depends on the * 'uallocated' field above. */} String;#define STRING_UALLOC(numChars) \ (numChars * sizeof(Tcl_UniChar))#define STRING_SIZE(ualloc) \ ((unsigned) (sizeof(String) - sizeof(Tcl_UniChar) + ualloc))#define GET_STRING(objPtr) \ ((String *) (objPtr)->internalRep.otherValuePtr)#define SET_STRING(objPtr, stringPtr) \ (objPtr)->internalRep.otherValuePtr = (VOID *) (stringPtr)/* * TCL STRING GROWTH ALGORITHM * * When growing strings (during an append, for example), the following growth * algorithm is used: * * Attempt to allocate 2 * (originalLength + appendLength) * On failure: * attempt to allocate originalLength + 2*appendLength + * TCL_GROWTH_MIN_ALLOC * * This algorithm allows very good performance, as it rapidly increases the * memory allocated for a given string, which minimizes the number of * reallocations that must be performed. However, using only the doubling * algorithm can lead to a significant waste of memory. In particular, it * may fail even when there is sufficient memory available to complete the * append request (but there is not 2 * totalLength memory available). So when * the doubling fails (because there is not enough memory available), the * algorithm requests a smaller amount of memory, which is still enough to * cover the request, but which hopefully will be less than the total available * memory. * * The addition of TCL_GROWTH_MIN_ALLOC allows for efficient handling * of very small appends. Without this extra slush factor, a sequence * of several small appends would cause several memory allocations. * As long as TCL_GROWTH_MIN_ALLOC is a reasonable size, we can * avoid that behavior. * * The growth algorithm can be tuned by adjusting the following parameters: * * TCL_GROWTH_MIN_ALLOC Additional space, in bytes, to allocate when * the double allocation has failed. * Default is 1024 (1 kilobyte). */#ifndef TCL_GROWTH_MIN_ALLOC#define TCL_GROWTH_MIN_ALLOC 1024#endif/* *---------------------------------------------------------------------- * * Tcl_NewStringObj -- * * This procedure is normally called when not debugging: i.e., when * TCL_MEM_DEBUG is not defined. It creates a new string object and * initializes it from the byte pointer and length arguments. * * When TCL_MEM_DEBUG is defined, this procedure just returns the * result of calling the debugging version Tcl_DbNewStringObj. * * Results: * A newly created string object is returned that has ref count zero. * * Side effects: * The new object's internal string representation will be set to a * copy of the length bytes starting at "bytes". If "length" is * negative, use bytes up to the first NULL byte; i.e., assume "bytes" * points to a C-style NULL-terminated string. The object's type is set * to NULL. An extra NULL is added to the end of the new object's byte * array. * *---------------------------------------------------------------------- */#ifdef TCL_MEM_DEBUG#undef Tcl_NewStringObjTcl_Obj *Tcl_NewStringObj(bytes, length) CONST char *bytes; /* Points to the first of the length bytes * used to initialize the new object. */ int length; /* The number of bytes to copy from "bytes" * when initializing the new object. If * negative, use bytes up to the first * NULL byte. */{ return Tcl_DbNewStringObj(bytes, length, "unknown", 0);}#else /* if not TCL_MEM_DEBUG */Tcl_Obj *Tcl_NewStringObj(bytes, length) CONST char *bytes; /* Points to the first of the length bytes * used to initialize the new object. */ int length; /* The number of bytes to copy from "bytes" * when initializing the new object. If * negative, use bytes up to the first * NULL byte. */{ register Tcl_Obj *objPtr; if (length < 0) { length = (bytes? strlen(bytes) : 0); } TclNewObj(objPtr); TclInitStringRep(objPtr, bytes, length); return objPtr;}#endif /* TCL_MEM_DEBUG *//* *---------------------------------------------------------------------- * * Tcl_DbNewStringObj -- * * This procedure is normally called when debugging: i.e., when * TCL_MEM_DEBUG is defined. It creates new string objects. It is the * same as the Tcl_NewStringObj procedure above except that it calls * Tcl_DbCkalloc directly with the file name and line number from its * caller. This simplifies debugging since then the [memory active] * command will report the correct file name and line number when * reporting objects that haven't been freed. * * When TCL_MEM_DEBUG is not defined, this procedure just returns the * result of calling Tcl_NewStringObj. * * Results: * A newly created string object is returned that has ref count zero. * * Side effects: * The new object's internal string representation will be set to a * copy of the length bytes starting at "bytes". If "length" is * negative, use bytes up to the first NULL byte; i.e., assume "bytes" * points to a C-style NULL-terminated string. The object's type is set * to NULL. An extra NULL is added to the end of the new object's byte * array. * *---------------------------------------------------------------------- */#ifdef TCL_MEM_DEBUGTcl_Obj *Tcl_DbNewStringObj(bytes, length, file, line) CONST char *bytes; /* Points to the first of the length bytes * used to initialize the new object. */ int length; /* The number of bytes to copy from "bytes" * when initializing the new object. If * negative, use bytes up to the first * NULL byte. */ CONST char *file; /* The name of the source file calling this * procedure; used for debugging. */ int line; /* Line number in the source file; used * for debugging. */{ register Tcl_Obj *objPtr; if (length < 0) { length = (bytes? strlen(bytes) : 0); } TclDbNewObj(objPtr, file, line); TclInitStringRep(objPtr, bytes, length); return objPtr;}#else /* if not TCL_MEM_DEBUG */Tcl_Obj *Tcl_DbNewStringObj(bytes, length, file, line) CONST char *bytes; /* Points to the first of the length bytes * used to initialize the new object. */ register int length; /* The number of bytes to copy from "bytes" * when initializing the new object. If * negative, use bytes up to the first * NULL byte. */ CONST char *file; /* The name of the source file calling this * procedure; used for debugging. */ int line; /* Line number in the source file; used * for debugging. */{ return Tcl_NewStringObj(bytes, length);}#endif /* TCL_MEM_DEBUG *//* *--------------------------------------------------------------------------- * * Tcl_NewUnicodeObj -- * * This procedure is creates a new String object and initializes * it from the given Unicode String. If the Utf String is the same size * as the Unicode string, don't duplicate the data. * * Results: * The newly created object is returned. This object will have no * initial string representation. The returned object has a ref count * of 0. * * Side effects: * Memory allocated for new object and copy of Unicode argument. * *--------------------------------------------------------------------------- */Tcl_Obj *Tcl_NewUnicodeObj(unicode, numChars) CONST Tcl_UniChar *unicode; /* The unicode string used to initialize * the new object. */ int numChars; /* Number of characters in the unicode * string. */{ Tcl_Obj *objPtr; String *stringPtr; size_t uallocated; if (numChars < 0) { numChars = 0; if (unicode) { while (unicode[numChars] != 0) { numChars++; } } } uallocated = STRING_UALLOC(numChars); /* * Create a new obj with an invalid string rep. */ TclNewObj(objPtr); Tcl_InvalidateStringRep(objPtr); objPtr->typePtr = &tclStringType; stringPtr = (String *) ckalloc(STRING_SIZE(uallocated)); stringPtr->numChars = numChars; stringPtr->uallocated = uallocated; stringPtr->hasUnicode = (numChars > 0); stringPtr->allocated = 0; memcpy((VOID *) stringPtr->unicode, (VOID *) unicode, uallocated); stringPtr->unicode[numChars] = 0; SET_STRING(objPtr, stringPtr); return objPtr;}/* *---------------------------------------------------------------------- * * Tcl_GetCharLength -- * * Get the length of the Unicode string from the Tcl object. * * Results: * Pointer to unicode string representing the unicode object. * * Side effects: * Frees old internal rep. Allocates memory for new "String" * internal rep. * *---------------------------------------------------------------------- */intTcl_GetCharLength(objPtr) Tcl_Obj *objPtr; /* The String object to get the num chars of. */{ String *stringPtr; SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); /* * If numChars is unknown, then calculate the number of characaters * while populating the Unicode string. */ if (stringPtr->numChars == -1) { register int i = objPtr->length; register unsigned char *str = (unsigned char *) objPtr->bytes; /* * This is a speed sensitive function, so run specially over the * string to count continuous ascii characters before resorting * to the Tcl_NumUtfChars call. This is a long form of: stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); */ while (i && (*str < 0xC0)) { i--; str++; } stringPtr->numChars = objPtr->length - i; if (i) { stringPtr->numChars += Tcl_NumUtfChars(objPtr->bytes + (objPtr->length - i), i); } if (stringPtr->numChars == objPtr->length) { /* * Since we've just calculated the number of chars, and all * UTF chars are 1-byte long, we don't need to store the * unicode string. */ stringPtr->hasUnicode = 0; } else { /* * Since we've just calucalated the number of chars, and not * all UTF chars are 1-byte long, go ahead and populate the * unicode string. */ FillUnicodeRep(objPtr); /* * We need to fetch the pointer again because we have just * reallocated the structure to make room for the Unicode data. */ stringPtr = GET_STRING(objPtr); } } return stringPtr->numChars;}/* *---------------------------------------------------------------------- * * Tcl_GetUniChar -- * * Get the index'th Unicode character from the String object. The * index is assumed to be in the appropriate range. * * Results: * Returns the index'th Unicode character in the Object. * * Side effects: * Fills unichar with the index'th Unicode character. * *---------------------------------------------------------------------- */Tcl_UniCharTcl_GetUniChar(objPtr, index) Tcl_Obj *objPtr; /* The object to get the Unicode charater from. */ int index; /* Get the index'th Unicode character. */{ Tcl_UniChar unichar; String *stringPtr; SetStringFromAny(NULL, objPtr); stringPtr = GET_STRING(objPtr); if (stringPtr->numChars == -1) { /* * We haven't yet calculated the length, so we don't have the * Unicode str. We need to know the number of chars before we * can do indexing. */ Tcl_GetCharLength(objPtr); /* * We need to fetch the pointer again because we may have just * reallocated the structure. */
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?