tclcmdmz.c

来自「tcl是工具命令语言」· C语言 代码 · 共 2,428 行 · 第 1/5 页

C
2,428
字号
		}		if ((index >= 0) && (index < length1)) {		    char buf[TCL_UTF_MAX];		    Tcl_UniChar ch;		    ch      = Tcl_GetUniChar(objv[2], index);		    length1 = Tcl_UniCharToUtf(ch, buf);		    Tcl_SetStringObj(resultPtr, buf, length1);		}	    }	    break;	}	case STR_IS: {	    char *end;	    Tcl_UniChar ch;            /*	     * The UniChar comparison function	     */	    int (*chcomp)_ANSI_ARGS_((int)) = NULL; 	    int i, failat = 0, result = 1, strict = 0;	    Tcl_Obj *objPtr, *failVarObj = NULL;	    static CONST char *isOptions[] = {		"alnum",	"alpha",	"ascii",	"control",		"boolean",	"digit",	"double",	"false",		"graph",	"integer",	"lower",	"print",		"punct",	"space",	"true",		"upper",		"wordchar",	"xdigit",	(char *) NULL	    };	    enum isOptions {		STR_IS_ALNUM,	STR_IS_ALPHA,	STR_IS_ASCII,	STR_IS_CONTROL,		STR_IS_BOOL,	STR_IS_DIGIT,	STR_IS_DOUBLE,	STR_IS_FALSE,		STR_IS_GRAPH,	STR_IS_INT,	STR_IS_LOWER,	STR_IS_PRINT,		STR_IS_PUNCT,	STR_IS_SPACE,	STR_IS_TRUE,	STR_IS_UPPER,		STR_IS_WORD,	STR_IS_XDIGIT	    };	    if (objc < 4 || objc > 7) {		Tcl_WrongNumArgs(interp, 2, objv,				 "class ?-strict? ?-failindex var? str");		return TCL_ERROR;	    }	    if (Tcl_GetIndexFromObj(interp, objv[2], isOptions, "class", 0,				    &index) != TCL_OK) {		return TCL_ERROR;	    }	    if (objc != 4) {		for (i = 3; i < objc-1; i++) {		    string2 = Tcl_GetStringFromObj(objv[i], &length2);		    if ((length2 > 1) &&			strncmp(string2, "-strict", (size_t) length2) == 0) {			strict = 1;		    } else if ((length2 > 1) &&			    strncmp(string2, "-failindex",				    (size_t) length2) == 0) {			if (i+1 >= objc-1) {			    Tcl_WrongNumArgs(interp, 3, objv,					     "?-strict? ?-failindex var? str");			    return TCL_ERROR;			}			failVarObj = objv[++i];		    } else {			Tcl_AppendStringsToObj(resultPtr, "bad option \"",				string2, "\": must be -strict or -failindex",				(char *) NULL);			return TCL_ERROR;		    }		}	    }	    /*	     * We get the objPtr so that we can short-cut for some classes	     * by checking the object type (int and double), but we need	     * the string otherwise, because we don't want any conversion	     * of type occuring (as, for example, Tcl_Get*FromObj would do	     */	    objPtr = objv[objc-1];	    string1 = Tcl_GetStringFromObj(objPtr, &length1);	    if (length1 == 0) {		if (strict) {		    result = 0;		}		goto str_is_done;	    }	    end = string1 + length1;	    /*	     * When entering here, result == 1 and failat == 0	     */	    switch ((enum isOptions) index) {		case STR_IS_ALNUM:		    chcomp = Tcl_UniCharIsAlnum;		    break;		case STR_IS_ALPHA:		    chcomp = Tcl_UniCharIsAlpha;		    break;		case STR_IS_ASCII:		    for (; string1 < end; string1++, failat++) {			/*			 * This is a valid check in unicode, because all			 * bytes < 0xC0 are single byte chars (but isascii			 * limits that def'n to 0x80).			 */			if (*((unsigned char *)string1) >= 0x80) {			    result = 0;			    break;			}		    }		    break;		case STR_IS_BOOL:		case STR_IS_TRUE:		case STR_IS_FALSE:		    if (objPtr->typePtr == &tclBooleanType) {			if ((((enum isOptions) index == STR_IS_TRUE) &&			     objPtr->internalRep.longValue == 0) ||			    (((enum isOptions) index == STR_IS_FALSE) &&			     objPtr->internalRep.longValue != 0)) {			    result = 0;			}		    } else if ((Tcl_GetBoolean(NULL, string1, &i)				== TCL_ERROR) ||			       (((enum isOptions) index == STR_IS_TRUE) &&				i == 0) ||			       (((enum isOptions) index == STR_IS_FALSE) &&				i != 0)) {			result = 0;		    }		    break;		case STR_IS_CONTROL:		    chcomp = Tcl_UniCharIsControl;		    break;		case STR_IS_DIGIT:		    chcomp = Tcl_UniCharIsDigit;		    break;		case STR_IS_DOUBLE: {		    char *stop;		    if ((objPtr->typePtr == &tclDoubleType) ||			(objPtr->typePtr == &tclIntType)) {			break;		    }		    /*		     * This is adapted from Tcl_GetDouble		     *		     * The danger in this function is that		     * "12345678901234567890" is an acceptable 'double',		     * but will later be interp'd as an int by something		     * like [expr].  Therefore, we check to see if it looks		     * like an int, and if so we do a range check on it.		     * If strtoul gets to the end, we know we either		     * received an acceptable int, or over/underflow		     */		    if (TclLooksLikeInt(string1, length1)) {			errno = 0;#ifdef TCL_WIDE_INT_IS_LONG			strtoul(string1, &stop, 0); /* INTL: Tcl source. */#else			strtoull(string1, &stop, 0); /* INTL: Tcl source. */#endif			if (stop == end) {			    if (errno == ERANGE) {				result = 0;				failat = -1;			    }			    break;			}		    }		    errno = 0;		    strtod(string1, &stop); /* INTL: Tcl source. */		    if (errno == ERANGE) {			/*			 * if (errno == ERANGE), then it was an over/underflow			 * problem, but in this method, we only want to know			 * yes or no, so bad flow returns 0 (false) and sets			 * the failVarObj to the string length.			 */			result = 0;			failat = -1;		    } else if (stop == string1) {			/*			 * In this case, nothing like a number was found			 */			result = 0;			failat = 0;		    } else {			/*			 * Assume we sucked up one char per byte			 * and then we go onto SPACE, since we are			 * allowed trailing whitespace			 */			failat = stop - string1;			string1 = stop;			chcomp = Tcl_UniCharIsSpace;		    }		    break;		}		case STR_IS_GRAPH:		    chcomp = Tcl_UniCharIsGraph;		    break;		case STR_IS_INT: {		    char *stop;		    if ((objPtr->typePtr == &tclIntType) ||			(Tcl_GetInt(NULL, string1, &i) == TCL_OK)) {			break;		    }		    /*		     * Like STR_IS_DOUBLE, but we use strtoul.		     * Since Tcl_GetInt already failed, we set result to 0.		     */		    result = 0;		    errno = 0;#ifdef TCL_WIDE_INT_IS_LONG		    strtoul(string1, &stop, 0); /* INTL: Tcl source. */#else		    strtoull(string1, &stop, 0); /* INTL: Tcl source. */#endif		    if (errno == ERANGE) {			/*			 * if (errno == ERANGE), then it was an over/underflow			 * problem, but in this method, we only want to know			 * yes or no, so bad flow returns 0 (false) and sets			 * the failVarObj to the string length.			 */			failat = -1;		    } else if (stop == string1) {			/*			 * In this case, nothing like a number was found			 */			failat = 0;		    } else {			/*			 * Assume we sucked up one char per byte			 * and then we go onto SPACE, since we are			 * allowed trailing whitespace			 */			failat = stop - string1;			string1 = stop;			chcomp = Tcl_UniCharIsSpace;		    }		    break;		}		case STR_IS_LOWER:		    chcomp = Tcl_UniCharIsLower;		    break;		case STR_IS_PRINT:		    chcomp = Tcl_UniCharIsPrint;		    break;		case STR_IS_PUNCT:		    chcomp = Tcl_UniCharIsPunct;		    break;		case STR_IS_SPACE:		    chcomp = Tcl_UniCharIsSpace;		    break;		case STR_IS_UPPER:		    chcomp = Tcl_UniCharIsUpper;		    break;		case STR_IS_WORD:		    chcomp = Tcl_UniCharIsWordChar;		    break;		case STR_IS_XDIGIT: {		    for (; string1 < end; string1++, failat++) {			/* INTL: We assume unicode is bad for this class */			if ((*((unsigned char *)string1) >= 0xC0) ||			    !isxdigit(*(unsigned char *)string1)) {			    result = 0;			    break;			}		    }		    break;		}	    }	    if (chcomp != NULL) {		for (; string1 < end; string1 += length2, failat++) {		    length2 = TclUtfToUniChar(string1, &ch);		    if (!chcomp(ch)) {			result = 0;			break;		    }		}	    }	str_is_done:	    /*	     * Only set the failVarObj when we will return 0	     * and we have indicated a valid fail index (>= 0)	     */	    if ((result == 0) && (failVarObj != NULL) &&		Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(failat),			       TCL_LEAVE_ERR_MSG) == NULL) {		return TCL_ERROR;	    }	    Tcl_SetBooleanObj(resultPtr, result);	    break;	}	case STR_LAST: {	    Tcl_UniChar *ustring1, *ustring2, *p;	    int match, start;	    if (objc < 4 || objc > 5) {	        Tcl_WrongNumArgs(interp, 2, objv,				 "subString string ?startIndex?");		return TCL_ERROR;	    }	    /*	     * We are searching string2 for the sequence string1.	     */	    match = -1;	    start = 0;	    length2 = -1;	    ustring1 = Tcl_GetUnicodeFromObj(objv[2], &length1);	    ustring2 = Tcl_GetUnicodeFromObj(objv[3], &length2);	    if (objc == 5) {		/*		 * If a startIndex is specified, we will need to restrict		 * the string range to that char index in the string		 */		if (TclGetIntForIndex(interp, objv[4], length2 - 1,			&start) != TCL_OK) {		    return TCL_ERROR;		}		if (start < 0) {		    goto str_last_done;		} else if (start < length2) {		    p = ustring2 + start + 1 - length1;		} else {		    p = ustring2 + length2 - length1;		}	    } else {		p = ustring2 + length2 - length1;	    }	    if (length1 > 0) {		for (; p >= ustring2;  p--) {		    /*		     * Scan backwards to find the first character.		     */		    if ((*p == *ustring1) &&			    (memcmp((char *) ustring1, (char *) p, (size_t)				    (length1 * sizeof(Tcl_UniChar))) == 0)) {			match = p - ustring2;			break;		    }		}	    }	    str_last_done:	    Tcl_SetIntObj(resultPtr, match);	    break;	}	case STR_BYTELENGTH:	case STR_LENGTH: {	    if (objc != 3) {	        Tcl_WrongNumArgs(interp, 2, objv, "string");		return TCL_ERROR;	    }	    if ((enum options) index == STR_BYTELENGTH) {		(void) Tcl_GetStringFromObj(objv[2], &length1);	    } else {		/*		 * If we have a ByteArray object, avoid recomputing the		 * string since the byte array contains one byte per		 * character.  Otherwise, use the Unicode string rep to		 * calculate the length.		 */		if (objv[2]->typePtr == &tclByteArrayType) {		    (void) Tcl_GetByteArrayFromObj(objv[2], &length1);		} else {		    length1 = Tcl_GetCharLength(objv[2]);		}	    }	    Tcl_SetIntObj(resultPtr, length1);	    break;	}	case STR_MAP: {	    int mapElemc, nocase = 0;	    Tcl_Obj **mapElemv;	    Tcl_UniChar *ustring1, *ustring2, *p, *end;	    int (*strCmpFn)_ANSI_ARGS_((CONST Tcl_UniChar*,					CONST Tcl_UniChar*, unsigned long));	    if (objc < 4 || objc > 5) {	        Tcl_WrongNumArgs(interp, 2, objv, "?-nocase? charMap string");		return TCL_ERROR;	    }	    if (objc == 5) {		string2 = Tcl_GetStringFromObj(objv[2], &length2);		if ((length2 > 1) &&		    strncmp(string2, "-nocase", (size_t) length2) == 0) {		    nocase = 1;		} else {		    Tcl_AppendStringsToObj(resultPtr, "bad option \"",					   string2, "\": must be -nocase",					   (char *) NULL);		    return TCL_ERROR;		}	    }	    if (Tcl_ListObjGetElements(interp, objv[objc-2], &mapElemc,				       &mapElemv) != TCL_OK) {		return TCL_ERROR;	    }	    if (mapElemc == 0) {		/*		 * empty charMap, just return whatever string was given		 */		Tcl_SetObjResult(interp, objv[objc-1]);		return TCL_OK;	    } else if (mapElemc & 1) {		/*		 * The charMap must be an even number of key/value items		 */		Tcl_SetStringObj(resultPtr, "char map list unbalanced", -1);		return TCL_ERROR;	    }	    objc--;	    ustring1 = Tcl_GetUnicodeFromObj(objv[objc], &length1);	    if (length1 == 0) {		/*		 * Empty input string, just stop now		 */		break;	    }	    end = ustring1 + length1;	    strCmpFn = nocase ? Tcl_UniCharNcasecmp : Tcl_UniCharNcmp;	    /*	     * Force result to be Unicode	     */	    Tcl_SetUnicodeObj(resultPtr, ustring1, 0);	    if (mapElemc == 2) {		/*		 * Special case for one map pair which avoids the extra		 * for loop and extra calls to get Unicode data.  The		 * algorithm is otherwise identical to the multi-pair case.		 * This will be >30% faster on larger strings.		 */		int mapLen;		Tcl_UniChar *mapString, u2lc;		ustring2 = Tcl_GetUnicodeFromObj(mapElemv[0], &length2);		p = ustring1;		if (length2 == 0) {		    ustring1 = end;		} else {		    mapString = Tcl_GetUnicodeFromObj(mapElemv[1], &mapLen);		    u2lc = (nocase ? Tcl_UniCharToLower(*ustring2) : 0);		    for (; ustring1 < end; ustring1++) {			if (((*ustring1 == *ustring2) ||				(nocase && (Tcl_UniCharToLower(*ustring1) ==					u2lc))) &&				((length2 == 1) || strCmpFn(ustring1, ustring2,					(unsigned long) length2) == 0)) {			    if (p != ustring1) {				Tcl_AppendUnicodeToObj(resultPtr, p,					ustring1 - p);				p = ustring1 + length2;			    } else {				p += length2;			    }			    ustring1 = p - 1;			    Tcl_AppendUnicodeToObj(resultPtr, mapString,				    mapLen);			}		    }		}	    } else {		Tcl_UniChar **mapStrings, *u2lc = NULL;		int *mapLens;		/*		 * Precompute pointers to the unicode string and length.		 * This saves us repeated function calls later,		 * significantly speeding up the algorithm.  We only need		 * the lowercase first char in the nocase case.

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?