tclparse.c

来自「tcl是工具命令语言」· C语言 代码 · 共 1,785 行 · 第 1/4 页

C
1,785
字号
    register CONST char *p = src;    while (1) {	while (numBytes && ((type = CHAR_TYPE(*p)) & TYPE_SPACE)) {	    numBytes--; p++;	}	if (numBytes && (type & TYPE_SUBS)) {	    if (*p != '\\') {		break;	    }	    if (--numBytes == 0) {		break;	    }	    if (p[1] != '\n') {		break;	    }	    p+=2;	    if (--numBytes == 0) {		parsePtr->incomplete = 1;		break;	    }	    continue;	}	break;    }    *typePtr = type;    return (p - src);}/* *---------------------------------------------------------------------- * * TclParseHex -- * *	Scans a hexadecimal number as a Tcl_UniChar value. *	(e.g., for parsing \x and \u escape sequences). *	At most numBytes bytes are scanned. * * Results: *	The numeric value is stored in *resultPtr. *	Returns the number of bytes consumed. * * Notes: *	Relies on the following properties of the ASCII *	character set, with which UTF-8 is compatible: * *	The digits '0' .. '9' and the letters 'A' .. 'Z' and 'a' .. 'z'  *	occupy consecutive code points, and '0' < 'A' < 'a'. * *---------------------------------------------------------------------- */intTclParseHex(src, numBytes, resultPtr)    CONST char *src;		/* First character to parse. */    int numBytes;		/* Max number of byes to scan */    Tcl_UniChar *resultPtr;	/* Points to storage provided by				 * caller where the Tcl_UniChar				 * resulting from the conversion is				 * to be written. */{    Tcl_UniChar result = 0;    register CONST char *p = src;    while (numBytes--) {	unsigned char digit = UCHAR(*p);	if (!isxdigit(digit))	    break;	++p;	result <<= 4;	if (digit >= 'a') {	    result |= (10 + digit - 'a');	} else if (digit >= 'A') {	    result |= (10 + digit - 'A');	} else {	    result |= (digit - '0');	}    }    *resultPtr = result;    return (p - src);}/* *---------------------------------------------------------------------- * * TclParseBackslash -- * *	Scans up to numBytes bytes starting at src, consuming a *	backslash sequence as defined by Tcl's parsing rules.   * * Results: * 	Records at readPtr the number of bytes making up the backslash * 	sequence.  Records at dst the UTF-8 encoded equivalent of * 	that backslash sequence.  Returns the number of bytes written * 	to dst, at most TCL_UTF_MAX.  Either readPtr or dst may be * 	NULL, if the results are not needed, but the return value is * 	the same either way. * * Side effects: * 	None. * *---------------------------------------------------------------------- */intTclParseBackslash(src, numBytes, readPtr, dst)    CONST char * src;	/* Points to the backslash character of a			 * a backslash sequence */    int numBytes;	/* Max number of bytes to scan */    int *readPtr;	/* NULL, or points to storage where the			 * number of bytes scanned should be written. */    char *dst;		/* NULL, or points to buffer where the UTF-8			 * encoding of the backslash sequence is to be			 * written.  At most TCL_UTF_MAX bytes will be			 * written there. */{    register CONST char *p = src+1;    Tcl_UniChar result;    int count;    char buf[TCL_UTF_MAX];    if (numBytes == 0) {	if (readPtr != NULL) {	    *readPtr = 0;	}	return 0;    }    if (dst == NULL) {        dst = buf;    }    if (numBytes == 1) {	/* Can only scan the backslash.  Return it. */	result = '\\';	count = 1;	goto done;    }    count = 2;    switch (*p) {        /*         * Note: in the conversions below, use absolute values (e.g.,         * 0xa) rather than symbolic values (e.g. \n) that get converted         * by the compiler.  It's possible that compilers on some         * platforms will do the symbolic conversions differently, which         * could result in non-portable Tcl scripts.         */        case 'a':            result = 0x7;            break;        case 'b':            result = 0x8;            break;        case 'f':            result = 0xc;            break;        case 'n':            result = 0xa;            break;        case 'r':            result = 0xd;            break;        case 't':            result = 0x9;            break;        case 'v':            result = 0xb;            break;        case 'x':	    count += TclParseHex(p+1, numBytes-1, &result);	    if (count == 2) {		/* No hexadigits -> This is just "x". */		result = 'x';	    } else {		/* Keep only the last byte (2 hex digits) */		result = (unsigned char) result;	    }            break;        case 'u':	    count += TclParseHex(p+1, (numBytes > 5) ? 4 : numBytes-1, &result);	    if (count == 2) {		/* No hexadigits -> This is just "u". */		result = 'u';	    }            break;        case '\n':            count--;            do {                p++; count++;            } while ((count < numBytes) && ((*p == ' ') || (*p == '\t')));            result = ' ';            break;        case 0:            result = '\\';            count = 1;            break;        default:            /*             * Check for an octal number \oo?o?             */            if (isdigit(UCHAR(*p)) && (UCHAR(*p) < '8')) { /* INTL: digit */                result = (unsigned char)(*p - '0');                p++;                if ((numBytes == 2) || !isdigit(UCHAR(*p)) /* INTL: digit */			|| (UCHAR(*p) >= '8')) {                     break;                }                count = 3;                result = (unsigned char)((result << 3) + (*p - '0'));                p++;                if ((numBytes == 3) || !isdigit(UCHAR(*p)) /* INTL: digit */			|| (UCHAR(*p) >= '8')) {                    break;                }                count = 4;                result = (unsigned char)((result << 3) + (*p - '0'));                break;            }            /*             * We have to convert here in case the user has put a             * backslash in front of a multi-byte utf-8 character.             * While this means nothing special, we shouldn't break up             * a correct utf-8 character. [Bug #217987] test subst-3.2             */	    if (Tcl_UtfCharComplete(p, numBytes - 1)) {	        count = Tcl_UtfToUniChar(p, &result) + 1; /* +1 for '\' */	    } else {		char utfBytes[TCL_UTF_MAX];		memcpy(utfBytes, p, (size_t) (numBytes - 1));		utfBytes[numBytes - 1] = '\0';	        count = Tcl_UtfToUniChar(utfBytes, &result) + 1;	    }            break;    }    done:    if (readPtr != NULL) {        *readPtr = count;    }    return Tcl_UniCharToUtf((int) result, dst);}/* *---------------------------------------------------------------------- * * ParseComment -- * *	Scans up to numBytes bytes starting at src, consuming a *	Tcl comment as defined by Tcl's parsing rules.   * * Results: * 	Records in parsePtr information about the parse.  Returns the * 	number of bytes consumed. * * Side effects: * 	None. * *---------------------------------------------------------------------- */static intParseComment(src, numBytes, parsePtr)    CONST char *src;		/* First character to parse. */    register int numBytes;	/* Max number of bytes to scan. */    Tcl_Parse *parsePtr;	/* Information about parse in progress.				 * Updated if parsing indicates				 * an incomplete command. */{    register CONST char *p = src;    while (numBytes) {	char type;	int scanned;	do {	    scanned = TclParseWhiteSpace(p, numBytes, parsePtr, &type);	    p += scanned; numBytes -= scanned;	} while (numBytes && (*p == '\n') && (p++,numBytes--));	if ((numBytes == 0) || (*p != '#')) {	    break;	}	if (parsePtr->commentStart == NULL) {	    parsePtr->commentStart = p;	}	while (numBytes) {	    if (*p == '\\') {		scanned = TclParseWhiteSpace(p, numBytes, parsePtr, &type);		if (scanned) {		    p += scanned; numBytes -= scanned;		} else {		    /*		     * General backslash substitution in comments isn't		     * part of the formal spec, but test parse-15.47		     * and history indicate that it has been the de facto		     * rule.  Don't change it now.		     */		    TclParseBackslash(p, numBytes, &scanned, NULL);		    p += scanned; numBytes -= scanned;		}	    } else {		p++; numBytes--;		if (p[-1] == '\n') {		    break;		}	    }	}	parsePtr->commentSize = p - parsePtr->commentStart;    }    return (p - src);}/* *---------------------------------------------------------------------- * * ParseTokens -- * *	This procedure forms the heart of the Tcl parser.  It parses one *	or more tokens from a string, up to a termination point *	specified by the caller.  This procedure is used to parse *	unquoted command words (those not in quotes or braces), words in *	quotes, and array indices for variables.  No more than numBytes *	bytes will be scanned. * * Results: *	Tokens are added to parsePtr and parsePtr->term is filled in *	with the address of the character that terminated the parse (the *	first one whose CHAR_TYPE matched mask or the character at *	parsePtr->end).  The return value is TCL_OK if the parse *	completed successfully and TCL_ERROR otherwise.  If a parse *	error occurs and parsePtr->interp isn't NULL, then an error *	message is left in the interpreter's result. * * Side effects: *	None. * *---------------------------------------------------------------------- */static intParseTokens(src, numBytes, mask, parsePtr)    register CONST char *src;	/* First character to parse. */    register int numBytes;	/* Max number of bytes to scan. */    int mask;			/* Specifies when to stop parsing.  The				 * parse stops at the first unquoted				 * character whose CHAR_TYPE contains				 * any of the bits in mask. */    Tcl_Parse *parsePtr;	/* Information about parse in progress.				 * Updated with additional tokens and				 * termination information. */{    char type;     int originalTokens, varToken;    Tcl_Token *tokenPtr;    Tcl_Parse nested;    /*     * Each iteration through the following loop adds one token of     * type TCL_TOKEN_TEXT, TCL_TOKEN_BS, TCL_TOKEN_COMMAND, or     * TCL_TOKEN_VARIABLE to parsePtr.  For TCL_TOKEN_VARIABLE tokens,     * additional tokens are added for the parsed variable name.     */    originalTokens = parsePtr->numTokens;    while (numBytes && !((type = CHAR_TYPE(*src)) & mask)) {	if (parsePtr->numTokens == parsePtr->tokensAvailable) {	    TclExpandTokenArray(parsePtr);	}	tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens];	tokenPtr->start = src;	tokenPtr->numComponents = 0;	if ((type & TYPE_SUBS) == 0) {	    /*	     * This is a simple range of characters.  Scan to find the end	     * of the range.	     */	    while ((++src, --numBytes) 		    && !(CHAR_TYPE(*src) & (mask | TYPE_SUBS))) {		/* empty loop */	    }	    tokenPtr->type = TCL_TOKEN_TEXT;	    tokenPtr->size = src - tokenPtr->start;	    parsePtr->numTokens++;	} else if (*src == '$') {	    /*	     * This is a variable reference.  Call Tcl_ParseVarName to do	     * all the dirty work of parsing the name.	     */	    varToken = parsePtr->numTokens;	    if (Tcl_ParseVarName(parsePtr->interp, src, numBytes,		    parsePtr, 1) != TCL_OK) {		return TCL_ERROR;	    }	    src += parsePtr->tokenPtr[varToken].size;	    numBytes -= parsePtr->tokenPtr[varToken].size;	} else if (*src == '[') {	    /*	     * Command substitution.  Call Tcl_ParseCommand recursively	     * (and repeatedly) to parse the nested command(s), then	     * throw away the parse information.	     */	    src++; numBytes--;	    while (1) {		if (Tcl_ParseCommand(parsePtr->interp, src,			numBytes, 1, &nested) != TCL_OK) {		    parsePtr->errorType = nested.errorType;		    parsePtr->term = nested.term;		    parsePtr->incomplete = nested.incomplete;		    return TCL_ERROR;		}		src = nested.commandStart + nested.commandSize;		numBytes = parsePtr->end - src;		/*		 * This is equivalent to Tcl_FreeParse(&nested), but		 * presumably inlined here for sake of runtime optimization		 */		if (nested.tokenPtr != nested.staticTokens) {		    ckfree((char *) nested.tokenPtr);		}		/*		 * Check for the closing ']' that ends the command		 * substitution.  It must have been the last character of		 * the parsed command.		 */		if ((nested.term < parsePtr->end) && (*nested.term == ']')			&& !nested.incomplete) {		    break;		}		if (numBytes == 0) {		    if (parsePtr->interp != NULL) {			Tcl_SetResult(parsePtr->interp,			    "missing close-bracket", TCL_STATIC);		    }		    parsePtr->errorType = TCL_PARSE_MISSING_BRACKET;		    parsePtr->term = tokenPtr->start;		    parsePtr->incomplete = 1;		    return TCL_ERROR;		}	    }

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?