tclparse.c
来自「tcl是工具命令语言」· C语言 代码 · 共 1,785 行 · 第 1/4 页
C
1,785 行
register CONST char *p = src; while (1) { while (numBytes && ((type = CHAR_TYPE(*p)) & TYPE_SPACE)) { numBytes--; p++; } if (numBytes && (type & TYPE_SUBS)) { if (*p != '\\') { break; } if (--numBytes == 0) { break; } if (p[1] != '\n') { break; } p+=2; if (--numBytes == 0) { parsePtr->incomplete = 1; break; } continue; } break; } *typePtr = type; return (p - src);}/* *---------------------------------------------------------------------- * * TclParseHex -- * * Scans a hexadecimal number as a Tcl_UniChar value. * (e.g., for parsing \x and \u escape sequences). * At most numBytes bytes are scanned. * * Results: * The numeric value is stored in *resultPtr. * Returns the number of bytes consumed. * * Notes: * Relies on the following properties of the ASCII * character set, with which UTF-8 is compatible: * * The digits '0' .. '9' and the letters 'A' .. 'Z' and 'a' .. 'z' * occupy consecutive code points, and '0' < 'A' < 'a'. * *---------------------------------------------------------------------- */intTclParseHex(src, numBytes, resultPtr) CONST char *src; /* First character to parse. */ int numBytes; /* Max number of byes to scan */ Tcl_UniChar *resultPtr; /* Points to storage provided by * caller where the Tcl_UniChar * resulting from the conversion is * to be written. */{ Tcl_UniChar result = 0; register CONST char *p = src; while (numBytes--) { unsigned char digit = UCHAR(*p); if (!isxdigit(digit)) break; ++p; result <<= 4; if (digit >= 'a') { result |= (10 + digit - 'a'); } else if (digit >= 'A') { result |= (10 + digit - 'A'); } else { result |= (digit - '0'); } } *resultPtr = result; return (p - src);}/* *---------------------------------------------------------------------- * * TclParseBackslash -- * * Scans up to numBytes bytes starting at src, consuming a * backslash sequence as defined by Tcl's parsing rules. * * Results: * Records at readPtr the number of bytes making up the backslash * sequence. Records at dst the UTF-8 encoded equivalent of * that backslash sequence. Returns the number of bytes written * to dst, at most TCL_UTF_MAX. Either readPtr or dst may be * NULL, if the results are not needed, but the return value is * the same either way. * * Side effects: * None. * *---------------------------------------------------------------------- */intTclParseBackslash(src, numBytes, readPtr, dst) CONST char * src; /* Points to the backslash character of a * a backslash sequence */ int numBytes; /* Max number of bytes to scan */ int *readPtr; /* NULL, or points to storage where the * number of bytes scanned should be written. */ char *dst; /* NULL, or points to buffer where the UTF-8 * encoding of the backslash sequence is to be * written. At most TCL_UTF_MAX bytes will be * written there. */{ register CONST char *p = src+1; Tcl_UniChar result; int count; char buf[TCL_UTF_MAX]; if (numBytes == 0) { if (readPtr != NULL) { *readPtr = 0; } return 0; } if (dst == NULL) { dst = buf; } if (numBytes == 1) { /* Can only scan the backslash. Return it. */ result = '\\'; count = 1; goto done; } count = 2; switch (*p) { /* * Note: in the conversions below, use absolute values (e.g., * 0xa) rather than symbolic values (e.g. \n) that get converted * by the compiler. It's possible that compilers on some * platforms will do the symbolic conversions differently, which * could result in non-portable Tcl scripts. */ case 'a': result = 0x7; break; case 'b': result = 0x8; break; case 'f': result = 0xc; break; case 'n': result = 0xa; break; case 'r': result = 0xd; break; case 't': result = 0x9; break; case 'v': result = 0xb; break; case 'x': count += TclParseHex(p+1, numBytes-1, &result); if (count == 2) { /* No hexadigits -> This is just "x". */ result = 'x'; } else { /* Keep only the last byte (2 hex digits) */ result = (unsigned char) result; } break; case 'u': count += TclParseHex(p+1, (numBytes > 5) ? 4 : numBytes-1, &result); if (count == 2) { /* No hexadigits -> This is just "u". */ result = 'u'; } break; case '\n': count--; do { p++; count++; } while ((count < numBytes) && ((*p == ' ') || (*p == '\t'))); result = ' '; break; case 0: result = '\\'; count = 1; break; default: /* * Check for an octal number \oo?o? */ if (isdigit(UCHAR(*p)) && (UCHAR(*p) < '8')) { /* INTL: digit */ result = (unsigned char)(*p - '0'); p++; if ((numBytes == 2) || !isdigit(UCHAR(*p)) /* INTL: digit */ || (UCHAR(*p) >= '8')) { break; } count = 3; result = (unsigned char)((result << 3) + (*p - '0')); p++; if ((numBytes == 3) || !isdigit(UCHAR(*p)) /* INTL: digit */ || (UCHAR(*p) >= '8')) { break; } count = 4; result = (unsigned char)((result << 3) + (*p - '0')); break; } /* * We have to convert here in case the user has put a * backslash in front of a multi-byte utf-8 character. * While this means nothing special, we shouldn't break up * a correct utf-8 character. [Bug #217987] test subst-3.2 */ if (Tcl_UtfCharComplete(p, numBytes - 1)) { count = Tcl_UtfToUniChar(p, &result) + 1; /* +1 for '\' */ } else { char utfBytes[TCL_UTF_MAX]; memcpy(utfBytes, p, (size_t) (numBytes - 1)); utfBytes[numBytes - 1] = '\0'; count = Tcl_UtfToUniChar(utfBytes, &result) + 1; } break; } done: if (readPtr != NULL) { *readPtr = count; } return Tcl_UniCharToUtf((int) result, dst);}/* *---------------------------------------------------------------------- * * ParseComment -- * * Scans up to numBytes bytes starting at src, consuming a * Tcl comment as defined by Tcl's parsing rules. * * Results: * Records in parsePtr information about the parse. Returns the * number of bytes consumed. * * Side effects: * None. * *---------------------------------------------------------------------- */static intParseComment(src, numBytes, parsePtr) CONST char *src; /* First character to parse. */ register int numBytes; /* Max number of bytes to scan. */ Tcl_Parse *parsePtr; /* Information about parse in progress. * Updated if parsing indicates * an incomplete command. */{ register CONST char *p = src; while (numBytes) { char type; int scanned; do { scanned = TclParseWhiteSpace(p, numBytes, parsePtr, &type); p += scanned; numBytes -= scanned; } while (numBytes && (*p == '\n') && (p++,numBytes--)); if ((numBytes == 0) || (*p != '#')) { break; } if (parsePtr->commentStart == NULL) { parsePtr->commentStart = p; } while (numBytes) { if (*p == '\\') { scanned = TclParseWhiteSpace(p, numBytes, parsePtr, &type); if (scanned) { p += scanned; numBytes -= scanned; } else { /* * General backslash substitution in comments isn't * part of the formal spec, but test parse-15.47 * and history indicate that it has been the de facto * rule. Don't change it now. */ TclParseBackslash(p, numBytes, &scanned, NULL); p += scanned; numBytes -= scanned; } } else { p++; numBytes--; if (p[-1] == '\n') { break; } } } parsePtr->commentSize = p - parsePtr->commentStart; } return (p - src);}/* *---------------------------------------------------------------------- * * ParseTokens -- * * This procedure forms the heart of the Tcl parser. It parses one * or more tokens from a string, up to a termination point * specified by the caller. This procedure is used to parse * unquoted command words (those not in quotes or braces), words in * quotes, and array indices for variables. No more than numBytes * bytes will be scanned. * * Results: * Tokens are added to parsePtr and parsePtr->term is filled in * with the address of the character that terminated the parse (the * first one whose CHAR_TYPE matched mask or the character at * parsePtr->end). The return value is TCL_OK if the parse * completed successfully and TCL_ERROR otherwise. If a parse * error occurs and parsePtr->interp isn't NULL, then an error * message is left in the interpreter's result. * * Side effects: * None. * *---------------------------------------------------------------------- */static intParseTokens(src, numBytes, mask, parsePtr) register CONST char *src; /* First character to parse. */ register int numBytes; /* Max number of bytes to scan. */ int mask; /* Specifies when to stop parsing. The * parse stops at the first unquoted * character whose CHAR_TYPE contains * any of the bits in mask. */ Tcl_Parse *parsePtr; /* Information about parse in progress. * Updated with additional tokens and * termination information. */{ char type; int originalTokens, varToken; Tcl_Token *tokenPtr; Tcl_Parse nested; /* * Each iteration through the following loop adds one token of * type TCL_TOKEN_TEXT, TCL_TOKEN_BS, TCL_TOKEN_COMMAND, or * TCL_TOKEN_VARIABLE to parsePtr. For TCL_TOKEN_VARIABLE tokens, * additional tokens are added for the parsed variable name. */ originalTokens = parsePtr->numTokens; while (numBytes && !((type = CHAR_TYPE(*src)) & mask)) { if (parsePtr->numTokens == parsePtr->tokensAvailable) { TclExpandTokenArray(parsePtr); } tokenPtr = &parsePtr->tokenPtr[parsePtr->numTokens]; tokenPtr->start = src; tokenPtr->numComponents = 0; if ((type & TYPE_SUBS) == 0) { /* * This is a simple range of characters. Scan to find the end * of the range. */ while ((++src, --numBytes) && !(CHAR_TYPE(*src) & (mask | TYPE_SUBS))) { /* empty loop */ } tokenPtr->type = TCL_TOKEN_TEXT; tokenPtr->size = src - tokenPtr->start; parsePtr->numTokens++; } else if (*src == '$') { /* * This is a variable reference. Call Tcl_ParseVarName to do * all the dirty work of parsing the name. */ varToken = parsePtr->numTokens; if (Tcl_ParseVarName(parsePtr->interp, src, numBytes, parsePtr, 1) != TCL_OK) { return TCL_ERROR; } src += parsePtr->tokenPtr[varToken].size; numBytes -= parsePtr->tokenPtr[varToken].size; } else if (*src == '[') { /* * Command substitution. Call Tcl_ParseCommand recursively * (and repeatedly) to parse the nested command(s), then * throw away the parse information. */ src++; numBytes--; while (1) { if (Tcl_ParseCommand(parsePtr->interp, src, numBytes, 1, &nested) != TCL_OK) { parsePtr->errorType = nested.errorType; parsePtr->term = nested.term; parsePtr->incomplete = nested.incomplete; return TCL_ERROR; } src = nested.commandStart + nested.commandSize; numBytes = parsePtr->end - src; /* * This is equivalent to Tcl_FreeParse(&nested), but * presumably inlined here for sake of runtime optimization */ if (nested.tokenPtr != nested.staticTokens) { ckfree((char *) nested.tokenPtr); } /* * Check for the closing ']' that ends the command * substitution. It must have been the last character of * the parsed command. */ if ((nested.term < parsePtr->end) && (*nested.term == ']') && !nested.incomplete) { break; } if (numBytes == 0) { if (parsePtr->interp != NULL) { Tcl_SetResult(parsePtr->interp, "missing close-bracket", TCL_STATIC); } parsePtr->errorType = TCL_PARSE_MISSING_BRACKET; parsePtr->term = tokenPtr->start; parsePtr->incomplete = 1; return TCL_ERROR; } }
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?