📄 tokenizer.c
字号:
case '*':
switch (c2) {
case '*':
switch (c3) {
case '=':
return DOUBLESTAREQUAL;
}
break;
}
break;
case '/':
switch (c2) {
case '/':
switch (c3) {
case '=':
return DOUBLESLASHEQUAL;
}
break;
}
break;
}
return OP;
}
static int
indenterror(struct tok_state *tok)
{
if (tok->alterror) {
tok->done = E_TABSPACE;
tok->cur = tok->inp;
return 1;
}
if (tok->altwarning) {
PySys_WriteStderr("%s: inconsistent use of tabs and spaces "
"in indentation\n", tok->filename);
tok->altwarning = 0;
}
return 0;
}
/* Get next token, after space stripping etc. */
int
PyTokenizer_Get(register struct tok_state *tok, char **p_start,
char **p_end)
{
register int c;
int blankline;
*p_start = *p_end = NULL;
nextline:
tok->start = NULL;
blankline = 0;
/* Get indentation level */
if (tok->atbol) {
register int col = 0;
register int altcol = 0;
tok->atbol = 0;
for (;;) {
c = tok_nextc(tok);
if (c == ' ')
col++, altcol++;
else if (c == '\t') {
col = (col/tok->tabsize + 1) * tok->tabsize;
altcol = (altcol/tok->alttabsize + 1)
* tok->alttabsize;
}
else if (c == '\014') /* Control-L (formfeed) */
col = altcol = 0; /* For Emacs users */
else
break;
}
tok_backup(tok, c);
if (c == '#' || c == '\n') {
/* Lines with only whitespace and/or comments
shouldn't affect the indentation and are
not passed to the parser as NEWLINE tokens,
except *totally* empty lines in interactive
mode, which signal the end of a command group. */
if (col == 0 && c == '\n' && tok->prompt != NULL)
blankline = 0; /* Let it through */
else
blankline = 1; /* Ignore completely */
/* We can't jump back right here since we still
may need to skip to the end of a comment */
}
if (!blankline && tok->level == 0) {
if (col == tok->indstack[tok->indent]) {
/* No change */
if (altcol != tok->altindstack[tok->indent]) {
if (indenterror(tok))
return ERRORTOKEN;
}
}
else if (col > tok->indstack[tok->indent]) {
/* Indent -- always one */
if (tok->indent+1 >= MAXINDENT) {
tok->done = E_TOODEEP;
tok->cur = tok->inp;
return ERRORTOKEN;
}
if (altcol <= tok->altindstack[tok->indent]) {
if (indenterror(tok))
return ERRORTOKEN;
}
tok->pendin++;
tok->indstack[++tok->indent] = col;
tok->altindstack[tok->indent] = altcol;
}
else /* col < tok->indstack[tok->indent] */ {
/* Dedent -- any number, must be consistent */
while (tok->indent > 0 &&
col < tok->indstack[tok->indent]) {
tok->pendin--;
tok->indent--;
}
if (col != tok->indstack[tok->indent]) {
tok->done = E_DEDENT;
tok->cur = tok->inp;
return ERRORTOKEN;
}
if (altcol != tok->altindstack[tok->indent]) {
if (indenterror(tok))
return ERRORTOKEN;
}
}
}
}
tok->start = tok->cur;
/* Return pending indents/dedents */
if (tok->pendin != 0) {
if (tok->pendin < 0) {
tok->pendin++;
return DEDENT;
}
else {
tok->pendin--;
return INDENT;
}
}
again:
tok->start = NULL;
/* Skip spaces */
do {
c = tok_nextc(tok);
} while (c == ' ' || c == '\t' || c == '\014');
/* Set start of current token */
tok->start = tok->cur - 1;
/* Skip comment, while looking for tab-setting magic */
if (c == '#') {
static const char *const tabforms[] = {
"tab-width:", /* Emacs */
":tabstop=", /* vim, full form */
":ts=", /* vim, abbreviated form */
"set tabsize=", /* will vi never die? */
/* more templates can be added here to support other editors */
};
char cbuf[80];
char *tp;
const char **cp;
tp = cbuf;
do {
*tp++ = c = tok_nextc(tok);
} while (c != EOF && c != '\n' &&
tp - cbuf + 1 < sizeof(cbuf));
*tp = '\0';
// XXX:CW32
for (cp = (const char **)tabforms;
cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]);
cp++) {
if ((tp = strstr(cbuf, *cp))) {
int newsize = atoi(tp + strlen(*cp));
if (newsize >= 1 && newsize <= 40) {
tok->tabsize = newsize;
if (Py_VerboseFlag)
PySys_WriteStderr(
"Tab size set to %d\n",
newsize);
}
}
}
while (c != EOF && c != '\n')
c = tok_nextc(tok);
}
/* Check for EOF and errors now */
if (c == EOF) {
return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;
}
/* Identifier (most frequent token!) */
if (isalpha(c) || c == '_') {
/* Process r"", u"" and ur"" */
switch (c) {
case 'r':
case 'R':
c = tok_nextc(tok);
if (c == '"' || c == '\'')
goto letter_quote;
break;
case 'u':
case 'U':
c = tok_nextc(tok);
if (c == 'r' || c == 'R')
c = tok_nextc(tok);
if (c == '"' || c == '\'')
goto letter_quote;
break;
}
while (isalnum(c) || c == '_') {
c = tok_nextc(tok);
}
tok_backup(tok, c);
*p_start = tok->start;
*p_end = tok->cur;
return NAME;
}
/* Newline */
if (c == '\n') {
tok->atbol = 1;
if (blankline || tok->level > 0)
goto nextline;
*p_start = tok->start;
*p_end = tok->cur - 1; /* Leave '\n' out of the string */
return NEWLINE;
}
#ifdef macintosh
if (c == '\r') {
PySys_WriteStderr(
"File contains \\r characters (incorrect line endings?)\n");
tok->done = E_TOKEN;
tok->cur = tok->inp;
return ERRORTOKEN;
}
#endif
/* Period or number starting with period? */
if (c == '.') {
c = tok_nextc(tok);
if (isdigit(c)) {
goto fraction;
}
else {
tok_backup(tok, c);
*p_start = tok->start;
*p_end = tok->cur;
return DOT;
}
}
/* Number */
if (isdigit(c)) {
if (c == '0') {
/* Hex or octal -- maybe. */
c = tok_nextc(tok);
if (c == '.')
goto fraction;
#ifndef WITHOUT_COMPLEX
if (c == 'j' || c == 'J')
goto imaginary;
#endif
if (c == 'x' || c == 'X') {
/* Hex */
do {
c = tok_nextc(tok);
} while (isxdigit(c));
}
else {
int found_decimal = 0;
/* Octal; c is first char of it */
/* There's no 'isoctdigit' macro, sigh */
while ('0' <= c && c < '8') {
c = tok_nextc(tok);
}
if (isdigit(c)) {
found_decimal = 1;
do {
c = tok_nextc(tok);
} while (isdigit(c));
}
if (c == '.')
goto fraction;
else if (c == 'e' || c == 'E')
goto exponent;
#ifndef WITHOUT_COMPLEX
else if (c == 'j' || c == 'J')
goto imaginary;
#endif
else if (found_decimal) {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
}
if (c == 'l' || c == 'L')
c = tok_nextc(tok);
}
else {
/* Decimal */
do {
c = tok_nextc(tok);
} while (isdigit(c));
if (c == 'l' || c == 'L')
c = tok_nextc(tok);
else {
/* Accept floating point numbers. */
if (c == '.') {
fraction:
/* Fraction */
do {
c = tok_nextc(tok);
} while (isdigit(c));
}
if (c == 'e' || c == 'E') {
exponent:
/* Exponent part */
c = tok_nextc(tok);
if (c == '+' || c == '-')
c = tok_nextc(tok);
if (!isdigit(c)) {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
do {
c = tok_nextc(tok);
} while (isdigit(c));
}
#ifndef WITHOUT_COMPLEX
if (c == 'j' || c == 'J')
/* Imaginary part */
imaginary:
c = tok_nextc(tok);
#endif
}
}
tok_backup(tok, c);
*p_start = tok->start;
*p_end = tok->cur;
return NUMBER;
}
letter_quote:
/* String */
if (c == '\'' || c == '"') {
int quote2 = tok->cur - tok->start + 1;
int quote = c;
int triple = 0;
int tripcount = 0;
for (;;) {
c = tok_nextc(tok);
if (c == '\n') {
if (!triple) {
tok->done = E_TOKEN;
tok_backup(tok, c);
return ERRORTOKEN;
}
tripcount = 0;
}
else if (c == EOF) {
tok->done = E_TOKEN;
tok->cur = tok->inp;
return ERRORTOKEN;
}
else if (c == quote) {
tripcount++;
if (tok->cur - tok->start == quote2) {
c = tok_nextc(tok);
if (c == quote) {
triple = 1;
tripcount = 0;
continue;
}
tok_backup(tok, c);
}
if (!triple || tripcount == 3)
break;
}
else if (c == '\\') {
tripcount = 0;
c = tok_nextc(tok);
if (c == EOF) {
tok->done = E_TOKEN;
tok->cur = tok->inp;
return ERRORTOKEN;
}
}
else
tripcount = 0;
}
*p_start = tok->start;
*p_end = tok->cur;
return STRING;
}
/* Line continuation */
if (c == '\\') {
c = tok_nextc(tok);
if (c != '\n') {
tok->done = E_TOKEN;
tok->cur = tok->inp;
return ERRORTOKEN;
}
goto again; /* Read next line */
}
/* Check for two-character token */
{
int c2 = tok_nextc(tok);
int token = PyToken_TwoChars(c, c2);
if (token != OP) {
int c3 = tok_nextc(tok);
int token3 = PyToken_ThreeChars(c, c2, c3);
if (token3 != OP) {
token = token3;
} else {
tok_backup(tok, c3);
}
*p_start = tok->start;
*p_end = tok->cur;
return token;
}
tok_backup(tok, c2);
}
/* Keep track of parentheses nesting level */
switch (c) {
case '(':
case '[':
case '{':
tok->level++;
break;
case ')':
case ']':
case '}':
tok->level--;
break;
}
/* Punctuation character */
*p_start = tok->start;
*p_end = tok->cur;
return PyToken_OneChar(c);
}
#ifdef Py_DEBUG
void
tok_dump(int type, char *start, char *end)
{
printf("%s", _PyParser_TokenNames[type]);
if (type == NAME || type == NUMBER || type == STRING || type == OP)
printf("(%.*s)", (int)(end - start), start);
}
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -