📄 lexer.c
字号:
/* skip until white space or '>' */ for (;;) { c = ReadChar(lexer->in); if (c == EndOfStream || c == '>') { UngetChar(c, lexer->in); break; } map = MAP(c); if (!(map & white)) continue; /* and skip to end of whitespace */ for (;;) { c = ReadChar(lexer->in); if (c == EndOfStream || c == '>') { UngetChar(c, lexer->in); break; } map = MAP(c); if (map & white) continue; UngetChar(c, lexer->in); break; } break; } /* if some text before < return it now */ if (lexer->txtend > lexer->txtstart) return lexer->token = TextToken(lexer); lexer->txtstart = lexer->lexsize; continue; } else if (c == '[') { /* Word 2000 embeds <![if ...]> ... <![endif]> sequences */ lexer->lexsize -= 2; lexer->state = LEX_SECTION; lexer->txtend = lexer->lexsize; /* if some text before < return it now */ if (lexer->txtend > lexer->txtstart) return lexer->token = TextToken(lexer); lexer->txtstart = lexer->lexsize; continue; } /* otherwise swallow chars up to and including next '>' */ while ((c = ReadChar(lexer->in)) != '>') { if (c == -1) { UngetChar(c, lexer->in); break; } } lexer->lexsize -= 2; lexer->lexbuf[lexer->lexsize] = '\0'; lexer->state = LEX_CONTENT; continue; } /* processing instructions */ if (c == '?') { lexer->lexsize -= 2; lexer->state = LEX_PROCINSTR; lexer->txtend = lexer->lexsize; /* if some text before < return it now */ if (lexer->txtend > lexer->txtstart) return lexer->token = TextToken(lexer); lexer->txtstart = lexer->lexsize; continue; } /* Microsoft ASP's e.g. <% ... server-code ... %> */ if (c == '%') { lexer->lexsize -= 2; lexer->state = LEX_ASP; lexer->txtend = lexer->lexsize; /* if some text before < return it now */ if (lexer->txtend > lexer->txtstart) return lexer->token = TextToken(lexer); lexer->txtstart = lexer->lexsize; continue; } /* Netscapes JSTE e.g. <# ... server-code ... #> */ if (c == '#') { lexer->lexsize -= 2; lexer->state = LEX_JSTE; lexer->txtend = lexer->lexsize; /* if some text before < return it now */ if (lexer->txtend > lexer->txtstart) return lexer->token = TextToken(lexer); lexer->txtstart = lexer->lexsize; continue; } map = MAP(c); /* check for start tag */ if ((map & letter) != 0) { UngetChar(c, lexer->in); /* push back letter */ lexer->lexsize -= 2; /* discard "<" + letter */ lexer->txtend = lexer->lexsize; lexer->state = LEX_STARTTAG; /* ready to read tag name */ /* if some text before < return it now */ if (lexer->txtend > lexer->txtstart) return lexer->token = TextToken(lexer); continue; /* no text so keep going */ } /* otherwise treat as CDATA */ lexer->state = LEX_CONTENT; lexer->waswhite = no; continue; case LEX_ENDTAG: /* </letter */ lexer->txtstart = lexer->lexsize - 1; lexer->in->curcol += 2; c = ParseTagName(lexer); lexer->token = TagToken(lexer, EndTag); /* create endtag token */ lexer->lexsize = lexer->txtend = lexer->txtstart; /* skip to '>' */ while (c != '>') { c = ReadChar(lexer->in); if (c == EndOfStream) break; } if (c == EndOfStream) { UngetChar(c, lexer->in); continue; } lexer->state = LEX_CONTENT; lexer->waswhite = no; return lexer->token; /* the endtag token */ case LEX_STARTTAG: /* first letter of tagname */ lexer->txtstart = lexer->lexsize - 1; /* set txtstart to first letter */ c = ParseTagName(lexer); isempty = no; attributes = null; lexer->token = TagToken(lexer, (isempty ? StartEndTag : StartTag)); /* parse attributes, consuming closing ">" */ if (c != '>') { if (c == '/') UngetChar(c, lexer->in); attributes = ParseAttrs(lexer, &isempty); } if (isempty) lexer->token->type = StartEndTag; lexer->token->attributes = attributes; lexer->lexsize = lexer->txtend = lexer->txtstart; /* swallow newline following start tag */ /* special check needed for CRLF sequence */ c = ReadChar(lexer->in); if (c == '\r') { c = ReadChar(lexer->in); if (c != '\n') UngetChar(c, lexer->in); } else if (c != '\n' && c != '\f') UngetChar(c, lexer->in); lexer->state = LEX_CONTENT; lexer->waswhite = yes; /* to swallow leading whitespace */ if (lexer->token->tag == null) ReportError(lexer, null, lexer->token, UNKNOWN_ELEMENT); else if (!XmlTags) { lexer->versions &= lexer->token->tag->versions; if (lexer->token->tag->versions & VERS_PROPRIETARY) { if (!MakeClean && (lexer->token->tag == tag_nobr || lexer->token->tag == tag_wbr)) ReportWarning(lexer, null, lexer->token, PROPRIETARY_ELEMENT); } if (lexer->token->tag->chkattrs) { CheckUniqueAttributes(lexer, lexer->token); lexer->token->tag->chkattrs(lexer, lexer->token); } else CheckAttributes(lexer, lexer->token); } return lexer->token; /* return start tag */ case LEX_COMMENT: /* seen <!-- so look for --> */ if (c != '-') continue; c = ReadChar(lexer->in); AddCharToLexer(lexer, c); if (c != '-') continue; end_comment: c = ReadChar(lexer->in); if (c == '>') { if (badcomment) ReportWarning(lexer, null, null, MALFORMED_COMMENT); lexer->txtend = lexer->lexsize; lexer->lexbuf[lexer->lexsize] = '\0'; lexer->state = LEX_CONTENT; lexer->waswhite = no; return lexer->token = CommentToken(lexer); } /* note position of first such error in the comment */ if (!badcomment) { lexer->lines = lexer->in->curline; lexer->columns = lexer->in->curcol - 3; } badcomment++; if (FixComments) lexer->lexbuf[lexer->lexsize - 2] = '='; AddCharToLexer(lexer, c); /* if '-' then look for '>' to end the comment */ if (c == '-') goto end_comment; /* otherwise continue to look for --> */ lexer->lexbuf[lexer->lexsize - 2] = '='; continue; case LEX_DOCTYPE: /* seen <!d so look for '>' munging whitespace */ map = MAP(c); if (map & white) { if (lexer->waswhite) lexer->lexsize -= 1; lexer->waswhite = yes; } else lexer->waswhite = no; if (c != '>') continue; lexer->lexsize -= 1; lexer->txtend = lexer->lexsize; lexer->lexbuf[lexer->lexsize] = '\0'; lexer->state = LEX_CONTENT; lexer->waswhite = no; lexer->token = DocTypeToken(lexer); /* make a note of the version named by the doctype */ lexer->doctype = FindGivenVersion(lexer, lexer->token); return lexer->token; case LEX_PROCINSTR: /* seen <? so look for '>' */ /* check for PHP preprocessor instructions <?php ... ?> */ if (lexer->lexsize - lexer->txtstart == 3) { if (wstrncmp(lexer->lexbuf + lexer->txtstart, "php", 3) == 0) { lexer->state = LEX_PHP; continue; } } if (XmlPIs) /* insist on ?> as terminator */ { if (c != '?') continue; /* now look for '>' */ c = ReadChar(lexer->in); if (c == EndOfStream) { ReportWarning(lexer, null, null, UNEXPECTED_END_OF_FILE); UngetChar(c, lexer->in); continue; } AddCharToLexer(lexer, c); } if (c != '>') continue; lexer->lexsize -= 1; lexer->txtend = lexer->lexsize; lexer->lexbuf[lexer->lexsize] = '\0'; lexer->state = LEX_CONTENT; lexer->waswhite = no; return lexer->token = PIToken(lexer); case LEX_ASP: /* seen <% so look for "%>" */ if (c != '%') continue; /* now look for '>' */ c = ReadChar(lexer->in); if (c != '>') { UngetChar(c, lexer->in); continue; } lexer->lexsize -= 1; lexer->txtend = lexer->lexsize; lexer->lexbuf[lexer->lexsize] = '\0'; lexer->state = LEX_CONTENT; lexer->waswhite = no; return lexer->token = AspToken(lexer); case LEX_JSTE: /* seen <# so look for "#>" */ if (c != '#') continue; /* now look for '>' */ c = ReadChar(lexer->in); if (c != '>') { UngetChar(c, lexer->in); continue; } lexer->lexsize -= 1; lexer->txtend = lexer->lexsize; lexer->lexbuf[lexer->lexsize] = '\0'; lexer->state = LEX_CONTENT; lexer->waswhite = no; return lexer->token = JsteToken(lexer); case LEX_PHP: /* seen "<?php" so look for "?>" */ if (c != '?') continue; /* now look for '>' */ c = ReadChar(lexer->in); if (c != '>') { UngetChar(c, lexer->in); continue; } lexer->lexsize -= 1; lexer->txtend = lexer->lexsize; lexer->lexbuf[lexer->lexsize] = '\0'; lexer->state = LEX_CONTENT; lexer->waswhite = no; return lexer->token = PhpToken(lexer); case LEX_SECTION: /* seen "<![" so look for "]>" */ if (c == '[') { if (lexer->lexsize == (lexer->txtstart + 6) && wstrncmp(lexer->lexbuf+lexer->txtstart, "CDATA[", 6) == 0) { lexer->state = LEX_CDATA; lexer->lexsize -= 6; continue; } } if (c != ']') continue; /* now look for '>' */ c = ReadChar(lexer->in); if (c != '>') { UngetChar(c, lexer->in); continue; } lexer->lexsize -= 1; lexer->txtend = lexer->lexsize; lexer->lexbuf[lexer->lexsize] = '\0'; lexer->state = LEX_CONTENT; lexer->waswhite = no; return lexer->token = SectionToken(lexer); case LEX_CDATA: /* seen "<![CDATA[" so look for "]]>" */ if (c != ']') continue; /* now look for ']' */ c = ReadChar(lexer->in); if (c != ']') { UngetChar(c, lexer->in); continue; } /* now look for '>' */ c = ReadChar(lexer->in); if (c != '>') { UngetChar(c, lexer->in); continue; } lexer->lexsize -= 1; lexer->txtend = lexer->lexsize; lexer->lexbuf[lexer->lexsize] = '\0'; lexer->state = LEX_CONTENT; lexer->waswhite = no; return lexer->token = CDATAToken(lexer); } } if (lexer->state == LEX_CONTENT) /* text string */ { lexer->txtend = lexer->lexsize; if (lexer->txtend > lexer->txtstart) { UngetChar(c, lexer->in); if (lexer->lexbuf[lexer->lexsize - 1] == ' ') { lexer->lexsize -= 1; lexer->txtend = lexer->lexsize; } return lexer->token = TextToken(lexer); } } else if (lexer->state == LEX_COMMENT) /* comment */ { if (c == EndOfStream) ReportWarning(lexer, null, null, MALFORMED_COMMENT); lexer->txtend = lexer->lexsize; lexer->lexbuf[lexer->lexsize] = '\0'; lexer->state = LEX_CONTENT; lexer->waswhite = no; return lexer->token = CommentToken(lexer); } return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -