📄 lexer.c
字号:
case VERS_HTML32: if (lexer->versions & VERS_HTML32) return VERS_HTML32; break; /* to replace old version by new */ case VERS_HTML40_STRICT: if (lexer->versions & VERS_HTML40_STRICT) return VERS_HTML40_STRICT; break; case VERS_HTML40_LOOSE: if (lexer->versions & VERS_HTML40_LOOSE) return VERS_HTML40_LOOSE; break; /* to replace old version by new */ case VERS_FRAMES: if (lexer->versions & VERS_FRAMES) return VERS_FRAMES; break; } ReportWarning(lexer, null, null, INCONSISTENT_VERSION); return HTMLVersion(lexer);}/* fixup doctype if missing */Bool FixDocType(Lexer *lexer, Node *root){ Node *doctype; int guessed = VERS_HTML40_STRICT, i; if (lexer->bad_doctype) ReportWarning(lexer, null, null, MALFORMED_DOCTYPE); if (XmlOut) return yes; doctype = FindDocType(root); if (doctype_mode == doctype_omit) { if (doctype) DiscardElement(doctype); return yes; } if (doctype_mode == doctype_strict) { DiscardElement(doctype); doctype = null; guessed = VERS_HTML40_STRICT; } else if (doctype_mode == doctype_loose) { DiscardElement(doctype); doctype = null; guessed = VERS_HTML40_LOOSE; } else if (doctype_mode == doctype_auto) { if (doctype) { if (lexer->doctype == VERS_UNKNOWN) return no; switch (lexer->doctype) { case VERS_UNKNOWN: return no; case VERS_HTML20: if (lexer->versions & VERS_HTML20) return yes; break; /* to replace old version by new */ case VERS_HTML32: if (lexer->versions & VERS_HTML32) return yes; break; /* to replace old version by new */ case VERS_HTML40_STRICT: if (lexer->versions & VERS_HTML40_STRICT) return yes; break; /* to replace old version by new */ case VERS_HTML40_LOOSE: if (lexer->versions & VERS_HTML40_LOOSE) return yes; break; /* to replace old version by new */ case VERS_FRAMES: if (lexer->versions & VERS_FRAMES) return yes; break; /* to replace old version by new */ } /* INCONSISTENT_VERSION warning is now issued by ApparentVersion() */ } /* choose new doctype */ guessed = HTMLVersion(lexer); } if (guessed == VERS_UNKNOWN) return no; /* for XML use the Voyager system identifier */ if (XmlOut || XmlTags || lexer->isvoyager) { if (doctype) DiscardElement(doctype); for (i = 0; i < W3C_VERSIONS; ++i) { if (guessed == W3C_Version[i].code) { FixHTMLNameSpace(lexer, root, W3C_Version[i].profile); break; } } return yes; } if (!doctype) { doctype = NewNode(); doctype->type = DocTypeTag; doctype->next = root->content; doctype->parent = root; doctype->prev = null; root->content = doctype; } lexer->txtstart = lexer->txtend = lexer->lexsize; /* use the appropriate public identifier */ AddStringLiteral(lexer, "html PUBLIC "); if (doctype_mode == doctype_user && doctype_str) AddStringLiteral(lexer, doctype_str); else if (guessed == VERS_HTML20) AddStringLiteral(lexer, "\"-//IETF//DTD HTML 2.0//EN\""); else { AddStringLiteral(lexer, "\"-//W3C//DTD "); for (i = 0; i < W3C_VERSIONS; ++i) { if (guessed == W3C_Version[i].code) { AddStringLiteral(lexer, W3C_Version[i].name); break; } } AddStringLiteral(lexer, "//EN\""); } lexer->txtend = lexer->lexsize; doctype->start = lexer->txtstart; doctype->end = lexer->txtend; return yes;}/* ensure XML document starts with <?XML version="1.0"?> *//* XXX For MZ: add <?xml-stylesheet..> for html stylesheets */Bool FixXMLStyleSheets(Lexer *lexer, Node *root){ return no;}Bool FixXMLPI(Lexer *lexer, Node *root){ Node *xml; char *s; if( root->content && root->content->type == ProcInsTag) { s = &lexer->lexbuf[root->content->start]; if (s[0] == 'x' && s[1] == 'm' && s[2] == 'l' && s[3] == ' ') return yes; } xml = NewNode(); xml->type = ProcInsTag; xml->next = root->content; if (root->content) { root->content->prev = xml; xml->next = root->content; } root->content = xml; lexer->txtstart = lexer->txtend = lexer->lexsize; AddStringLiteral(lexer, "xml version=\"1.0\""); lexer->txtend = lexer->lexsize; xml->start = lexer->txtstart; xml->end = lexer->txtend; return no;}Node *InferredTag(Lexer *lexer, char *name){ Node *node; node = NewNode(); node->type = StartTag; node->implicit = yes; node->element = wstrdup(name); node->start = lexer->txtstart; node->end = lexer->txtend; FindTag(node); return node;}/* create a text node for the contents of a CDATA element like style or script which ends with </foo> for some foo.*/Node *GetCDATA(Lexer *lexer, Node *container){ int c, lastc, start, i, len; lexer->lines = lexer->in->curline; lexer->columns = lexer->in->curcol; lexer->waswhite = no; lexer->txtstart = lexer->txtend = lexer->lexsize; lastc = '\0'; start = -1; while ((c = ReadChar(lexer->in)) != EndOfStream) { /* treat \r\n as \n and \r as \n */ if (c == '/' && lastc == '<') start = lexer->lexsize + 1; /* to first letter */ else if (c == '>' && start >= 0) { if (((len = lexer->lexsize - start) == wstrlen(container->element)) && wstrncasecmp(lexer->lexbuf+start, container->element, len) == 0) { lexer->txtend = start - 2; break; } lexer->lines = lexer->in->curline; lexer->columns = lexer->in->curcol - 3; ReportWarning(lexer, null, null, BAD_CDATA_CONTENT); /* if javascript insert backslash before / */ if (IsJavaScript(container)) { for (i = lexer->lexsize; i > start-1; --i) lexer->lexbuf[i] = lexer->lexbuf[i-1]; lexer->lexbuf[start-1] = '\\'; lexer->lexsize++; } start = -1; } else if (c == '\r') { c = ReadChar(lexer->in); if (c != '\n') UngetChar(c, lexer->in); c = '\n'; } AddCharToLexer(lexer, (uint)c); lexer->txtend = lexer->lexsize; lastc = c; } if (c == EndOfStream) ReportWarning(lexer, container, null, MISSING_ENDTAG_FOR); if (lexer->txtend > lexer->txtstart) return lexer->token = TextToken(lexer); return null;}void UngetToken(Lexer *lexer){ lexer->pushed = yes;}/* modes for GetToken() MixedContent -- for elements which don't accept PCDATA Preformatted -- white space preserved as is IgnoreMarkup -- for CDATA elements such as script, style*/Node *GetToken(Lexer *lexer, uint mode){ uint map; int c, lastc, badcomment = 0; Bool isempty; AttVal *attributes; if (lexer->pushed) { /* duplicate inlines in preference to pushed text nodes when appropriate */ if (lexer->token->type != TextNode || (!lexer->insert && !lexer->inode)) { lexer->pushed = no; return lexer->token; } } /* at start of block elements, unclosed inline elements are inserted into the token stream */ if (lexer->insert || lexer->inode) return InsertedToken(lexer); lexer->lines = lexer->in->curline; lexer->columns = lexer->in->curcol; lexer->waswhite = no; lexer->txtstart = lexer->txtend = lexer->lexsize; while ((c = ReadChar(lexer->in)) != EndOfStream) { if (lexer->insertspace && mode != IgnoreWhitespace) { AddCharToLexer(lexer, ' '); lexer->waswhite = yes; lexer->insertspace = no; } /* treat \r\n as \n and \r as \n */ if (c == '\r') { c = ReadChar(lexer->in); if (c != '\n') UngetChar(c, lexer->in); c = '\n'; } AddCharToLexer(lexer, (uint)c); switch (lexer->state) { case LEX_CONTENT: /* element content */ map = MAP(c); /* Discard white space if appropriate. Its cheaper to do this here rather than in parser methods for elements that don't have mixed content. */ if ((map & white) && (mode == IgnoreWhitespace) && lexer->lexsize == lexer->txtstart + 1) { --(lexer->lexsize); lexer->waswhite = no; lexer->lines = lexer->in->curline; lexer->columns = lexer->in->curcol; continue; } if (c == '<') { lexer->state = LEX_GT; continue; } if ((map & white) != 0) { /* was previous char white? */ if (lexer->waswhite) { if (mode != Preformatted && mode != IgnoreMarkup) { --(lexer->lexsize); lexer->lines = lexer->in->curline; lexer->columns = lexer->in->curcol; } } else /* prev char wasn't white */ { lexer->waswhite = yes; lastc = c; if (mode != Preformatted && mode != IgnoreMarkup && c != ' ') ChangeChar(lexer, ' '); } continue; } else if (c == '&' && mode != IgnoreMarkup) ParseEntity(lexer, mode); lexer->waswhite = no; continue; case LEX_GT: /* < */ /* check for endtag */ if (c == '/') { if ((c = ReadChar(lexer->in)) == EndOfStream) { UngetChar(c, lexer->in); continue; } AddCharToLexer(lexer, c); map = MAP(c); if ((map & letter) != 0) { lexer->lexsize -= 3; lexer->txtend = lexer->lexsize; UngetChar(c, lexer->in); lexer->state = LEX_ENDTAG; lexer->lexbuf[lexer->lexsize] = '\0'; /* debug */ lexer->in->curcol -= 2; /* if some text before the </ return it now */ if (lexer->txtend > lexer->txtstart) { /* trim space char before end tag */ if (mode == IgnoreWhitespace && lexer->lexbuf[lexer->lexsize - 1] == ' ') { lexer->lexsize -= 1; lexer->txtend = lexer->lexsize; } return lexer->token = TextToken(lexer); } continue; /* no text so keep going */ } /* otherwise treat as CDATA */ lexer->waswhite = no; lexer->state = LEX_CONTENT; continue; } if (mode == IgnoreMarkup) { /* otherwise treat as CDATA */ lexer->waswhite = no; lexer->state = LEX_CONTENT; continue; } /* look out for comments, doctype or marked sections this isn't quite right, but its getting there ... */ if (c == '!') { c = ReadChar(lexer->in); if (c == '-') { c = ReadChar(lexer->in); if (c == '-') { lexer->state = LEX_COMMENT; /* comment */ lexer->lexsize -= 2; lexer->txtend = lexer->lexsize; /* if some text before < return it now */ if (lexer->txtend > lexer->txtstart) return lexer->token = TextToken(lexer); lexer->txtstart = lexer->lexsize; continue; } ReportWarning(lexer, null, null, MALFORMED_COMMENT); } else if (c == 'd' || c == 'D') { lexer->state = LEX_DOCTYPE; /* doctype */ lexer->lexsize -= 2; lexer->txtend = lexer->lexsize; mode = IgnoreWhitespace;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -