📄 lexer.c
字号:
while (node) { while (node->attributes) { av = node->attributes; if (av->attribute) MemFree(av->attribute); if (av->value) MemFree(av->value); node->attributes = av->next; MemFree(av); } if (node->element) MemFree(node->element); if (node->content) FreeNode(node->content); if (node->next) { next = node->next; MemFree(node); node = next; continue; } node->element = null; node->tag = null;#if 0 if (_msize(node) != sizeof (Node)) /* debug */ fprintf(stderr, "Error in FreeNode() - trying to free corrupted node size %d vs %d\n", _msize(node), sizeof(Node));#endif MemFree(node); break; }}Node *TextToken(Lexer *lexer){ Node *node; node = NewNode(); node->start = lexer->txtstart; node->end = lexer->txtend; return node;}Node *TagToken(Lexer *lexer, uint type){ Node *node; node = NewNode(); node->type = type; node->element = wstrndup(lexer->lexbuf + lexer->txtstart, lexer->txtend - lexer->txtstart); node->start = lexer->txtstart; node->end = lexer->txtstart; if (type == StartTag || type == StartEndTag || type == EndTag) FindTag(node); return node;}Node *CommentToken(Lexer *lexer){ Node *node; node = NewNode(); node->type = CommentTag; node->start = lexer->txtstart; node->end = lexer->txtend; return node;}Node *DocTypeToken(Lexer *lexer){ Node *node; node = NewNode(); node->type = DocTypeTag; node->start = lexer->txtstart; node->end = lexer->txtend; return node;}Node *PIToken(Lexer *lexer){ Node *node; node = NewNode(); node->type = ProcInsTag; node->start = lexer->txtstart; node->end = lexer->txtend; return node;}Node *AspToken(Lexer *lexer){ Node *node; node = NewNode(); node->type = AspTag; node->start = lexer->txtstart; node->end = lexer->txtend; return node;}Node *JsteToken(Lexer *lexer){ Node *node; node = NewNode(); node->type = JsteTag; node->start = lexer->txtstart; node->end = lexer->txtend; return node;}/* Added by Baruch Even - handle PHP code too. */Node *PhpToken(Lexer *lexer){ Node *node; node = NewNode(); node->type = PhpTag; node->start = lexer->txtstart; node->end = lexer->txtend; return node;}/* Word2000 uses <![if ... ]> and <![endif]> */Node *SectionToken(Lexer *lexer){ Node *node; node = NewNode(); node->type = SectionTag; node->start = lexer->txtstart; node->end = lexer->txtend; return node;}/* CDATA uses <![CDATA[ ... ]]> */Node *CDATAToken(Lexer *lexer){ Node *node; node = NewNode(); node->type = CDATATag; node->start = lexer->txtstart; node->end = lexer->txtend; return node;}void AddStringLiteral(Lexer *lexer, char *str){ unsigned char c; while((c = *str++) != '\0') AddCharToLexer(lexer, c);}/* find doctype element */Node *FindDocType(Node *root){ Node *node; for (node = root->content; node && node->type != DocTypeTag; node = node->next); return node;}/* find html element */Node *FindHTML(Node *root){ Node *node; for (node = root->content; node && node->tag != tag_html; node = node->next); return node;}Node *FindHEAD(Node *root){ Node *node; node = FindHTML(root); if (node) { for (node = node->content; node && node->tag != tag_head; node = node->next); } return node;}/* add meta element for Tidy */Bool AddGenerator(Lexer *lexer, Node *root){ AttVal *attval; Node *node; Node *head = FindHEAD(root); if (head) { for (node = head->content; node; node = node->next) { if (node->tag == tag_meta) { attval = GetAttrByName(node, "name"); if (attval && attval->value && wstrcasecmp(attval->value, "generator") == 0) { attval = GetAttrByName(node, "content"); if (attval && attval->value && wstrncasecmp(attval->value, "HTML Tidy", 9) == 0) { return no; } } } } node = InferredTag(lexer, "meta"); AddAttribute(node, "content", "HTML Tidy, see www.w3.org"); AddAttribute(node, "name", "generator"); InsertNodeAtStart(head, node); return yes; } return no;}/* examine <!DOCTYPE> to identify version */int FindGivenVersion(Lexer *lexer, Node *doctype){ char *p, *s; uint i, j; int len; /* give up if all we are given is the system id for the doctype */ if (wstrncasecmp(lexer->lexbuf+doctype->start, "html SYSTEM ", 12) == 0) { /* but at least ensure the case is correct */ memcpy(lexer->lexbuf+doctype->start, "html SYSTEM", 11); return 0; /* unrecognized */ } if (wstrncasecmp(lexer->lexbuf+doctype->start, "html PUBLIC ", 12) == 0) memcpy(lexer->lexbuf+doctype->start, "html PUBLIC", 11); else lexer->bad_doctype = yes; for (i = doctype->start; i < doctype->end; ++i) { if (lexer->lexbuf[i] == '"') { if (wstrncmp(lexer->lexbuf+i+1, "-//W3C//DTD ", 12) == 0) { p = lexer->lexbuf + i + 13; /* compute length of identifier e.g. "HTML 4.0 Transitional" */ for (j = i + 13; j < doctype->end && lexer->lexbuf[j] != '/'; ++j); len = j - i - 13; for (j = 1; j < W3C_VERSIONS; ++j) { s = W3C_Version[j].name; if (len == wstrlen(s) && wstrncmp(p, s, len) == 0) return W3C_Version[j].code; } /* else unrecognized version */ } else if (wstrncmp(lexer->lexbuf+i+1, "-//IETF//DTD ", 13) == 0) { p = lexer->lexbuf + i + 14; /* compute length of identifier e.g. "HTML 2.0" */ for (j = i + 14; j < doctype->end && lexer->lexbuf[j] != '/'; ++j); len = j - i - 14; s = W3C_Version[0].name; if (len == wstrlen(s) && wstrncmp(p, s, len) == 0) return W3C_Version[0].code; /* else unrecognized version */ } break; } } return 0;}char *HTMLVersionName(Lexer *lexer){ int guessed, j; guessed = ApparentVersion(lexer); for (j = 0; j < W3C_VERSIONS; ++j) { if (guessed == W3C_Version[j].code) { if (lexer->isvoyager) return W3C_Version[j].voyager_name; return W3C_Version[j].name; } } return null;}void FixHTMLNameSpace(Lexer *lexer, Node *root, char *profile){ Node *node; AttVal *prev, *attr; for (node = root->content; node && node->tag != tag_html; node = node->next); if (node) { prev = null; for (attr = node->attributes; attr; attr = attr->next) { if (wstrcmp(attr->attribute, "xmlns") == 0) break; prev = attr; } if (attr) { if (wstrcmp(attr->value, profile)) { ReportWarning(lexer, node, null, INCONSISTENT_NAMESPACE); MemFree(attr->value); attr->value = wstrdup(profile); } } else { attr = NewAttribute(); attr->delim = '"'; attr->attribute = wstrdup("xmlns"); attr->value = wstrdup(profile); attr->dict = FindAttribute(attr); attr->next = node->attributes; node->attributes = attr; if (ForMZ == yes) { attr = NewAttribute(); attr->delim = '"'; attr->attribute = wstrdup("xmlns:math"); attr->value = wstrdup(MML_NAMESPACE); attr->dict = FindAttribute(attr); attr->next = node->attributes; node->attributes = attr; attr = NewAttribute(); attr->delim = '"'; attr->attribute = wstrdup("xmlns:xlink"); attr->value = wstrdup(XLINK_NAMESPACE); attr->dict = FindAttribute(attr); attr->next = node->attributes; node->attributes = attr; } } }}Bool SetXHTMLDocType(Lexer *lexer, Node *root){ char *fpi, *sysid, *name_space = XHTML_NAMESPACE; Node *doctype; doctype = FindDocType(root); if (doctype_mode == doctype_omit) { if (doctype) DiscardElement(doctype); return yes; } if (doctype_mode == doctype_auto) { /* see what flavor of XHTML this document matches */ if (lexer->versions & VERS_HTML40_STRICT) { /* use XHTML strict */ fpi = "-//W3C//DTD XHTML 1.0 Strict//EN"; sysid = voyager_strict; } else if (lexer->versions & VERS_LOOSE) { fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN"; sysid = voyager_loose; } else if (lexer->versions & VERS_FRAMES) { /* use XHTML frames */ fpi = "-//W3C//DTD XHTML 1.0 Frameset//EN"; sysid = voyager_frameset; } else /* lets assume XHTML transitional */ { fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN"; sysid = voyager_loose; } } else if (doctype_mode == doctype_strict) { fpi = "-//W3C//DTD XHTML 1.0 Strict//EN"; sysid = voyager_strict; } else if (doctype_mode == doctype_loose) { fpi = "-//W3C//DTD XHTML 1.0 Transitional//EN"; sysid = voyager_loose; } /* fix dtd for MZ */ if (ForMZ == yes) fpi = "mathml.dtd"; FixHTMLNameSpace(lexer, root, name_space); if (!doctype) { doctype = NewNode(); doctype->type = DocTypeTag; doctype->next = root->content; doctype->parent = root; doctype->prev = null; root->content = doctype; } if (doctype_mode == doctype_user && doctype_str) { fpi = doctype_str; sysid = ""; } lexer->txtstart = lexer->txtend = lexer->lexsize; /* add public or system (for MZ) identifier */ if (ForMZ == yes) AddStringLiteral(lexer, "html SYSTEM "); else AddStringLiteral(lexer, "html PUBLIC "); /* check if the fpi is quoted or not */ if (fpi[0] == '"') AddStringLiteral(lexer, fpi); else { AddStringLiteral(lexer, "\""); AddStringLiteral(lexer, fpi); AddStringLiteral(lexer, "\""); } /* if ((unsigned)(wstrlen(sysid) + 6) >= wraplen) AddStringLiteral(lexer, "\n\""); else AddStringLiteral(lexer, "\n \""); */ /* add system identifier */ /* AddStringLiteral(lexer, sysid); AddStringLiteral(lexer, "\""); */ lexer->txtend = lexer->lexsize; doctype->start = lexer->txtstart; doctype->end = lexer->txtend; return no;}int ApparentVersion(Lexer *lexer){ if (lexer->doctype == VERS_UNKNOWN) return no; switch (lexer->doctype) { case VERS_UNKNOWN: return VERS_UNKNOWN; case VERS_HTML20: if (lexer->versions & VERS_HTML20) return VERS_HTML20; break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -