📄 parser.c
字号:
/* parser.c - HTML Parser (c) 1998-2000 (W3C) MIT, INRIA, Keio University See tidy.c for the copyright notice.*/#include "platform.h" /* platform independent stuff */#include "html.h" /* to pull in definition of nodes */int SeenBodyEndTag; /* could be moved into lexer structure */Bool CheckNodeIntegrity(Node *node){ Node *child; Bool found = no; if (node->prev) { if (node->prev->next != node) return no; } if (node->next) { if (node->next->prev != node) return no; } if (node->parent) { if (node->prev == null && node->parent->content != node) return no; if (node->next == null && node->parent->last != node) return no; for (child = node->parent->content; child; child = child->next) if (child == node) { found = yes; break; } if (!found) return no; } for (child = node->content; child; child = child->next) if (!CheckNodeIntegrity(child)) return no; return yes;}/* used to determine how attributes without values should be printed this was introduced to deal with user defined tags e.g. Cold Fusion*/Bool IsNewNode(Node *node){ if (node && node->tag) { return (node->tag->model & CM_NEW); } return yes;}void CoerceNode(Lexer *lexer, Node *node, Dict *tag){ Node *tmp = InferredTag(lexer, tag->name); ReportWarning(lexer, node, tmp, OBSOLETE_ELEMENT); MemFree(tmp); MemFree(node->element); node->was = node->tag; node->tag = tag; node->type = StartTag; node->implicit = yes; node->element = wstrdup(tag->name);}/* extract a node and its children from a markup tree */void RemoveNode(Node *node){ if (node->prev) node->prev->next = node->next; if (node->next) node->next->prev = node->prev; if (node->parent) { if (node->parent->content == node) node->parent->content = node->next; if (node->parent->last == node) node->parent->last = node->prev; } node->parent = node->prev = node->next = null;}/* remove node from markup tree and discard it */Node *DiscardElement(Node *element){ Node *next = null; if (element) { next = element->next; RemoveNode(element); FreeNode(element); } return next;}/* insert node into markup tree */void InsertNodeAtStart(Node *element, Node *node){ node->parent = element; if (element->content == null) element->last = node; node->next = element->content; node->prev = null; element->content = node;}/* insert node into markup tree */void InsertNodeAtEnd(Node *element, Node *node){ node->parent = element; node->prev = element->last; if (element->last != null) element->last->next = node; else element->content = node; element->last = node;}/* insert node into markup tree in pace of element which is moved to become the child of the node*/void InsertNodeAsParent(Node *element, Node *node){ node->content = element; node->last = element; node->parent = element->parent; element->parent = node; if (node->parent->content == element) node->parent->content = node; if (node->parent->last == element) node->parent->last = node; node->prev = element->prev; element->prev = null; if (node->prev) node->prev->next = node; node->next = element->next; element->next = null; if (node->next) node->next->prev = node;}/* insert node into markup tree before element */void InsertNodeBeforeElement(Node *element, Node *node){ Node *parent; parent = element->parent; node->parent = parent; node->next = element; node->prev = element->prev; element->prev = node; if (node->prev) node->prev->next = node; if (parent->content == element) parent->content = node;}/* insert node into markup tree after element */void InsertNodeAfterElement(Node *element, Node *node){ Node *parent; parent = element->parent; node->parent = parent; if (parent->last == element) parent->last = node; else { node->next = element->next; node->next->prev = node; } element->next = node; node->prev = element;}Bool CanPrune(Node *element){ if (element->type == TextNode) return yes; if (element->content) return no; if (element->tag == tag_a && element->attributes != null) return no; if (element->tag == tag_p && !DropEmptyParas) return no; if (element->tag == null) return no; if (element->tag->model & CM_ROW) return no; if (element->tag == tag_applet) return no; if (element->tag == tag_object) return no; return yes;}void TrimEmptyElement(Lexer *lexer, Node *element){ if (CanPrune(element)) { if (element->type != TextNode) ReportWarning(lexer, element, null, TRIM_EMPTY_ELEMENT); DiscardElement(element); } else if (element->tag == tag_p && element->content == null) { /* replace <p></p> by <br><br> to preserve formatting */ Node *node = InferredTag(lexer, "br"); CoerceNode(lexer, element, tag_br); InsertNodeAfterElement(element, node); }}/* If last child of element is a text node then trim trailing white space character.*/void TrimSpace(Lexer *lexer, Node *last){ unsigned char c; if (last != null && last->type == TextNode && last->end > last->start) { while (last->end > last->start) { c = (unsigned char)lexer->lexbuf[last->end - 1]; if (c == 160) /* non breaking space */ { if (last->parent->tag == tag_td || last->parent->tag == tag_th) { if (last->end > last->start + 1) last->end -= 1; else break; } else last->end -= 1; } else if (c == ' ') last->end -= 1; else break; } if (last->end < last->start) tidy_out(lexer->errout, "TrimSpace: screwed up text node\n"); /* if empty string then delete from parse tree */ if (last->start == last->end) TrimEmptyElement(lexer, last); }}/* This maps <em>hello </em><strong>world</strong> to <em>hello</em> <strong>world</strong> If last child of element is a text node then trim trailing white space character moving it to after element's end tag.*/void TrimTrailingSpace(Lexer *lexer, Node *last){ unsigned char c; if (last != null && last->type == TextNode && last->end > last->start) { c = (unsigned char)lexer->lexbuf[last->end - 1]; if (c == ' ' || c == 160) { last->end -= 1; if (last->parent->tag->model & CM_INLINE) lexer->insertspace = yes; } /* if empty string then delete from parse tree */ if (last->start == last->end) TrimEmptyElement(lexer, last); }}/* This maps <p>hello<em> world</em> to <p>hello <em>world</em> Trims initial space, by moving it before the start tag, or if this element is the first in parent's content, then by discarding the space*/void TrimInitialSpace(Lexer *lexer, Node *element, Node *text){ Node *prev, *node; if (text->type == TextNode && lexer->lexbuf[text->start] == ' ') { if (element->tag->model & CM_INLINE && element->parent->content != element) { prev = element->prev; if (prev && prev->type == TextNode) { if (lexer->lexbuf[prev->end - 1] != ' ') lexer->lexbuf[(prev->end)++] = ' '; ++(element->start); } else /* create new node */ { node = NewNode(); node->start = (element->start)++; node->end = element->start; lexer->lexbuf[node->start] = ' '; node->prev = prev; if (prev) prev->next = node; node->next = element; element->prev = node; node->parent = element->parent; } } /* discard the space in current node */ ++(text->start); }}Bool DescendantOf(Node *element, Dict *tag){ Node *parent; for (parent = element->parent; parent != null; parent = parent->parent) { if (parent->tag == tag) return yes; } return no;}Bool InsertMisc(Node *element, Node *node){ if (node->type == CommentTag || node->type == ProcInsTag || node->type == CDATATag || node->type == SectionTag || node->type == AspTag || node->type == JsteTag || node->type == PhpTag) { InsertNodeAtEnd(element, node); return yes; } return no;}void ParseTag(Lexer *lexer, Node *node, uint mode){ if (node->tag->model & CM_EMPTY) { lexer->waswhite = no; return; } else if (!(node->tag->model & CM_INLINE)) lexer->insertspace = no; if (node->tag->parser == null || node->type == StartEndTag) return; (*node->tag->parser)(lexer, node, mode);}/* the doctype has been found after other tags, and needs moving to before the html element*/void InsertDocType(Lexer *lexer, Node *element, Node *doctype){ ReportWarning(lexer, element, doctype, DOCTYPE_AFTER_TAGS); while (element->tag != tag_html) element = element->parent; InsertNodeBeforeElement(element, doctype);}/* duplicate name attribute as an id */void FixId(Lexer *lexer, Node *node){ AttVal *name = GetAttrByName(node, "name"); AttVal *id = GetAttrByName(node, "id"); if (name) { if (id) { if (wstrcmp(id->value, name->value) != 0) ReportAttrError(lexer, node, "name", ID_NAME_MISMATCH); } else if (XmlOut)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -