parser.c

来自「我搜集到的一个java常用类库的源代码」· C语言代码 · 共 2,301 行 · 第 1/5 页
2,301 行
/*  parser.c - HTML Parser  (c) 1998-2000 (W3C) MIT, INRIA, Keio University  See tidy.c for the copyright notice.*/#include "platform.h"   /* platform independent stuff */#include "html.h"       /* to pull in definition of nodes */int SeenBodyEndTag;  /* could be moved into lexer structure */Bool CheckNodeIntegrity(Node *node){    Node *child;    Bool found = no;    if (node->prev)    {        if (node->prev->next != node)            return no;    }    if (node->next)    {        if (node->next->prev != node)            return no;    }    if (node->parent)    {        if (node->prev == null && node->parent->content != node)            return no;        if (node->next == null && node->parent->last != node)            return no;        for (child = node->parent->content; child; child = child->next)            if (child == node)            {                found = yes;                break;            }        if (!found)            return no;    }    for (child = node->content; child; child = child->next)        if (!CheckNodeIntegrity(child))            return no;    return yes;}/* used to determine how attributes without values should be printed this was introduced to deal with user defined tags e.g. Cold Fusion*/Bool IsNewNode(Node *node){    if (node && node->tag)    {        return (node->tag->model & CM_NEW);    }    return yes;}void CoerceNode(Lexer *lexer, Node *node, Dict *tag){    Node *tmp = InferredTag(lexer, tag->name);    ReportWarning(lexer, node, tmp, OBSOLETE_ELEMENT);    MemFree(tmp);    MemFree(node->element);    node->was = node->tag;    node->tag = tag;    node->type = StartTag;    node->implicit = yes;    node->element = wstrdup(tag->name);}/* extract a node and its children from a markup tree */void RemoveNode(Node *node){    if (node->prev)        node->prev->next = node->next;    if (node->next)        node->next->prev = node->prev;    if (node->parent)    {        if (node->parent->content == node)            node->parent->content = node->next;        if (node->parent->last == node)            node->parent->last = node->prev;    }    node->parent = node->prev = node->next = null;}/* remove node from markup tree and discard it */Node *DiscardElement(Node *element){    Node *next = null;    if (element)    {        next = element->next;        RemoveNode(element);        FreeNode(element);    }    return next;}/* insert node into markup tree */void InsertNodeAtStart(Node *element, Node *node){    node->parent = element;    if (element->content == null)        element->last = node;    node->next = element->content;    node->prev = null;    element->content = node;}/* insert node into markup tree */void InsertNodeAtEnd(Node *element, Node *node){    node->parent = element;    node->prev = element->last;    if (element->last != null)        element->last->next = node;    else        element->content = node;    element->last = node;}/* insert node into markup tree in pace of element which is moved to become the child of the node*/void InsertNodeAsParent(Node *element, Node *node){    node->content = element;    node->last = element;    node->parent = element->parent;    element->parent = node;        if (node->parent->content == element)        node->parent->content = node;    if (node->parent->last == element)        node->parent->last = node;    node->prev = element->prev;    element->prev = null;    if (node->prev)        node->prev->next = node;    node->next = element->next;    element->next = null;    if (node->next)        node->next->prev = node;}/* insert node into markup tree before element */void InsertNodeBeforeElement(Node *element, Node *node){    Node *parent;    parent = element->parent;    node->parent = parent;    node->next = element;    node->prev = element->prev;    element->prev = node;    if (node->prev)        node->prev->next = node;    if (parent->content == element)        parent->content = node;}/* insert node into markup tree after element */void InsertNodeAfterElement(Node *element, Node *node){    Node *parent;    parent = element->parent;    node->parent = parent;    if (parent->last == element)        parent->last = node;    else    {        node->next = element->next;        node->next->prev = node;    }    element->next = node;    node->prev = element;}Bool CanPrune(Node *element){    if (element->type == TextNode)        return yes;    if (element->content)        return no;    if (element->tag == tag_a && element->attributes != null)        return no;    if (element->tag == tag_p && !DropEmptyParas)        return no;    if (element->tag == null)        return no;    if (element->tag->model & CM_ROW)        return no;    if (element->tag == tag_applet)        return no;    if (element->tag == tag_object)        return no;    return yes;}void TrimEmptyElement(Lexer *lexer, Node *element){    if (CanPrune(element))    {       if (element->type != TextNode)            ReportWarning(lexer, element, null, TRIM_EMPTY_ELEMENT);        DiscardElement(element);    }    else if (element->tag == tag_p && element->content == null)    {        /* replace <p></p> by <br><br> to preserve formatting */        Node *node = InferredTag(lexer, "br");        CoerceNode(lexer, element, tag_br);        InsertNodeAfterElement(element, node);    }}/*  If last child of element is a text node  then trim trailing white space character.*/void TrimSpace(Lexer *lexer, Node *last){    unsigned char c;    if (last != null && last->type == TextNode && last->end > last->start)    {        while (last->end > last->start)        {            c = (unsigned char)lexer->lexbuf[last->end - 1];            if (c == 160)  /* non breaking space */            {                if (last->parent->tag == tag_td || last->parent->tag == tag_th)                {                    if (last->end > last->start + 1)                        last->end -= 1;                    else                        break;                }                else                    last->end -= 1;            }            else if (c == ' ')                last->end -= 1;            else                break;        }         if (last->end < last->start)            tidy_out(lexer->errout, "TrimSpace: screwed up text node\n");       /* if empty string then delete from parse tree */        if (last->start == last->end)            TrimEmptyElement(lexer, last);    }}/*  This maps        <em>hello </em><strong>world</strong>  to       <em>hello</em> <strong>world</strong>  If last child of element is a text node  then trim trailing white space character  moving it to after element's end tag.*/void TrimTrailingSpace(Lexer *lexer, Node *last){    unsigned char c;    if (last != null && last->type == TextNode && last->end > last->start)    {        c = (unsigned char)lexer->lexbuf[last->end - 1];        if (c == ' ' || c == 160)        {            last->end -= 1;            if (last->parent->tag->model & CM_INLINE)                lexer->insertspace = yes;        }        /* if empty string then delete from parse tree */        if (last->start == last->end)            TrimEmptyElement(lexer, last);    }}/*  This maps        <p>hello<em> world</em>  to       <p>hello <em>world</em>  Trims initial space, by moving it before the  start tag, or if this element is the first in  parent's content, then by discarding the space*/void TrimInitialSpace(Lexer *lexer, Node *element, Node *text){    Node *prev, *node;    if (text->type == TextNode && lexer->lexbuf[text->start] == ' ')    {        if (element->tag->model & CM_INLINE &&            element->parent->content != element)        {            prev = element->prev;            if (prev && prev->type == TextNode)            {                if (lexer->lexbuf[prev->end - 1] != ' ')                    lexer->lexbuf[(prev->end)++] = ' ';                ++(element->start);            }            else /* create new node */            {                node = NewNode();                node->start = (element->start)++;                node->end = element->start;                lexer->lexbuf[node->start] = ' ';                node->prev = prev;                if (prev)                    prev->next = node;                node->next = element;                element->prev = node;                node->parent = element->parent;            }        }        /* discard the space  in current node */        ++(text->start);    }}Bool DescendantOf(Node *element, Dict *tag){    Node *parent;    for (parent = element->parent;            parent != null; parent = parent->parent)    {        if (parent->tag == tag)            return yes;    }    return no;}Bool InsertMisc(Node *element, Node *node){    if (node->type == CommentTag ||        node->type == ProcInsTag ||        node->type == CDATATag ||        node->type == SectionTag ||        node->type == AspTag ||        node->type == JsteTag ||        node->type == PhpTag)    {        InsertNodeAtEnd(element, node);        return yes;    }    return no;}void ParseTag(Lexer *lexer, Node *node, uint mode){    if (node->tag->model & CM_EMPTY)    {        lexer->waswhite = no;        return;    }    else if (!(node->tag->model & CM_INLINE))        lexer->insertspace = no;    if (node->tag->parser == null || node->type == StartEndTag)        return;    (*node->tag->parser)(lexer, node, mode);}/* the doctype has been found after other tags, and needs moving to before the html element*/void InsertDocType(Lexer *lexer, Node *element, Node *doctype){    ReportWarning(lexer, element, doctype, DOCTYPE_AFTER_TAGS);    while (element->tag != tag_html)        element = element->parent;    InsertNodeBeforeElement(element, doctype);}/* duplicate name attribute as an id */void FixId(Lexer *lexer, Node *node){    AttVal *name = GetAttrByName(node, "name");    AttVal *id = GetAttrByName(node, "id");    if (name)    {        if (id)        {            if (wstrcmp(id->value, name->value) != 0)                ReportAttrError(lexer, node, "name", ID_NAME_MISMATCH);        }        else if (XmlOut)
parser.c - 源码说明

本页面展示了「我搜集到的一个java常用类库的源代码」中的 parser.c 源码文件，采用 C语言编程语言编写，共 2,301 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。
虫虫下载站收录了大量与java相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。
⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?