⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 clean.c

📁 我搜集到的一个java常用类库的源代码
💻 C
📖 第 1 页 / 共 3 页
字号:
            {                MergeStyles(node, child);                AddStyleProperty(node, "font-weight: bold");                StripOnlyChild(node);                return yes;            }            if (child->tag == tag_i)            {                MergeStyles(node, child);                AddStyleProperty(node, "font-style: italic");                StripOnlyChild(node);                return yes;            }            if (child->tag == tag_font)            {                MergeStyles(node, child);                AddFontStyles(node, child->attributes);                StripOnlyChild(node);                return yes;            }        }    }    return no;}/* the only child of table cell or an inline element such as em */Bool InlineStyle(Lexer *lexer, Node *node, Node **pnode){    Node *child;    if (node->tag != tag_font && (node->tag->model & (CM_INLINE|CM_ROW)))    {        child = node->content;        if (child == null)            return no;        /* check child has no peers */        if (child->next)            return no;        if (child->tag == tag_b && LogicalEmphasis)        {            MergeStyles(node, child);            AddStyleProperty(node, "font-weight: bold");            StripOnlyChild(node);            return yes;        }        if (child->tag == tag_i && LogicalEmphasis)        {            MergeStyles(node, child);            AddStyleProperty(node, "font-style: italic");            StripOnlyChild(node);            return yes;        }        if (child->tag == tag_font)        {            MergeStyles(node, child);            AddFontStyles(node, child->attributes);            StripOnlyChild(node);            return yes;        }    }    return no;}/*  Replace font elements by span elements, deleting  the font element's attributes and replacing them  by a single style attribute.*/Bool Font2Span(Lexer *lexer, Node *node, Node **pnode){    AttVal *av, *style, *next;    if (node->tag == tag_font)    {        if (DropFontTags)        {            DiscardContainer(node, pnode);            return no;        }        /* if FONT is only child of parent element then leave alone */        if (node->parent->content == node            && node->next == null)            return no;        AddFontStyles(node, node->attributes);        /* extract style attribute and free the rest */        av = node->attributes;        style = null;        while (av)        {            next = av->next;            if (wstrcmp(av->attribute, "style") == 0)            {                av->next = null;                style = av;            }            else            {                if (av->attribute)                    MemFree(av->attribute);                if (av->value)                    MemFree(av->value);                MemFree(av);            }            av = next;        }        node->attributes = style;        node->tag = tag_span;        MemFree(node->element);        node->element = wstrdup("span");        return yes;    }    return no;}Bool IsElement(Node *node){    return (node->type == StartTag || node->type == StartEndTag ? yes : no);}/*  Applies all matching rules to a node.*/Node *CleanNode(Lexer *lexer, Node *node){    Node *next = null;    for (next = node; IsElement(node); node = next)    {        if (Dir2Div(lexer, node, &next))            continue;        if (NestedList(lexer, node, &next))            continue;        if (Center2Div(lexer, node, &next))            continue;        if (MergeDivs(lexer, node, &next))            continue;        if (BlockStyle(lexer, node, &next))            continue;        if (InlineStyle(lexer, node, &next))            continue;        if (Font2Span(lexer, node, &next))            continue;        break;    }    return next;}Node *CreateStyleProperties(Lexer *lexer, Node *node){    Node *child;    if (node->content)    {        for (child = node->content; child != null; child = child->next)        {            child = CreateStyleProperties(lexer, child);        }    }    return CleanNode(lexer, node);}void DefineStyleRules(Lexer *lexer, Node *node){    Node *child;    if (node->content)    {        for (child = node->content;                child != null; child = child->next)        {            DefineStyleRules(lexer, child);        }    }    Style2Rule(lexer, node);}void CleanTree(Lexer *lexer, Node *doc){    doc = CreateStyleProperties(lexer, doc);    if (MakeClean)    {        DefineStyleRules(lexer, doc);        CreateStyleElement(lexer, doc);    }}/* replace i by em and b by strong */void EmFromI(Node *node){    while (node)    {        if (node->tag == tag_i)        {            MemFree(node->element);            node->element = wstrdup(tag_em->name);            node->tag = tag_em;        }        else if (node->tag == tag_b)        {            MemFree(node->element);            node->element = wstrdup(tag_strong->name);            node->tag = tag_strong;        }        if (node->content)            EmFromI(node->content);        node = node->next;    }}Bool HasOneChild(Node *node){    return (node->content && node->content->next == null);}/* Some people use dir or ul without an li to indent the content. The pattern to look for is a list with a single implicit li. This is recursively replaced by an implicit blockquote.*/void List2BQ(Node *node){    while (node)    {        if (node->content)            List2BQ(node->content);        if (node->tag && node->tag->parser == ParseList &&            HasOneChild(node) && node->content->implicit)        {            StripOnlyChild(node);            MemFree(node->element);            node->element = wstrdup(tag_blockquote->name);            node->tag = tag_blockquote;            node->implicit = yes;        }        node = node->next;    }}static char indent_buf[32];/* Replace implicit blockquote by div with an indent taking care to reduce nested blockquotes to a single div with the indent set to match the nesting depth*/void BQ2Div(Node *node){    int indent;    while (node)    {        if (node->tag == tag_blockquote && node->implicit)        {            indent = 1;            while(HasOneChild(node) &&                  node->content->tag == tag_blockquote &&                  node->implicit)            {                ++indent;                StripOnlyChild(node);            }            if (node->content)                BQ2Div(node->content);            sprintf(indent_buf, "margin-left: %dem", 2*indent);            MemFree(node->element);            node->element = wstrdup(tag_div->name);            node->tag = tag_div;            AddAttribute(node, "style", indent_buf);        }        else if (node->content)            BQ2Div(node->content);        node = node->next;    }}/* node is <![if ...]> prune up to <![endif]> */Node *PruneSection(Lexer *lexer, Node *node){    for (;;)    {        /* discard node and returns next */        node = DiscardElement(node);        if (node == null)            return null;                if (node->type == SectionTag)        {            if (wstrncmp(lexer->lexbuf + node->start, "if", 2) == 0)            {                node = PruneSection(lexer, node);                continue;            }            if (wstrncmp(lexer->lexbuf + node->start, "endif", 5) == 0)            {                node = DiscardElement(node);                break;            }        }    }    return node;}void DropSections(Lexer *lexer, Node *node){    while (node)    {        if (node->type == SectionTag)        {            /* prune up to matching endif */            if (wstrncmp(lexer->lexbuf + node->start, "if", 2) == 0)            {                node = PruneSection(lexer, node);                continue;            }            /* discard others as well */            node = DiscardElement(node);            continue;        }        if (node->content)            DropSections(lexer, node->content);        node = node->next;    }}void PurgeAttributes(Node *node){    AttVal *attr = node->attributes, *next, *prev = null;    while (attr)    {        next = attr->next;        if (wstrcmp(attr->attribute, "class") == 0 ||            wstrcmp(attr->attribute, "style") == 0 ||            wstrcmp(attr->attribute, "lang") == 0 ||            (wstrcmp(attr->attribute, "width") == 0 &&                (node->tag == tag_td || node->tag == tag_th)))        {            if (prev)                prev->next = next;            else                node->attributes = next;            FreeAttribute(attr);        }        else            prev = attr;        attr = next;    }}/* Word2000 uses span excessively, so we strip span out */Node *StripSpan(Node *span){    Node *node, *prev, *content;    /*     deal with span elements that have content     by splicing the content in place of the span     after having processed it    */    CleanWord2000(span->content);    content = span->content;    if (span->prev)        prev = span->prev;    else if (content)    {        node = content;        content = content->next;        RemoveNode(node);        InsertNodeBeforeElement(span, node);        prev = node;    }    while (content)    {        node = content;        content = content->next;        RemoveNode(node);        InsertNodeAfterElement(prev, node);        prev = node;    }    if (span->next == null)        span->parent->last = prev;    node = span->next;    span->content = null;    DiscardElement(span);    return node;}/* This is a major clean up to strip out all the extra stuff you get when you save as web page from Word 2000. It doesn't yet know what to do with VML tags, but these will appear as errors unless you declare them as new tags, such as o:p which needs to be declared as inline.*/void CleanWord2000(Node *node){    while (node)    {        /* discard Word's style verbiage */        if (node->tag == tag_style || node->tag == tag_meta || node->type == CommentTag)        {            node = DiscardElement(node);            continue;        }        /* strip out all span tags Word scatters so liberally! */        if (node->tag == tag_span)        {            node = StripSpan(node);            continue;        }        /* get rid of Word's xmlns attributes */        if (node->tag == tag_html)        {            /* check that it's a Word 2000 document */            if (!GetAttrByName(node, "xmlns:o"))                return;            FreeAttrs(node);        }        if (node->tag == tag_link)        {            AttVal *attr = GetAttrByName(node, "rel");            if (attr && wstrcmp(attr->value, "File-List") == 0)            {                node = DiscardElement(node);                continue;            }        }        /* strip out style and class attributes */        if (node->type == StartTag || node->type == StartEndTag)            PurgeAttributes(node);        if (node->content)            CleanWord2000(node->content);        /* discard empty paragraphs */        if (node->content == null && node->tag == tag_p)        {            node = DiscardElement(node);            continue;        }        node = node->next;    }}Bool IsWord2000(Node *root){    Node *html = FindHTML(root);    return (html && GetAttrByName(html, "xmlns:o"));}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -