📄 html.c
字号:
parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) { /* skip until the closing instructions */ } else if ( parse_mode == DILLO_HTML_PARSE_MODE_PRE ) { /* all this overhead is to catch white-space entities */ Pword = Html_parse_entities(html, word, size); for (start = i = 0; Pword[i]; start = i) if (isspace(Pword[i])) { while (Pword[++i] && isspace(Pword[i])); Html_process_space(html, Pword + start, i - start); } else { while (Pword[++i] && !isspace(Pword[i])); a_Dw_page_add_text(DW_PAGE (html->dw), g_strndup(Pword + start, i - start), html->stack[html->stack_top].style); html->pre_column += i - start; html->PreFirstChar = FALSE; } g_free(Pword); } else { /* add pending space if present */ if (html->SPCPending && (!SGML_SPCDEL || !html->PrevWasOpenTag)) /* SGML_SPCDEL ignores space after an open tag */ a_Dw_page_add_space(DW_PAGE (html->dw), html->stack[html->stack_top].style); /* actually white-space entities inside the word could be * collapsed (except ), but that's too much overhead * for a very rare case of ill-formed HTML --Jcid */ Pword = Html_parse_entities(html, word, size); g_strdelimit(Pword, "\t\f\n\r", ' '); a_Dw_page_add_text(DW_PAGE (html->dw), Pword, html->stack[html->stack_top].style); } html->PrevWasOpenTag = FALSE; html->SPCPending = FALSE;}/* * Does the tag in tagstr (e.g. "p") match the tag in the tag, tagsize * structure, with the initial < skipped over (e.g. "P align=center>") */static gboolean Html_match_tag(const char *tagstr, char *tag, gint tagsize){ gint i; for (i = 0; i < tagsize && tagstr[i] != '\0'; i++) { if (tolower(tagstr[i]) != tolower(tag[i])) return FALSE; } /* The test for '/' is for xml compatibility: "empty/>" will be matched. */ if (i < tagsize && (isspace(tag[i]) || tag[i] == '>' || tag[i] == '/')) return TRUE; return FALSE;}/* * This function is called after popping the stack, to * handle nested DwPage widgets. */static void Html_eventually_pop_dw(DilloHtml *html){ /* This function is called after popping from the stack, so the * relevant hand_over_break is at html->stack_top + 1. */ if (html->dw != html->stack[html->stack_top].page) { if (html->stack[html->stack_top + 1].hand_over_break) a_Dw_page_hand_over_break(DW_PAGE(html->dw), html->stack[(html)->stack_top].style); a_Dw_page_flush(DW_PAGE(html->dw)); html->dw = html->stack[html->stack_top].page; }}/* * Push the tag (copying attributes from the top of the stack) */static void Html_push_tag(DilloHtml *html, gint tag_idx){ char *tagstr; gint n_items; /* Save the element's name (no parameters) into tagstr. */ tagstr = g_strdup(Tags[tag_idx].name); n_items = html->stack_top + 1; a_List_add(html->stack, n_items, html->stack_max); /* We'll copy the former stack item and just change the tag and its index * instead of copying all fields except for tag. --Jcid */ html->stack[n_items] = html->stack[n_items - 1]; html->stack[n_items].tag_name = tagstr; html->stack[n_items].tag_idx = tag_idx; html->stack_top = n_items; /* proper memory management, may be unref'd later */ a_Dw_style_ref (html->stack[html->stack_top].style); if (html->stack[html->stack_top].table_cell_style) a_Dw_style_ref (html->stack[html->stack_top].table_cell_style); html->dw = html->stack[html->stack_top].page;}/* * Push the tag (used to force en element with optional open into the stack) * Note: now it's the same as Html_push_tag(), but things may change... */static void Html_force_push_tag(DilloHtml *html, gint tag_idx){ Html_push_tag(html, tag_idx);}/* * Pop the top tag in the stack */static void Html_real_pop_tag(DilloHtml *html){ a_Dw_style_unref (html->stack[html->stack_top].style); if (html->stack[html->stack_top].table_cell_style) a_Dw_style_unref (html->stack[html->stack_top].table_cell_style); g_free(html->stack[html->stack_top--].tag_name); Html_eventually_pop_dw(html);}/* * Default close function for tags. * (conditional cleanup of the stack) * There're several ways of doing it. Considering the HTML 4.01 spec * which defines optional close tags, and the will to deliver useful diagnose * messages for bad-formed HTML, it'll go as follows: * 1.- Search the stack for the first tag that requires a close tag. * 2.- If it matches, clean all the optional-close tags in between. * 3.- Cleanup the matching tag. (on error, give a warning message) * * If 'w3c_mode' is NOT enabled: * 1.- Search the stack for a matching tag based on tag level. * 2.- If it exists, clean all the tags in between. * 3.- Cleanup the matching tag. (on error, give a warning message) */static void Html_tag_cleanup_at_close(DilloHtml *html, gint TagIdx){ gint w3c_mode = !prefs.w3c_plus_heuristics; gint stack_idx, cmp = 1; gint new_idx = TagIdx; if (html->CloseOneTag) { Html_real_pop_tag(html); html->CloseOneTag = FALSE; return; } /* Look for the candidate tag to close */ stack_idx = html->stack_top; while (stack_idx && (cmp = (new_idx != html->stack[stack_idx].tag_idx)) && ((w3c_mode && Tags[html->stack[stack_idx].tag_idx].EndTag == 'O') || (!w3c_mode && Tags[html->stack[stack_idx].tag_idx].TagLevel < Tags[new_idx].TagLevel))) { --stack_idx; } /* clean, up to the matching tag */ if (cmp == 0 && stack_idx > 0) { /* There's a valid matching tag in the stack */ while (html->stack_top >= stack_idx) { gint toptag_idx = html->stack[html->stack_top].tag_idx; /* Warn when we decide to close an open tag (for !w3c_mode) */ if (html->stack_top > stack_idx && Tags[toptag_idx].EndTag != 'O') MSG_HTML(" - forcing close of open tag: <%s>\n", Tags[toptag_idx].name); /* Close this and only this tag */ html->CloseOneTag = TRUE; Tags[toptag_idx].close (html, toptag_idx); } } else { MSG_HTML("unexpected closing tag: </%s>. -- expected </%s>\n", Tags[new_idx].name, html->stack[stack_idx].tag_name); }}/* * Cleanup (conditional), and Pop the tag (if it matches) */static void Html_pop_tag(DilloHtml *html, gint TagIdx){ Html_tag_cleanup_at_close(html, TagIdx);}/* * Some parsing routines. *//* * Used by Html_parse_length */static DwStyleLength Html_parse_length_or_multi_length (const gchar *attr, gchar **endptr){ DwStyleLength l; double v; gchar *end; v = strtod (attr, &end); switch (*end) { case '%': end++; l = DW_STYLE_CREATE_PER_LENGTH (v / 100); break; case '*': end++; l = DW_STYLE_CREATE_REL_LENGTH (v); break;/* The "px" suffix seems not allowed by HTML4.01 SPEC. case 'p': if (end[1] == 'x') end += 2;*/ default: l = DW_STYLE_CREATE_ABS_LENGTH ((gint)v); break; } if (endptr) *endptr = end; return l;}/* * Returns a length or a percentage, or DW_STYLE_UNDEF_LENGTH in case * of an error, or if attr is NULL. */static DwStyleLength Html_parse_length (DilloHtml *html, const gchar *attr){ DwStyleLength l; gchar *end; l = Html_parse_length_or_multi_length (attr, &end); if (DW_STYLE_IS_REL_LENGTH (l)) /* not allowed as &Length; */ return DW_STYLE_LENGTH_AUTO; else { /* allow only whitespaces */ if (*end && !isspace (*end)) { MSG_HTML("Garbage after length: %s\n", attr); return DW_STYLE_LENGTH_AUTO; } } return l;}/* * Parse a color attribute. * Return value: parsed color, or default_color (+ error msg) on error. */static gint32 Html_color_parse(DilloHtml *html, const char *subtag, gint32 default_color){ gint err = 1; gint32 color = a_Color_parse(subtag, default_color, &err); if (err) { MSG_HTML("color is not in \"#RRGGBB\" format\n"); } return color;}/* * Check that 'val' is composed of characters inside [A-Za-z0-9:_.-] * Note: ID can't have entities, but this check is enough (no '&'). * Return value: 1 if OK, 0 otherwise. */static gint Html_check_name_val(DilloHtml *html, const char *val, const char *attrname){ gint i; for (i = 0; val[i]; ++i) if (!(isalnum(val[i]) || strchr(":_.-", val[i]))) break; if (val[i] || !isalpha(val[0])) MSG_HTML("'%s' value is not of the form " "[A-Za-z][A-Za-z0-9:_.-]*\n", attrname); return !(val[i]);}/* * Handle DOCTYPE declaration * * Follows the convention that HTML 4.01 * doctypes which include a full w3c DTD url are treated as * standards-compliant, but 4.01 without the url and HTML 4.0 and * earlier are not. XHTML doctypes are always standards-compliant * whether or not an url is present. * * Note: I'm not sure about this convention. The W3C validator * recognizes the "HTML Level" with or without the URL. The convention * comes from mozilla (see URLs below), but Dillo doesn't have the same * rendering modes, so it may be better to chose another behaviour. --Jcid * * http://www.mozilla.org/docs/web-developer/quirks/doctypes.html * http://lists.auriga.wearlab.de/pipermail/dillo-dev/2004-October/002300.html * * This is not a full DOCTYPE parser, just enough for what Dillo uses. */static void Html_parse_doctype(DilloHtml *html, char *tag, gint tagsize){ char *HTML_sig = "<!DOCTYPE HTML PUBLIC "; char *HTML20 = "-//IETF//DTD HTML//EN"; char *HTML32 = "-//W3C//DTD HTML 3.2"; char *HTML40 = "-//W3C//DTD HTML 4.0"; char *HTML401 = "-//W3C//DTD HTML 4.01"; char *HTML401_url = "http://www.w3.org/TR/html4/"; char *XHTML1 = "-//W3C//DTD XHTML 1.0"; char *XHTML1_url = "http://www.w3.org/TR/xhtml1/DTD/"; char *XHTML11 = "-//W3C//DTD XHTML 1.1"; char *XHTML11_url = "http://www.w3.org/TR/xhtml11/DTD/"; int i, quote; char *p, *ntag = g_strndup(tag, tagsize); /* Tag sanitization: Collapse whitespace between tokens * and replace '\n' and '\r' with ' ' inside quoted strings. */ for (i = 0, p = ntag; *p; ++p) { if (isspace(*p)) { for (ntag[i++] = ' '; isspace(p[1]); ++p); } else if ((quote = *p) == '"' || *p == '\'') { for (ntag[i++] = *p++; (ntag[i++] = *p) && *p != quote; ++p) { if (*p == '\n' || *p == '\r') ntag[i - 1] = ' '; p += (p[0] == '\r' && p[1] == '\n') ? 1 : 0; } } else { ntag[i++] = *p; } if (!*p) break; } ntag[i] = 0; _MSG("New: {%s}\n", ntag); /* The default DT_NONE type is TagSoup */ if (!g_strncasecmp(ntag, HTML_sig, strlen(HTML_sig))) { p = ntag + strlen(HTML_sig) + 1; if (!strncmp(p, HTML401, strlen(HTML401)) && a_Misc_stristr(p + strlen(HTML401), HTML401_url)) { html->DocType = DT_HTML; html->DocTypeVersion = 4.01f; } else if (!strncmp(p, XHTML1, strlen(XHTML1)) && a_Misc_stristr(p + strlen(XHTML1), XHTML1_url)) { html->DocType = DT_XHTML; html->DocTypeVersion = 1.0f; } else if (!strncmp(p, XHTML11, strlen(XHTML11)) && a_Misc_stristr(p + strlen(XHTML11), XHTML11_url)) { html->DocType = DT_XHTML; html->DocTypeVersion = 1.1f; } else if (!strncmp(p, HTML40, strlen(HTML40))) { html->DocType = DT_HTML; html->DocTypeVersion = 4.0f; } else if (!strncmp(p, HTML32, strlen(HTML32))) { html->DocType = DT_HTML; html->DocTypeVersion = 3.2f; } else if (!strncmp(p, HTML20, strlen(HTML20))) { html->DocType = DT_HTML; html->DocTypeVersion = 2.0f; } } g_free(ntag);}/* * Handle open HTML element */static void Html_tag_open_html(DilloHtml *html, char *tag, gint tagsize){ if (!(html->InFlags & IN_HTML)) html->InFlags |= IN_HTML; ++html->Num_HTML; if (html->Num_HTML > 1) { MSG_HTML("HTML element was already open\n"); }}/* * Handle close HTML element */static void Html_tag_close_html(DilloHtml *html, gint TagIdx){ /* todo: may add some checks here */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -