📄 html.c
字号:
{"iuml",0357}, {"kappa",01672}, {"lArr",020720}, {"lambda",01673}, {"lang",021451},{"laquo",0253}, {"larr",020620}, {"lceil",021410}, {"ldquo",020034},{"le",021144}, {"lfloor",021412},{"lowast",021027}, {"loz",022712}, {"lrm",020016}, {"lsaquo",020071},{"lsquo",020030}, {"lt",60}, {"macr",0257}, {"mdash",020024},{"micro",0265}, {"middot",0267},{"minus",021022},{"mu",01674}, {"nabla",021007}, {"nbsp",32}, {"ndash",020023},{"ne",021140}, {"ni",021013}, {"not",0254}, {"notin",021011},{"nsub",021204}, {"ntilde",0361}, {"nu",01675}, {"oacute",0363}, {"ocirc",0364}, {"oelig",0523}, {"ograve",0362},{"oline",020076},{"omega",01711}, {"omicron",01677}, {"oplus",021225},{"or",021050}, {"ordf",0252}, {"ordm",0272}, {"oslash",0370},{"otilde",0365}, {"otimes",021227},{"ouml",0366}, {"para",0266}, {"part",021002}, {"permil",020060},{"perp",021245}, {"phi",01706}, {"pi",01700}, {"piv",01726}, {"plusmn",0261}, {"pound",0243}, {"prime",020062},{"prod",021017}, {"prop",021035}, {"psi",01710}, {"quot",34}, {"rArr",020722}, {"radic",021032}, {"rang",021452},{"raquo",0273}, {"rarr",020622}, {"rceil",021411}, {"rdquo",020035},{"real",020434},{"reg",0256}, {"rfloor",021413}, {"rho",01701}, {"rlm",020017}, {"rsaquo",020072},{"rsquo",020031}, {"sbquo",020032},{"scaron",0541},{"sdot",021305}, {"sect",0247}, {"shy",0255}, {"sigma",01703}, {"sigmaf",01702},{"sim",021074}, {"spades",023140},{"sub",021202},{"sube",021206}, {"sum",021021}, {"sup",021203}, {"sup1",0271}, {"sup2",0262}, {"sup3",0263}, {"supe",021207},{"szlig",0337}, {"tau",01704}, {"there4",021064}, {"theta",01670},{"thetasym",01721},{"thinsp",020011},{"thorn",0376}, {"tilde",01334},{"times",0327}, {"trade",020442},{"uArr",020721}, {"uacute",0372},{"uarr",020621}, {"ucirc",0373}, {"ugrave",0371}, {"uml",0250}, {"upsih",01722}, {"upsilon",01705},{"uuml",0374}, {"weierp",020430},{"xi",01676}, {"yacute",0375}, {"yen",0245}, {"yuml",0377}, {"zeta",01666}, {"zwj",020015}, {"zwnj",020014}};/* * Comparison function for binary search */static int Html_entity_comp(const void *a, const void *b){ return strcmp(((Ent_t *)a)->entity, ((Ent_t *)b)->entity);}/* * Binary search of 'key' in entity list */static int Html_entity_search(char *key){ Ent_t *res, EntKey; EntKey.entity = key; res = bsearch(&EntKey, Entities, NumEnt, sizeof(Ent_t), Html_entity_comp); if ( res ) return (res - Entities); return -1;}/* * Switch a few UCS encodings to latin1. */static gint Html_try_ucs2latin1(gint isocode){ gint ret; switch (isocode) { case 0x2018: case 0x2019: ret = '\''; break; case 0x201c: case 0x201d: ret = '"'; break; case 0x2013: case 0x2014: ret = '-'; break; case 0x2039: ret = '<'; break; case 0x203a: ret = '>'; break; case 0x2022: ret = 176; break; default: ret = -1; break; } return ret;}/* * Given an entity, return the ISO-Latin1 character code. * (-1 if not a valid entity) */static gint Html_parse_entity(const gchar *token, gint toksize){ gint base, isocode, i, ret; gchar *eoe, *name; g_return_val_if_fail (token[0] == '&', -1); ret = -1; eoe = (toksize) ? memchr(token, ';', toksize) : strchr(token, ';'); if (eoe) { if (token[1] == '#') { /* Numeric token */ base = (token[2] == 'x' || token[2] == 'X') ? 16 : 10; isocode = strtol(token + 2 + (base==16), NULL, base); if (isocode > 0 && isocode <= 255) { ret = isocode; } else { /* Try a few UCS translations to Latin1 */ ret = Html_try_ucs2latin1(isocode); } } else { /* Search for named entity */ name = g_strndup(token + 1, eoe - token - 1); i = Html_entity_search(name); g_free(name); if (i != -1) { if (Entities[i].isocode > 0 && Entities[i].isocode <= 255) ret = Entities[i].isocode; else ret = Html_try_ucs2latin1(Entities[i].isocode); } } } return ret;}/* * Convert all the entities in a token to plain ISO character codes. Takes * a token and its length, and returns a newly allocated string. */static char *Html_parse_entities(gchar *token, gint toksize){ gchar *esc_set = "&\xE2\xC2"; gchar *new_str; gint i, j, isocode; new_str = g_strndup(token, toksize); if (new_str[strcspn(new_str, esc_set)] == 0) return new_str; for (i = j = 0; i < toksize; i++) { if (token[i] == '&' && (isocode = Html_parse_entity(token + i, toksize - i)) != -1) { new_str[j++] = isocode; while (token[++i] != ';'); } else if (token[i] == '\xE2' && token[i+1] == '\x80' && i+2 < toksize){ /* Hack: for parsing some UTF-8 characters into latin1 */ switch (token[i+2]) { case '\x94': new_str[j++] = '-'; new_str[j++] = '-'; break; case '\x98': case '\x99': new_str[j++] = '\''; break; case '\x9C': case '\x9D': new_str[j++] = '"'; break; case '\xA2': new_str[j++] = '*'; new_str[j++] = ' '; break; default: /* unhandled */ new_str[j++] = '\xE2'; break; } i += 2; } else if (token[i] == '\xC2' && token[i+1] == '\xA0') { /* Hack: for parsing some UTF-8 characters into latin1 */ new_str[j++] = ' '; ++i; } else { new_str[j++] = token[i]; } } new_str[j] = '\0'; return new_str;}/* * Parse spaces * */static void Html_process_space(DilloHtml *html, char *space, gint spacesize){ gint i, offset; DilloHtmlParseMode parse_mode = html->stack[html->stack_top].parse_mode; if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH ) { html->StashSpace = (html->Stash->len > 0); html->SPCPending = FALSE; } else if ( parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) { char *Pword = g_strndup(space, spacesize); g_string_append(html->Stash, Pword); g_free(Pword); html->SPCPending = FALSE; } else if ( parse_mode == DILLO_HTML_PARSE_MODE_PRE ) { /* re-scan the string for characters that cause line breaks */ for (i = 0; i < spacesize; i++) { /* Support for "\r", "\n" and "\r\n" line breaks (skips the first) */ if (!html->PreFirstChar && (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR))) { a_Dw_page_add_linebreak(DW_PAGE (html->dw), html->stack[(html)->stack_top].style); html->pre_column = 0; } html->PreFirstChar = FALSE; /* cr and lf should not be rendered -- they appear as a break */ switch (space[i]) { case '\r': case '\n': break; case '\t': if (prefs.show_extra_warnings) MSG_HTML("TAB character inside <PRE>\n"); offset = TAB_SIZE - html->pre_column % TAB_SIZE; a_Dw_page_add_text(DW_PAGE (html->dw), g_strnfill(offset, ' '), html->stack[html->stack_top].style); html->pre_column += offset; break; default: a_Dw_page_add_text(DW_PAGE (html->dw), g_strndup(space + i, 1), html->stack[html->stack_top].style); html->pre_column++; break; } html->PrevWasCR = (space[i] == '\r'); } html->SPCPending = FALSE; } else { if (html->PrevWasOpenTag) { /* ignore white space inmediately after an open tag */ html->SPCPending = FALSE; } else { g_free(html->SPCBuf); html->SPCBuf = g_strndup(space, spacesize); html->SPCPending = TRUE; } if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY ) html->StashSpace = (html->Stash->len > 0); }}/* * Handles putting the word into its proper place * > STASH and VERBATIM --> html->Stash * > otherwise it goes through a_Dw_page_add_text() * * Entities are parsed (or not) according to parse_mode. */static void Html_process_word(DilloHtml *html, char *word, gint size){ gint i, start; gchar *Pword; DilloHtmlParseMode parse_mode = html->stack[html->stack_top].parse_mode; if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH || parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY ) { if ( html->StashSpace ) { g_string_append_c(html->Stash, ' '); html->StashSpace = FALSE; } Pword = Html_parse_entities(word, size); g_string_append(html->Stash, Pword); g_free(Pword); } else if ( parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) { /* word goes in untouched, it is not processed here. */ Pword = g_strndup(word, size); g_string_append(html->Stash, Pword); g_free(Pword); } if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH || parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) { /* skip until the closing instructions */ } else if ( parse_mode == DILLO_HTML_PARSE_MODE_PRE ) { /* all this overhead is to catch white-space entities */ Pword = Html_parse_entities(word, size); for (start = i = 0; Pword[i]; start = i) if (isspace(Pword[i])) { while (Pword[++i] && isspace(Pword[i])); Html_process_space(html, Pword + start, i - start); } else { while (Pword[++i] && !isspace(Pword[i])); a_Dw_page_add_text(DW_PAGE (html->dw), g_strndup(Pword + start, i - start), html->stack[html->stack_top].style); html->pre_column += i - start; html->PreFirstChar = FALSE; } g_free(Pword); } else { /* add pending space if present */ if (html->SPCPending && !html->PrevWasOpenTag) a_Dw_page_add_space(DW_PAGE (html->dw), html->stack[html->stack_top].style); /* actually white-space entities inside the word could be * collapsed (except ), but that's too much overhead * for a very rare case of ill-formed HTML --Jcid */ Pword = Html_parse_entities(word, size); g_strdelimit(Pword, "\t\f\n\r", ' '); a_Dw_page_add_text(DW_PAGE (html->dw), Pword, html->stack[html->stack_top].style); } html->PrevWasOpenTag = FALSE; html->SPCPending = FALSE;}/* * Does the tag in tagstr (e.g. "p") match the tag in the tag, tagsize * structure, with the initial < skipped over (e.g. "P align=center>") */static gboolean Html_match_tag(const char *tagstr, char *tag, gint tagsize){ gint i; for (i = 0; i < tagsize && tagstr[i] != '\0'; i++) { if (tolower(tagstr[i]) != tolower(tag[i])) return FALSE; } /* The test for '/' is for xml compatibility: "empty/>" will be matched. */ if (i < tagsize && (isspace(tag[i]) || tag[i] == '>' || tag[i] == '/')) return TRUE; return FALSE;}/* * This function is called by Html_cleanup_tag and Html_pop_tag, to * handle nested DwPage widgets. */static void Html_eventually_pop_dw(DilloHtml *html){ /* This function is called after popping from the stack, so the * relevant hand_over_break is at html->stack_top + 1. */ if (html->dw != html->stack[html->stack_top].page) { if (html->stack[html->stack_top + 1].hand_over_break) a_Dw_page_hand_over_break(DW_PAGE(html->dw), html->stack[(html)->stack_top].style); a_Dw_page_flush(DW_PAGE(html->dw)); html->dw = html->stack[html->stack_top].page; }}/* * Push the tag (copying attributes from the top of the stack) */static void Html_push_tag(DilloHtml *html, char *tag, gint tagsize){ char *tagstr; gint n_items; /* Save the element's name (no parameters) into tagstr. */ tagstr = g_strdup(Html_tags_get_name(html->CurrTagIdx)); n_items = html->stack_top + 1; a_List_add(html->stack, n_items, html->stack_max); /* We'll copy the former stack item and just change the tag and its index * instead of copying all fields except for tag. --Jcid */ html->stack[n_items] = html->stack[n_items - 1]; html->stack[n_items].tag = tagstr; html->stack[n_items].tag_idx = html->CurrTagIdx; html->stack_top = n_items; /* proper memory management, may be unref'd later */ a_Dw_style_ref (html->stack[html->stack_top].style); if (html->stack[html->stack_top].table_cell_style) a_Dw_style_ref (html->stack[html->stack_top].table_cell_style); html->dw = html->stack[html->stack_top].page;}/* * Remove the stack's topmost tag (only if it matches) * If it matches, TRUE is returned. */static gboolean Html_cleanup_tag(DilloHtml *html, char *tag){ if ( html->stack_top && Html_match_tag(html->stack[html->stack_top].tag, tag, strlen(tag)) ) { a_Dw_style_unref (html->stack[html->stack_top].style); if (html->stack[html->stack_top].table_cell_style) a_Dw_style_unref (html->stack[html->stack_top].table_cell_style); g_free(html->stack[html->stack_top--].tag); Html_eventually_pop_dw(html); return TRUE; } else return FALSE;}/* * Default close function for tags. * (conditional cleanup of the stack) * There're several ways of doing it. Considering the HTML 4.01 spec * which defines optional close tags, and the will to deliver useful diagnose * messages for bad-formed HTML, it'll go as follows: * 1.- Search the stack for the first tag that requires a close tag. * 2.- If it matches, clean all the optional-close tags in between.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -