📄 html.c
字号:
html->CurrTagOfs = 0; html->CurrTagIdx = 0; html->OldTagOfs = 0; html->OldTagLine = 1; html->dw = NULL; html->bw = bw; html->linkblock = Html_lb_new(bw, url); html->stack_max = 16; html->stack_top = 0; html->stack = g_malloc0 (html->stack_max * sizeof (DilloHtmlState)); html->stack[0].tag = g_strdup("none"); html->stack[0].parse_mode = DILLO_HTML_PARSE_MODE_INIT; html->stack[0].table_mode = DILLO_HTML_TABLE_MODE_NONE; html->stack[0].cell_text_align_set = FALSE; html->stack[0].list_type = HTML_LIST_NONE; /* no <ul> or <ol> open */ html->stack[0].list_number = 0; html->stack[0].page = NULL; html->stack[0].table = NULL; html->stack[0].ref_list_item = NULL; html->stack[0].current_bg_color = prefs.bg_color; html->stack[0].hand_over_break = FALSE; html->Stash = g_string_new(""); html->StashSpace = FALSE; html->pre_column = 0; html->PreFirstChar = FALSE; html->PrevWasCR = FALSE; html->InVisitedLink = FALSE; html->InFlags = IN_HTML | IN_HEAD; html->attr_data = g_string_sized_new(1024); html->logfont = NULL; Html_find_charset (html, "charset=gb2312"); Html_set_dwpage(html); return html;}/* * Initialize the stash buffer */static void Html_stash_init(DilloHtml *html){ html->stack[html->stack_top].parse_mode = DILLO_HTML_PARSE_MODE_STASH; html->StashSpace = FALSE; g_string_truncate(html->Stash, 0);}/* Entities list from the HTML 4.01 DTD */typedef struct { char *entity; int isocode;} Ent_t;#define NumEnt 252static const Ent_t Entities[NumEnt] = { {"AElig",0306}, {"Aacute",0301}, {"Acirc",0302}, {"Agrave",0300}, {"Alpha",01621},{"Aring",0305}, {"Atilde",0303}, {"Auml",0304}, {"Beta",01622}, {"Ccedil",0307}, {"Chi",01647}, {"Dagger",020041}, {"Delta",01624},{"ETH",0320}, {"Eacute",0311}, {"Ecirc",0312}, {"Egrave",0310},{"Epsilon",01625},{"Eta",01627}, {"Euml",0313}, {"Gamma",01623},{"Iacute",0315}, {"Icirc",0316}, {"Igrave",0314}, {"Iota",01631}, {"Iuml",0317}, {"Kappa",01632}, {"Lambda",01633}, {"Mu",01634}, {"Ntilde",0321}, {"Nu",01635}, {"OElig",0522}, {"Oacute",0323},{"Ocirc",0324}, {"Ograve",0322}, {"Omega",01651}, {"Omicron",01637},{"Oslash",0330},{"Otilde",0325},{"Ouml",0326}, {"Phi",01646}, {"Pi",01640}, {"Prime",020063},{"Psi",01650}, {"Rho",01641}, {"Scaron",0540}, {"Sigma",01643}, {"THORN",0336}, {"Tau",01644}, {"Theta",01630}, {"Uacute",0332}, {"Ucirc",0333}, {"Ugrave",0331},{"Upsilon",01645},{"Uuml",0334}, {"Xi",01636}, {"Yacute",0335},{"Yuml",0570}, {"Zeta",01626}, {"aacute",0341}, {"acirc",0342}, {"acute",0264}, {"aelig",0346}, {"agrave",0340}, {"alefsym",020465},{"alpha",01661},{"amp",38}, {"and",021047}, {"ang",021040}, {"aring",0345}, {"asymp",021110},{"atilde",0343}, {"auml",0344}, {"bdquo",020036},{"beta",01662}, {"brvbar",0246}, {"bull",020042},{"cap",021051}, {"ccedil",0347}, {"cedil",0270}, {"cent",0242}, {"chi",01707}, {"circ",01306}, {"clubs",023143}, {"cong",021105},{"copy",0251}, {"crarr",020665},{"cup",021052}, {"curren",0244},{"dArr",020723}, {"dagger",020040},{"darr",020623}, {"deg",0260}, {"delta",01664}, {"diams",023146},{"divide",0367}, {"eacute",0351},{"ecirc",0352}, {"egrave",0350}, {"empty",021005}, {"emsp",020003},{"ensp",020002}, {"epsilon",01665},{"equiv",021141}, {"eta",01667}, {"eth",0360}, {"euml",0353}, {"euro",020254}, {"exist",021003},{"fnof",0622}, {"forall",021000},{"frac12",0275}, {"frac14",0274},{"frac34",0276}, {"frasl",020104},{"gamma",01663}, {"ge",021145}, {"gt",62}, {"hArr",020724}, {"harr",020624}, {"hearts",023145},{"hellip",020046},{"iacute",0355},{"icirc",0356}, {"iexcl",0241}, {"igrave",0354}, {"image",020421},{"infin",021036}, {"int",021053}, {"iota",01671}, {"iquest",0277}, {"isin",021010}, {"iuml",0357}, {"kappa",01672}, {"lArr",020720}, {"lambda",01673}, {"lang",021451},{"laquo",0253}, {"larr",020620}, {"lceil",021410}, {"ldquo",020034},{"le",021144}, {"lfloor",021412},{"lowast",021027}, {"loz",022712}, {"lrm",020016}, {"lsaquo",020071},{"lsquo",020030}, {"lt",60}, {"macr",0257}, {"mdash",020024},{"micro",0265}, {"middot",0267},{"minus",021022},{"mu",01674}, {"nabla",021007}, {"nbsp",32}, {"ndash",020023},{"ne",021140}, {"ni",021013}, {"not",0254}, {"notin",021011},{"nsub",021204}, {"ntilde",0361}, {"nu",01675}, {"oacute",0363}, {"ocirc",0364}, {"oelig",0523}, {"ograve",0362},{"oline",020076},{"omega",01711}, {"omicron",01677}, {"oplus",021225},{"or",021050}, {"ordf",0252}, {"ordm",0272}, {"oslash",0370},{"otilde",0365}, {"otimes",021227},{"ouml",0366}, {"para",0266}, {"part",021002}, {"permil",020060},{"perp",021245}, {"phi",01706}, {"pi",01700}, {"piv",01726}, {"plusmn",0261}, {"pound",0243}, {"prime",020062},{"prod",021017}, {"prop",021035}, {"psi",01710}, {"quot",34}, {"rArr",020722}, {"radic",021032}, {"rang",021452},{"raquo",0273}, {"rarr",020622}, {"rceil",021411}, {"rdquo",020035},{"real",020434},{"reg",0256}, {"rfloor",021413}, {"rho",01701}, {"rlm",020017}, {"rsaquo",020072},{"rsquo",020031}, {"sbquo",020032},{"scaron",0541},{"sdot",021305}, {"sect",0247}, {"shy",0255}, {"sigma",01703}, {"sigmaf",01702},{"sim",021074}, {"spades",023140},{"sub",021202},{"sube",021206}, {"sum",021021}, {"sup",021203}, {"sup1",0271}, {"sup2",0262}, {"sup3",0263}, {"supe",021207},{"szlig",0337}, {"tau",01704}, {"there4",021064}, {"theta",01670},{"thetasym",01721},{"thinsp",020011},{"thorn",0376}, {"tilde",01334},{"times",0327}, {"trade",020442},{"uArr",020721}, {"uacute",0372},{"uarr",020621}, {"ucirc",0373}, {"ugrave",0371}, {"uml",0250}, {"upsih",01722}, {"upsilon",01705},{"uuml",0374}, {"weierp",020430},{"xi",01676}, {"yacute",0375}, {"yen",0245}, {"yuml",0377}, {"zeta",01666}, {"zwj",020015}, {"zwnj",020014}};/* * Comparison function for binary search */static int Html_entity_comp (const void *a, const void *b){ return strcmp(((Ent_t *)a)->entity, ((Ent_t *)b)->entity);}/* * Binary search of 'key' in entity list */static int Html_entity_search (char *key){ Ent_t *res, EntKey; EntKey.entity = key; res = bsearch(&EntKey, Entities, NumEnt, sizeof(Ent_t), Html_entity_comp); if ( res ) return (res - Entities); return -1;}/* * Switch a few UCS encodings to latin1. */static int Html_try_ucs2latin1(int isocode){ int ret; switch (isocode) { case 0x2018: case 0x2019: ret = '\''; break; case 0x201c: case 0x201d: ret = '"'; break; case 0x2013: case 0x2014: ret = '-'; break; case 0x2039: ret = '<'; break; case 0x203a: ret = '>'; break; case 0x2022: ret = 176; break; default: ret = -1; break; } return ret;}/* * Given an entity, return the ISO-Latin1 character code. * (-1 if not a valid entity) */static int Html_parse_entity(const char *token, int toksize){ int base, isocode, i, ret; char *eoe, *name; if (token[0] != '&') return -1; ret = -1; eoe = (toksize) ? memchr(token, ';', toksize) : strchr(token, ';'); if (eoe) { if (token[1] == '#') { /* Numeric token */ base = (token[2] == 'x' || token[2] == 'X') ? 16 : 10; isocode = strtol(token + 2 + (base==16), NULL, base); if (isocode > 0 && isocode <= 255) { ret = isocode; } else { /* Try a few UCS translations to Latin1 */ ret = Html_try_ucs2latin1(isocode); } } else { /* Search for named entity */ name = g_strndup(token + 1, eoe - token - 1); i = Html_entity_search(name); g_free(name); if (i != -1) { if (Entities[i].isocode > 0 && Entities[i].isocode <= 255) ret = Entities[i].isocode; else ret = Html_try_ucs2latin1(Entities[i].isocode); } } } return ret;}/* * Convert all the entities in a token to plain ISO character codes. Takes * a token and its length, and returns a newly allocated string. */static char *Html_parse_entities (char *token, int toksize){ char *new_str; int i, j, isocode; if ( memchr(token, '&', toksize) == NULL ) return g_strndup(token, toksize); new_str = g_malloc0 (toksize + 1 * sizeof (char)); for (i = j = 0; i < toksize; i++) { if (token[i] == '&' && (isocode = Html_parse_entity(token + i, toksize - i)) != -1) { new_str[j++] = isocode; while (token[++i] != ';'); } else { new_str[j++] = token[i]; } } new_str[j] = '\0'; return new_str;}/* * Parse spaces * */static void Html_process_space(DilloHtml *html, char *space, int spacesize){ int i, offset; DilloHtmlParseMode parse_mode = html->stack[html->stack_top].parse_mode; if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH ) { html->StashSpace = (html->Stash->len > 0); } else if ( parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) { char *Pword = g_strndup (space, spacesize); g_string_append (html->Stash, Pword); g_free(Pword); } else if ( parse_mode == DILLO_HTML_PARSE_MODE_PRE ) { /* re-scan the string for characters that cause line breaks */ for (i = 0; i < spacesize; i++) { /* Support for "\r", "\n" and "\r\n" line breaks (skips the first) */ if (!html->PreFirstChar && (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR))) { a_Dw_page_add_linebreak(DW_PAGE (html->dw), html->stack[(html)->stack_top].style); html->pre_column = 0; } html->PreFirstChar = FALSE; /* cr and lf should not be rendered -- they appear as a break */ switch (space[i]) { case '\r': case '\n': break; case '\t': MSG_HTML("TAB character inside <PRE>\n"); offset = TAB_SIZE - html->pre_column % TAB_SIZE; a_Dw_page_add_text(DW_PAGE (html->dw), g_strnfill(offset, ' '), html->stack[html->stack_top].style); html->pre_column += offset; break; default: a_Dw_page_add_text(DW_PAGE (html->dw), g_strndup(space + i, 1), html->stack[html->stack_top].style); html->pre_column++; break; } html->PrevWasCR = (space[i] == '\r'); } } else { a_Dw_page_add_space(DW_PAGE (html->dw), html->stack[html->stack_top].style); if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY ) html->StashSpace = (html->Stash->len > 0); }}/* * Handles putting the word into its proper place * > STASH and VERBATIM --> html->Stash * > otherwise it goes through a_Dw_page_add_text() * * Entities are parsed (or not) according to parse_mode. */static void Html_process_word(DilloHtml *html, char *word, int size){ int i, start; char *Pword; DilloHtmlParseMode parse_mode = html->stack[html->stack_top].parse_mode; if (parse_mode == DILLO_HTML_PARSE_MODE_STASH || parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY) { if ( html->StashSpace ) { g_string_append_c(html->Stash, ' '); html->StashSpace = FALSE; } Pword = Html_parse_entities(word, size); g_string_append(html->Stash, Pword); g_free(Pword); } else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) { /* word goes in untouched, it is not processed here. */ Pword = g_strndup(word, size); g_string_append(html->Stash, Pword); g_free(Pword); } if (parse_mode == DILLO_HTML_PARSE_MODE_STASH || parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) return; if ( parse_mode == DILLO_HTML_PARSE_MODE_PRE ) { /* all this overhead is to catch white-space entities */ Pword = Html_parse_entities(word, size); for (start = i = 0; Pword[i]; start = i) { if (isspace(Pword[i])) { while (Pword[++i] && isspace(Pword[i])); Html_process_space(html, Pword + start, i - start); } else { while (Pword[++i] && !isspace(Pword[i])); a_Dw_page_add_text(DW_PAGE (html->dw), g_strndup(Pword + start, i - start), html->stack[html->stack_top].style); html->pre_column += i - start; html->PreFirstChar = FALSE; } } g_free(Pword); } else { if (memchr(word, '&', size) == NULL) { a_Dw_page_add_text(DW_PAGE (html->dw), g_strndup(word, size), html->stack[html->stack_top].style); } else { /* actually white-space entities inside the word should be * collapsed (except ), but that's too much overhead * for a very rare case of bad-formed HTML --Jcid */ Pword = Html_parse_entities(word, size); g_strdelimit (Pword, "\t\f\n\r", ' '); a_Dw_page_add_text(DW_PAGE (html->dw), Pword, html->stack[html->stack_top].style); } }}/* * Does the tag in tagstr (e.g. "p") match the tag in the tag, tagsize * structure, with the initial < skipped over (e.g. "P align=center>") */static gboolean Html_match_tag(const char *tagstr, char *tag, int tagsize){ int i; for (i = 0; i < tagsize && tagstr[i] != '\0'; i++) { if (tolower(tagstr[i]) != tolower(tag[i])) return FALSE; } /* The test for '/' is for xml compatibility: "empty/>" will be matched. */ if (i < tagsize && (isspace(tag[i]) || tag[i] == '>' || tag[i] == '/')) return TRUE; return FALSE;}/* * This function is called by Html_cleanup_tag and Html_pop_tag, to * handle nested DwPage widgets. */static void Html_eventually_pop_dw(DilloHtml *html){ /* This function is called after popping from the stack, so the * relevant hand_over_break is at html->stack_top + 1. */ if (html->dw != html->stack[html->stack_top].page) { if (html->stack[html->stack_top + 1].hand_over_break)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -