📄 html.c
字号:
html->InFlags = 0; html->attr_data = g_string_sized_new(1024); Html_set_dwpage(html); return html;}/* * Initialize the stash buffer */static void Html_stash_init(DilloHtml *html){ html->stack[html->stack_top].parse_mode = DILLO_HTML_PARSE_MODE_STASH; html->StashSpace = FALSE; g_string_truncate(html->Stash, 0);}/* Entities list from the HTML 4.01 DTD */typedef struct { char *entity; int isocode;} Ent_t;#define NumEnt 252static const Ent_t Entities[NumEnt] = { {"AElig",0306}, {"Aacute",0301}, {"Acirc",0302}, {"Agrave",0300}, {"Alpha",01621},{"Aring",0305}, {"Atilde",0303}, {"Auml",0304}, {"Beta",01622}, {"Ccedil",0307}, {"Chi",01647}, {"Dagger",020041}, {"Delta",01624},{"ETH",0320}, {"Eacute",0311}, {"Ecirc",0312}, {"Egrave",0310},{"Epsilon",01625},{"Eta",01627}, {"Euml",0313}, {"Gamma",01623},{"Iacute",0315}, {"Icirc",0316}, {"Igrave",0314}, {"Iota",01631}, {"Iuml",0317}, {"Kappa",01632}, {"Lambda",01633}, {"Mu",01634}, {"Ntilde",0321}, {"Nu",01635}, {"OElig",0522}, {"Oacute",0323},{"Ocirc",0324}, {"Ograve",0322}, {"Omega",01651}, {"Omicron",01637},{"Oslash",0330},{"Otilde",0325},{"Ouml",0326}, {"Phi",01646}, {"Pi",01640}, {"Prime",020063},{"Psi",01650}, {"Rho",01641}, {"Scaron",0540}, {"Sigma",01643}, {"THORN",0336}, {"Tau",01644}, {"Theta",01630}, {"Uacute",0332}, {"Ucirc",0333}, {"Ugrave",0331},{"Upsilon",01645},{"Uuml",0334}, {"Xi",01636}, {"Yacute",0335},{"Yuml",0570}, {"Zeta",01626}, {"aacute",0341}, {"acirc",0342}, {"acute",0264}, {"aelig",0346}, {"agrave",0340}, {"alefsym",020465},{"alpha",01661},{"amp",38}, {"and",021047}, {"ang",021040}, {"aring",0345}, {"asymp",021110},{"atilde",0343}, {"auml",0344}, {"bdquo",020036},{"beta",01662}, {"brvbar",0246}, {"bull",020042},{"cap",021051}, {"ccedil",0347}, {"cedil",0270}, {"cent",0242}, {"chi",01707}, {"circ",01306}, {"clubs",023143}, {"cong",021105},{"copy",0251}, {"crarr",020665},{"cup",021052}, {"curren",0244},{"dArr",020723}, {"dagger",020040},{"darr",020623}, {"deg",0260}, {"delta",01664}, {"diams",023146},{"divide",0367}, {"eacute",0351},{"ecirc",0352}, {"egrave",0350}, {"empty",021005}, {"emsp",020003},{"ensp",020002}, {"epsilon",01665},{"equiv",021141}, {"eta",01667}, {"eth",0360}, {"euml",0353}, {"euro",020254}, {"exist",021003},{"fnof",0622}, {"forall",021000},{"frac12",0275}, {"frac14",0274},{"frac34",0276}, {"frasl",020104},{"gamma",01663}, {"ge",021145}, {"gt",62}, {"hArr",020724}, {"harr",020624}, {"hearts",023145},{"hellip",020046},{"iacute",0355},{"icirc",0356}, {"iexcl",0241}, {"igrave",0354}, {"image",020421},{"infin",021036}, {"int",021053}, {"iota",01671}, {"iquest",0277}, {"isin",021010}, {"iuml",0357}, {"kappa",01672}, {"lArr",020720}, {"lambda",01673}, {"lang",021451},{"laquo",0253}, {"larr",020620}, {"lceil",021410}, {"ldquo",020034},{"le",021144}, {"lfloor",021412},{"lowast",021027}, {"loz",022712}, {"lrm",020016}, {"lsaquo",020071},{"lsquo",020030}, {"lt",60}, {"macr",0257}, {"mdash",020024},{"micro",0265}, {"middot",0267},{"minus",021022},{"mu",01674}, {"nabla",021007}, {"nbsp",32}, {"ndash",020023},{"ne",021140}, {"ni",021013}, {"not",0254}, {"notin",021011},{"nsub",021204}, {"ntilde",0361}, {"nu",01675}, {"oacute",0363}, {"ocirc",0364}, {"oelig",0523}, {"ograve",0362},{"oline",020076},{"omega",01711}, {"omicron",01677}, {"oplus",021225},{"or",021050}, {"ordf",0252}, {"ordm",0272}, {"oslash",0370},{"otilde",0365}, {"otimes",021227},{"ouml",0366}, {"para",0266}, {"part",021002}, {"permil",020060},{"perp",021245}, {"phi",01706}, {"pi",01700}, {"piv",01726}, {"plusmn",0261}, {"pound",0243}, {"prime",020062},{"prod",021017}, {"prop",021035}, {"psi",01710}, {"quot",34}, {"rArr",020722}, {"radic",021032}, {"rang",021452},{"raquo",0273}, {"rarr",020622}, {"rceil",021411}, {"rdquo",020035},{"real",020434},{"reg",0256}, {"rfloor",021413}, {"rho",01701}, {"rlm",020017}, {"rsaquo",020072},{"rsquo",020031}, {"sbquo",020032},{"scaron",0541},{"sdot",021305}, {"sect",0247}, {"shy",0255}, {"sigma",01703}, {"sigmaf",01702},{"sim",021074}, {"spades",023140},{"sub",021202},{"sube",021206}, {"sum",021021}, {"sup",021203}, {"sup1",0271}, {"sup2",0262}, {"sup3",0263}, {"supe",021207},{"szlig",0337}, {"tau",01704}, {"there4",021064}, {"theta",01670},{"thetasym",01721},{"thinsp",020011},{"thorn",0376}, {"tilde",01334},{"times",0327}, {"trade",020442},{"uArr",020721}, {"uacute",0372},{"uarr",020621}, {"ucirc",0373}, {"ugrave",0371}, {"uml",0250}, {"upsih",01722}, {"upsilon",01705},{"uuml",0374}, {"weierp",020430},{"xi",01676}, {"yacute",0375}, {"yen",0245}, {"yuml",0377}, {"zeta",01666}, {"zwj",020015}, {"zwnj",020014}};/* * Comparison function for binary search */static int Html_entity_comp(const void *a, const void *b){ return strcmp(((Ent_t *)a)->entity, ((Ent_t *)b)->entity);}/* * Binary search of 'key' in entity list */static int Html_entity_search(char *key){ Ent_t *res, EntKey; EntKey.entity = key; res = bsearch(&EntKey, Entities, NumEnt, sizeof(Ent_t), Html_entity_comp); if ( res ) return (res - Entities); return -1;}/* * Switch a few UCS encodings to latin1. */static gint Html_try_ucs2latin1(gint isocode){ gint ret; switch (isocode) { case 0x2018: case 0x2019: ret = '\''; break; case 0x201c: case 0x201d: ret = '"'; break; case 0x2013: case 0x2014: ret = '-'; break; case 0x2039: ret = '<'; break; case 0x203a: ret = '>'; break; case 0x2022: ret = 176; break; default: ret = -1; break; } return ret;}/* * Switch a few 'undefined for HTML' ASCII encodings to latin1. */static gint Html_try_ascii2latin1(gint isocode){ gint ret; switch (isocode) { case 145: case 146: ret = '\''; break; case 147: case 148: ret = '"'; break; case 149: ret = 176; break; case 150: case 151: ret = '-'; break; default: ret = isocode; break; } return ret;}/* * Given an entity, return the ISO-Latin1 character code. * Returns a negative value (error code) if not a valid entity. * * The first character *token is assumed to be == '&' * * For valid entities, *entsize is set to the length of the parsed entity. */static gint Html_parse_entity(DilloHtml *html, const gchar *token, gint toksize, gint *entsize){ gint isocode, i; gchar *tok, *s, c; token++; tok = s = toksize ? g_strndup(token, (guint)toksize) : g_strdup(token); isocode = -1; if (*s == '#') { /* numeric character reference */ errno = 0; if (*++s == 'x' || *s == 'X') { if (isxdigit(*++s)) { /* strtol with base 16 accepts leading "0x" - we don't */ if (*s == '0' && s[1] == 'x') { s++; isocode = 0; } else { isocode = strtol(s, &s, 16); } } } else if (isdigit(*s)) { isocode = strtol(s, &s, 10); } if (!isocode || errno || isocode > 0x7fffffffL) { /* this catches null bytes, errors and codes >=2^31 */ MSG_HTML("numeric character reference out of range\n"); isocode = -2; } if (isocode != -1) { if (*s == ';') s++; else if (prefs.show_extra_warnings) MSG_HTML("numeric character reference without trailing ';'\n"); } } else if (isalpha(*s)) { /* character entity reference */ while (isalnum(*++s) || strchr(":_.-", *s)); c = *s; *s = 0; if (c != ';' || (i = Html_entity_search(tok)) == -1) { if ((html->DocType == DT_HTML && html->DocTypeVersion == 4.01f) || html->DocType == DT_XHTML) MSG_HTML("undefined character entity '%s'\n", tok); isocode = -3; } else isocode = Entities[i].isocode; if (c == ';') s++; else if (prefs.show_extra_warnings) MSG_HTML("character entity reference without trailing ';'\n"); } *entsize = s-tok+1; g_free(tok); if (isocode >= 128 && isocode <= 159) { MSG_HTML("code positions 128-159 are not defined for ISO Latin-1\n"); isocode = Html_try_ascii2latin1(isocode); } else if (isocode > 255) /* Try a few UCS translations to Latin1 */ isocode = Html_try_ucs2latin1(isocode); else if (isocode == -1 && prefs.show_extra_warnings) MSG_HTML("literal '&'\n"); return isocode;}/* * Convert all the entities in a token to plain ISO character codes. Takes * a token and its length, and returns a newly allocated string. */static char * Html_parse_entities(DilloHtml *html, gchar *token, gint toksize){ gchar *esc_set = "&\xE2\xC2"; gchar *new_str; gint i, j, isocode, entsize; new_str = g_strndup(token, toksize); if (new_str[strcspn(new_str, esc_set)] == 0) return new_str; for (i = j = 0; i < toksize; i++) { if (token[i] == '&' && (isocode = Html_parse_entity(html, token+i, toksize-i, &entsize)) >= 0) { new_str[j++] = (gchar) isocode; i += entsize-1; } else if (token[i] == '\xE2' && token[i+1] == '\x80' && i+2 < toksize){ /* Hack: for parsing some UTF-8 characters into latin1 */ switch (token[i+2]) { case '\x94': new_str[j++] = '-'; new_str[j++] = '-'; break; case '\x98': case '\x99': new_str[j++] = '\''; break; case '\x9C': case '\x9D': new_str[j++] = '"'; break; case '\xA2': new_str[j++] = '*'; new_str[j++] = ' '; break; default: /* unhandled */ new_str[j++] = '\xE2'; break; } i += 2; } else if (token[i] == '\xC2' && token[i+1] == '\xA0') { /* Hack: for parsing some UTF-8 characters into latin1 */ new_str[j++] = ' '; ++i; } else { new_str[j++] = token[i]; } } new_str[j] = '\0'; return new_str;}/* * Parse spaces * */static void Html_process_space(DilloHtml *html, char *space, gint spacesize){ gint i, offset; DilloHtmlParseMode parse_mode = html->stack[html->stack_top].parse_mode; if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH ) { html->StashSpace = (html->Stash->len > 0); html->SPCPending = FALSE; } else if ( parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) { char *Pword = g_strndup(space, spacesize); g_string_append(html->Stash, Pword); g_free(Pword); html->SPCPending = FALSE; } else if ( parse_mode == DILLO_HTML_PARSE_MODE_PRE ) { /* re-scan the string for characters that cause line breaks */ for (i = 0; i < spacesize; i++) { /* Support for "\r", "\n" and "\r\n" line breaks (skips the first) */ if (!html->PreFirstChar && (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR))) { a_Dw_page_add_linebreak(DW_PAGE (html->dw), html->stack[(html)->stack_top].style); html->pre_column = 0; } html->PreFirstChar = FALSE; /* cr and lf should not be rendered -- they appear as a break */ switch (space[i]) { case '\r': case '\n': break; case '\t': if (prefs.show_extra_warnings) MSG_HTML("TAB character inside <PRE>\n"); offset = TAB_SIZE - html->pre_column % TAB_SIZE; a_Dw_page_add_text(DW_PAGE (html->dw), g_strnfill(offset, ' '), html->stack[html->stack_top].style); html->pre_column += offset; break; default: a_Dw_page_add_text(DW_PAGE (html->dw), g_strndup(space + i, 1), html->stack[html->stack_top].style); html->pre_column++; break; } html->PrevWasCR = (space[i] == '\r'); } html->SPCPending = FALSE; } else { if (SGML_SPCDEL && html->PrevWasOpenTag) { /* SGML_SPCDEL ignores white space inmediately after an open tag */ html->SPCPending = FALSE; } else { g_free(html->SPCBuf); html->SPCBuf = g_strndup(space, spacesize); html->SPCPending = TRUE; } if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY ) html->StashSpace = (html->Stash->len > 0); }}/* * Handles putting the word into its proper place * > STASH and VERBATIM --> html->Stash * > otherwise it goes through a_Dw_page_add_text() * * Entities are parsed (or not) according to parse_mode. */static void Html_process_word(DilloHtml *html, char *word, gint size){ gint i, start; gchar *Pword; DilloHtmlParseMode parse_mode = html->stack[html->stack_top].parse_mode; if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH || parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY ) { if ( html->StashSpace ) { g_string_append_c(html->Stash, ' '); html->StashSpace = FALSE; } Pword = Html_parse_entities(html, word, size); g_string_append(html->Stash, Pword); g_free(Pword); } else if ( parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) { /* word goes in untouched, it is not processed here. */ Pword = g_strndup(word, size); g_string_append(html->Stash, Pword); g_free(Pword); } if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH ||
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -