📄 html.c

📁 基于minigui的浏览器. 这是最新版本.
💻 C
📖 第 1 页 / 共 5 页
字号:
   html->InFlags = 0;   html->attr_data = g_string_sized_new(1024);   Html_set_dwpage(html);   return html;}/* * Initialize the stash buffer */static void Html_stash_init(DilloHtml *html){   html->stack[html->stack_top].parse_mode = DILLO_HTML_PARSE_MODE_STASH;   html->StashSpace = FALSE;   g_string_truncate(html->Stash, 0);}/* Entities list from the HTML 4.01 DTD */typedef struct {   char *entity;   int isocode;} Ent_t;#define NumEnt 252static const Ent_t Entities[NumEnt] = {   {"AElig",0306}, {"Aacute",0301}, {"Acirc",0302},  {"Agrave",0300},   {"Alpha",01621},{"Aring",0305},  {"Atilde",0303}, {"Auml",0304},   {"Beta",01622}, {"Ccedil",0307}, {"Chi",01647},   {"Dagger",020041},   {"Delta",01624},{"ETH",0320},    {"Eacute",0311}, {"Ecirc",0312},   {"Egrave",0310},{"Epsilon",01625},{"Eta",01627},  {"Euml",0313},   {"Gamma",01623},{"Iacute",0315}, {"Icirc",0316},  {"Igrave",0314},   {"Iota",01631}, {"Iuml",0317},   {"Kappa",01632}, {"Lambda",01633},   {"Mu",01634},   {"Ntilde",0321}, {"Nu",01635},    {"OElig",0522},   {"Oacute",0323},{"Ocirc",0324},  {"Ograve",0322}, {"Omega",01651},   {"Omicron",01637},{"Oslash",0330},{"Otilde",0325},{"Ouml",0326},   {"Phi",01646},  {"Pi",01640},    {"Prime",020063},{"Psi",01650},   {"Rho",01641},  {"Scaron",0540}, {"Sigma",01643}, {"THORN",0336},   {"Tau",01644},  {"Theta",01630}, {"Uacute",0332}, {"Ucirc",0333},   {"Ugrave",0331},{"Upsilon",01645},{"Uuml",0334},  {"Xi",01636},   {"Yacute",0335},{"Yuml",0570},   {"Zeta",01626},  {"aacute",0341},   {"acirc",0342}, {"acute",0264},  {"aelig",0346},  {"agrave",0340},   {"alefsym",020465},{"alpha",01661},{"amp",38},    {"and",021047},   {"ang",021040}, {"aring",0345},  {"asymp",021110},{"atilde",0343},   {"auml",0344},  {"bdquo",020036},{"beta",01662},  {"brvbar",0246},   {"bull",020042},{"cap",021051},  {"ccedil",0347}, {"cedil",0270},   {"cent",0242},  {"chi",01707},   {"circ",01306},  {"clubs",023143},   {"cong",021105},{"copy",0251},   {"crarr",020665},{"cup",021052},   {"curren",0244},{"dArr",020723}, {"dagger",020040},{"darr",020623},   {"deg",0260},   {"delta",01664}, {"diams",023146},{"divide",0367},   {"eacute",0351},{"ecirc",0352},  {"egrave",0350}, {"empty",021005},   {"emsp",020003},{"ensp",020002}, {"epsilon",01665},{"equiv",021141},   {"eta",01667},  {"eth",0360},    {"euml",0353},   {"euro",020254},   {"exist",021003},{"fnof",0622},  {"forall",021000},{"frac12",0275},   {"frac14",0274},{"frac34",0276}, {"frasl",020104},{"gamma",01663},   {"ge",021145},  {"gt",62},       {"hArr",020724}, {"harr",020624},   {"hearts",023145},{"hellip",020046},{"iacute",0355},{"icirc",0356},   {"iexcl",0241}, {"igrave",0354}, {"image",020421},{"infin",021036},   {"int",021053}, {"iota",01671},  {"iquest",0277}, {"isin",021010},   {"iuml",0357},  {"kappa",01672}, {"lArr",020720}, {"lambda",01673},   {"lang",021451},{"laquo",0253},  {"larr",020620}, {"lceil",021410},   {"ldquo",020034},{"le",021144},  {"lfloor",021412},{"lowast",021027},   {"loz",022712}, {"lrm",020016},  {"lsaquo",020071},{"lsquo",020030},   {"lt",60},      {"macr",0257},   {"mdash",020024},{"micro",0265},   {"middot",0267},{"minus",021022},{"mu",01674},    {"nabla",021007},   {"nbsp",32},    {"ndash",020023},{"ne",021140},   {"ni",021013},   {"not",0254},   {"notin",021011},{"nsub",021204}, {"ntilde",0361},   {"nu",01675},   {"oacute",0363}, {"ocirc",0364},  {"oelig",0523},   {"ograve",0362},{"oline",020076},{"omega",01711}, {"omicron",01677},   {"oplus",021225},{"or",021050},  {"ordf",0252},   {"ordm",0272},   {"oslash",0370},{"otilde",0365}, {"otimes",021227},{"ouml",0366},   {"para",0266},  {"part",021002}, {"permil",020060},{"perp",021245},   {"phi",01706},  {"pi",01700},    {"piv",01726},   {"plusmn",0261},   {"pound",0243}, {"prime",020062},{"prod",021017}, {"prop",021035},   {"psi",01710},  {"quot",34},     {"rArr",020722}, {"radic",021032},   {"rang",021452},{"raquo",0273},  {"rarr",020622}, {"rceil",021411},   {"rdquo",020035},{"real",020434},{"reg",0256},    {"rfloor",021413},   {"rho",01701},  {"rlm",020017},  {"rsaquo",020072},{"rsquo",020031},   {"sbquo",020032},{"scaron",0541},{"sdot",021305}, {"sect",0247},   {"shy",0255},   {"sigma",01703}, {"sigmaf",01702},{"sim",021074},   {"spades",023140},{"sub",021202},{"sube",021206}, {"sum",021021},   {"sup",021203}, {"sup1",0271},   {"sup2",0262},   {"sup3",0263},   {"supe",021207},{"szlig",0337},  {"tau",01704},   {"there4",021064},   {"theta",01670},{"thetasym",01721},{"thinsp",020011},{"thorn",0376},   {"tilde",01334},{"times",0327},  {"trade",020442},{"uArr",020721},   {"uacute",0372},{"uarr",020621}, {"ucirc",0373},  {"ugrave",0371},   {"uml",0250},   {"upsih",01722}, {"upsilon",01705},{"uuml",0374},   {"weierp",020430},{"xi",01676},  {"yacute",0375}, {"yen",0245},   {"yuml",0377},  {"zeta",01666},  {"zwj",020015},  {"zwnj",020014}};/* * Comparison function for binary search */static int Html_entity_comp(const void *a, const void *b){   return strcmp(((Ent_t *)a)->entity, ((Ent_t *)b)->entity);}/* * Binary search of 'key' in entity list */static int Html_entity_search(char *key){   Ent_t *res, EntKey;   EntKey.entity = key;   res = bsearch(&EntKey, Entities, NumEnt, sizeof(Ent_t), Html_entity_comp);   if ( res )     return (res - Entities);   return -1;}/* * Switch a few UCS encodings to latin1. */static gint Html_try_ucs2latin1(gint isocode){   gint ret;   switch (isocode) {      case 0x2018:      case 0x2019: ret = '\''; break;      case 0x201c:      case 0x201d: ret = '"'; break;      case 0x2013:      case 0x2014: ret = '-'; break;      case 0x2039: ret = '<'; break;      case 0x203a: ret = '>'; break;      case 0x2022: ret = 176; break;      default:     ret = -1;  break;   }   return ret;}/* * Switch a few 'undefined for HTML' ASCII encodings to latin1. */static gint Html_try_ascii2latin1(gint isocode){   gint ret;   switch (isocode) {      case 145:      case 146: ret = '\''; break;      case 147:      case 148: ret = '"'; break;      case 149: ret = 176; break;      case 150:      case 151: ret = '-'; break;      default:  ret = isocode; break;   }   return ret;}/* * Given an entity, return the ISO-Latin1 character code. * Returns a negative value (error code) if not a valid entity. * * The first character *token is assumed to be == '&' * * For valid entities, *entsize is set to the length of the parsed entity. */static gint Html_parse_entity(DilloHtml *html, const gchar *token,                              gint toksize, gint *entsize){   gint isocode, i;   gchar *tok, *s, c;   token++;   tok = s = toksize ? g_strndup(token, (guint)toksize) : g_strdup(token);   isocode = -1;   if (*s == '#') {      /* numeric character reference */      errno = 0;      if (*++s == 'x' || *s == 'X') {         if (isxdigit(*++s)) {            /* strtol with base 16 accepts leading "0x" - we don't */            if (*s == '0' && s[1] == 'x') {               s++;               isocode = 0;            } else {               isocode = strtol(s, &s, 16);            }         }      } else if (isdigit(*s)) {         isocode = strtol(s, &s, 10);      }      if (!isocode || errno || isocode > 0x7fffffffL) {         /* this catches null bytes, errors and codes >=2^31 */         MSG_HTML("numeric character reference out of range\n");         isocode = -2;      }      if (isocode != -1) {         if (*s == ';')            s++;         else if (prefs.show_extra_warnings)            MSG_HTML("numeric character reference without trailing ';'\n");      }   } else if (isalpha(*s)) {      /* character entity reference */      while (isalnum(*++s) || strchr(":_.-", *s));      c = *s;      *s = 0;      if (c != ';' || (i = Html_entity_search(tok)) == -1) {         if ((html->DocType == DT_HTML && html->DocTypeVersion == 4.01f) ||             html->DocType == DT_XHTML)            MSG_HTML("undefined character entity '%s'\n", tok);         isocode = -3;      } else         isocode = Entities[i].isocode;      if (c == ';')         s++;      else if (prefs.show_extra_warnings)         MSG_HTML("character entity reference without trailing ';'\n");   }   *entsize = s-tok+1;   g_free(tok);   if (isocode >= 128 && isocode <= 159) {      MSG_HTML("code positions 128-159 are not defined for ISO Latin-1\n");      isocode = Html_try_ascii2latin1(isocode);   } else if (isocode  > 255)      /* Try a few UCS translations to Latin1 */      isocode = Html_try_ucs2latin1(isocode);   else if (isocode == -1 && prefs.show_extra_warnings)      MSG_HTML("literal '&'\n");   return isocode;}/* * Convert all the entities in a token to plain ISO character codes. Takes * a token and its length, and returns a newly allocated string. */static char * Html_parse_entities(DilloHtml *html, gchar *token, gint toksize){   gchar *esc_set = "&\xE2\xC2";   gchar *new_str;   gint i, j, isocode, entsize;   new_str = g_strndup(token, toksize);   if (new_str[strcspn(new_str, esc_set)] == 0)      return new_str;   for (i = j = 0; i < toksize; i++) {      if (token[i] == '&' &&          (isocode = Html_parse_entity(html, token+i,                                       toksize-i, &entsize)) >= 0) {         new_str[j++] = (gchar) isocode;         i += entsize-1;      } else if (token[i] == '\xE2' && token[i+1] == '\x80' && i+2 < toksize){         /* Hack: for parsing some UTF-8 characters into latin1 */         switch (token[i+2]) {         case '\x94':            new_str[j++] = '-';            new_str[j++] = '-';            break;         case '\x98':         case '\x99':            new_str[j++] = '\'';            break;         case '\x9C':         case '\x9D':            new_str[j++] = '"';            break;         case '\xA2':            new_str[j++] = '*';            new_str[j++] = ' ';            break;         default: /* unhandled */            new_str[j++] = '\xE2';            break;         }         i += 2;      } else if (token[i] == '\xC2' && token[i+1] == '\xA0') {         /* Hack: for parsing some UTF-8 characters into latin1 */         new_str[j++] = ' ';         ++i;      } else {         new_str[j++] = token[i];      }   }   new_str[j] = '\0';   return new_str;}/* * Parse spaces * */static void Html_process_space(DilloHtml *html, char *space, gint spacesize){   gint i, offset;   DilloHtmlParseMode parse_mode = html->stack[html->stack_top].parse_mode;   if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH ) {      html->StashSpace = (html->Stash->len > 0);      html->SPCPending = FALSE;   } else if ( parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) {      char *Pword = g_strndup(space, spacesize);      g_string_append(html->Stash, Pword);      g_free(Pword);      html->SPCPending = FALSE;   } else if ( parse_mode == DILLO_HTML_PARSE_MODE_PRE ) {      /* re-scan the string for characters that cause line breaks */      for (i = 0; i < spacesize; i++) {         /* Support for "\r", "\n" and "\r\n" line breaks (skips the first) */         if (!html->PreFirstChar &&             (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR))) {            a_Dw_page_add_linebreak(DW_PAGE (html->dw),                                    html->stack[(html)->stack_top].style);            html->pre_column = 0;         }         html->PreFirstChar = FALSE;         /* cr and lf should not be rendered -- they appear as a break */         switch (space[i]) {         case '\r':         case '\n':            break;         case '\t':            if (prefs.show_extra_warnings)               MSG_HTML("TAB character inside <PRE>\n");            offset = TAB_SIZE - html->pre_column % TAB_SIZE;            a_Dw_page_add_text(DW_PAGE (html->dw),                               g_strnfill(offset, ' '),                               html->stack[html->stack_top].style);            html->pre_column += offset;            break;         default:            a_Dw_page_add_text(DW_PAGE (html->dw),                               g_strndup(space + i, 1),                               html->stack[html->stack_top].style);            html->pre_column++;            break;         }         html->PrevWasCR = (space[i] == '\r');      }      html->SPCPending = FALSE;   } else {      if (SGML_SPCDEL && html->PrevWasOpenTag) {         /* SGML_SPCDEL ignores white space inmediately after an open tag */         html->SPCPending = FALSE;      } else {         g_free(html->SPCBuf);         html->SPCBuf = g_strndup(space, spacesize);         html->SPCPending = TRUE;      }      if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY )         html->StashSpace = (html->Stash->len > 0);   }}/* * Handles putting the word into its proper place *  > STASH and VERBATIM --> html->Stash *  > otherwise it goes through a_Dw_page_add_text() * * Entities are parsed (or not) according to parse_mode. */static void Html_process_word(DilloHtml *html, char *word, gint size){   gint i, start;   gchar *Pword;   DilloHtmlParseMode parse_mode = html->stack[html->stack_top].parse_mode;   if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH ||        parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY ) {      if ( html->StashSpace ) {         g_string_append_c(html->Stash, ' ');         html->StashSpace = FALSE;      }      Pword = Html_parse_entities(html, word, size);      g_string_append(html->Stash, Pword);      g_free(Pword);   } else if ( parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) {      /* word goes in untouched, it is not processed here. */      Pword = g_strndup(word, size);      g_string_append(html->Stash, Pword);      g_free(Pword);   }   if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH  ||
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -