📄 html.c

📁 浏览器的源代码,可移植到嵌入式设备.
💻 C
📖 第 1 页 / 共 5 页
字号:
   {"iuml",0357},  {"kappa",01672}, {"lArr",020720}, {"lambda",01673},   {"lang",021451},{"laquo",0253},  {"larr",020620}, {"lceil",021410},   {"ldquo",020034},{"le",021144},  {"lfloor",021412},{"lowast",021027},   {"loz",022712}, {"lrm",020016},  {"lsaquo",020071},{"lsquo",020030},   {"lt",60},      {"macr",0257},   {"mdash",020024},{"micro",0265},   {"middot",0267},{"minus",021022},{"mu",01674},    {"nabla",021007},   {"nbsp",32},    {"ndash",020023},{"ne",021140},   {"ni",021013},   {"not",0254},   {"notin",021011},{"nsub",021204}, {"ntilde",0361},   {"nu",01675},   {"oacute",0363}, {"ocirc",0364},  {"oelig",0523},   {"ograve",0362},{"oline",020076},{"omega",01711}, {"omicron",01677},   {"oplus",021225},{"or",021050},  {"ordf",0252},   {"ordm",0272},   {"oslash",0370},{"otilde",0365}, {"otimes",021227},{"ouml",0366},   {"para",0266},  {"part",021002}, {"permil",020060},{"perp",021245},   {"phi",01706},  {"pi",01700},    {"piv",01726},   {"plusmn",0261},   {"pound",0243}, {"prime",020062},{"prod",021017}, {"prop",021035},   {"psi",01710},  {"quot",34},     {"rArr",020722}, {"radic",021032},   {"rang",021452},{"raquo",0273},  {"rarr",020622}, {"rceil",021411},   {"rdquo",020035},{"real",020434},{"reg",0256},    {"rfloor",021413},   {"rho",01701},  {"rlm",020017},  {"rsaquo",020072},{"rsquo",020031},   {"sbquo",020032},{"scaron",0541},{"sdot",021305}, {"sect",0247},   {"shy",0255},   {"sigma",01703}, {"sigmaf",01702},{"sim",021074},   {"spades",023140},{"sub",021202},{"sube",021206}, {"sum",021021},   {"sup",021203}, {"sup1",0271},   {"sup2",0262},   {"sup3",0263},   {"supe",021207},{"szlig",0337},  {"tau",01704},   {"there4",021064},   {"theta",01670},{"thetasym",01721},{"thinsp",020011},{"thorn",0376},   {"tilde",01334},{"times",0327},  {"trade",020442},{"uArr",020721},   {"uacute",0372},{"uarr",020621}, {"ucirc",0373},  {"ugrave",0371},   {"uml",0250},   {"upsih",01722}, {"upsilon",01705},{"uuml",0374},   {"weierp",020430},{"xi",01676},  {"yacute",0375}, {"yen",0245},   {"yuml",0377},  {"zeta",01666},  {"zwj",020015},  {"zwnj",020014}};/* * Comparison function for binary search */static int Html_entity_comp(const void *a, const void *b){   return strcmp(((Ent_t *)a)->entity, ((Ent_t *)b)->entity);}/* * Binary search of 'key' in entity list */static int Html_entity_search(char *key){   Ent_t *res, EntKey;   EntKey.entity = key;   res = bsearch(&EntKey, Entities, NumEnt, sizeof(Ent_t), Html_entity_comp);   if ( res )     return (res - Entities);   return -1;}/* * Switch a few UCS encodings to latin1. */static gint Html_try_ucs2latin1(gint isocode){   gint ret;   switch (isocode) {      case 0x2018:      case 0x2019: ret = '\''; break;      case 0x201c:      case 0x201d: ret = '"'; break;      case 0x2013:      case 0x2014: ret = '-'; break;      case 0x2039: ret = '<'; break;      case 0x203a: ret = '>'; break;      case 0x2022: ret = 176; break;      default:     ret = -1;  break;   }   return ret;}/* * Given an entity, return the ISO-Latin1 character code. * (-1 if not a valid entity) */static gint Html_parse_entity(const gchar *token, gint toksize){   gint base, isocode, i, ret;   gchar *eoe, *name;   g_return_val_if_fail (token[0] == '&', -1);   ret = -1;   eoe = (toksize) ? memchr(token, ';', toksize) : strchr(token, ';');   if (eoe) {      if (token[1] == '#') {         /* Numeric token */         base = (token[2] == 'x' || token[2] == 'X') ? 16 : 10;         isocode = strtol(token + 2 + (base==16), NULL, base);         if (isocode > 0 && isocode <= 255) {            ret = isocode;         } else {            /* Try a few UCS translations to Latin1 */            ret = Html_try_ucs2latin1(isocode);         }      } else {         /* Search for named entity */         name = g_strndup(token + 1, eoe - token - 1);         i = Html_entity_search(name);         g_free(name);         if (i != -1) {            if (Entities[i].isocode > 0 && Entities[i].isocode <= 255)               ret = Entities[i].isocode;            else               ret = Html_try_ucs2latin1(Entities[i].isocode);         }      }   }   return ret;}/* * Convert all the entities in a token to plain ISO character codes. Takes * a token and its length, and returns a newly allocated string. */static char *Html_parse_entities(gchar *token, gint toksize){   gchar *esc_set = "&\xE2\xC2";   gchar *new_str;   gint i, j, isocode;   new_str = g_strndup(token, toksize);   if (new_str[strcspn(new_str, esc_set)] == 0)      return new_str;   for (i = j = 0; i < toksize; i++) {      if (token[i] == '&' &&          (isocode = Html_parse_entity(token + i, toksize - i)) != -1) {         new_str[j++] = isocode;         while (token[++i] != ';');      } else if (token[i] == '\xE2' && token[i+1] == '\x80' && i+2 < toksize){         /* Hack: for parsing some UTF-8 characters into latin1 */         switch (token[i+2]) {         case '\x94':            new_str[j++] = '-';            new_str[j++] = '-';            break;         case '\x98':         case '\x99':            new_str[j++] = '\'';            break;         case '\x9C':         case '\x9D':            new_str[j++] = '"';            break;         case '\xA2':            new_str[j++] = '*';            new_str[j++] = ' ';            break;         default: /* unhandled */            new_str[j++] = '\xE2';            break;         }         i += 2;      } else if (token[i] == '\xC2' && token[i+1] == '\xA0') {         /* Hack: for parsing some UTF-8 characters into latin1 */         new_str[j++] = ' ';         ++i;      } else {         new_str[j++] = token[i];      }   }   new_str[j] = '\0';   return new_str;}/* * Parse spaces * */static void Html_process_space(DilloHtml *html, char *space, gint spacesize){   gint i, offset;   DilloHtmlParseMode parse_mode = html->stack[html->stack_top].parse_mode;   if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH ) {      html->StashSpace = (html->Stash->len > 0);      html->SPCPending = FALSE;   } else if ( parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) {      char *Pword = g_strndup(space, spacesize);      g_string_append(html->Stash, Pword);      g_free(Pword);      html->SPCPending = FALSE;   } else if ( parse_mode == DILLO_HTML_PARSE_MODE_PRE ) {      /* re-scan the string for characters that cause line breaks */      for (i = 0; i < spacesize; i++) {         /* Support for "\r", "\n" and "\r\n" line breaks (skips the first) */         if (!html->PreFirstChar &&             (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR))) {            a_Dw_page_add_linebreak(DW_PAGE (html->dw),                                    html->stack[(html)->stack_top].style);            html->pre_column = 0;         }         html->PreFirstChar = FALSE;         /* cr and lf should not be rendered -- they appear as a break */         switch (space[i]) {         case '\r':         case '\n':            break;         case '\t':            if (prefs.show_extra_warnings)               MSG_HTML("TAB character inside <PRE>\n");            offset = TAB_SIZE - html->pre_column % TAB_SIZE;            a_Dw_page_add_text(DW_PAGE (html->dw),                               g_strnfill(offset, ' '),                               html->stack[html->stack_top].style);            html->pre_column += offset;            break;         default:            a_Dw_page_add_text(DW_PAGE (html->dw),                               g_strndup(space + i, 1),                               html->stack[html->stack_top].style);            html->pre_column++;            break;         }         html->PrevWasCR = (space[i] == '\r');      }      html->SPCPending = FALSE;   } else {      if (html->PrevWasOpenTag) {         /* ignore white space inmediately after an open tag */         html->SPCPending = FALSE;      } else {         g_free(html->SPCBuf);         html->SPCBuf = g_strndup(space, spacesize);         html->SPCPending = TRUE;      }      if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY )         html->StashSpace = (html->Stash->len > 0);   }}/* * Handles putting the word into its proper place *  > STASH and VERBATIM --> html->Stash *  > otherwise it goes through a_Dw_page_add_text() * * Entities are parsed (or not) according to parse_mode. */static void Html_process_word(DilloHtml *html, char *word, gint size){   gint i, start;   gchar *Pword;   DilloHtmlParseMode parse_mode = html->stack[html->stack_top].parse_mode;   if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH ||        parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY ) {      if ( html->StashSpace ) {         g_string_append_c(html->Stash, ' ');         html->StashSpace = FALSE;      }      Pword = Html_parse_entities(word, size);      g_string_append(html->Stash, Pword);      g_free(Pword);   } else if ( parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) {      /* word goes in untouched, it is not processed here. */      Pword = g_strndup(word, size);      g_string_append(html->Stash, Pword);      g_free(Pword);   }   if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH  ||        parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) {      /* skip until the closing instructions */   } else if ( parse_mode == DILLO_HTML_PARSE_MODE_PRE ) {      /* all this overhead is to catch white-space entities */      Pword = Html_parse_entities(word, size);      for (start = i = 0; Pword[i]; start = i)         if (isspace(Pword[i])) {            while (Pword[++i] && isspace(Pword[i]));            Html_process_space(html, Pword + start, i - start);         } else {            while (Pword[++i] && !isspace(Pword[i]));            a_Dw_page_add_text(DW_PAGE (html->dw),                               g_strndup(Pword + start, i - start),                               html->stack[html->stack_top].style);            html->pre_column += i - start;            html->PreFirstChar = FALSE;         }      g_free(Pword);   } else {      /* add pending space if present */      if (html->SPCPending && !html->PrevWasOpenTag)         a_Dw_page_add_space(DW_PAGE (html->dw),                             html->stack[html->stack_top].style);      /* actually white-space entities inside the word could be       * collapsed (except &nbsp;), but that's too much overhead       * for a very rare case of ill-formed HTML  --Jcid */      Pword = Html_parse_entities(word, size);      g_strdelimit(Pword, "\t\f\n\r", ' ');      a_Dw_page_add_text(DW_PAGE (html->dw),                         Pword,                         html->stack[html->stack_top].style);   }   html->PrevWasOpenTag = FALSE;   html->SPCPending = FALSE;}/* * Does the tag in tagstr (e.g. "p") match the tag in the tag, tagsize * structure, with the initial < skipped over (e.g. "P align=center>") */static gboolean Html_match_tag(const char *tagstr, char *tag, gint tagsize){   gint i;   for (i = 0; i < tagsize && tagstr[i] != '\0'; i++) {      if (tolower(tagstr[i]) != tolower(tag[i]))         return FALSE;   }   /* The test for '/' is for xml compatibility: "empty/>" will be matched. */   if (i < tagsize && (isspace(tag[i]) || tag[i] == '>' || tag[i] == '/'))      return TRUE;   return FALSE;}/* * This function is called by Html_cleanup_tag and Html_pop_tag, to * handle nested DwPage widgets. */static void Html_eventually_pop_dw(DilloHtml *html){   /* This function is called after popping from the stack, so the    * relevant hand_over_break is at html->stack_top + 1. */   if (html->dw != html->stack[html->stack_top].page) {      if (html->stack[html->stack_top + 1].hand_over_break)         a_Dw_page_hand_over_break(DW_PAGE(html->dw),                                   html->stack[(html)->stack_top].style);      a_Dw_page_flush(DW_PAGE(html->dw));      html->dw = html->stack[html->stack_top].page;   }}/* * Push the tag (copying attributes from the top of the stack) */static void Html_push_tag(DilloHtml *html, char *tag, gint tagsize){   char *tagstr;   gint n_items;   /* Save the element's name (no parameters) into tagstr. */   tagstr = g_strdup(Html_tags_get_name(html->CurrTagIdx));   n_items = html->stack_top + 1;   a_List_add(html->stack, n_items, html->stack_max);   /* We'll copy the former stack item and just change the tag and its index    * instead of copying all fields except for tag.  --Jcid */   html->stack[n_items] = html->stack[n_items - 1];   html->stack[n_items].tag = tagstr;   html->stack[n_items].tag_idx = html->CurrTagIdx;   html->stack_top = n_items;   /* proper memory management, may be unref'd later */   a_Dw_style_ref (html->stack[html->stack_top].style);   if (html->stack[html->stack_top].table_cell_style)      a_Dw_style_ref (html->stack[html->stack_top].table_cell_style);   html->dw = html->stack[html->stack_top].page;}/* * Remove the stack's topmost tag (only if it matches) * If it matches, TRUE is returned. */static gboolean Html_cleanup_tag(DilloHtml *html, char *tag){   if ( html->stack_top &&        Html_match_tag(html->stack[html->stack_top].tag, tag, strlen(tag)) ) {      a_Dw_style_unref (html->stack[html->stack_top].style);      if (html->stack[html->stack_top].table_cell_style)         a_Dw_style_unref (html->stack[html->stack_top].table_cell_style);      g_free(html->stack[html->stack_top--].tag);      Html_eventually_pop_dw(html);      return TRUE;   } else      return FALSE;}/* * Default close function for tags. * (conditional cleanup of the stack) * There're several ways of doing it. Considering the HTML 4.01 spec * which defines optional close tags, and the will to deliver useful diagnose * messages for bad-formed HTML, it'll go as follows: *   1.- Search the stack for the first tag that requires a close tag. *   2.- If it matches, clean all the optional-close tags in between.
💿 文件大小 589 K
👤 上传用户 lichao0516
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#浏览器 #嵌入式设备 #源代码 #移植
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -