⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 html.c

📁 嵌入式下基于MiniGUI的Web Browser
💻 C
📖 第 1 页 / 共 5 页
字号:
    Html_set_dwpage(html);    return html;}/* * Initialize the stash buffer */static void Html_stash_init(DilloHtml *html){    html->stack[html->stack_top].parse_mode = DILLO_HTML_PARSE_MODE_STASH;    html->StashSpace = FALSE;    g_string_truncate(html->Stash, 0);}/* Entities list from the HTML 4.01 DTD */typedef struct {   char *entity;   int isocode;} Ent_t;#define NumEnt 252static const Ent_t Entities[NumEnt] = {   {"AElig",0306}, {"Aacute",0301}, {"Acirc",0302},  {"Agrave",0300},   {"Alpha",01621},{"Aring",0305},  {"Atilde",0303}, {"Auml",0304},   {"Beta",01622}, {"Ccedil",0307}, {"Chi",01647},   {"Dagger",020041},   {"Delta",01624},{"ETH",0320},    {"Eacute",0311}, {"Ecirc",0312},   {"Egrave",0310},{"Epsilon",01625},{"Eta",01627},  {"Euml",0313},   {"Gamma",01623},{"Iacute",0315}, {"Icirc",0316},  {"Igrave",0314},   {"Iota",01631}, {"Iuml",0317},   {"Kappa",01632}, {"Lambda",01633},   {"Mu",01634},   {"Ntilde",0321}, {"Nu",01635},    {"OElig",0522},   {"Oacute",0323},{"Ocirc",0324},  {"Ograve",0322}, {"Omega",01651},   {"Omicron",01637},{"Oslash",0330},{"Otilde",0325},{"Ouml",0326},   {"Phi",01646},  {"Pi",01640},    {"Prime",020063},{"Psi",01650},   {"Rho",01641},  {"Scaron",0540}, {"Sigma",01643}, {"THORN",0336},   {"Tau",01644},  {"Theta",01630}, {"Uacute",0332}, {"Ucirc",0333},   {"Ugrave",0331},{"Upsilon",01645},{"Uuml",0334},  {"Xi",01636},   {"Yacute",0335},{"Yuml",0570},   {"Zeta",01626},  {"aacute",0341},   {"acirc",0342}, {"acute",0264},  {"aelig",0346},  {"agrave",0340},   {"alefsym",020465},{"alpha",01661},{"amp",38},    {"and",021047},   {"ang",021040}, {"aring",0345},  {"asymp",021110},{"atilde",0343},   {"auml",0344},  {"bdquo",020036},{"beta",01662},  {"brvbar",0246},   {"bull",020042},{"cap",021051},  {"ccedil",0347}, {"cedil",0270},   {"cent",0242},  {"chi",01707},   {"circ",01306},  {"clubs",023143},   {"cong",021105},{"copy",0251},   {"crarr",020665},{"cup",021052},   {"curren",0244},{"dArr",020723}, {"dagger",020040},{"darr",020623},   {"deg",0260},   {"delta",01664}, {"diams",023146},{"divide",0367},   {"eacute",0351},{"ecirc",0352},  {"egrave",0350}, {"empty",021005},   {"emsp",020003},{"ensp",020002}, {"epsilon",01665},{"equiv",021141},   {"eta",01667},  {"eth",0360},    {"euml",0353},   {"euro",020254},   {"exist",021003},{"fnof",0622},  {"forall",021000},{"frac12",0275},   {"frac14",0274},{"frac34",0276}, {"frasl",020104},{"gamma",01663},   {"ge",021145},  {"gt",62},       {"hArr",020724}, {"harr",020624},   {"hearts",023145},{"hellip",020046},{"iacute",0355},{"icirc",0356},   {"iexcl",0241}, {"igrave",0354}, {"image",020421},{"infin",021036},   {"int",021053}, {"iota",01671},  {"iquest",0277}, {"isin",021010},   {"iuml",0357},  {"kappa",01672}, {"lArr",020720}, {"lambda",01673},   {"lang",021451},{"laquo",0253},  {"larr",020620}, {"lceil",021410},   {"ldquo",020034},{"le",021144},  {"lfloor",021412},{"lowast",021027},   {"loz",022712}, {"lrm",020016},  {"lsaquo",020071},{"lsquo",020030},   {"lt",60},      {"macr",0257},   {"mdash",020024},{"micro",0265},   {"middot",0267},{"minus",021022},{"mu",01674},    {"nabla",021007},   {"nbsp",32},    {"ndash",020023},{"ne",021140},   {"ni",021013},   {"not",0254},   {"notin",021011},{"nsub",021204}, {"ntilde",0361},   {"nu",01675},   {"oacute",0363}, {"ocirc",0364},  {"oelig",0523},   {"ograve",0362},{"oline",020076},{"omega",01711}, {"omicron",01677},   {"oplus",021225},{"or",021050},  {"ordf",0252},   {"ordm",0272},   {"oslash",0370},{"otilde",0365}, {"otimes",021227},{"ouml",0366},   {"para",0266},  {"part",021002}, {"permil",020060},{"perp",021245},   {"phi",01706},  {"pi",01700},    {"piv",01726},   {"plusmn",0261},   {"pound",0243}, {"prime",020062},{"prod",021017}, {"prop",021035},   {"psi",01710},  {"quot",34},     {"rArr",020722}, {"radic",021032},   {"rang",021452},{"raquo",0273},  {"rarr",020622}, {"rceil",021411},   {"rdquo",020035},{"real",020434},{"reg",0256},    {"rfloor",021413},   {"rho",01701},  {"rlm",020017},  {"rsaquo",020072},{"rsquo",020031},   {"sbquo",020032},{"scaron",0541},{"sdot",021305}, {"sect",0247},   {"shy",0255},   {"sigma",01703}, {"sigmaf",01702},{"sim",021074},   {"spades",023140},{"sub",021202},{"sube",021206}, {"sum",021021},   {"sup",021203}, {"sup1",0271},   {"sup2",0262},   {"sup3",0263},   {"supe",021207},{"szlig",0337},  {"tau",01704},   {"there4",021064},   {"theta",01670},{"thetasym",01721},{"thinsp",020011},{"thorn",0376},   {"tilde",01334},{"times",0327},  {"trade",020442},{"uArr",020721},   {"uacute",0372},{"uarr",020621}, {"ucirc",0373},  {"ugrave",0371},   {"uml",0250},   {"upsih",01722}, {"upsilon",01705},{"uuml",0374},   {"weierp",020430},{"xi",01676},  {"yacute",0375}, {"yen",0245},   {"yuml",0377},  {"zeta",01666},  {"zwj",020015},  {"zwnj",020014}};/* * Comparison function for binary search */static int Html_entity_comp (const void *a, const void *b){    return strcmp(((Ent_t *)a)->entity, ((Ent_t *)b)->entity);}/* * Binary search of 'key' in entity list */static int Html_entity_search (char *key){    Ent_t *res, EntKey;    EntKey.entity = key;    res = bsearch(&EntKey, Entities, NumEnt, sizeof(Ent_t), Html_entity_comp);    if ( res )        return (res - Entities);    return -1;}/* * Switch a few UCS encodings to latin1. */static int Html_try_ucs2latin1(int isocode){    int ret;    switch (isocode) {        case 0x2018:        case 0x2019: ret = '\''; break;        case 0x201c:        case 0x201d: ret = '"'; break;        case 0x2013:        case 0x2014: ret = '-'; break;        case 0x2039: ret = '<'; break;        case 0x203a: ret = '>'; break;        case 0x2022: ret = 176; break;        default:     ret = -1;  break;    }    return ret;}/* * Switch a few 'undefined for HTML' ASCII encodings to latin1. */static gint Html_try_ascii2latin1(gint isocode){   gint ret;   switch (isocode) {      case 145:      case 146: ret = '\''; break;      case 147:      case 148: ret = '"'; break;      case 149: ret = 176; break;      case 150:      case 151: ret = '-'; break;      default:  ret = isocode; break;   }   return ret;}/* * Given an entity, return the ISO-Latin1 character code. * (-1 if not a valid entity) */static int Html_parse_entity(DilloHtml *html, const char *token,	   						int toksize, int *entsize){    int isocode, i;	char *tok, *s, c;	token ++;	tok = s = toksize ? g_strndup(token, (guint)toksize) : g_strdup(token);	isocode = -1;   if (*s == '#') {      /* numeric character reference */      errno = 0;      if (*++s == 'x' || *s == 'X') {         if (isxdigit(*++s)) {            /* strtol with base 16 accepts leading "0x" - we don't */            if (*s == '0' && s[1] == 'x') {               s++;               isocode = 0;             } else {               isocode = strtol(s, &s, 16);            }         }      } else if (isdigit(*s)) {         isocode = strtol(s, &s, 10);      }      if (!isocode || errno || isocode > 0x7fffffffL) {         /* this catches null bytes, errors and codes >=2^31 */         MSG_HTML("numeric character reference out of range\n");         isocode = -2;      }      if (isocode != -1) {         if (*s == ';')            s++;         else if (prefs.show_extra_warnings)            MSG_HTML("numeric character reference without trailing ';'\n");      }   } else if (isalpha(*s)) {      /* character entity reference */      while (isalnum(*++s) || strchr(":_.-", *s));      c = *s;      *s = 0;      if ((i = Html_entity_search(tok)) == -1) {         MSG_HTML("undefined character entity '%s'\n", tok);         isocode = -3;      } else         isocode = Entities[i].isocode;      if (c == ';')         s++;      else if (prefs.show_extra_warnings)         MSG_HTML("character entity reference without trailing ';'\n");   }   *entsize = s-tok+1;   g_free(tok);   if (isocode >= 128 && isocode <= 159) {      MSG_HTML("code positions 128-159 are not defined for ISO Latin-1\n");      isocode = Html_try_ascii2latin1(isocode);   } else if (isocode  > 255)      /* Try a few UCS translations to Latin1 */      isocode = Html_try_ucs2latin1(isocode);   else if (isocode == -1 && prefs.show_extra_warnings)      MSG_HTML("literal '&'\n");   return isocode;}/* * Convert all the entities in a token to plain ISO character codes. Takes * a token and its length, and returns a newly allocated string. */static char *Html_parse_entities (DilloHtml *html, char *token, int toksize){   char *esc_set = "&\xE2\xC2";   char *new_str;   int i, j, isocode, entsize;   new_str = g_strndup(token, toksize);   if (new_str[strcspn(new_str, esc_set)] == 0)      return new_str;   for (i = j = 0; i < toksize; i++) {      if (token[i] == '&' &&          (isocode = Html_parse_entity(html, token+i,                                       toksize-i, &entsize)) >= 0) {         new_str[j++] = (gchar) isocode;         i += entsize-1;      } else if (token[i] == '\xE2' && token[i+1] == '\x80' && i+2 < toksize){         /* Hack: for parsing some UTF-8 characters into latin1 */         switch (token[i+2]) {         case '\x94':            new_str[j++] = '-';            new_str[j++] = '-';            break;         case '\x98':         case '\x99':            new_str[j++] = '\'';            break;         case '\x9C':         case '\x9D':            new_str[j++] = '"';            break;         case '\xA2':            new_str[j++] = '*';            new_str[j++] = ' ';            break;         default: /* unhandled */            new_str[j++] = '\xE2';            break;         }         i += 2;      } else if (token[i] == '\xC2' && token[i+1] == '\xA0') {         /* Hack: for parsing some UTF-8 characters into latin1 */         new_str[j++] = ' ';         ++i;      } else {         new_str[j++] = token[i];      }   }   new_str[j] = '\0';   return new_str;}/* * Parse spaces * */static void Html_process_space(DilloHtml *html, char *space, int spacesize){    int i, offset;    DilloHtmlParseMode parse_mode = html->stack[html->stack_top].parse_mode;    if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH ) {        html->StashSpace = (html->Stash->len > 0);		html->SPCPending = FALSE;    }    else if ( parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM ) {        char *Pword = g_strndup (space, spacesize);        g_string_append (html->Stash, Pword);        g_free(Pword);		html->SPCPending = FALSE;    }    else if ( parse_mode == DILLO_HTML_PARSE_MODE_PRE ) {        /* re-scan the string for characters that cause line breaks */        for (i = 0; i < spacesize; i++) {            /* Support for "\r", "\n" and "\r\n" line breaks (skips the first) */            if (!html->PreFirstChar &&                    (space[i] == '\r' || (space[i] == '\n' && !html->PrevWasCR))) {                a_Dw_page_add_linebreak(DW_PAGE (html->dw),                                        html->stack[(html)->stack_top].style);                html->pre_column = 0;            }            html->PreFirstChar = FALSE;            /* cr and lf should not be rendered -- they appear as a break */            switch (space[i]) {            case '\r':            case '\n':                break;            case '\t':				if (prefs.show_extra_warnings)					MSG_HTML("TAB character inside <PRE>\n");				offset = TAB_SIZE - html->pre_column % TAB_SIZE;				a_Dw_page_add_text(DW_PAGE (html->dw),								   g_strnfill(offset, ' '),								   html->stack[html->stack_top].style);				html->pre_column += offset;				break;            default:                a_Dw_page_add_text(DW_PAGE (html->dw),                                g_strndup(space + i, 1),                               html->stack[html->stack_top].style);                html->pre_column++;                break;            }            html->PrevWasCR = (space[i] == '\r');        }		html->SPCPending = FALSE;    }    else {      if (SGML_SPCDEL && html->PrevWasOpenTag) {         /* SGML_SPCDEL ignores white space inmediately after an open tag */         html->SPCPending = FALSE;      } else {         g_free(html->SPCBuf);         html->SPCBuf = g_strndup(space, spacesize);         html->SPCPending = TRUE;      }        if ( parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY )            html->StashSpace = (html->Stash->len > 0);    }}/* * Handles putting the word into its proper place *  > STASH and VERBATIM --> html->Stash *  > otherwise it goes through a_Dw_page_add_text() * * Entities are parsed (or not) according to parse_mode. */static void Html_process_word(DilloHtml *html, char *word, int size){    int i, start;    char *Pword;    DilloHtmlParseMode parse_mode = html->stack[html->stack_top].parse_mode;    if (parse_mode == DILLO_HTML_PARSE_MODE_STASH ||            parse_mode == DILLO_HTML_PARSE_MODE_STASH_AND_BODY) {        if ( html->StashSpace ) {            g_string_append_c(html->Stash, ' ');            html->StashSpace = FALSE;        }        Pword = Html_parse_entities(html, word, size);        g_string_append(html->Stash, Pword);        g_free(Pword);    }    else if (parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {        /* word goes in untouched, it is not processed here. */        Pword = g_strndup(word, size);        g_string_append(html->Stash, Pword);        g_free(Pword);    }    if (parse_mode == DILLO_HTML_PARSE_MODE_STASH  ||            parse_mode == DILLO_HTML_PARSE_MODE_VERBATIM) {	}	else if ( parse_mode == DILLO_HTML_PARSE_MODE_PRE ) {        /* all this overhead is to catch white-space entities */        Pword = Html_parse_entities(html, word, size);        for (start = i = 0; Pword[i]; start = i) {            if (isspace(Pword[i])) {                while (Pword[++i] && isspace(Pword[i]));                Html_process_space(html, Pword + start, i - start);            }            else {                while (Pword[++i] && !isspace(Pword[i]));                a_Dw_page_add_text(DW_PAGE (html->dw),                                g_strndup(Pword + start, i - start),                                html->stack[html->stack_top].style);                html->pre_column += i - start;                html->PreFirstChar = FALSE;            }        }        g_free(Pword);    }    else {      if (html->SPCPending && (!SGML_SPCDEL || !html->PrevWasOpenTag))         /* SGML_SPCDEL ignores space after an open tag */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -