📄 parse.c
字号:
unsigned char *skip_comment(unsigned char *html, unsigned char *eof){ if (html + 4 <= eof && html[2] == '-' && html[3] == '-') { html += 4; while (html < eof) { if (html + 2 <= eof && html[0] == '-' && html[1] == '-') { html += 2; while (html < eof && *html == '-') html++; while (html < eof && isspace(*html)) html++; if (html >= eof) return eof; if (*html == '>') return html + 1; continue; } html++; } } else { html += 2; while (html < eof) { if (html[0] == '>') return html + 1; html++; } } return eof;}enum element_type { ELEMENT_TYPE_NESTABLE, ELEMENT_TYPE_NON_NESTABLE, ELEMENT_TYPE_NON_PAIRABLE, ELEMENT_TYPE_LI,};struct element_info { /* Element name, uppercase. */ unsigned char *name; /* Element handler. This does the relevant arguments processing and * formatting (by calling renderer hooks). Note that in a few cases, * this is just a placeholder and the element is given special care * in start_element() (which is also where we call these handlers). */ element_handler_T *func; /* How many line-breaks to ensure we have before and after an element. * Value of 1 means the element will be on a line on its own, value * of 2 means that it will also have empty lines before and after. * Note that this does not add up - it just ensures that there is * at least so many linebreaks, but does not add more if that is the * case. Therefore, something like e.g. </pre></p> will add only two * linebreaks, not four. */ /* In some stack killing logic, we use some weird heuristic based on * whether an element is block or inline. That is determined from * whether this attribute is zero on non-zero. */ int linebreak; enum element_type type;};static struct element_info elements[] = { {"A", html_a, 0, ELEMENT_TYPE_NON_NESTABLE}, {"ABBR", html_italic, 0, ELEMENT_TYPE_NESTABLE }, {"ADDRESS", html_address, 2, ELEMENT_TYPE_NESTABLE }, {"APPLET", html_applet, 1, ELEMENT_TYPE_NON_PAIRABLE}, {"B", html_bold, 0, ELEMENT_TYPE_NESTABLE }, {"BASE", html_base, 0, ELEMENT_TYPE_NON_PAIRABLE}, {"BASEFONT", html_font, 0, ELEMENT_TYPE_NON_PAIRABLE}, {"BLOCKQUOTE", html_blockquote, 2, ELEMENT_TYPE_NESTABLE }, {"BODY", html_body, 0, ELEMENT_TYPE_NESTABLE }, {"BR", html_br, 1, ELEMENT_TYPE_NON_PAIRABLE}, {"BUTTON", html_button, 0, ELEMENT_TYPE_NESTABLE }, {"CAPTION", html_center, 1, ELEMENT_TYPE_NESTABLE }, {"CENTER", html_center, 1, ELEMENT_TYPE_NESTABLE }, {"CODE", html_fixed, 0, ELEMENT_TYPE_NESTABLE }, {"DD", html_dd, 1, ELEMENT_TYPE_NON_PAIRABLE}, {"DFN", html_bold, 0, ELEMENT_TYPE_NESTABLE }, {"DIR", html_ul, 2, ELEMENT_TYPE_NESTABLE }, {"DIV", html_linebrk, 1, ELEMENT_TYPE_NESTABLE }, {"DL", html_dl, 2, ELEMENT_TYPE_NESTABLE }, {"DT", html_dt, 1, ELEMENT_TYPE_NON_PAIRABLE}, {"EM", html_italic, 0, ELEMENT_TYPE_NESTABLE }, {"EMBED", html_embed, 0, ELEMENT_TYPE_NON_PAIRABLE}, {"FIXED", html_fixed, 0, ELEMENT_TYPE_NESTABLE }, {"FONT", html_font, 0, ELEMENT_TYPE_NESTABLE }, {"FORM", html_form, 1, ELEMENT_TYPE_NESTABLE }, {"FRAME", html_frame, 1, ELEMENT_TYPE_NON_PAIRABLE}, {"FRAMESET", html_frameset, 1, ELEMENT_TYPE_NESTABLE }, {"H1", html_h1, 2, ELEMENT_TYPE_NON_NESTABLE}, {"H2", html_h2, 2, ELEMENT_TYPE_NON_NESTABLE}, {"H3", html_h3, 2, ELEMENT_TYPE_NON_NESTABLE}, {"H4", html_h4, 2, ELEMENT_TYPE_NON_NESTABLE}, {"H5", html_h5, 2, ELEMENT_TYPE_NON_NESTABLE}, {"H6", html_h6, 2, ELEMENT_TYPE_NON_NESTABLE}, {"HEAD", html_head, 0, ELEMENT_TYPE_NESTABLE }, {"HR", html_hr, 2, ELEMENT_TYPE_NON_PAIRABLE}, {"HTML", html_html, 0, ELEMENT_TYPE_NESTABLE }, {"I", html_italic, 0, ELEMENT_TYPE_NESTABLE }, {"IFRAME", html_iframe, 1, ELEMENT_TYPE_NON_PAIRABLE}, {"IMG", html_img, 0, ELEMENT_TYPE_NON_PAIRABLE}, {"INPUT", html_input, 0, ELEMENT_TYPE_NON_PAIRABLE}, {"LI", html_li, 1, ELEMENT_TYPE_LI }, {"LINK", html_link, 1, ELEMENT_TYPE_NON_PAIRABLE}, {"LISTING", html_pre, 2, ELEMENT_TYPE_NESTABLE }, {"MENU", html_ul, 2, ELEMENT_TYPE_NESTABLE }, {"META", html_meta, 0, ELEMENT_TYPE_NON_PAIRABLE}, {"NOFRAMES", html_noframes, 0, ELEMENT_TYPE_NESTABLE }, {"NOSCRIPT", html_noscript, 0, ELEMENT_TYPE_NESTABLE }, {"OBJECT", html_object, 1, ELEMENT_TYPE_NON_PAIRABLE}, {"OL", html_ol, 2, ELEMENT_TYPE_NESTABLE }, {"OPTION", html_option, 1, ELEMENT_TYPE_NON_PAIRABLE}, {"P", html_p, 2, ELEMENT_TYPE_NON_NESTABLE}, {"PRE", html_pre, 2, ELEMENT_TYPE_NESTABLE }, {"Q", html_italic, 0, ELEMENT_TYPE_NESTABLE }, {"S", html_underline, 0, ELEMENT_TYPE_NESTABLE }, {"SCRIPT", html_script, 0, ELEMENT_TYPE_NESTABLE }, {"SELECT", html_select, 0, ELEMENT_TYPE_NESTABLE }, {"SPAN", html_span, 0, ELEMENT_TYPE_NESTABLE }, {"STRIKE", html_underline, 0, ELEMENT_TYPE_NESTABLE }, {"STRONG", html_bold, 0, ELEMENT_TYPE_NESTABLE }, {"STYLE", html_style, 0, ELEMENT_TYPE_NESTABLE }, {"SUB", html_subscript, 0, ELEMENT_TYPE_NESTABLE }, {"SUP", html_superscript, 0, ELEMENT_TYPE_NESTABLE }, {"TABLE", html_table, 2, ELEMENT_TYPE_NESTABLE }, {"TD", html_td, 0, ELEMENT_TYPE_NESTABLE }, {"TEXTAREA", html_textarea, 0, ELEMENT_TYPE_NON_PAIRABLE}, {"TH", html_th, 0, ELEMENT_TYPE_NESTABLE }, {"TITLE", html_title, 0, ELEMENT_TYPE_NESTABLE }, {"TR", html_tr, 1, ELEMENT_TYPE_NESTABLE }, {"TT", html_tt, 0, ELEMENT_TYPE_NON_NESTABLE}, {"U", html_underline, 0, ELEMENT_TYPE_NESTABLE }, {"UL", html_ul, 2, ELEMENT_TYPE_NESTABLE }, {"XMP", html_xmp, 2, ELEMENT_TYPE_NESTABLE }, {NULL, NULL, 0, ELEMENT_TYPE_NESTABLE },};#define NUMBER_OF_TAGS (sizeof_array(elements) - 1)#ifndef USE_FASTFINDstatic intcompar(const void *a, const void *b){ return strcasecmp(((struct element_info *) a)->name, ((struct element_info *) b)->name);}#elsestatic struct element_info *internal_pointer;/* Reset internal list pointer */static voidtags_list_reset(void){ internal_pointer = elements;}/* Returns a pointer to a struct that contains * current key and data pointers and increment * internal pointer. * It returns NULL when key is NULL. */static struct fastfind_key_value *tags_list_next(void){ static struct fastfind_key_value kv; if (!internal_pointer->name) return NULL; kv.key = internal_pointer->name; kv.data = internal_pointer; internal_pointer++; return &kv;}static struct fastfind_index ff_tags_index = INIT_FASTFIND_INDEX("tags_lookup", tags_list_reset, tags_list_next);#endif /* USE_FASTFIND */voidinit_tags_lookup(void){#ifdef USE_FASTFIND fastfind_index(&ff_tags_index, FF_COMPRESS);#endif}voidfree_tags_lookup(void){#ifdef USE_FASTFIND fastfind_done(&ff_tags_index);#endif}static unsigned char *process_element(unsigned char *name, int namelen, int endingtag, unsigned char *html, unsigned char *prev_html, unsigned char *eof, unsigned char *attr, struct html_context *html_context);voidparse_html(unsigned char *html, unsigned char *eof, struct part *part, unsigned char *head, struct html_context *html_context){ unsigned char *base_pos = html; int noupdate = 0; html_context->putsp = HTML_SPACE_SUPPRESS; html_context->line_breax = html_context->table_level ? 2 : 1; html_context->position = 0; html_context->was_br = 0; html_context->was_li = 0; html_context->was_body = 0;/* html_context->was_body_background = 0; */ html_context->part = part; html_context->eoff = eof; if (head) process_head(html_context, head);main_loop: while (html < eof) { unsigned char *name, *attr, *end; int namelen, endingtag; int dotcounter = 0; if (!noupdate) { html_context->part = part; html_context->eoff = eof; base_pos = html; } else { noupdate = 0; } if (isspace(*html) && !html_is_preformatted()) { unsigned char *h = html; while (h < eof && isspace(*h)) h++; if (h + 1 < eof && h[0] == '<' && h[1] == '/') { if (!parse_element(h, eof, &name, &namelen, &attr, &end)) { put_chrs(html_context, base_pos, html - base_pos); base_pos = html = h; html_context->putsp = HTML_SPACE_ADD; goto element; } } html++; if (!(html_context->position + (html - base_pos - 1))) goto skip_w; /* ??? */ if (*(html - 1) == ' ') { /* Do not replace with isspace() ! --Zas */ /* BIG performance win; not sure if it doesn't cause any bug */ if (html < eof && !isspace(*html)) { noupdate = 1; continue; } put_chrs(html_context, base_pos, html - base_pos); } else { put_chrs(html_context, base_pos, html - base_pos - 1); put_chrs(html_context, " ", 1); }skip_w: while (html < eof && isspace(*html)) html++; continue; } if (html_is_preformatted()) { html_context->putsp = HTML_SPACE_NORMAL; if (*html == ASCII_TAB) { put_chrs(html_context, base_pos, html - base_pos); put_chrs(html_context, " ", 8 - (html_context->position % 8)); html++; continue; } else if (*html == ASCII_CR || *html == ASCII_LF) { put_chrs(html_context, base_pos, html - base_pos); if (html - base_pos == 0 && html_context->line_breax > 0) html_context->line_breax--;next_break: if (*html == ASCII_CR && html < eof - 1 && html[1] == ASCII_LF) html++; ln_break(html_context, 1); html++; if (*html == ASCII_CR || *html == ASCII_LF) { html_context->line_breax = 0; goto next_break; } continue; } else if (html + 5 < eof && *html == '&') { /* Really nasty hack to make handling in * <pre>-tags lynx-compatible. It works around * the entity handling done in the renderer, * since checking #13 value there would require * something along the lines of NBSP_CHAR or * checking for '\n's in AT_PREFORMATTED text. */ /* See bug 52 and 387 for more info. */ int length = html - base_pos; int newlines = 0; while ((html + 5 < eof && html[0] == '&' && html[1] == '#') && (!memcmp(html + 2, "13;", 3) || (html + 6 < eof && !strncasecmp(html + 2, "x0a;", 4)))) { newlines++; html += 5 + (html[4] != ';'); } if (newlines) { put_chrs(html_context, base_pos, length); ln_break(html_context, newlines); continue; } } } while (*html < ' ') { if (html - base_pos) put_chrs(html_context, base_pos, html - base_pos); dotcounter++; base_pos = ++html; if (*html >= ' ' || isspace(*html) || html >= eof) { unsigned char *dots = fmem_alloc(dotcounter); if (dots) { memset(dots, '.', dotcounter); put_chrs(html_context, dots, dotcounter); fmem_free(dots); } goto main_loop; } } if (html + 2 <= eof && html[0] == '<' && (html[1] == '!' || html[1] == '?') && !html_context->was_xmp) { put_chrs(html_context, base_pos, html - base_pos); html = skip_comment(html, eof); continue; } if (*html != '<' || parse_element(html, eof, &name, &namelen, &attr, &end)) { html++; noupdate = 1; continue;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -