📄 parse.c
字号:
* current key and data pointers and increment * internal pointer. * It returns NULL when key is NULL. */static struct fastfind_key_value *tags_list_next(void){ static struct fastfind_key_value kv; if (!internal_pointer->name) return NULL; kv.key = internal_pointer->name; kv.data = internal_pointer; internal_pointer++; return &kv;}static struct fastfind_index ff_tags_index = INIT_FASTFIND_INDEX("tags_lookup", tags_list_reset, tags_list_next);#endif /* USE_FASTFIND */voidinit_tags_lookup(void){#ifdef USE_FASTFIND fastfind_index(&ff_tags_index, FF_COMPRESS);#endif}voidfree_tags_lookup(void){#ifdef USE_FASTFIND fastfind_done(&ff_tags_index);#endif}static unsigned char *process_element(unsigned char *name, int namelen, int endingtag, unsigned char *html, unsigned char *prev_html, unsigned char *eof, unsigned char *attr, struct part *part);voidparse_html(unsigned char *html, unsigned char *eof, struct part *part, unsigned char *head){ unsigned char *base_pos = html; int noupdate = 0; html_context.putsp = -1; html_context.line_breax = html_context.table_level ? 2 : 1; html_context.position = 0; html_context.was_br = 0; html_context.was_li = 0; html_context.part = part; html_context.eoff = eof; if (head) process_head(head);main_loop: while (html < eof) { unsigned char *name, *attr, *end, *prev_html; int namelen, endingtag; int dotcounter = 0; if (!noupdate) { html_context.part = part; html_context.eoff = eof; base_pos = html; } else { noupdate = 0; } if (isspace(*html) && !html_is_preformatted()) { unsigned char *h = html; while (h < eof && isspace(*h)) h++; if (h + 1 < eof && h[0] == '<' && h[1] == '/') { if (!parse_element(h, eof, &name, &namelen, &attr, &end)) { put_chrs(base_pos, html - base_pos, html_context.put_chars_f, part); base_pos = html = h; html_context.putsp = 1; goto element; } } html++; if (!(html_context.position + (html - base_pos - 1))) goto skip_w; /* ??? */ if (*(html - 1) == ' ') { /* Do not replace with isspace() ! --Zas */ /* BIG performance win; not sure if it doesn't cause any bug */ if (html < eof && !isspace(*html)) { noupdate = 1; continue; } put_chrs(base_pos, html - base_pos, html_context.put_chars_f, part); } else { put_chrs(base_pos, html - base_pos - 1, html_context.put_chars_f, part); put_chrs(" ", 1, html_context.put_chars_f, part); }skip_w: while (html < eof && isspace(*html)) html++; continue; } if (html_is_preformatted()) { html_context.putsp = 0; if (*html == ASCII_TAB) { put_chrs(base_pos, html - base_pos, html_context.put_chars_f, part); put_chrs(" ", 8 - (html_context.position % 8), html_context.put_chars_f, part); html++; continue; } else if (*html == ASCII_CR || *html == ASCII_LF) { put_chrs(base_pos, html - base_pos, html_context.put_chars_f, part); if (html - base_pos == 0 && html_context.line_breax > 0) html_context.line_breax--;next_break: if (*html == ASCII_CR && html < eof - 1 && html[1] == ASCII_LF) html++; ln_break(1, html_context.line_break_f, part); html++; if (*html == ASCII_CR || *html == ASCII_LF) { html_context.line_breax = 0; goto next_break; } continue; } else if (html + 5 < eof && *html == '&') { /* Really nasty hack to make handling in * <pre>-tags lynx-compatible. It works around * the entity handling done in the renderer, * since checking #13 value there would require * something along the lines of NBSP_CHAR or * checking for '\n's in AT_PREFORMATTED text. */ /* See bug 52 and 387 for more info. */ int length = html - base_pos; int newlines = 0; while (html + 5 < eof && !memcmp(html, " ", 5)) { newlines++; html += 5; } if (newlines) { put_chrs(base_pos, length, html_context.put_chars_f, part); ln_break(newlines, html_context.line_break_f, part); continue; } } } while (*html < ' ') { if (html - base_pos) put_chrs(base_pos, html - base_pos, html_context.put_chars_f, part); dotcounter++; base_pos = ++html; if (*html >= ' ' || isspace(*html) || html >= eof) { unsigned char *dots = fmem_alloc(dotcounter); if (dots) { memset(dots, '.', dotcounter); put_chrs(dots, dotcounter, html_context.put_chars_f, part); fmem_free(dots); } goto main_loop; } } if (html + 2 <= eof && html[0] == '<' && (html[1] == '!' || html[1] == '?') && !html_context.was_xmp) { put_chrs(base_pos, html - base_pos, html_context.put_chars_f, part); html = skip_comment(html, eof); continue; } if (*html != '<' || parse_element(html, eof, &name, &namelen, &attr, &end)) { html++; noupdate = 1; continue; }element: endingtag = *name == '/'; name += endingtag; namelen -= endingtag; if (!endingtag && html_context.putsp == 1 && !html_top.invisible) put_chrs(" ", 1, html_context.put_chars_f, part); put_chrs(base_pos, html - base_pos, html_context.put_chars_f, part); if (!html_is_preformatted() && !endingtag && !html_context.putsp) { unsigned char *ee = end; unsigned char *nm; while (!parse_element(ee, eof, &nm, NULL, NULL, &ee)) if (*nm == '/') goto ng; if (ee < eof && isspace(*ee)) { put_chrs(" ", 1, html_context.put_chars_f, part); }ng:; } prev_html = html; html = process_element(name, namelen, endingtag, end, prev_html, eof, attr, part); } if (noupdate) put_chrs(base_pos, html - base_pos, html_context.put_chars_f, part); ln_break(1, html_context.line_break_f, part); /* Restore the part in case the html_context was trashed in the last * iteration so that when destroying the stack in the caller we still * get the right part pointer. */ html_context.part = part; html_context.putsp = -1; html_context.position = 0; html_context.was_br = 0;}static unsigned char *start_element(struct element_info *ei, unsigned char *name, int namelen, int endingtag, unsigned char *html, unsigned char *prev_html, unsigned char *eof, unsigned char *attr, struct part *part){ unsigned char *a; struct par_attrib old_format; int restore_format;#ifdef CONFIG_CSS struct css_selector *selector = NULL;#endif if (html_context.was_xmp) { put_chrs("<", 1, html_context.put_chars_f, part); html = prev_html + 1; return html; } ln_break(ei->linebreak, html_context.line_break_f, part); a = get_attr_val(attr, "id"); if (a) { html_context.special_f(part, SP_TAG, a); mem_free(a); } if (html_top.type == ELEMENT_WEAK) { kill_html_stack_item(&html_top); } /* We try to process nested <script> if we didn't process the parent * one. */ if (html_top.invisible && (ei->func != html_script || html_top.invisible < 2)) { return html; } restore_format = html_is_preformatted(); old_format = par_format; if (ei->func == html_table && global_doc_opts->tables && html_context.table_level < HTML_MAX_TABLE_LEVEL) { format_table(attr, html, eof, &html, part); ln_break(2, html_context.line_break_f, part); return html; } if (ei->func == html_select) { if (!do_html_select(attr, html, eof, &html, part)) return html; } if (ei->func == html_textarea) { do_html_textarea(attr, html, eof, &html, part); return html; }#ifdef CONFIG_CSS if (ei->func == html_style && global_doc_opts->css_enable) { css_parse_stylesheet(&html_context.css_styles, html_context.base_href, html, eof); }#endif if (ei->nopair == 2 || ei->nopair == 3) { struct html_element *e; if (ei->nopair == 2) { foreach (e, html_context.stack) { if (e->type < ELEMENT_KILLABLE) break; if (e->linebreak || !ei->linebreak) break; } } else foreach (e, html_context.stack) { if (e->linebreak && !ei->linebreak) break; if (e->type < ELEMENT_KILLABLE) break; if (!strlcasecmp(e->name, e->namelen, name, namelen)) break; } if (!strlcasecmp(e->name, e->namelen, name, namelen)) { while (e->prev != (void *) &html_context.stack) kill_html_stack_item(e->prev); if (e->type > ELEMENT_IMMORTAL) kill_html_stack_item(e); } } if (ei->nopair != 1) { html_stack_dup(ELEMENT_KILLABLE); html_top.name = name; html_top.namelen = namelen; html_top.options = attr; html_top.linebreak = ei->linebreak;#ifdef CONFIG_ECMASCRIPT if (has_attr(attr, "onClick")) { /* XXX: Put something better to format.link. --pasky */ mem_free_set(&format.link, stracpy("javascript:void(0);")); mem_free_set(&format.target, stracpy(html_context.base_target)); format.style.fg = format.clink; html_top.pseudo_class = ELEMENT_LINK; mem_free_set(&format.title, stracpy("onClick placeholder")); /* Er. I know. Well, double html_focusable()s shouldn't * really hurt. */ html_focusable(attr); }#endif }#ifdef CONFIG_ECMASCRIPT /* We need to have own element in the stack, that's why we waited for * so long. */ if (ei->func == html_script) { if (!do_html_script(attr, html, eof, &html, part)) return html; }#endif#ifdef CONFIG_CSS if (html_top.options && global_doc_opts->css_enable) { /* XXX: We should apply CSS otherwise as well, but that'll need * some deeper changes in order to have options filled etc. * Probably just applying CSS from more places, since we * usually have nopair set when we either (1) rescan on your * own from somewhere else (2) html_stack_dup() in our own way. * --pasky */ /* Call it now to gain some of the stuff which might affect * formatting of some elements. */ /* FIXME: The caching of the CSS selector is broken, since t can * lead to wrong styles being applied to following elements, so * disabled for now. */ selector = get_css_selector_for_element(&html_top, &html_context.css_styles, &html_context.stack); if (selector) { apply_css_selector_style(&html_top, selector); done_css_selector(selector); } }#endif if (ei->func) ei->func(attr);#ifdef CONFIG_CSS if (selector && html_top.options) { /* Call it now to override default colors of the elements. */ selector = get_css_selector_for_element(&html_top, &html_context.css_styles, &html_context.stack); if (selector) { apply_css_selector_style(&html_top, selector); done_css_selector(selector); } }#endif if (ei->func != html_br) html_context.was_br = 0; if (restore_format) par_format = old_format; return html;}static unsigned char *end_element(struct element_info *ei, unsigned char *name, int namelen, int endingtag, unsigned char *html, unsigned char *prev_html, unsigned char *eof, unsigned char *attr, struct part *part){ struct html_element *e, *elt; int lnb = 0; int kill = 0; if (html_context.was_xmp) { if (ei->func != html_xmp) return html; html_context.was_xmp = 0; } html_context.was_br = 0; if (ei->nopair == 1 || ei->nopair == 3) return html; /* dump_html_stack(); */ foreach (e, html_context.stack) { if (e->linebreak && !ei->linebreak) kill = 1; if (strlcasecmp(e->name, e->namelen, name, namelen)) { if (e->type < ELEMENT_KILLABLE) break; else continue; } if (kill) { kill_html_stack_item(e); break; } for (elt = e; elt != (void *) &html_context.stack; elt = elt->prev) if (elt->linebreak > lnb) lnb = elt->linebreak; /* This hack forces a line break after a list end. It is needed * when ending a list with the last <li> having no text the * line_breax is 2 so the ending list's linebreak will be * ignored when calling ln_break(). */ if (html_context.was_li) html_context.line_breax = 0; ln_break(lnb, html_context.line_break_f, part); while (e->prev != (void *) &html_context.stack) kill_html_stack_item(e->prev); kill_html_stack_item(e); break; } /* dump_html_stack(); */ return html;}static unsigned char *process_element(unsigned char *name, int namelen, int endingtag, unsigned char *html, unsigned char *prev_html, unsigned char *eof, unsigned char *attr, struct part *part){ struct element_info *ei;#ifndef USE_FASTFIND { struct element_info elem; unsigned char tmp; tmp = name[namelen]; name[namelen] = '\0'; elem.name = name; ei = bsearch(&elem, elements, NUMBER_OF_TAGS, sizeof(elem), compar); name[namelen] = tmp; }#else ei = (struct element_info *) fastfind_search(&ff_tags_index, name, namelen);#endif if (!ei) return html; if (!endingtag) { return start_element(ei, name, namelen, endingtag, html, prev_html, eof, attr, part); } else { return end_element(ei, name, namelen, endingtag, html, prev_html, eof, attr, part); }}voidscan_http_equiv(unsigned char *s, unsigned char *eof, struct string *head, struct string *title){ unsigned char *name, *attr, *he, *c; int namelen; if (title && !init_string(title)) return; add_char_to_string(head, '\n');se: while (s < eof && *s != '<') {sp: s++; } if (s >= eof) return; if (s + 2 <= eof && (s[1] == '!' || s[1] == '?')) { s = skip_comment(s, eof); goto se; } if (parse_element(s, eof, &name, &namelen, &attr, &s)) goto sp;ps: if (!strlcasecmp(name, namelen, "HEAD", 4)) goto se; if (!strlcasecmp(name, namelen, "/HEAD", 5)) return; if (!strlcasecmp(name, namelen, "BODY", 4)) return; if (title && !title->length && !strlcasecmp(name, namelen, "TITLE", 5)) { unsigned char *s1;xse: s1 = s; while (s < eof && *s != '<') {xsp: s++; } if (s - s1) add_bytes_to_string(title, s1, s - s1); if (s >= eof) goto se; if (s + 2 <= eof && (s[1] == '!' || s[1] == '?')) { s = skip_comment(s, eof); goto xse; } if (parse_element(s, eof, &name, &namelen, &attr, &s)) { s1 = s; goto xsp; } clr_spaces(title->source); goto ps; } if (strlcasecmp(name, namelen, "META", 4)) goto se; he = get_attr_val(attr, "charset"); if (he) { add_to_string(head, "Charset: "); add_to_string(head, he); mem_free(he); } he = get_attr_val(attr, "http-equiv"); if (!he) goto se; add_to_string(head, he); mem_free(he); c = get_attr_val(attr, "content"); if (c) { add_to_string(head, ": "); add_to_string(head, c); mem_free(c); } add_crlf_to_string(head); goto se;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -