📄 htmlparser.y
字号:
/* ------------------------------------------------------------------------- *//* * Copyright (c) 1999 * GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. * http://www.gmrs.de * All rights reserved. * Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. *//* ------------------------------------------------------------------------- */%name HTMLParser%define PURE%define DEBUG 1%{/* ------------------------------------------------------------------------- */#ident "$Id: HTMLParser.y,v 1.14 1999/10/26 10:56:55 arno Exp $"#include "html.h"#include "HTMLParser.h"// MIPS machines don't have "alloca()", so disable stack realloc'ing.#ifdef mips#define yyoverflow yyerror("parser stack overflow"), (void)#endif/* ------------------------------------------------------------------------- */%}/* ------------------------------------------------------------------------- */%define LEX_BODY = 0%define ERROR_BODY = 0%define MEMBERS\ virtual ~HTMLParser(); \ virtual void process(const Document &) = 0;\ virtual bool read_cdata(const char *terminal, string *) = 0;\ int list_nesting;%define CONSTRUCTOR_INIT : list_nesting(0)%union { Document *document; Element *element; list<auto_ptr<Element> > *element_list; PCData *pcdata; string *strinG; list<TagAttribute> *tag_attributes; int inT; list<auto_ptr<TableRow> > *table_rows; list<auto_ptr<TableCell> > *table_cells; ListItem *list_item; list<auto_ptr<ListItem> > *list_items; Caption *caption; Heading *heading; list<auto_ptr<Option> > *option_list; Option *option; DefinitionList *definition_list; list<auto_ptr<DefinitionListItem> > *definition_list_item_list; TermName *term_name; TermDefinition *term_definition; Preformatted *preformatted; Address *address; list<auto_ptr<list<TagAttribute> > > *tag_attributes_list;}%type <document> document_%type <pcdata> pcdata%type <pcdata> opt_pcdata%type <element_list> body_content%type <heading> heading%type <heading> HX%type <inT> END_HX%type <element> block%type <element> block_except_p%type <element> text%type <element_list> texts%type <element_list> opt_texts%type <element> font%type <element> phrase%type <element> special%type <element> form%type <table_rows> table_rows%type <table_cells> table_cells%type <caption> caption%type <caption> opt_caption%type <element_list> applet_content%type <definition_list> definition_list%type <definition_list_item_list>definition_list_content%type <term_name> term_name%type <term_definition> term_definition%type <option_list> select_content%type <option> option%type <element> list%type <list_items> list_content%type <list_item> list_item%type <preformatted> preformatted%type <element_list> opt_flow%type <element_list> flow%type <element> flow_%type <element_list> paragraph_content%type <address> address%type <tag_attributes_list> map_content%type <tag_attributes> opt_LI%type <tag_attributes> opt_P%token DOCTYPE%token <strinG> PCDATA%token SCAN_ERROR%token <tag_attributes> A%token <tag_attributes> ADDRESS%token <tag_attributes> APPLET%token <tag_attributes> AREA%token <tag_attributes> B%token <tag_attributes> BASE%token <tag_attributes> BASEFONT%token <tag_attributes> BIG%token <tag_attributes> BLOCKQUOTE%token <tag_attributes> BODY%token <tag_attributes> BR%token <tag_attributes> CAPTION%token <tag_attributes> CENTER%token <tag_attributes> CITE%token <tag_attributes> CODE%token <tag_attributes> DD%token <tag_attributes> DFN%token <tag_attributes> DIR%token <tag_attributes> DIV%token <tag_attributes> DL%token <tag_attributes> DT%token <tag_attributes> EM%token <tag_attributes> FONT%token <tag_attributes> FORM%token <tag_attributes> H1%token <tag_attributes> H2%token <tag_attributes> H3%token <tag_attributes> H4%token <tag_attributes> H5%token <tag_attributes> H6%token <tag_attributes> HEAD%token <tag_attributes> HR%token <tag_attributes> HTML%token <tag_attributes> I%token <tag_attributes> IMG%token <tag_attributes> INPUT%token <tag_attributes> ISINDEX%token <tag_attributes> KBD%token <tag_attributes> LI%token <tag_attributes> LINK%token <tag_attributes> MAP%token <tag_attributes> MENU%token <tag_attributes> META%token <tag_attributes> NOBR%token <tag_attributes> OL%token <tag_attributes> OPTION%token <tag_attributes> P%token <tag_attributes> PARAM%token <tag_attributes> PRE%token <tag_attributes> SAMP%token <tag_attributes> SCRIPT%token <tag_attributes> SELECT%token <tag_attributes> SMALL%token <tag_attributes> STRIKE%token <tag_attributes> STRONG%token <tag_attributes> STYLE%token <tag_attributes> SUB%token <tag_attributes> SUP%token <tag_attributes> TABLE%token <tag_attributes> TD%token <tag_attributes> TEXTAREA%token <tag_attributes> TH%token <tag_attributes> TITLE%token <tag_attributes> TR%token <tag_attributes> TT%token <tag_attributes> U%token <tag_attributes> UL%token <tag_attributes> VAR%token END_A%token END_ADDRESS%token END_APPLET%token END_B%token END_BIG%token END_BLOCKQUOTE%token END_BODY%token END_CAPTION%token END_CENTER%token END_CITE%token END_CODE%token END_DD%token END_DFN%token END_DIR%token END_DIV%token END_DL%token END_DT%token END_EM%token END_FONT%token END_FORM%token END_H1%token END_H2%token END_H3%token END_H4%token END_H5%token END_H6%token END_HEAD%token END_HTML%token END_I%token END_KBD%token END_LI%token END_MAP%token END_MENU%token END_NOBR%token END_OL%token END_OPTION%token END_P%token END_PRE%token END_SAMP%token END_SCRIPT%token END_SELECT%token END_SMALL%token END_STRIKE%token END_STRONG%token END_STYLE%token END_SUB%token END_SUP%token END_TABLE%token END_TD%token END_TEXTAREA%token END_TH%token END_TITLE%token END_TR%token END_TT%token END_U%token END_UL%token END_VAR/* ------------------------------------------------------------------------- */%start document%% /* { */document: document_ { process(*$1); delete $1; } ;/* * Well... actually, an HTML document should look like * * <!DOCTYPE ...> * <HTML> * <HEAD> * ... * </HEAD> * <BODY> * ... * </BODY> * </HTML> * * but... * * (A) All seven tags are optional * (B) The contents of the HEAD and the BODY section can be distinuished * (C) Most people out there do not know which element to put before, into, * or after which section... * * so... let's just forget about the structure of an HTML document, discard * the seven tags, and process the remainder as a series of sections. */document_: /* empty */ { $$ = new Document; $$->body.content.reset(new list<auto_ptr<Element> >); } | document_ error { $$ = $1; } | document_ DOCTYPE { $$ = $1; } | document_ HTML { $$->attributes.reset($2); $$ = $1; } | document_ END_HTML { $$ = $1; } | document_ HEAD { delete $2; $$ = $1; } | document_ END_HEAD { $$ = $1; } | document_ TITLE opt_pcdata opt_END_TITLE { delete $2; // Ignore <TITLE> attributes ($$ = $1)->head.title.reset($3); } | document_ ISINDEX { ($$ = $1)->head.isindex_attributes.reset($2); } | document_ BASE { ($$ = $1)->head.base_attributes.reset($2); } | document_ META { ($$ = $1)->head.meta_attributes.reset($2); } | document_ LINK { ($$ = $1)->head.link_attributes.reset($2); } | document_ SCRIPT { auto_ptr<Script> s(new Script); s->attributes.reset($2); if (!read_cdata("</SCRIPT>", &s->text)) { yyerror("CDATA terminal not found"); } ($$ = $1)->head.scripts.push_back(s); } | document_ STYLE { auto_ptr<Style> s(new Style); s->attributes.reset($2); if (!read_cdata("</STYLE>", &s->text)) { yyerror("CDATA terminal not found"); } ($$ = $1)->head.styles.push_back(s); } | document_ BODY { delete $2; $$ = $1; } | document_ END_BODY { $$ = $1; } | document_ texts { Paragraph *p = new Paragraph; p->texts.reset($2); ($$ = $1)->body.content->push_back(auto_ptr<Element>(p)); } | document_ heading { ($$ = $1)->body.content->push_back(auto_ptr<Element>($2)); } | document_ block { ($$ = $1)->body.content->push_back(auto_ptr<Element>($2)); } | document_ address { ($$ = $1)->body.content->push_back(auto_ptr<Element>($2)); } ;pcdata: PCDATA { $$ = new PCData; $$->text = *$1; delete $1; } ;body_content: /* empty */ { $$ = new list<auto_ptr<Element> >; } | body_content error { $$ = $1; } | body_content texts { Paragraph *p = new Paragraph; p->texts = auto_ptr<list<auto_ptr<Element> > >($2); ($$ = $1)->push_back(auto_ptr<Element>(p)); } | body_content heading { ($$ = $1)->push_back(auto_ptr<Element>($2)); } | body_content block { ($$ = $1)->push_back(auto_ptr<Element>($2)); } | body_content address { ($$ = $1)->push_back(auto_ptr<Element>($2)); } ;heading: HX paragraph_content END_HX { /* EXTENSION: Allow paragraph content in heading, not only texts */ if ($1->level != $3) { yyerror ("Levels of opening and closing headings don't match"); } $$ = $1; $$->content.reset($2); } ;block: block_except_p { $$ = $1; } | P paragraph_content opt_END_P { Paragraph *p = new Paragraph; p->attributes.reset($1); p->texts.reset($2); $$ = p; } ;paragraph_content: /* EXTENSION: Allow blocks (except "<P>") in paragraphs. */ /* empty */ { $$ = new list<auto_ptr<Element> >; } | paragraph_content error { $$ = $1; } | paragraph_content texts { $$ = $1; $$->splice($$->end(), *$2); delete $2; } | paragraph_content block_except_p { ($$ = $1)->push_back(auto_ptr<Element>($2)); } ;block_except_p: list { $$ = $1; } | preformatted { $$ = $1; } | definition_list { $$ = $1; } | DIV body_content END_DIV { Division *p = new Division; p->attributes.reset($1); p->body_content.reset($2); $$ = p; } | CENTER body_content opt_END_CENTER { Center *p = new Center; delete $1; // CENTER has no attributes. p->body_content.reset($2); $$ = p; } | BLOCKQUOTE body_content END_BLOCKQUOTE { delete $1; // BLOCKQUOTE has no attributes! BlockQuote *bq = new BlockQuote; bq->content.reset($2);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -