⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlparser.y

📁 将HTML转换为TXT文件的程序
💻 Y
📖 第 1 页 / 共 2 页
字号:
    $$ = bq;  }  | FORM body_content END_FORM {    Form *f = new Form;    f->attributes.reset($1);    f->content.reset($2);    $$ = f;  }  | HR {    HorizontalRule *h = new HorizontalRule;    h->attributes.reset($1);    $$ = h;  }  | TABLE opt_caption table_rows END_TABLE {    Table *t = new Table;    t->attributes.reset($1);    t->caption.reset($2);    t->rows.reset($3);    $$ = t;  }  ;list:  OL { ++list_nesting; } list_content END_OL {    OrderedList *ol = new OrderedList;    ol->attributes.reset($1);    ol->items.reset($3);    ol->nesting = --list_nesting;    $$ = ol;  }  | UL { ++list_nesting; } list_content END_UL {    UnorderedList *ul = new UnorderedList;    ul->attributes.reset($1);    ul->items.reset($3);    ul->nesting = --list_nesting;    $$ = ul;  }  | DIR { ++list_nesting; } list_content END_DIR {    Dir *d = new Dir;    d->attributes.reset($1);    d->items.reset($3);    d->nesting = --list_nesting;    $$ = d;  }  | MENU { ++list_nesting; } list_content END_MENU {    Menu *m = new Menu;    m->attributes.reset($1);    m->items.reset($3);    m->nesting = --list_nesting;    $$ = m;  }  ;list_content:  /* empty */ {    $$ = 0;  }  | list_content error {    $$ = $1;  }  | list_content list_item {    $$ = $1 ? $1 : new list<auto_ptr<ListItem> >;    $$->push_back(auto_ptr<ListItem>($2));  }  ;list_item:  LI opt_flow opt_END_LI {    ListNormalItem *lni = new ListNormalItem;    lni->attributes.reset($1);    lni->flow.reset($2);    $$ = lni;  }  | block {   /* EXTENSION: Handle a "block" in a list as an indented block. */    ListBlockItem *lbi = new ListBlockItem;    lbi->block.reset($1);    $$ = lbi;  }  | texts {              /* EXTENSION: Treat "texts" in a list as an "<LI>". */    ListNormalItem *lni = new ListNormalItem;    lni->flow.reset($1);    $$ = lni;  }  ;definition_list:                                           /* EXTENSION: Allow nested <DL>s. */                                             /* EXTENSION: "</DL>" optional. */  DL opt_flow opt_error definition_list opt_END_DL {    delete $1;    delete $2; /* Kludge */    $$ = $4;  }                                 /* EXTENSION: Accept a "preamble" in the DL */  | DL opt_flow opt_error definition_list_content END_DL {    DefinitionList *dl = new DefinitionList;    dl->attributes.reset($1);    dl->preamble.reset($2);    dl->items.reset($4);    $$ = dl;  }  ;definition_list_content:  /* empty */ {    $$ = 0;  }  | definition_list_content {    $$ = $1;  }  | definition_list_content term_name {    $$ = $1 ? $1 : new list<auto_ptr<DefinitionListItem> >;    $$->push_back(auto_ptr<DefinitionListItem>($2));  }  | definition_list_content term_definition {    $$ = $1 ? $1 : new list<auto_ptr<DefinitionListItem> >;    $$->push_back(auto_ptr<DefinitionListItem>($2));  }  ;term_name:  DT opt_flow opt_error {      /* EXTENSION: Allow "flow" instead of "texts" */    delete $1;    $$ = new TermName;    $$->flow.reset($2);  }  | DT opt_flow END_DT opt_P opt_error {/* EXTENSION: Ignore <P> after </DT> */    delete $1;    delete $4;    $$ = new TermName;    $$->flow.reset($2);  }  ;term_definition:  DD opt_flow opt_error {    delete $1;    $$ = new TermDefinition;    $$->flow.reset($2);  }  | DD opt_flow END_DD opt_P opt_error {/* EXTENSION: Ignore <P> after </DD> */    delete $1;    delete $4;    $$ = new TermDefinition;    $$->flow.reset($2);  }  ;flow:  flow_ {    $$ = new list<auto_ptr<Element> >;    $$->push_back(auto_ptr<Element>($1));  }  | flow error {    $$ = $1;  }  | flow flow_ {    ($$ = $1)->push_back(auto_ptr<Element>($2));  }  ;flow_:  texts {    if ($1->size() == 1) {      $$ = $1->front().release();      delete $1;    } else {      Paragraph *p = new Paragraph;      p->texts.reset($1);      $$ = p;    }  }  | heading {          /* EXTENSION: Allow headings in "flow", i.e. in lists */    $$ = $1;  }  | block {    $$ = $1;  }  ;preformatted:  PRE opt_texts END_PRE {    $$ = new Preformatted;    $$->attributes.reset($1);    $$->texts.reset($2);  }  ;caption:  CAPTION opt_texts END_CAPTION {    $$ = new Caption;    $$->attributes.reset($1);    $$->texts.reset($2);  }  ;table_rows:  /* empty */ {    $$ = new list<auto_ptr<TableRow> >;  }  | table_rows error {    $$ = $1;  }  | table_rows TR table_cells opt_END_TR {    TableRow *tr = new TableRow;    tr->attributes.reset($2);    tr->cells.reset($3);    ($$ = $1)->push_back(auto_ptr<TableRow>(tr));  }  ;table_cells:  /* empty */ {    $$ = new list<auto_ptr<TableCell> >;  }  | table_cells error {    $$ = $1;  }  | table_cells TD body_content opt_END_TD {    TableCell *tc = new TableCell;    tc->attributes.reset($2);    tc->content.reset($3);    ($$ = $1)->push_back(auto_ptr<TableCell>(tc));  }  | table_cells TH body_content opt_END_TH opt_END_TD {                            /* EXTENSION: Allow "</TD>" in place of "</TH>". */    TableHeadingCell *thc = new TableHeadingCell;    thc->attributes.reset($2);    thc->content.reset($3);    ($$ = $1)->push_back(auto_ptr<TableCell>(thc));  }  | table_cells INPUT {    /* EXTENSION: Ignore <INPUT> between table cells. */    delete $2;    $$ = $1;  }  ;address:  ADDRESS opt_texts END_ADDRESS { /* Should be "address_content"... */    delete $1;    $$ = new Address;    $$->content.reset($2);  }  ;/* ------------------------------------------------------------------------- */texts:  text {    $$ = new list<auto_ptr<Element> >;    $$->push_back(auto_ptr<Element>($1));  }  | texts text {    ($$ = $1)->push_back(auto_ptr<Element>($2));  }  ;text:  pcdata                    opt_error { $$ = $1; }  | font                    opt_error { $$ = $1; }  | phrase                  opt_error { $$ = $1; }  | special                 opt_error { $$ = $1; }  | form                    opt_error { $$ = $1; }  | NOBR opt_texts END_NOBR opt_error { /* EXTENSION: NS 1.1 / IE 2.0 */    NoBreak *nb = new NoBreak;    delete $1;    nb->content.reset($2);    $$ = nb;  }  ;font:  TT       opt_texts opt_END_TT     { delete $1; $$ = new Font(TT,     $2); }  | I      opt_texts opt_END_I      { delete $1; $$ = new Font(I,      $2); }  | B      opt_texts opt_END_B      { delete $1; $$ = new Font(B,      $2); }  | U      opt_texts opt_END_U      { delete $1; $$ = new Font(U,      $2); }  | STRIKE opt_texts opt_END_STRIKE { delete $1; $$ = new Font(STRIKE, $2); }  | BIG    opt_texts opt_END_BIG    { delete $1; $$ = new Font(BIG,    $2); }  | SMALL  opt_texts opt_END_SMALL  { delete $1; $$ = new Font(SMALL,  $2); }  | SUB    opt_texts opt_END_SUB    { delete $1; $$ = new Font(SUB,    $2); }  | SUP    opt_texts opt_END_SUP    { delete $1; $$ = new Font(SUP,    $2); }  ;phrase:  EM       opt_texts opt_END_EM     { delete $1; $$ = new Phrase(EM,     $2); }  | STRONG opt_texts opt_END_STRONG { delete $1; $$ = new Phrase(STRONG, $2); }  | DFN    opt_texts opt_END_DFN    { delete $1; $$ = new Phrase(DFN,    $2); }  | CODE   opt_texts opt_END_CODE   { delete $1; $$ = new Phrase(CODE,   $2); }  | SAMP   opt_texts opt_END_SAMP   { delete $1; $$ = new Phrase(SAMP,   $2); }  | KBD    opt_texts opt_END_KBD    { delete $1; $$ = new Phrase(KBD,    $2); }  | VAR    opt_texts opt_END_VAR    { delete $1; $$ = new Phrase(VAR,    $2); }  | CITE   opt_texts opt_END_CITE   { delete $1; $$ = new Phrase(CITE,   $2); }  ;special:                        /* EXTENSION: Allow "flow" in <A>, not only "texts". */                                 /* EXTENSION: Allow useless <LI> in anchor. */                                               /* EXTENSION: "</A>" optional.*/  A opt_LI opt_flow opt_END_A {    delete $2;    Anchor *a = new Anchor;    a->attributes.reset($1);    a->texts.reset($3);    $$ = a;  }  | IMG {    Image *i = new Image;    i->attributes.reset($1);    $$ = i;  }  | APPLET applet_content END_APPLET {    Applet *a = new Applet;    a->attributes.reset($1);    a->content.reset($2);    $$ = a;  }                   /* EXTENSION: "flow" in <FONT> allowed, not only "texts". */                                           /* EXTENSION: "</FONT>" optional. */  | FONT opt_flow opt_END_FONT {    Font2 *f2 = new Font2;    f2->attributes.reset($1);    f2->elements.reset($2);    $$ = f2;  }  | BASEFONT {    BaseFont *bf = new BaseFont;    bf->attributes.reset($1);    $$ = bf;  }  | BR {    LineBreak *lb = new LineBreak;    lb->attributes.reset($1);    $$ = lb;  }  | MAP map_content END_MAP {    Map *m = new Map;    m->attributes.reset($1);    m->areas.reset($2);    $$ = m;  }  ;applet_content:  /* empty */ {    $$ = 0;  }  | applet_content text {    $$ = $1 ? $1 : new list<auto_ptr<Element> >;    $$->push_back(auto_ptr<Element>($2));  }  | applet_content PARAM {    $$ = $1 ? $1 : new list<auto_ptr<Element> >;    Param *p = new Param;    p->attributes.reset($2);    $$->push_back(auto_ptr<Element>(p));  }  ;map_content:  /* empty */ {    $$ = 0;  }  | map_content error {    $$ = $1;  }  | map_content AREA {    $$ = $1 ? $1 : new list<auto_ptr<list<TagAttribute> > >;    $$->push_back(auto_ptr<list<TagAttribute> >($2));  }  ;form:  INPUT {    Input *i = new Input;    i->attributes.reset($1);    $$ = i;  }  | SELECT select_content END_SELECT {    Select *s = new Select;    s->attributes.reset($1);    s->content.reset($2);    $$ = s;  }  | TEXTAREA pcdata END_TEXTAREA {    TextArea *ta = new TextArea;    ta->attributes.reset($1);    ta->pcdata.reset($2);    $$ = ta;  }  ;select_content:  option {    $$ = new list<auto_ptr<Option> >;    $$->push_back(auto_ptr<Option>($1));  }  | select_content option {    ($$ = $1)->push_back(auto_ptr<Option>($2));  }  ;option:  OPTION pcdata opt_END_OPTION {    $$ = new Option;    $$->attributes.reset($1);    $$->pcdata.reset($2);  }  ;/* ------------------------------------------------------------------------- */HX:  H1   { $$ = new Heading; $$->level = 1; $$->attributes.reset($1); }  | H2 { $$ = new Heading; $$->level = 2; $$->attributes.reset($1); }  | H3 { $$ = new Heading; $$->level = 3; $$->attributes.reset($1); }  | H4 { $$ = new Heading; $$->level = 4; $$->attributes.reset($1); }  | H5 { $$ = new Heading; $$->level = 5; $$->attributes.reset($1); }  | H6 { $$ = new Heading; $$->level = 6; $$->attributes.reset($1); }  ;END_HX:  END_H1   { $$ = 1; }  | END_H2 { $$ = 2; }  | END_H3 { $$ = 3; }  | END_H4 { $$ = 4; }  | END_H5 { $$ = 5; }  | END_H6 { $$ = 6; }  ;/* ------------------------------------------------------------------------- */opt_pcdata:     /* empty */ { $$ = 0; } | pcdata  { $$ = $1; };opt_caption:    /* empty */ { $$ = 0; } | caption { $$ = $1; };opt_texts:      /* empty */ { $$ = 0; } | texts   { $$ = $1; };opt_flow:       /* empty */ { $$ = 0; } | flow    { $$ = $1; };opt_LI:         /* empty */ { $$ = 0; } | LI      { $$ = $1; };opt_P:          /* empty */ { $$ = 0; } | P       { $$ = $1; };opt_END_A:      /* empty */ | END_A;opt_END_B:      /* empty */ | END_B;opt_END_BIG:    /* empty */ | END_BIG;opt_END_CENTER: /* empty */ | END_CENTER;opt_END_CITE:   /* empty */ | END_CITE;opt_END_CODE:   /* empty */ | END_CODE;opt_END_DFN:    /* empty */ | END_DFN;opt_END_DL:     /* empty */ | END_DL;opt_END_EM:     /* empty */ | END_EM;opt_END_FONT:   /* empty */ | END_FONT;opt_END_I:      /* empty */ | END_I;opt_END_KBD:    /* empty */ | END_KBD;opt_END_LI:     /* empty */ | END_LI;opt_END_OPTION: /* empty */ | END_OPTION;opt_END_P:      /* empty */ | END_P;opt_END_SAMP:   /* empty */ | END_SAMP;opt_END_SMALL:  /* empty */ | END_SMALL;opt_END_STRIKE: /* empty */ | END_STRIKE;opt_END_STRONG: /* empty */ | END_STRONG;opt_END_SUB:    /* empty */ | END_SUB;opt_END_SUP:    /* empty */ | END_SUP;opt_END_TD:     /* empty */ | END_TD;opt_END_TH:     /* empty */ | END_TH;opt_END_TITLE:  /* empty */ | END_TITLE;opt_END_TR:     /* empty */ | END_TR;opt_END_TT:     /* empty */ | END_TT;opt_END_U:      /* empty */ | END_U;opt_END_VAR:    /* empty */ | END_VAR;opt_error:      /* empty */ | error;%% /* } *//* * Some C++ compilers (e.g. EGCS 2.91.66) have problems if all virtual * methods of a class are inline or pure virtual, so we define the destructor, * which is the only virtual method, non-inline, although it is empty. */HTMLParser::~HTMLParser(){}/* ------------------------------------------------------------------------- */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -