📄 htmlcontrol.c
字号:
if (c == '<') { /* * Examine the first character of the tag. */ c = get_char(); if (c == '!') { c = get_char(); if (c == '-') { c = get_char(); if (c != '-') return SCAN_ERROR; /* * This is a comment... skip it! * * <!-- Single-line comment --> * * <!-- Multi- * line * comment //--> * * EXTENSION: Allow "-->" as the terminator of a multi-line comment. */ int state = 0; do { c = get_char(); if (c == EOF) return SCAN_ERROR; switch (state) { case 0: if (c == '-') state = 1; break; case 1: state = c == '-' ? 2 : 0; break; case 2: state = c == '>' ? 3 : c == '-' ? 2 : 0; break; } } while (state != 3); continue; // Start over } /* * Scan "<!DOCTYPE ...>" tag. */ if (!isalpha(c)) return SCAN_ERROR; string tag_name(1, '!'); tag_name += c; for (;;) { c = get_char(); if (!isalnum(c) && c != '-') break; tag_name += c; } if (cmp_nocase(tag_name, "!DOCTYPE") != 0) return SCAN_ERROR; while (c != '>') { c = get_char(); if (c == EOF || c == '\n') return SCAN_ERROR; } return DOCTYPE; } if (c == '/' || isalpha(c) || c == '_') { string tag_name; bool is_end_tag = false; if (c == '/') { is_end_tag = true; c = get_char(); } if (!isalpha(c) && c != '_') return SCAN_ERROR; tag_name += c; for (;;) { c = get_char(); if (!isalnum(c) && c != '-' && c != '_') break; tag_name += c; } while (isspace(c)) c = get_char(); /* * Scan tag attributes (only for opening tags). Create the * "tag_attributes" only on demand; this saves a lot of overhead. */ auto_ptr<list<TagAttribute> > tag_attributes; if (!is_end_tag) { while (isalpha(c) || c == '_') { TagAttribute attribute; /* * Scan attribute name. */ attribute.first = c; for (;;) { c = get_char(); if (!isalpha(c) && c != '-' && c != '_') break; attribute.first += c; } while (isspace(c)) c = get_char(); // Skip WS after attribute name /* * Scan (optional) attribute value. */ if (c == '=') { c = get_char(); while (isspace(c)) c = get_char(); if (c == '"' || c == '\'') { int closing_quote = c; // Same as opening quote! for (;;) { c = get_char(); if (c == EOF || c == '\n') return SCAN_ERROR; if (c == closing_quote) break; /* * Do *not* interpret "ä" and consorts here! This * would ruin tag attributes like "HREF=hhh?a=1&b=2". */ attribute.second += c; } c = get_char(); // Get next char after closing quote. } else while (c != '>' && c > ' ') { if (c == EOF || c == '\n') return SCAN_ERROR; attribute.second += c; c = get_char(); } while (isspace(c)) c = get_char(); // Skip WS after attr value } /* * Store the attribute. */ if (!tag_attributes.get()) { tag_attributes.reset(new list<TagAttribute>); } tag_attributes->push_back(attribute); } } if (c != '>') return SCAN_ERROR; if (debug_scanner) { cerr << "Scanned tag \"<" << (is_end_tag ? "/" : "") << tag_name; if (!is_end_tag && tag_attributes.get()) { const list<TagAttribute> &ta(*tag_attributes); list<TagAttribute>::const_iterator j; for (j = ta.begin(); j != ta.end(); ++j) { cerr << " " << (*j).first << "=\"" << (*j).second << "\""; } } cerr << ">\"" << endl; } /* * Look up the tag in the table of recognized tags. */ static int (*const f)(const char *, const char *) = cmp_nocase; const TextToIntP *tag = (const TextToIntP *) bsearch( tag_name.c_str(), tag_names, nelems(tag_names), sizeof(TextToIntP), (int (*)(const void *, const void *)) f ); if (tag == NULL) { /* EXTENSION: Swallow unknown tags. */ if (debug_scanner) { cerr << "Tag unknown -- swallowed." << endl; } continue; } /* * Return the BISON token code for the tag. */ if (is_end_tag) { if (!tag->end_tag_code) { if (debug_scanner) { cerr << "Non-container end tag scanned." << endl; } continue; } *tag_type_return = tag->block_tag ? BLOCK_END_TAG : END_TAG; return *tag->end_tag_code; } else { *tag_type_return = ( !tag->end_tag_code ? NON_CONTAINER_TAG : tag->block_tag ? BLOCK_START_TAG : START_TAG ); value_return->tag_attributes = tag_attributes.release(); return *tag->start_tag_code; } } /* * EXTENSION: This tag did not match "<!", and not "</", and not * "<[A-Za-z-]", so take it as literal text. */ unget_char(c); c = '<'; } if (c == '\n' || c >= ' ') { string *s = value_return->strinG = new string; while (c != EOF) { /* * Accept literal '<' in some cases. */ if (c == '<') { int c2; unget_char(c2 = get_char()); if (c2 == '!' || c2 == '/' || isalpha(c2)) { unget_char(c); break; } } *s += c; c = get_char(); } replace_sgml_entities(s); // Replace "ä" and consorts. /* * Swallow empty PCDATAs. */ if (s->empty()) { delete s; continue; } if (debug_scanner) cerr << "Scanned PCDATA \"" << *s << "\"" << endl; return PCDATA; } return SCAN_ERROR; }}/* ------------------------------------------------------------------------- */boolHTMLControl::read_cdata(const char *terminal, string *value_return){ string &s(*value_return); int c; int state = 0; for (;;) { c = get_char(); if (c == EOF) return false; if (toupper(c) == terminal[state]) { state++; if (terminal[state] == '\0') { s.erase(s.length() - state); return true; } } else { state = 0; } s += c; }}/* ------------------------------------------------------------------------- */intHTMLControl::get_char(){ if (number_of_ungotten_chars > 0) { return ungotten_chars[--number_of_ungotten_chars]; } int c = is.get(); while (c == '\r') c = is.get(); if (c == EOF) { ; } else if (c == '\n') { current_line++; current_column = 0; } else { current_column++; } return c;}/* ------------------------------------------------------------------------- */voidHTMLControl::unget_char(int c){ if (number_of_ungotten_chars == nelems(ungotten_chars)) { yyerror("Too many chars ungotten"); return; } ungotten_chars[number_of_ungotten_chars++] = c;}/* ------------------------------------------------------------------------- */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -