📄 htmlcontrol.c
字号:
/* ------------------------------------------------------------------------- *//* * Copyright (c) 1999 * GMRS Software GmbH, Innsbrucker Ring 159, 81669 Munich, Germany. * http://www.gmrs.de * All rights reserved. * Author: Arno Unkrig (arno.unkrig@gmrs.de) * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by GMRS Software GmbH. * 4. The name of GMRS Software GmbH may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY GMRS SOFTWARE GMBH ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GMRS SOFTWARE GMBH BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. *//* ------------------------------------------------------------------------- */#ident "$Id: HTMLControl.C,v 1.15 1999/12/08 20:36:12 arno Exp $"#include <iostream.h>#include <stdlib.h>#include <string.h>#include <ctype.h>#include "html.h"#include "HTMLControl.h"#include "sgml.h"#include "cmp_nocase.h"#ifndef nelems#define nelems(array) (sizeof(array) / sizeof((array)[0]))#endifenum { NOT_A_TAG, START_TAG, BLOCK_START_TAG, END_TAG, BLOCK_END_TAG, NON_CONTAINER_TAG};/* ------------------------------------------------------------------------- *//* * Effectively, this method simply invokes "yylex2()", but it does some * postprocessing on PCDATA tokens that would be difficult to do in "yylex2()". */intHTMLControl::yylex(yy_HTMLParser_stype *value_return){ for (;;) { // Notice the "return" at the end of the body! int token, tag_type; if (next_token == EOF) { token = yylex2(value_return, &tag_type); } else { token = next_token; *value_return = next_token_value; tag_type = next_token_tag_type; next_token = EOF; } /* * Switch on/off "literal mode" on "<PRE>" and "</PRE>". */ if (token == PRE) { literal_mode = true; /* * Swallow '\n' immediately following "<PRE>". */ if (next_token == EOF) { next_token = yylex2(&next_token_value, &next_token_tag_type); } if (next_token == PCDATA) { string &s(*next_token_value.strinG); if (!s.empty() && s[0] == '\n') s.erase(0, 1); } } if (token == END_PRE) literal_mode = false; if (token == PCDATA) { /* * In order to post-process the PCDATA token, we need to look ahead one * token... */ if (next_token == EOF) { next_token = yylex2(&next_token_value, &next_token_tag_type); } /* * Erase " '\n' { ' ' } " immediately before "</PRE>". */ if (next_token == END_PRE) { string &s(*value_return->strinG); string::size_type x = s.length(); while (x > 0 && s[x - 1] == ' ') --x; if (x > 0 && s[x - 1] == '\n') s.erase(x - 1, string::npos); } else /* * Erase whitespace before end tag or block start tag. */ if (!literal_mode && ( next_token_tag_type == END_TAG || next_token_tag_type == BLOCK_END_TAG || next_token_tag_type == BLOCK_START_TAG )) { string &s(*value_return->strinG); string::size_type x = s.length(); while (x > 0 && isspace(s[x - 1])) --x; s.erase(x, string::npos); } /* * Collate sequences of whitespace, if not in "literal mode". */ if (!literal_mode) { string &s(*value_return->strinG);// bool whitespace_only = true; for (string::size_type x = 0; x < s.length(); ++x) { if (isspace(s[x])) { string::size_type y; for (y = x + 1; y < s.length() && isspace(s[y]); ++y); s.replace(x, y - x, " "); } else {// whitespace_only = false; } } if (s.empty()) { delete value_return->strinG; continue; } } } /* * Erase whitespace after start tag or block end tag, if not in "literal * mode". */ if (!literal_mode && ( ( tag_type == START_TAG || tag_type == BLOCK_START_TAG || tag_type == BLOCK_END_TAG || token == BR || token == HR ) && token != SCRIPT && token != STYLE )) { if (next_token == EOF) { next_token = yylex2(&next_token_value, &next_token_tag_type); } if (next_token == PCDATA) { string &s(*next_token_value.strinG); string::size_type x; for (x = 0; x < s.length() && isspace(s[x]); ++x); if (x > 0) s.erase(0, x); if (s.empty()) { delete next_token_value.strinG; next_token = EOF; } } } return token; }}/* ------------------------------------------------------------------------- *//* * Keep this array sorted alphabetically! */static const struct TextToIntP { char name[11]; char block_tag; const int *start_tag_code; const int *end_tag_code;} tag_names[] = {#define pack1(tag) { #tag, 0, &HTMLParser::tag, 0 }#define pack2(tag) { #tag, 0, &HTMLParser::tag, &HTMLParser::END_##tag }#define pack3(tag) { #tag, 1, &HTMLParser::tag, &HTMLParser::END_##tag } pack2(A), pack3(ADDRESS), pack2(APPLET), pack1(AREA), pack2(B), pack1(BASE), pack1(BASEFONT), pack2(BIG), pack3(BLOCKQUOTE), pack3(BODY), pack1(BR), pack3(CAPTION), pack3(CENTER), pack3(CITE), pack2(CODE), pack3(DD), pack2(DFN), pack3(DIR), pack3(DIV), pack3(DL), pack3(DT), pack2(EM), pack2(FONT), pack3(FORM), pack3(H1), pack3(H2), pack3(H3), pack3(H4), pack3(H5), pack3(H6), pack3(HEAD), pack1(HR), pack3(HTML), pack2(I), pack1(IMG), pack1(INPUT), pack1(ISINDEX), pack2(KBD), pack3(LI), pack1(LINK), pack2(MAP), pack3(MENU), pack1(META), pack2(NOBR), pack3(OL), pack3(OPTION), pack3(P), pack1(PARAM), pack3(PRE), pack2(SAMP), pack3(SCRIPT), pack2(SELECT), pack2(SMALL), pack2(STRIKE), pack2(STRONG), pack3(STYLE), pack2(SUB), pack2(SUP), pack3(TABLE), pack3(TD), pack2(TEXTAREA), pack3(TH), pack3(TITLE), pack3(TR), pack2(TT), pack2(U), pack3(UL), pack2(VAR),#undef pack};/* ------------------------------------------------------------------------- */intHTMLControl::yylex2(yy_HTMLParser_stype *value_return, int *tag_type_return){ int c; *tag_type_return = NOT_A_TAG; for (;;) { // Notice the "return" at the end of this loop. /* * Get the first character of the token. */ c = get_char(); if (c == EOF) return EOF;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -