📄 htmlparser.cpp
字号:
//####COPYRIGHTBEGIN####//// ----------------------------------------------------------------------------// Copyright (C) 1998, 1999, 2000 Red Hat, Inc.//// This program is part of the eCos host tools.//// This program is free software; you can redistribute it and/or modify it// under the terms of the GNU General Public License as published by the Free// Software Foundation; either version 2 of the License, or (at your option)// any later version.//// This program is distributed in the hope that it will be useful, but WITHOUT// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for// more details.//// You should have received a copy of the GNU General Public License along with// this program; if not, write to the Free Software Foundation, Inc.,// 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.//// ----------------------------------------------------------------------------////####COPYRIGHTEND####// htmlparser.cpp :////===========================================================================//#####DESCRIPTIONBEGIN####//// Author(s): julians// Contact(s): julians// Date: 2001/04/04// Version: $Id: htmlparser.cpp,v 1.6 2001/04/12 10:02:22 julians Exp $// Purpose:// Description: HTML parser/HTML Help file generator// Requires:// Provides:// See also:// Known bugs:// Usage:////####DESCRIPTIONEND####////===========================================================================// ============================================================================// declarations// ============================================================================// ----------------------------------------------------------------------------// headers// ----------------------------------------------------------------------------#ifdef __GNUG__#pragma implementation "htmlparser.h"#endif// Includes other headers for precompiled compilation#include "ecpch.h"#ifdef __BORLANDC__#pragma hdrstop#endif#include "wx/textfile.h"#include "wx/wfstream.h"#include "ecutils.h"#include "htmlparser.h"/* * wxSimpleHtmlAttribute * Representation of an attribute */wxSimpleHtmlParser::wxSimpleHtmlParser(){ m_topLevel = NULL; m_pos = 0;}wxSimpleHtmlParser::~wxSimpleHtmlParser(){ Clear();}bool wxSimpleHtmlParser::ParseFile(const wxString& filename){ wxTextFile textFile; if (textFile.Open(filename)) { wxString text; wxString line; int i; int count = textFile.GetLineCount(); for (i = 0; i < count; i++) { if (i == 0) line = textFile.GetFirstLine(); else line = textFile.GetNextLine(); text += line; if (i != (count - 1)) text += wxT("\n"); }#if 0 for ( line = textFile.GetFirstLine(); !textFile.Eof(); line = textFile.GetNextLine() ) { text += line; if (!textFile.Eof()) text += wxT("\n"); }#endif return ParseString(text); } else return FALSE;}bool wxSimpleHtmlParser::ParseString(const wxString& str){ Clear(); m_pos = 0; m_text = str; m_length = str.Length(); m_topLevel = new wxSimpleHtmlTag(wxT("TOPLEVEL"), wxSimpleHtmlTag_TopLevel); return ParseHtml(m_topLevel);}// Main recursive parsing functionbool wxSimpleHtmlParser::ParseHtml(wxSimpleHtmlTag* parent){ while (!Eof()) { EatWhitespace(); if (IsComment()) { ParseComment(); } else if (IsDirective()) { wxSimpleHtmlTag* tag = ParseDirective(); if (tag) parent->AppendTag(tag); } else if (IsTagClose()) { wxSimpleHtmlTag* tag = ParseTagClose(); if (tag) parent->AppendTag(tag); } else if (IsTagStartBracket(GetChar(m_pos))) { wxSimpleHtmlTag* tag = ParseTagHeader(); if (tag) parent->AppendTag(tag); } else { // Just a text string wxString text; ParseText(text); wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(wxT("TEXT"), wxSimpleHtmlTag_Text); tag->SetText(text); parent->AppendTag(tag); } } return TRUE;}// Plain text, up until an angled bracketbool wxSimpleHtmlParser::ParseText(wxString& text){ while (!Eof() && GetChar(m_pos) != wxT('<')) { text += GetChar(m_pos); m_pos ++; } return TRUE;}wxSimpleHtmlTag* wxSimpleHtmlParser::ParseTagHeader(){ if (IsTagStartBracket(GetChar(m_pos))) { m_pos ++; EatWhitespace(); wxString word; ReadWord(word, TRUE); EatWhitespace(); wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Open); ParseAttributes(tag); EatWhitespace(); if (IsTagEndBracket(GetChar(m_pos))) m_pos ++; return tag; } else return NULL;}wxSimpleHtmlTag* wxSimpleHtmlParser::ParseTagClose(){ Matches(wxT("</"), TRUE); EatWhitespace(); wxString word; ReadWord(word, TRUE); EatWhitespace(); m_pos ++; wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Close); return tag;}bool wxSimpleHtmlParser::ParseAttributes(wxSimpleHtmlTag* tag){ // Parse attributes of a tag header until we reach > while (!IsTagEndBracket(GetChar(m_pos)) && !Eof()) { EatWhitespace(); wxString attrName, attrValue; if (IsString()) { ReadString(attrName, TRUE); tag->AppendAttribute(attrName, wxEmptyString); } else if (IsNumeric(GetChar(m_pos))) { ReadNumber(attrName, TRUE); tag->AppendAttribute(attrName, wxEmptyString); } else { // Try to read an attribute name/value pair, or at least a name // without the value ReadLiteral(attrName, TRUE); EatWhitespace(); if (GetChar(m_pos) == wxT('=')) { m_pos ++; EatWhitespace(); if (IsString()) ReadString(attrValue, TRUE); else if (!Eof() && !IsTagEndBracket(GetChar(m_pos))) ReadLiteral(attrValue, TRUE); } if (!attrName.IsEmpty()) tag->AppendAttribute(attrName, attrValue); } } return TRUE;}// e.g. <!DOCTYPE ....>wxSimpleHtmlTag* wxSimpleHtmlParser::ParseDirective(){ Matches(wxT("<!"), TRUE); EatWhitespace(); wxString word; ReadWord(word, TRUE); EatWhitespace(); wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Directive); ParseAttributes(tag); EatWhitespace(); if (IsTagEndBracket(GetChar(m_pos))) m_pos ++; return tag;}bool wxSimpleHtmlParser::ParseComment(){ // Eat the comment tag start Matches(wxT("<!--"), TRUE); while (!Eof() && !Matches(wxT("-->"), TRUE)) { m_pos ++; } return TRUE;}bool wxSimpleHtmlParser::EatWhitespace(){ while (!Eof() && IsWhitespace(GetChar(m_pos))) m_pos ++; return TRUE;}bool wxSimpleHtmlParser::EatWhitespace(int& pos){ while (!Eof(pos) && IsWhitespace(GetChar(pos))) pos ++; return TRUE;}bool wxSimpleHtmlParser::ReadString(wxString& str, bool eatIt){ int pos = m_pos; if (GetChar(pos) == (int) '"') { pos ++; while (!Eof(pos) && GetChar(pos) != (int) '"') { // TODO: how are quotes escaped in HTML? str += (wxChar) GetChar(pos); pos ++; } if (GetChar(pos) == (int) '"') pos ++; if (eatIt) m_pos = pos; return TRUE; } else return FALSE;}bool wxSimpleHtmlParser::ReadWord(wxString& str, bool eatIt){ int pos = m_pos; if (!IsAlpha(GetChar(pos))) return FALSE; str += (wxChar) GetChar(pos) ; pos ++; while (!Eof(pos) && IsWordChar(GetChar(pos))) { str += (wxChar) GetChar(pos); pos ++; } if (eatIt) m_pos = pos; return TRUE;}bool wxSimpleHtmlParser::ReadNumber(wxString& str, bool eatIt){ int pos = m_pos; if (!IsNumeric(GetChar(pos))) return FALSE; str += (wxChar) GetChar(pos) ; pos ++; while (!Eof(pos) && IsNumeric(GetChar(pos))) { str += (wxChar) GetChar(pos); pos ++; } if (eatIt) m_pos = pos; return TRUE;}// Could be number, string, whatever, but read up until whitespace or end of tag (but not a quoted string)bool wxSimpleHtmlParser::ReadLiteral(wxString& str, bool eatIt){ int pos = m_pos; while (!Eof(pos) && !IsWhitespace(GetChar(pos)) && !IsTagEndBracket(GetChar(pos)) && GetChar(pos) != wxT('=')) { str += GetChar(pos); pos ++; } if (eatIt) m_pos = pos; return TRUE;}bool wxSimpleHtmlParser::IsTagClose()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -