⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlparser.cpp

📁 ecos实时嵌入式操作系统
💻 CPP
📖 第 1 页 / 共 2 页
字号:
//####COPYRIGHTBEGIN####//// ----------------------------------------------------------------------------// Copyright (C) 1998, 1999, 2000 Red Hat, Inc.//// This program is part of the eCos host tools.//// This program is free software; you can redistribute it and/or modify it// under the terms of the GNU General Public License as published by the Free// Software Foundation; either version 2 of the License, or (at your option)// any later version.//// This program is distributed in the hope that it will be useful, but WITHOUT// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for// more details.//// You should have received a copy of the GNU General Public License along with// this program; if not, write to the Free Software Foundation, Inc.,// 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.//// ----------------------------------------------------------------------------////####COPYRIGHTEND####// htmlparser.cpp :////===========================================================================//#####DESCRIPTIONBEGIN####//// Author(s):   julians// Contact(s):  julians// Date:        2001/04/04// Version:     $Id: htmlparser.cpp,v 1.6 2001/04/12 10:02:22 julians Exp $// Purpose:// Description: HTML parser/HTML Help file generator// Requires:// Provides:// See also:// Known bugs:// Usage:////####DESCRIPTIONEND####////===========================================================================// ============================================================================// declarations// ============================================================================// ----------------------------------------------------------------------------// headers// ----------------------------------------------------------------------------#ifdef __GNUG__#pragma implementation "htmlparser.h"#endif// Includes other headers for precompiled compilation#include "ecpch.h"#ifdef __BORLANDC__#pragma hdrstop#endif#include "wx/textfile.h"#include "wx/wfstream.h"#include "ecutils.h"#include "htmlparser.h"/* * wxSimpleHtmlAttribute * Representation of an attribute */wxSimpleHtmlParser::wxSimpleHtmlParser(){    m_topLevel = NULL;    m_pos = 0;}wxSimpleHtmlParser::~wxSimpleHtmlParser(){    Clear();}bool wxSimpleHtmlParser::ParseFile(const wxString& filename){    wxTextFile textFile;    if (textFile.Open(filename))    {        wxString text;        wxString line;        int i;        int count = textFile.GetLineCount();        for (i = 0; i < count; i++)        {            if (i == 0)                line = textFile.GetFirstLine();            else                line = textFile.GetNextLine();            text += line;            if (i != (count - 1))                text += wxT("\n");        }#if 0        for ( line = textFile.GetFirstLine(); !textFile.Eof(); line = textFile.GetNextLine() )        {            text += line;            if (!textFile.Eof())                text += wxT("\n");        }#endif        return ParseString(text);    }    else        return FALSE;}bool wxSimpleHtmlParser::ParseString(const wxString& str){    Clear();    m_pos = 0;    m_text = str;    m_length = str.Length();    m_topLevel = new wxSimpleHtmlTag(wxT("TOPLEVEL"), wxSimpleHtmlTag_TopLevel);    return ParseHtml(m_topLevel);}// Main recursive parsing functionbool wxSimpleHtmlParser::ParseHtml(wxSimpleHtmlTag* parent){    while (!Eof())    {        EatWhitespace();        if (IsComment())        {            ParseComment();        }        else if (IsDirective())        {            wxSimpleHtmlTag* tag = ParseDirective();            if (tag)                parent->AppendTag(tag);        }        else if (IsTagClose())        {            wxSimpleHtmlTag* tag = ParseTagClose();            if (tag)                parent->AppendTag(tag);        }        else if (IsTagStartBracket(GetChar(m_pos)))        {            wxSimpleHtmlTag* tag = ParseTagHeader();            if (tag)                parent->AppendTag(tag);        }        else        {            // Just a text string            wxString text;            ParseText(text);            wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(wxT("TEXT"), wxSimpleHtmlTag_Text);            tag->SetText(text);            parent->AppendTag(tag);        }    }    return TRUE;}// Plain text, up until an angled bracketbool wxSimpleHtmlParser::ParseText(wxString& text){    while (!Eof() && GetChar(m_pos) != wxT('<'))    {        text += GetChar(m_pos);        m_pos ++;    }    return TRUE;}wxSimpleHtmlTag* wxSimpleHtmlParser::ParseTagHeader(){    if (IsTagStartBracket(GetChar(m_pos)))    {        m_pos ++;        EatWhitespace();        wxString word;        ReadWord(word, TRUE);        EatWhitespace();        wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Open);        ParseAttributes(tag);        EatWhitespace();        if (IsTagEndBracket(GetChar(m_pos)))            m_pos ++;        return tag;    }    else        return NULL;}wxSimpleHtmlTag* wxSimpleHtmlParser::ParseTagClose(){    Matches(wxT("</"), TRUE);    EatWhitespace();    wxString word;    ReadWord(word, TRUE);    EatWhitespace();    m_pos ++;    wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Close);    return tag;}bool wxSimpleHtmlParser::ParseAttributes(wxSimpleHtmlTag* tag){    // Parse attributes of a tag header until we reach >    while (!IsTagEndBracket(GetChar(m_pos)) && !Eof())    {        EatWhitespace();        wxString attrName, attrValue;        if (IsString())        {            ReadString(attrName, TRUE);            tag->AppendAttribute(attrName, wxEmptyString);        }        else if (IsNumeric(GetChar(m_pos)))        {            ReadNumber(attrName, TRUE);            tag->AppendAttribute(attrName, wxEmptyString);        }        else        {            // Try to read an attribute name/value pair, or at least a name            // without the value            ReadLiteral(attrName, TRUE);            EatWhitespace();            if (GetChar(m_pos) == wxT('='))            {                m_pos ++;                EatWhitespace();                if (IsString())                    ReadString(attrValue, TRUE);                else if (!Eof() && !IsTagEndBracket(GetChar(m_pos)))                    ReadLiteral(attrValue, TRUE);            }            if (!attrName.IsEmpty())                tag->AppendAttribute(attrName, attrValue);        }    }    return TRUE;}// e.g. <!DOCTYPE ....>wxSimpleHtmlTag* wxSimpleHtmlParser::ParseDirective(){    Matches(wxT("<!"), TRUE);    EatWhitespace();    wxString word;    ReadWord(word, TRUE);    EatWhitespace();    wxSimpleHtmlTag* tag = new wxSimpleHtmlTag(word, wxSimpleHtmlTag_Directive);    ParseAttributes(tag);    EatWhitespace();    if (IsTagEndBracket(GetChar(m_pos)))        m_pos ++;    return tag;}bool wxSimpleHtmlParser::ParseComment(){    // Eat the comment tag start    Matches(wxT("<!--"), TRUE);    while (!Eof() && !Matches(wxT("-->"), TRUE))    {        m_pos ++;    }    return TRUE;}bool wxSimpleHtmlParser::EatWhitespace(){    while (!Eof() && IsWhitespace(GetChar(m_pos)))        m_pos ++;    return TRUE;}bool wxSimpleHtmlParser::EatWhitespace(int& pos){    while (!Eof(pos) && IsWhitespace(GetChar(pos)))        pos ++;    return TRUE;}bool wxSimpleHtmlParser::ReadString(wxString& str, bool eatIt){    int pos = m_pos;    if (GetChar(pos) == (int) '"')    {        pos ++;        while (!Eof(pos) && GetChar(pos) != (int) '"')        {            // TODO: how are quotes escaped in HTML?            str += (wxChar) GetChar(pos);            pos ++;        }        if (GetChar(pos) == (int) '"')            pos ++;        if (eatIt)            m_pos = pos;        return TRUE;    }    else        return FALSE;}bool wxSimpleHtmlParser::ReadWord(wxString& str, bool eatIt){    int pos = m_pos;    if (!IsAlpha(GetChar(pos)))        return FALSE;    str += (wxChar) GetChar(pos) ;    pos ++;    while (!Eof(pos) && IsWordChar(GetChar(pos)))    {        str += (wxChar) GetChar(pos);        pos ++;    }    if (eatIt)        m_pos = pos;    return TRUE;}bool wxSimpleHtmlParser::ReadNumber(wxString& str, bool eatIt){    int pos = m_pos;    if (!IsNumeric(GetChar(pos)))        return FALSE;    str += (wxChar) GetChar(pos) ;    pos ++;    while (!Eof(pos) && IsNumeric(GetChar(pos)))    {        str += (wxChar) GetChar(pos);        pos ++;    }    if (eatIt)        m_pos = pos;    return TRUE;}// Could be number, string, whatever, but read up until whitespace or end of tag (but not a quoted string)bool wxSimpleHtmlParser::ReadLiteral(wxString& str, bool eatIt){    int pos = m_pos;    while (!Eof(pos) && !IsWhitespace(GetChar(pos)) && !IsTagEndBracket(GetChar(pos)) && GetChar(pos) != wxT('='))    {        str += GetChar(pos);        pos ++;    }    if (eatIt)        m_pos = pos;    return TRUE;}bool wxSimpleHtmlParser::IsTagClose()

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -