📄 xml_tokenizer.cpp
字号:
/* $Id: xml_tokenizer.cpp,v 1.18 2003/12/22 17:03:41 mbn Exp $
**
** ClanLib Game SDK
** Copyright (C) 2003 The ClanLib Team
** For a total list of contributers see the file CREDITS.
**
** This library is free software; you can redistribute it and/or
** modify it under the terms of the GNU Lesser General Public
** License as published by the Free Software Foundation; either
** version 2.1 of the License, or (at your option) any later version.
**
** This library is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
** Lesser General Public License for more details.
**
** You should have received a copy of the GNU Lesser General Public
** License along with this library; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include "Core/precomp.h"
#include "API/Core/XML/xml_tokenizer.h"
#include "API/Core/XML/xml_token_load.h"
#include "API/Core/System/clanstring.h"
#include "API/Core/System/error.h"
#include "xml_tokenizer_generic.h"
#include "API/Core/XML/xml_token_string.h"
#include <algorithm>
#include <utility>
//#include <iterator>
//std::string replace_escapes(std::string str);
//template <typename Iter>
//std::string replace_escapes_fast(Iter begin, Iter end);
/////////////////////////////////////////////////////////////////////////////
// CL_XMLTokenizer construction:
CL_XMLTokenizer::CL_XMLTokenizer() : impl(0)
{
}
CL_XMLTokenizer::CL_XMLTokenizer(const CL_XMLTokenizer ©) : impl(copy.impl)
{
if (impl) impl->add_ref();
}
CL_XMLTokenizer::CL_XMLTokenizer(CL_InputSource *input, bool delete_input) : impl(new CL_XMLTokenizer_Generic)
{
impl->input = input;
impl->delete_input = delete_input;
impl->size = input->size();
impl->data.resize(impl->size);
input->read(&impl->data[0], impl->size);
impl->pos = 0;
impl->add_ref();
}
CL_XMLTokenizer::~CL_XMLTokenizer()
{
if (impl) impl->release_ref();
}
/////////////////////////////////////////////////////////////////////////////
// CL_XMLTokenizer attributes:
bool CL_XMLTokenizer::get_eat_whitespace() const
{
return impl->eat_whitespace;
}
void CL_XMLTokenizer::set_eat_whitespace(bool enable)
{
impl->eat_whitespace = enable;
}
/////////////////////////////////////////////////////////////////////////////
// CL_XMLTokenizer operations:
CL_XMLTokenizer &CL_XMLTokenizer::operator =(const CL_XMLTokenizer ©)
{
if (impl) impl->release_ref();
impl = copy.impl;
if (impl) impl->add_ref();
return *this;
}
CL_XMLTokenLoad CL_XMLTokenizer::next()
{
if (impl == 0)
return CL_XMLTokenLoad();
if (impl->pos == impl->size)
return CL_XMLTokenLoad(); // EOF, return null token.
bool is_need_escape = true;
if (impl->data[impl->pos] != '<') // Text node
{
std::string::size_type start_pos = impl->pos;
std::string::size_type end_pos = impl->data.find('<', start_pos);
if (end_pos == impl->data.npos) end_pos = impl->size;
impl->pos = end_pos;
CL_XMLTokenString text(&impl->data[start_pos], end_pos-start_pos, is_need_escape);
if (impl->eat_whitespace)
{
text = trim_whitespace(text);
if (text.empty())
return next();
}
CL_XMLTokenLoad token;
token.set_type(CL_XMLToken::TEXT_TOKEN);
token.set_value(text);
return token;
}
else // Tag node
{
impl->pos++;
if (impl->pos == impl->size)
throw CL_Error("Premature end of XML data!");
// Try to early predict what sort of node it might be:
bool closing = false;
bool questionMark = false;
bool exclamationMark = false;
if (impl->data[impl->pos] == '/')
closing = true;
else
if (impl->data[impl->pos] == '?')
questionMark = true;
else
if (impl->data[impl->pos] == '!')
exclamationMark = true;
if (closing || questionMark || exclamationMark)
{
impl->pos++;
if (impl->pos == impl->size) throw CL_Error("Premature end of XML data!");
}
if (exclamationMark) // check for cdata section or comments
{
if (impl->data.compare(impl->pos, 2, "--") == 0) // comment block
{
std::string::size_type start_pos = impl->pos+2;
std::string::size_type end_pos = impl->data.find("-->", start_pos);
if (end_pos == impl->data.npos)
throw CL_Error("Premature end of XML data!");
impl->pos = end_pos+3;
CL_XMLTokenLoad token;
token.set_type(CL_XMLToken::COMMENT_TOKEN);
token.set_variant(CL_XMLToken::SINGLE);
token.set_value(CL_XMLTokenString(&impl->data[start_pos], end_pos-start_pos, is_need_escape));
return token;
}
if (impl->data.compare(impl->pos, 7, "[CDATA[") != 0)
throw CL_Error(CL_String::format("Error in XML stream at position %1", static_cast<int>(impl->pos)));
std::string::size_type start_pos = impl->pos+7;
std::string::size_type end_pos = impl->data.find("]]>", start_pos);
if (end_pos == impl->data.npos)
throw CL_Error("Premature end of XML data!");
impl->pos = end_pos+3;
CL_XMLTokenLoad token;
token.set_type(CL_XMLToken::CDATA_SECTION_TOKEN);
token.set_variant(CL_XMLToken::SINGLE);
token.set_value(CL_XMLTokenString(&impl->data[start_pos], end_pos-start_pos, is_need_escape));
return token;
}
// Extract the tag name:
std::string::size_type start_pos = impl->pos;
std::string::size_type end_pos = impl->data.find_first_of(" \r\n\t?/>", start_pos);
if (end_pos == impl->data.npos)
throw CL_Error("Premature end of XML data!");
impl->pos = end_pos;
CL_XMLTokenLoad token;
token.set_type(questionMark ? CL_XMLToken::PROCESSING_INSTRUCTION_TOKEN : CL_XMLToken::ELEMENT_TOKEN);
token.set_variant(closing ? CL_XMLToken::END : CL_XMLToken::BEGIN);
token.set_name(CL_XMLTokenString(&impl->data[start_pos], end_pos-start_pos, is_need_escape));
//token.set_name(replace_escapes_fast(impl->data.begin() + start_pos, impl->data.begin() + end_pos));
// Check for possible attributes:
while (true)
{
// Strip whitespace:
impl->pos = impl->data.find_first_not_of(" \r\n\t", impl->pos);
if (impl->pos == impl->data.npos)
throw CL_Error("Premature end of XML data!");
// End of tag, stop searching for more attributes:
if (impl->data[impl->pos] == '/' || impl->data[impl->pos] == '?' || impl->data[impl->pos] == '>')
break;
// Extract attribute name:
std::string::size_type start_pos = impl->pos;
std::string::size_type end_pos = impl->data.find_first_of(" \r\n\t=", start_pos);
if (end_pos == impl->data.npos)
throw CL_Error("Premature end of XML data!");
impl->pos = end_pos;
CL_XMLTokenString attributeName(&impl->data[start_pos], end_pos-start_pos, is_need_escape);
// Find seperator:
impl->pos = impl->data.find_first_not_of(" \r\n\t", impl->pos);
if (impl->pos == impl->data.npos || impl->pos == impl->size-1)
throw CL_Error("Premature end of XML data!");
if (impl->data[impl->pos++] != '=')
throw CL_Error(CL_String::format("XML error(s), parser confused at line %1 (tag=%2, attributeName=%3)", impl->get_line_number(), token.get_name(), attributeName.to_string()));
// Strip whitespace:
impl->pos = impl->data.find_first_not_of(" \r\n\t", impl->pos);
if (impl->pos == impl->data.npos)
throw CL_Error("Premature end of XML data!");
// Extract attribute value:
char const * first_of = " \r\n\t";
if (impl->data[impl->pos] == '"')
{
first_of = "\"";
impl->pos++;
if (impl->pos == impl->size)
throw CL_Error("Premature end of XML data!");
}
else
if (impl->data[impl->pos] == '\'')
{
first_of = "'";
impl->pos++;
if (impl->pos == impl->size)
throw CL_Error("Premature end of XML data!");
}
start_pos = impl->pos;
end_pos = impl->data.find_first_of(first_of, start_pos);
if (end_pos == impl->data.npos)
throw CL_Error("Premature end of XML data!");
CL_XMLTokenString attributeValue(CL_XMLTokenString(&impl->data[start_pos], end_pos-start_pos, is_need_escape));
impl->pos = end_pos + 1;
if (impl->pos == impl->size)
throw CL_Error("Premature end of XML data!");
// Finally apply attribute to token:
token.set_attribute(attributeName, attributeValue);
}
// Check if its singular:
if (impl->data[impl->pos] == '/' || impl->data[impl->pos] == '?')
{
token.set_variant(CL_XMLToken::SINGLE);
impl->pos++;
if (impl->pos == impl->size)
throw CL_Error("Premature end of XML data!");
}
// Data stream should be ending now.
if (impl->data[impl->pos] != '>')
throw CL_Error(CL_String::format("Error in XML stream, line %1 (expected end of tag)", impl->get_line_number()));
impl->pos++;
return token;
}
}
/////////////////////////////////////////////////////////////////////////////
// CL_XMLTokenizer implementation:
/*
inline bool try_replace(std::string & str, std::string::size_type pos, std::string const & escape, char const * escape_char)
{
if (pos + escape.size() <= str.size())
if (std::equal(escape.begin(), escape.end(), str.begin() + pos))
{
str.replace(pos, escape.size(), escape_char);
return true;
}
return false;
}
std::string replace_escapes(std::string str)
{
std::string::size_type pos;
static std::string const amp("&");
static std::string const quot(""");
static std::string const apos("&apos");
static std::string const lt("<");
static std::string const gt(">");
pos = 0;
while (pos != std::string::npos)
{
pos = str.find('&', pos);
if (pos == std::string::npos)
break;
if ( try_replace(str, pos, amp, "&")
|| try_replace(str, pos, quot, "\"")
|| try_replace(str, pos, apos, "\'")
|| try_replace(str, pos, gt, ">")
|| try_replace(str, pos, lt, "<") )
{
}
pos++;
}
return str;
}
*/
//std::string & replace_escapes(std::string & str)
//{
// std::string::size_type pos;
//
// static std::string const amp("&");
// static std::string const quot(""");
// static std::string const apos("&apos");
// static std::string const lt("<");
// static std::string const gt(">");
//
// pos = 0;
// while (pos != std::string::npos)
// {
// pos = str.find('&', pos);
// if (pos == std::string::npos)
// break;
//
// if ( try_replace(str, pos, amp, "&")
// || try_replace(str, pos, quot, "\"")
// || try_replace(str, pos, apos, "\'")
// || try_replace(str, pos, gt, ">")
// || try_replace(str, pos, lt, "<") )
// {
// }
// pos++;
// }
//
// return str;
//}
/*
template <typename Container, typename Iter>
inline bool append_escape(Container & buff, Iter & begin, Iter end, std::string const & escape, char escape_char)
{
if (static_cast<ptrdiff_t>(escape.size()) <= std::distance(begin, end))
if (std::equal(escape.begin(), escape.end(), begin))
{
buff.insert(buff.end(), 1, escape_char);
std::advance(begin, escape.size());
return true;
}
return false;
}
template <typename Iter>
std::string replace_escapes_fast(Iter begin, Iter end)
{
static std::string const amp("&");
static std::string const quot(""");
static std::string const apos("&apos");
static std::string const lt("<");
static std::string const gt(">");
std::size_t size = std::distance(begin, end);
// static std::string str;
// str.reserve(size);
// str.resize(0);
static std::vector<char> buff;
// buff.reserve(size);
buff.resize(0);
while(begin != end)
{
Iter pos = std::find(begin, end, '&');
buff.insert(buff.end(), begin, pos);
if (pos == end)
break;
begin = pos;
if ( append_escape(buff, begin, end, amp, '&')
|| append_escape(buff, begin, end, quot, '\"')
|| append_escape(buff, begin, end, apos, '\'')
|| append_escape(buff, begin, end, gt, '>')
|| append_escape(buff, begin, end, lt, '<'))
{
}
else
++begin;
}
return std::string(&buff[0], buff.size());
}
*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -