📄 tokenizer.cpp
字号:
//**************************************************************************************************************************
//* Blue Xml Extension
//* Copyright (c) 2002-2004 Josh Harler
//*
//* Blue - General Purpose C++ Library
//* Copyright (c) 2002-2004 Josh Harler
//*
//* This software is provided 'as-is', without any express or implied warranty. In no event
//* will the authors be held liable for any damages arising from the use of this software.
//*
//* Permission is granted to anyone to use this software for any purpose, including commercial
//* applications, and to alter it and redistribute it freely, subject to the following restrictions:
//*
//* 1. The origin of this software must not be misrepresented; you must not claim that you
//* wrote the original software. If you use this software in a product, an acknowledgment in the
//* product documentation would be appreciated but is not required.
//*
//* 2. Altered source versions must be plainly marked as such, and must not be misrepresented as
//* being the original software.
//*
//* 3. This notice may not be removed or altered from any source distribution.
//*
//*
//* file Blue/Extension/Xml/internal/Tokenizer.cpp
//**
// Private Headers =========================================================================================================
// matching header
#include "Tokenizer.h"
// Private Defines/Enums/Typedefs/Etc ======================================================================================
// Private Classes/Structs =================================================================================================
// Private Global Variables ================================================================================================
// External Global Variables ===============================================================================================
// Private Functions =======================================================================================================
// Functions ===============================================================================================================
namespace blue {
namespace ext {
namespace xml {
// ---------------------------------------------------------------------------------------------------------------------
Tokenizer::Tokenizer() :m_input(0), m_lastChar(0), m_bufferIdx(0), m_buffer(128)
{
}
// ---------------------------------------------------------------------------------------------------------------------
Tokenizer::Tokenizer( data::InputStream* input ) :m_input(input), m_lastChar(0), m_bufferIdx(0), m_buffer(128)
{
}
// ---------------------------------------------------------------------------------------------------------------------
Tokenizer::~Tokenizer()
{
}
// ---------------------------------------------------------------------------------------------------------------------
data::InputStream* Tokenizer::getInputStream() const
{
return (m_input);
}
// ---------------------------------------------------------------------------------------------------------------------
Tokenizer::token_type_e Tokenizer::getTokenType( String token )
{
if( token == "<" ) return (TOKEN_BRACKET_L);
if( token == ">" ) return (TOKEN_BRACKET_R);
if( token == "/" ) return (TOKEN_SLASH);
if( token == "=" ) return (TOKEN_EQUALS);
if( token == "?" ) return (TOKEN_QUESTION);
if( token == "!" ) return (TOKEN_EXCLAMATION);
if( (token.beginsWith("\"") && token.endsWith("\"")) ||
(token.beginsWith("\'") && token.endsWith("\'")) ) {
return (TOKEN_QUOTED);
}
if( token.trim().getLength() == 0 ) {
return (TOKEN_WHITESPACE);
}
return (TOKEN_ALPHANUMERIC);
}
// ---------------------------------------------------------------------------------------------------------------------
void Tokenizer::setInputStream( data::InputStream* input )
{
m_input = input;
}
// ---------------------------------------------------------------------------------------------------------------------
bool Tokenizer::getNextToken( String& str, token_type_e& type )
{
if( m_input == 0 ) {
throw XmlTokenizeException($("Tokenizer input stream has not been set"));
}
str = String::null;
type = TOKEN_UNKNOWN;
bool quotes = false;
while( m_input->isReading() ) {
uint8_t ch;
if( m_lastChar != 0 ) {
ch = m_lastChar;
m_lastChar = 0;
}
else {
while( m_input->read(&ch, sizeof(ch)) != sizeof(ch) ) {
if( m_input->isReading() == false ) {
return (false);
}
}
}
if( m_readUntil != String::null ) {
writeToBuffer(ch);
if( m_readIgnoreQuotes ) {
if( ch == '\"' || ch == '\'' ) {
quotes = !quotes;
}
else if( quotes ) {
continue;
}
}
if( getBufferString().endsWith(m_readUntil) ) {
if( !m_readInclude ) {
m_input->pushBack( Buffer(m_readUntil) );
str = getBufferString(false).stripFromRight(m_readUntil.getLength());
}
else {
str = getBufferString(false);
}
m_readUntil = String::null;
type = getTokenType(str);
return (true);
}
else continue;
}
if( ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' ) {
if( getBufferString().getLength() > 0 ) {
str = getBufferString(false);
m_lastChar = ch;
type = getTokenType(str);
return (true);
}
else {
str = ch;
type = TOKEN_WHITESPACE;
return (true);
}
}
if( (ch == '<' && (type = TOKEN_BRACKET_L )) ||
(ch == '>' && (type = TOKEN_BRACKET_R )) ||
(ch == '?' && (type = TOKEN_QUESTION )) ||
(ch == '=' && (type = TOKEN_EQUALS )) ||
(ch == '/' && (type = TOKEN_SLASH )) ||
(ch == '!' && (type = TOKEN_EXCLAMATION )) ) {
if( getBufferString().getLength() > 0 ) {
str = getBufferString(false);
m_lastChar = ch;
type = getTokenType(str);
return (true);
}
else {
str = ch;
return (true);
}
}
if( ch == '\'' || ch == '\"' ) {
if( getBufferString().getLength() > 0 ) {
str = getBufferString(false);
m_lastChar = ch;
type = getTokenType(str);
return (true);
}
else {
str = ch;
m_readUntil = ch;
continue;
}
}
writeToBuffer(ch);
}
return (false);
}
// ---------------------------------------------------------------------------------------------------------------------
void Tokenizer::readUntil( String text, bool ignoreQuotes, bool include )
{
m_readUntil = text;
m_readInclude = include;
m_readIgnoreQuotes = ignoreQuotes;
}
// ---------------------------------------------------------------------------------------------------------------------
void Tokenizer::writeToBuffer( uint8_t byte )
{
if( m_buffer.getSize() == m_bufferIdx ) {
m_buffer.resize(m_bufferIdx * 2);
}
m_buffer.writeData(&byte, sizeof(byte), m_bufferIdx);
m_bufferIdx += sizeof(byte);
}
// ---------------------------------------------------------------------------------------------------------------------
String Tokenizer::getBufferString( bool asConst )
{
if( asConst ) {
return String((char*)m_buffer.getData(), m_bufferIdx, String::STATIC);
}
else {
String result = String((char*)m_buffer.getData(), m_bufferIdx);
m_bufferIdx = 0;
return (result);
}
}
}}} // namespaces
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -