⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tokenizer.cpp

📁 简单的xml解析类
💻 CPP
字号:
//************************************************************************************************************************** 
//* Blue Xml Extension
//* Copyright (c) 2002-2004 Josh Harler
//* 
//* Blue - General Purpose C++ Library
//* Copyright (c) 2002-2004 Josh Harler
//* 
//* This software is provided 'as-is', without any express or implied warranty. In no event
//* will the authors be held liable for any damages arising from the use of this software.
//* 
//* Permission is granted to anyone to use this software for any purpose, including commercial
//* applications, and to alter it and redistribute it freely, subject to the following restrictions:
//* 
//* 	1. The origin of this software must not be misrepresented; you must not claim that you
//* 	wrote the original software. If you use this software in a product, an acknowledgment in the
//* 	product documentation would be appreciated but is not required.
//* 
//* 	2. Altered source versions must be plainly marked as such, and must not be misrepresented as
//* 	being the original software.
//* 
//* 	3. This notice may not be removed or altered from any source distribution.
//*
//*
//* file   Blue/Extension/Xml/internal/Tokenizer.cpp
//**

// Private Headers =========================================================================================================

// matching header
#include "Tokenizer.h"


// Private Defines/Enums/Typedefs/Etc ======================================================================================

// Private Classes/Structs =================================================================================================

// Private Global Variables ================================================================================================

// External Global Variables ===============================================================================================

// Private Functions =======================================================================================================

// Functions ===============================================================================================================

namespace blue {
namespace ext {
namespace xml {

	// ---------------------------------------------------------------------------------------------------------------------

	Tokenizer::Tokenizer() :m_input(0), m_lastChar(0), m_bufferIdx(0), m_buffer(128)
	{
	}

	// ---------------------------------------------------------------------------------------------------------------------

	Tokenizer::Tokenizer( data::InputStream* input ) :m_input(input), m_lastChar(0), m_bufferIdx(0), m_buffer(128)
	{
	}

	// ---------------------------------------------------------------------------------------------------------------------

	Tokenizer::~Tokenizer()
	{
	}

	// ---------------------------------------------------------------------------------------------------------------------

	data::InputStream* Tokenizer::getInputStream() const
	{
		return (m_input);
	}

	// ---------------------------------------------------------------------------------------------------------------------

	Tokenizer::token_type_e Tokenizer::getTokenType( String token )
	{
		if( token == "<" ) return (TOKEN_BRACKET_L);
		if( token == ">" ) return (TOKEN_BRACKET_R);
		if( token == "/" ) return (TOKEN_SLASH);
		if( token == "=" ) return (TOKEN_EQUALS);
		if( token == "?" ) return (TOKEN_QUESTION);
		if( token == "!" ) return (TOKEN_EXCLAMATION);
		
		if( (token.beginsWith("\"") && token.endsWith("\"")) ||
			(token.beginsWith("\'") && token.endsWith("\'")) ) {
			return (TOKEN_QUOTED);
		}

		if( token.trim().getLength() == 0 ) {
			return (TOKEN_WHITESPACE);
		}

		return (TOKEN_ALPHANUMERIC);
	}

	// ---------------------------------------------------------------------------------------------------------------------

	void Tokenizer::setInputStream( data::InputStream* input )
	{
		m_input = input;
	}

	// ---------------------------------------------------------------------------------------------------------------------

	bool Tokenizer::getNextToken( String& str, token_type_e& type )
	{
		if( m_input == 0 ) {
			throw XmlTokenizeException($("Tokenizer input stream has not been set"));
		}

		str = String::null;
		type = TOKEN_UNKNOWN;
		bool quotes = false;

		while( m_input->isReading() ) {
			uint8_t ch;
			if( m_lastChar != 0 ) {
				ch = m_lastChar;
				m_lastChar = 0;
			}
			else {
				while( m_input->read(&ch, sizeof(ch)) != sizeof(ch) ) {
					if( m_input->isReading() == false ) {
						return (false);
					}
				}
			}

			if( m_readUntil != String::null ) {
				writeToBuffer(ch);
				if( m_readIgnoreQuotes ) {
					if( ch == '\"' || ch == '\'' ) {
						quotes = !quotes;
					}
					else if( quotes ) {
						continue;
					}
				}
				if( getBufferString().endsWith(m_readUntil) ) {
					if( !m_readInclude ) {
						m_input->pushBack( Buffer(m_readUntil) );
						str = getBufferString(false).stripFromRight(m_readUntil.getLength());
					}
					else {
						str = getBufferString(false);
					}
					m_readUntil = String::null;
					type = getTokenType(str);
					return (true);
				}
				else continue;
			}

			if( ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r' ) {
				if( getBufferString().getLength() > 0 ) {
					str = getBufferString(false);
					m_lastChar = ch;
					type = getTokenType(str);
					return (true);
				}
				else {
					str = ch;
					type = TOKEN_WHITESPACE;
					return (true);
				}
			}


			if( (ch == '<' && (type = TOKEN_BRACKET_L   )) ||
				(ch == '>' && (type = TOKEN_BRACKET_R   )) ||
				(ch == '?' && (type = TOKEN_QUESTION    )) ||
				(ch == '=' && (type = TOKEN_EQUALS      )) ||
				(ch == '/' && (type = TOKEN_SLASH       )) ||
				(ch == '!' && (type = TOKEN_EXCLAMATION )) ) {

				if( getBufferString().getLength() > 0 ) {
					str = getBufferString(false);
					m_lastChar = ch;
					type = getTokenType(str);
					return (true);
				}
				else {
					str = ch;
					return (true);
				}
			}


			if( ch == '\'' || ch == '\"' ) {
				if( getBufferString().getLength() > 0 ) {
					str = getBufferString(false);
					m_lastChar = ch;
					type = getTokenType(str);
					return (true);
				}
				else {
					str = ch;
					m_readUntil = ch;
					continue;
				}
			}

			writeToBuffer(ch);
		}

		return (false);
	}

	// ---------------------------------------------------------------------------------------------------------------------

	void Tokenizer::readUntil( String text, bool ignoreQuotes, bool include )
	{
		m_readUntil   = text;
		m_readInclude = include;
		m_readIgnoreQuotes = ignoreQuotes;
	}

	// ---------------------------------------------------------------------------------------------------------------------
	
	void Tokenizer::writeToBuffer( uint8_t byte )
	{
		if( m_buffer.getSize() == m_bufferIdx ) {
			m_buffer.resize(m_bufferIdx * 2);
		}

		m_buffer.writeData(&byte, sizeof(byte), m_bufferIdx);
		m_bufferIdx += sizeof(byte);
	}

	// ---------------------------------------------------------------------------------------------------------------------

	String Tokenizer::getBufferString( bool asConst )
	{
		if( asConst ) {
			return String((char*)m_buffer.getData(), m_bufferIdx, String::STATIC);
		}
		else {
			String result = String((char*)m_buffer.getData(), m_bufferIdx);
			m_bufferIdx = 0;
			return (result);
		}
	}


}}}	// namespaces

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -