📄 tokenizer.hpp
字号:
// released into the public domain
// by Christopher Diggins 2004
// http://www.cdiggins.com
//revison add double checker chinahardbone@yahoo.com 2005/02/05
//revison tokenizer inherent structure,add RangeTokenizer class
#ifndef TOKENIZER_HPP_INCLUDED
#define TOKENIZER_HPP_INCLUDED
#include <string>
#include <list>
#include <utility>
#include <iostream>
#include "parser_input_stream.hpp"
namespace yard {
typedef std::pair < char const *, char const * > Token;
typedef std::list < Token > TokenList;
typedef TokenList::const_iterator TokenIter;
template < typename Rules_T >
struct OneTokenizer : public TokenList {
void Tokenize(char const * pBegin, char const * pEnd) {
ParserInputStream < char > input(pBegin, pEnd);
while (!input.AtEnd()) {
char const * pos = input.GetPos();
//if (Rules_T::Accept(input)) {//error: never call accept.
if (match < Rules_T > () (input)) {
push_back(Token(pos, input.GetPos()));
return;
}
else //chinahardbone@yahoo.com 20050225
{
//double check AtEnd()
if ((!input.AtEnd())) input.GotoNext();
}
}
}
};
template < typename Rules_T >
struct Tokenizer : public TokenList {
void Tokenize(char const * pBegin, char const * pEnd) {
ParserInputStream < char > input(pBegin, pEnd);
while (!input.AtEnd()) {
char const * pos = input.GetPos();
//if (Rules_T::Accept(input)) {//error: never call accept.
if (match < Rules_T > () (input)) {
push_back(Token(pos, input.GetPos()));
}
else //chinahardbone@yahoo.com 20050225
{
//double check AtEnd()
if ((!input.AtEnd())) input.GotoNext();
}
}
}
};
/** recursive token */
template < typename Rules_T, typename TTokenizer >
struct TokenizerRecursive : public Tokenizer < Rules_T > {
void Tokenize(char const * pBegin, char const * pEnd) {
TTokenizer token;
token.Tokenize(pBegin, pEnd);
for (TokenIter it = token.begin(); it != token.end(); it++) {
Tokenizer < Rules_T >::Tokenize(it->first, it->second);
}
}
};
void OutputTokensToStream(TokenIter iter, std::ostream & os) {
int n = static_cast < int > (iter->second - iter->first);
os.write(iter->first, n);
}
void OutputTokens(TokenIter iter, TokenIter end) {
// outputs first 10 tokens as strings
for (int i = 0; iter != end; i++, iter++) {
int n = static_cast < int > (iter->second - iter->first);
std::string s(iter->first, 0, n);
std::cout << "ID " << i << " " << s.c_str() << std::endl;
}
}
/** range tokenize. apply the second rules to the data which in the range of the first rule find out. */
/** obsolete */
template < typename Rules_T, typename Rules_Second >
struct RangeTokenizer : public Tokenizer < Rules_Second > {
void Tokenize(char const * pBegin, char const * pEnd) {
Tokenizer < Rules_T > rangeToken;
rangeToken.Tokenize(pBegin, pEnd);
for (TokenIter it = rangeToken.begin(); it != rangeToken.end(); it++) {
Tokenizer < Rules_Second >::Tokenize(it->first, it->second);
}
}
};
/** OBSOLETE */
template < typename Rules_T, typename TTokenizer >
struct RangeTokenizerRecursive : public Tokenizer < Rules_T > {
void Tokenize(char const * pBegin, char const * pEnd) {
TTokenizer rangeToken;
rangeToken.Tokenize(pBegin, pEnd);
for (TokenIter it = rangeToken.begin(); it != rangeToken.end(); it++) {
Tokenizer < Rules_T >::Tokenize(it->first, it->second);
}
}
};
}
#endif // #ifndef TOKENIZER_HPP_INCLUDED
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -