⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tokenizer.hpp

📁 yard lib, template for regular rule
💻 HPP
字号:

// released into the public domain
// by Christopher Diggins 2004
// http://www.cdiggins.com
//revison add double checker chinahardbone@yahoo.com 2005/02/05
//revison tokenizer inherent structure,add RangeTokenizer class

#ifndef TOKENIZER_HPP_INCLUDED
#define TOKENIZER_HPP_INCLUDED

#include <string>
#include <list>
#include <utility>
#include <iostream>

#include "parser_input_stream.hpp"

namespace yard {
    typedef std::pair < char const *, char const * > Token;
    typedef std::list < Token > TokenList;
    typedef TokenList::const_iterator TokenIter;

    template < typename Rules_T >
    struct OneTokenizer : public TokenList {
        void Tokenize(char const * pBegin, char const * pEnd) {
            ParserInputStream < char > input(pBegin, pEnd);
            while (!input.AtEnd()) {
                char const * pos = input.GetPos();
                //if (Rules_T::Accept(input)) {//error: never call accept.
                if (match < Rules_T > () (input)) {
                    push_back(Token(pos, input.GetPos()));
                    return;
                }
                else //chinahardbone@yahoo.com 20050225
                {
                    //double check AtEnd()
                    if ((!input.AtEnd())) input.GotoNext();
                }
            }
        }
    };

    template < typename Rules_T >
    struct Tokenizer : public TokenList {
        void Tokenize(char const * pBegin, char const * pEnd) {
            ParserInputStream < char > input(pBegin, pEnd);
            while (!input.AtEnd()) {
                char const * pos = input.GetPos();
                //if (Rules_T::Accept(input)) {//error: never call accept.
                if (match < Rules_T > () (input)) {
                    push_back(Token(pos, input.GetPos()));
                }
                else //chinahardbone@yahoo.com 20050225
                {
                    //double check AtEnd()
                    if ((!input.AtEnd())) input.GotoNext();
                }
            }
        }
    };

    /** recursive token */

    template < typename Rules_T, typename TTokenizer >
    struct TokenizerRecursive : public Tokenizer < Rules_T > {
        void Tokenize(char const * pBegin, char const * pEnd) {
            TTokenizer token;
            token.Tokenize(pBegin, pEnd);
            for (TokenIter it = token.begin(); it != token.end(); it++) {

                Tokenizer < Rules_T >::Tokenize(it->first, it->second);
            }
        }
    };

    void OutputTokensToStream(TokenIter iter, std::ostream & os) {
        int n = static_cast < int > (iter->second - iter->first);
        os.write(iter->first, n);
    }

    void OutputTokens(TokenIter iter, TokenIter end) {
        // outputs first 10 tokens as strings
        for (int i = 0; iter != end; i++, iter++) {
            int n = static_cast < int > (iter->second - iter->first);
            std::string s(iter->first, 0, n);
            std::cout << "ID " << i << " " << s.c_str() << std::endl;
        }
    }

    /** range tokenize. apply the second rules to the data which in the range of the first rule find out. */

    /** obsolete */

    template < typename Rules_T, typename Rules_Second >
    struct RangeTokenizer : public Tokenizer < Rules_Second > {
        void Tokenize(char const * pBegin, char const * pEnd) {
            Tokenizer < Rules_T > rangeToken;
            rangeToken.Tokenize(pBegin, pEnd);
            for (TokenIter it = rangeToken.begin(); it != rangeToken.end(); it++) {

                Tokenizer < Rules_Second >::Tokenize(it->first, it->second);
            }
        }
    };

    /** OBSOLETE */

    template < typename Rules_T, typename TTokenizer >
    struct RangeTokenizerRecursive : public Tokenizer < Rules_T > {
        void Tokenize(char const * pBegin, char const * pEnd) {
            TTokenizer rangeToken;
            rangeToken.Tokenize(pBegin, pEnd);
            for (TokenIter it = rangeToken.begin(); it != rangeToken.end(); it++) {

                Tokenizer < Rules_T >::Tokenize(it->first, it->second);
            }
        }
    };

}

#endif // #ifndef TOKENIZER_HPP_INCLUDED

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -