⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 generator.hpp

📁 Boost provides free peer-reviewed portable C++ source libraries. We emphasize libraries that work
💻 HPP
📖 第 1 页 / 共 2 页
字号:
// generator.hpp// Copyright (c) 2007 Ben Hanson (http://www.benhanson.net/)//// Distributed under the Boost Software License, Version 1.0. (See accompanying// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)#ifndef BOOST_LEXER_GENERATOR_HPP#define BOOST_LEXER_GENERATOR_HPP#include "char_traits.hpp"// memcmp()#include <cstring>#include "partition/charset.hpp"#include "partition/equivset.hpp"#include <memory>#include "parser/tree/node.hpp"#include "parser/parser.hpp"#include "containers/ptr_list.hpp"#include "rules.hpp"#include "state_machine.hpp"namespace boost{namespace lexer{template<typename CharT, typename Traits = char_traits<CharT> >class basic_generator{public:    typedef typename basic_state_machine<CharT>::size_t_vector size_t_vector;    typedef basic_rules<CharT> rules;    static void build (const rules &rules_, basic_state_machine<CharT> &state_machine_)    {        std::size_t index_ = 0;        std::size_t size_ = rules_.statemap ().size ();        node_ptr_vector node_ptr_vector_;        state_machine_.clear ();        for (; index_ < size_; ++index_)        {            state_machine_._lookup->push_back (0);            state_machine_._lookup->back () = new size_t_vector;            state_machine_._dfa_alphabet.push_back (0);            state_machine_._dfa->push_back (0);            state_machine_._dfa->back () = new size_t_vector;        }        for (index_ = 0, size_ = state_machine_._lookup->size ();            index_ < size_; ++index_)        {            state_machine_._lookup[index_]->resize (sizeof (CharT) == 1 ?                num_chars : num_wchar_ts, dead_state_index);            if (!rules_.regexes ()[index_].empty ())            {                // vector mapping token indexes to partitioned token index sets                index_set_vector set_mapping_;                // syntax tree                detail::node *root_ = build_tree (rules_, index_,                    node_ptr_vector_, state_machine_._lookup[index_],                    set_mapping_, state_machine_._dfa_alphabet[index_],                    state_machine_._seen_BOL_assertion,                    state_machine_._seen_EOL_assertion);                build_dfa (root_, set_mapping_,                    state_machine_._dfa_alphabet[index_],                    *state_machine_._dfa[index_]);            }        }    }    static void minimise (basic_state_machine<CharT> &state_machine_)    {        const std::size_t machines_ = state_machine_._dfa->size ();        for (std::size_t i_ = 0; i_ < machines_; ++i_)        {            const std::size_t dfa_alphabet_ = state_machine_._dfa_alphabet[i_];            size_t_vector *dfa_ = state_machine_._dfa[i_];            if (dfa_alphabet_ != 0)            {                std::size_t size_ = 0;                do                {                    size_ = dfa_->size ();                    minimise_dfa (dfa_alphabet_, *dfa_, size_);                } while (dfa_->size () != size_);            }        }    }protected:    typedef detail::basic_charset<CharT> charset;    typedef detail::ptr_list<charset> charset_list;    typedef std::auto_ptr<charset> charset_ptr;    typedef detail::equivset equivset;    typedef detail::ptr_list<equivset> equivset_list;    typedef std::auto_ptr<equivset> equivset_ptr;    typedef typename charset::index_set index_set;    typedef std::vector<index_set> index_set_vector;    typedef detail::basic_parser<CharT> parser;    typedef typename parser::node_ptr_vector node_ptr_vector;    typedef std::set<const detail::node *> node_set;    typedef detail::ptr_vector<node_set> node_set_vector;    typedef std::vector<const detail::node *> node_vector;    typedef detail::ptr_vector<node_vector> node_vector_vector;    typedef typename parser::string string;    typedef std::pair<string, string> string_pair;    typedef typename parser::tokeniser::string_token string_token;    typedef std::deque<string_pair> macro_deque;    typedef std::pair<string, const detail::node *> macro_pair;    typedef typename parser::macro_map::iterator macro_iter;    typedef std::pair<macro_iter, bool> macro_iter_pair;    typedef typename parser::tokeniser::token_map token_map;    static detail::node *build_tree (const rules &rules_,        const std::size_t state_, node_ptr_vector &node_ptr_vector_,        size_t_vector *lookup_, index_set_vector &set_mapping_,        std::size_t &dfa_alphabet_, bool &seen_BOL_assertion_,        bool &seen_EOL_assertion_)    {        const typename rules::string_deque_deque &regexes_ =            rules_.regexes ();        const typename rules::id_vector_deque &ids_ = rules_.ids ();        const typename rules::id_vector_deque &states_ = rules_.states ();        typename rules::string_deque::const_iterator regex_iter_ =            regexes_[state_].begin ();        typename rules::string_deque::const_iterator regex_iter_end_ =            regexes_[state_].end ();        typename rules::id_vector::const_iterator ids_iter_ =            ids_[state_].begin ();        typename rules::id_vector::const_iterator states_iter_ =            states_[state_].begin ();        const typename rules::string &regex_ = *regex_iter_;        // map of regex charset tokens (strings) to index        token_map token_map_;        const typename rules::string_pair_deque &macrodeque_ =            rules_.macrodeque ();        typename parser::macro_map macromap_;        typename detail::node::node_vector tree_vector_;        build_macros (token_map_, macrodeque_, macromap_,            rules_.case_sensitive (), rules_.locale (), node_ptr_vector_,            rules_.dot_not_newline (), seen_BOL_assertion_,            seen_EOL_assertion_);        detail::node *root_ = parser::parse (regex_.c_str (),            regex_.c_str () + regex_.size (), *ids_iter_, *states_iter_,            rules_.case_sensitive (), rules_.dot_not_newline (),            rules_.locale (), node_ptr_vector_, macromap_, token_map_,            seen_BOL_assertion_, seen_EOL_assertion_);        ++regex_iter_;        ++ids_iter_;        ++states_iter_;        tree_vector_.push_back (root_);        // build syntax trees        while (regex_iter_ != regex_iter_end_)        {            // re-declare var, otherwise we perform an assignment..!            const typename rules::string &regex_ = *regex_iter_;            root_ = parser::parse (regex_.c_str (),                regex_.c_str () + regex_.size (), *ids_iter_,                *states_iter_, rules_.case_sensitive (),                rules_.dot_not_newline (), rules_.locale (), node_ptr_vector_,                macromap_, token_map_, seen_BOL_assertion_,                seen_EOL_assertion_);            tree_vector_.push_back (root_);            ++regex_iter_;            ++ids_iter_;            ++states_iter_;        }        if (seen_BOL_assertion_)        {            // Fixup BOLs            typename detail::node::node_vector::iterator iter_ =                tree_vector_.begin ();            typename detail::node::node_vector::iterator end_ =                tree_vector_.end ();            for (; iter_ != end_; ++iter_)            {                fixup_bol (*iter_, node_ptr_vector_);            }        }        // join trees        {            typename detail::node::node_vector::iterator iter_ =                tree_vector_.begin ();            typename detail::node::node_vector::iterator end_ =                tree_vector_.end ();            if (iter_ != end_)            {                root_ = *iter_;                ++iter_;            }            for (; iter_ != end_; ++iter_)            {                node_ptr_vector_->push_back (0);                node_ptr_vector_->back () = new detail::selection_node                    (root_, *iter_);                root_ = node_ptr_vector_->back ();            }        }        // partitioned token list        charset_list token_list_;        set_mapping_.resize (token_map_.size ());        partition_tokens (token_map_, token_list_);        typename charset_list::list::const_iterator iter_ =            token_list_->begin ();        typename charset_list::list::const_iterator end_ =            token_list_->end ();        std::size_t index_ = 0;        for (; iter_ != end_; ++iter_, ++index_)        {            const charset *cs_ = *iter_;            typename charset::index_set::const_iterator set_iter_ =                cs_->_index_set.begin ();            typename charset::index_set::const_iterator set_end_ =                cs_->_index_set.end ();            fill_lookup (cs_->_token, lookup_, index_);            for (; set_iter_ != set_end_; ++set_iter_)            {                set_mapping_[*set_iter_].insert (index_);            }        }        dfa_alphabet_ = token_list_->size () + dfa_offset;        return root_;    }    static void build_macros (token_map &token_map_,        const macro_deque &macrodeque_,        typename parser::macro_map &macromap_, const bool case_sensitive_,        const std::locale &locale_, node_ptr_vector &node_ptr_vector_,        const bool not_dot_newline_, bool &seen_BOL_assertion_,        bool &seen_EOL_assertion_)    {        for (typename macro_deque::const_iterator iter_ =            macrodeque_.begin (), end_ = macrodeque_.end ();            iter_ != end_; ++iter_)        {            const typename rules::string &name_ = iter_->first;            const typename rules::string &regex_ = iter_->second;            detail::node *node_ = parser::parse (regex_.c_str (),                regex_.c_str () + regex_.size (), 0, 0, case_sensitive_,                not_dot_newline_, locale_, node_ptr_vector_, macromap_,                token_map_, seen_BOL_assertion_, seen_EOL_assertion_);            macro_iter_pair map_iter_ = macromap_.                insert (macro_pair (name_, 0));            map_iter_.first->second = node_;        }    }    static void build_dfa (detail::node *root_,        const index_set_vector &set_mapping_, const std::size_t dfa_alphabet_,        size_t_vector &dfa_)    {        typename detail::node::node_vector *followpos_ =            &root_->firstpos ();        node_set_vector seen_sets_;        node_vector_vector seen_vectors_;        size_t_vector hash_vector_;        // 'jam' state        dfa_.resize (dfa_alphabet_, 0);        closure (followpos_, seen_sets_, seen_vectors_,            hash_vector_, dfa_alphabet_, dfa_);        std::size_t *ptr_ = 0;        for (std::size_t index_ = 0; index_ < seen_vectors_->size (); ++index_)        {            equivset_list equiv_list_;            build_equiv_list (seen_vectors_[index_], set_mapping_, equiv_list_);            for (typename equivset_list::list::const_iterator iter_ =                equiv_list_->begin (), end_ = equiv_list_->end ();                iter_ != end_; ++iter_)            {                equivset *equivset_ = *iter_;                const std::size_t transition_ = closure (&equivset_->_followpos,                    seen_sets_, seen_vectors_, hash_vector_, dfa_alphabet_, dfa_);                if (transition_ != npos)                {                    ptr_ = &dfa_.front () + ((index_ + 1) * dfa_alphabet_);                    // Prune abstemious transitions from end states.                    if (*ptr_ && !equivset_->_greedy) continue;                    for (typename detail::equivset::index_vector::const_iterator                        equiv_iter_ = equivset_->_index_vector.begin (),                        equiv_end_ = equivset_->_index_vector.end ();                        equiv_iter_ != equiv_end_; ++equiv_iter_)                    {                        const std::size_t index_ = *equiv_iter_;                        if (index_ == bol_token)                        {                            if (ptr_[eol_index] == 0)                            {                                ptr_[bol_index] = transition_;                            }                        }                        else if (index_ == eol_token)                        {                            if (ptr_[bol_index] == 0)                            {                                ptr_[eol_index] = transition_;                            }                        }                        else                        {                            ptr_[index_ + dfa_offset] = transition_;                        }                    }                }            }        }    }    static std::size_t closure (typename detail::node::node_vector *followpos_,        node_set_vector &seen_sets_, node_vector_vector &seen_vectors_,        size_t_vector &hash_vector_, const std::size_t size_, size_t_vector &dfa_)    {        bool end_state_ = false;        std::size_t id_ = 0;        std::size_t state_ = 0;        std::size_t hash_ = 0;        if (followpos_->empty ()) return npos;        std::size_t index_ = 0;        std::auto_ptr<node_set> set_ptr_ (new node_set);        std::auto_ptr<node_vector> vector_ptr_ (new node_vector);        for (typename detail::node::node_vector::const_iterator iter_ =            followpos_->begin (), end_ = followpos_->end ();            iter_ != end_; ++iter_)        {            closure_ex (*iter_, end_state_, id_, state_, set_ptr_.get (),                vector_ptr_.get (), hash_);        }        bool found_ = false;        // Stop VC++ 2005 crashing...        if (!hash_vector_.empty ())        {            const std::size_t *hash_iter_ = &hash_vector_.front ();            const std::size_t *hash_end_ = hash_iter_ + hash_vector_.size ();            node_set **set_iter_ = &seen_sets_->front ();            for (; hash_iter_ != hash_end_; ++hash_iter_, ++set_iter_)            {                found_ = *hash_iter_ == hash_ && *(*set_iter_) == *set_ptr_;                ++index_;                if (found_) break;            }        }        if (!found_)        {            seen_sets_->push_back (0);            seen_sets_->back () = set_ptr_.release ();            seen_vectors_->push_back (0);            seen_vectors_->back () = vector_ptr_.release ();            hash_vector_.push_back (hash_);            // State 0 is the jam state...            index_ = seen_sets_->size ();            const std::size_t old_size_ = dfa_.size ();            dfa_.resize (old_size_ + size_, 0);            if (end_state_)            {                dfa_[old_size_] |= end_state;                dfa_[old_size_ + id_index] = id_;                dfa_[old_size_ + state_index] = state_;            }        }        return index_;    }    static void closure_ex (detail::node *node_, bool &end_state_,        std::size_t &id_, std::size_t &state_, node_set *set_ptr_,        node_vector *vector_ptr_, std::size_t &hash_)    {        const bool temp_end_state_ = node_->end_state ();        if (temp_end_state_)        {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -