📄 dfa_min.h
字号:
/* * ASTL - the Automaton Standard Template Library. * C++ generic components for Finite State Automata handling. * Copyright (C) 2000-2003 Vincent Le Maout (vincent.lemaout@chello.fr). * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */#ifndef ASTL_DFA_MIN_H#define ASTL_DFA_MIN_H// Descrition: // ASTL 2.0 Determinisic Finite Automaton Class Template DFA_min// Dynamic minimal acyclic DFA// This class has a limited interface: words can be added with the// insert method but no direct state/transition creations are// supported. Moreover, this container does not allow tags and the// alphabet type is char.// Sorry, words removal is not implemented yet!#include <iostream>#include <set>#include <vector>#include <functional>#include <string>#include <cstring> // memcpy#include <bitset>#include <iterator>#include <utility> // pair<>#include <memory> // allocator<>#include <astl.h> #include <cursor.h> // stack_cursor<>#include <concept.h>#include <language.h> // is_inASTL_BEGIN_NAMESPACEstruct lower_than_transition;// 32 bits for a transition: 8 bits for the letter, 24 bits for the aim state// Hence the following limitations: 256 characters max, about 16 million states maxclass transition{protected: typedef bitset<32> bitset_type; static const bitset_type letter_mask; // le masque d'extraction de la lettre static const bitset_type aim_mask; // le masque d'extraction du but static const unsigned long LETTER_SHIFT; // le d閏alage pour ins閞er la lettre bitset_type data; friend struct lower_than_transition;public: transition() : data(0UL) { } transition(char a, unsigned long aim = 0) : data((bitset_type(a) << LETTER_SHIFT) | bitset_type(aim)) { } char letter() const { return (char) (((data & letter_mask) >> LETTER_SHIFT).to_ulong()); } unsigned long aim() const { return (data & aim_mask).to_ulong(); } void aim(unsigned long new_aim) { data &= letter_mask; data |= bitset_type(new_aim); } bool operator<(const transition &x) const { // to lexicographicaly compare states return data.to_ulong() < x.data.to_ulong(); }};const transition::bitset_type transition::letter_mask(0xFF000000);const transition::bitset_type transition::aim_mask(0x00FFFFFF);const unsigned long transition::LETTER_SHIFT = 24; // le d閏alage pour ins閞er la lettre// This order relation is used to access transitions by binary search, it does not // take in account the aim state:struct lower_than_transition : binary_function<transition, transition, bool>{ bool operator()(const transition &x, const transition &y) const { return (x.data & transition::letter_mask).to_ulong() < (y.data & transition::letter_mask).to_ulong(); }};// Implementation notes// A state has the following attributes:// - A in-degree (incoming transitions count)// - A boolean (final or not)// - A height (length of the longest word recognized by the state)// - A out-degree (outgoing transitions count)// // The in-degree and the final flag are stored in a 32 bits array: the first 31 bits // are reserved for the in-degree. The height and out-degree are stored in a 32 bits// array: the first 24 bits are reserved for the height and the last 8 bits for the// out-degree.// Hence the following limitations: word length is about 16 million char max and // in-degree is about 2 billion max.// Optimization: when out-degree is 1, the outgoing transition is stored in the// array pointer.class state{public:
class bool_reference;
friend class bool_reference;
protected: typedef bitset<32> bitset_type; static const bitset_type final_mask; static const bitset_type degree_in_mask; static const bitset_type card_mask; static const bitset_type height_mask; static const unsigned long DEGREE_IN_SHIFT; static const unsigned long HEIGHT_SHIFT; bitset_type degree_in_final; bitset_type height_card; union dummy_name { // VC++ does not support methods in unnamed union private: transition *_t; // the sorted transitions array (order relation is given by lower_than_transition) char _tr[sizeof(transition)]; // when there is only one transition public: // Accesseurs: transition*& t() { return _t; } transition& tr() { return *((transition*) _tr); } transition* const & t() const { return _t; } const transition& tr() const { return *((transition*) _tr); } } tunion;public: typedef transition* iterator; typedef const transition* const_iterator; state() : degree_in_final(0UL), height_card(0UL) { tunion.t() = NULL; } state(const state &x) : degree_in_final(x.degree_in_final), height_card(x.height_card) { degree_in_final &= final_mask; // in-degree is 0 unsigned long n = x.card(); switch (n) { case 0 : tunion.t() = NULL; break; case 1 : tunion.tr() = x.tunion.tr(); break; default : tunion.t() = new transition[n]; memcpy(tunion.t(), x.tunion.t(), sizeof(transition) * n); // Warning: DFA_min is responsible for // in-degree updating } } ~state() { if (card() > 1) delete [] tunion.t(); } unsigned long delta1(char a) const { const_iterator where = lower_bound(begin(), end(), transition(a), lower_than_transition()); return (where == end() || (*where).letter() != a) ? 0 : (*where).aim(); } const_iterator begin() const { return card() == 1 ? &(tunion.tr()) : tunion.t(); } const_iterator end() const { return card() == 1 ? &(tunion.tr()) + 1 : (tunion.t() + card()); } iterator begin() { return card() == 1 ? &(tunion.tr()) : tunion.t(); } iterator end() { return card() == 1 ? &(tunion.tr()) + 1 : (tunion.t() + card()); } unsigned long size() const { return card(); } const_iterator find(char a) const { const_iterator where = lower_bound(begin(), end(), transition(a), lower_than_transition()); return (where == end() || (*where).letter() != a) ? end() : where; } iterator find(char a) { iterator where = lower_bound(begin(), end(), transition(a), lower_than_transition()); return (where == end() || (*where).letter() != a) ? end() : where; } void insert(const transition &x) { // Precondition: transition does not exist unsigned long n = card(); if (n == 0) tunion.tr() = x; else { iterator where = lower_bound(begin(), end(), x, lower_than_transition()); unsigned long i = where - begin(); transition *tmp = new transition[n + 1]; memcpy(tmp, begin(), i * sizeof(transition)); tmp[i] = x; memcpy(tmp + i + 1, where, (end() - where) * sizeof(transition)); if (n > 1) delete [] tunion.t(); tunion.t() = tmp; } inc_card(); } void change_trans(const transition &x) { // Pr閏ondition: the transition is defined (*(lower_bound(begin(), end(), x, lower_than_transition()))) = x; } void erase(const transition &x) { // Precondition: the transition is defined unsigned long n = card(); switch (n) { case 1 : tunion.t() = NULL; break; case 2 : { transition *tmp = tunion.t(); tunion.tr() = x.letter() < tmp[1].letter() ? tmp[1] : tmp[0]; delete [] tmp; break; } default : { iterator where = lower_bound(begin(), end(), x, lower_than_transition()); transition *tmp = new transition[n - 1]; memcpy(tmp, begin(), (where - begin()) * sizeof(transition)); memcpy(tmp + (where - begin()), where + 1, (end() - (where + 1)) * sizeof(transition)); delete [] tunion.t(); tunion.t() = tmp; } } dec_card(); } unsigned long degree_in() const { return ((degree_in_final & degree_in_mask) >> DEGREE_IN_SHIFT).to_ulong(); } void inc_degree_in() { unsigned long d = degree_in() + 1; degree_in_final &= final_mask; degree_in_final |= bitset_type(d) << DEGREE_IN_SHIFT; } void dec_degree_in() { unsigned long d = degree_in() - 1; degree_in_final &= final_mask; degree_in_final |= bitset_type(d) << DEGREE_IN_SHIFT; } unsigned long card() const { return (height_card & card_mask).to_ulong(); } void inc_card() { unsigned long c = card() + 1; height_card &= height_mask; height_card |= bitset_type(c); } void dec_card() { unsigned long c = card() - 1; height_card &= height_mask; height_card |= bitset_type(c); } unsigned long height() const { return ((height_card & height_mask) >> HEIGHT_SHIFT).to_ulong(); } void height(unsigned long h) { height_card &= card_mask; height_card |= bitset_type(h) << HEIGHT_SHIFT; } bool final() const { return (degree_in_final & final_mask).to_ulong() == 1; } class bool_reference { bitset_type &r; public: bool_reference(bitset_type &ref) : r(ref) { }
operator bool() const { return (r & final_mask).to_ulong() == 1; }
bool_reference& operator=(const bool_reference &x) { if (x == true) r |= final_mask; else r &= degree_in_mask; return *this; }
bool_reference& operator=(bool b) { if (b == true) r |= final_mask; else r &= degree_in_mask; return *this; }
bool operator==(const bool_reference &x) const { return (r & final_mask) == (x.r & final_mask); }
bool operator==(bool b) const { return ((r & final_mask).to_ulong() != 0) == b; } }; bool_reference final() { return bool_reference(degree_in_final); } // This strict weak order relation defines equivalent states: // Let q, p be two states. We have (q == p) <=> !(q < p) && !(p < q) // It compares two states lexicographicaly bool operator< (const state &x) const { if (final() < x.final()) return true; if (final() == x.final()) if (size() < x.size()) return true; else if (size() == x.size()) { const_iterator i(begin()), j(end()), k(x.begin()); while (i != j) if (*i < *k) return true; else if (*k < *i) return false; else { ++i; ++k; } return false; } return false; }};// Compare two states given their ID (map ID -> state structure)// Needed to use sets of state IDstruct compare_state : public binary_function<vector<state*>::size_type, vector<state*>::size_type, bool> { const vector<state*> &Q; compare_state(const vector<state*> &r) : Q(r) { } result_type operator() (first_argument_type x, second_argument_type y) const { return *Q[x] < *Q[y]; }};const state::bitset_type state::final_mask(1);const state::bitset_type state::degree_in_mask(0xFFFFFFFF << 1);const state::bitset_type state::card_mask(0x000000FF);const state::bitset_type state::height_mask(0xFFFFFF00);const unsigned long state::DEGREE_IN_SHIFT = 1;const unsigned long state::HEIGHT_SHIFT = 8;template <class TransitionAllocator = allocator<transition>,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -