⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 nfa.h

📁 彩信浏览器
💻 H
字号:
/* * This file is part of Ambulant Player, www.ambulantplayer.org. * * Copyright (C) 2003-2007 Stichting CWI,  * Kruislaan 413, 1098 SJ Amsterdam, The Netherlands. * * Ambulant Player is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * (at your option) any later version. * * Ambulant Player is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with Ambulant Player; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA *//*  * @$Id: nfa.h,v 1.10 2007/02/12 14:14:38 jackjansen Exp $  */#ifndef AMBULANT_LIB_NFA_H#define AMBULANT_LIB_NFA_H#include "ambulant/config/config.h"#include <string>#include <ctype.h>#include <set>#include <map>#include <stack>#ifndef AMBULANT_PLATFORM_WIN32_WCE_3#include <cassert>#endifnamespace ambulant {namespace lib {////////////////////////////// A simple not optimized NFA based regular expression matcher.//// May be used to match short strings created by // the scanner against hand made (coded) regex. //// The intented usage is:// a) shring the string to be matched into tokens using the scanner// b) use the matcher against the short string of the tokens.// This way for example a decimal becomes the literals: d | d.d// instead of [0-9]+ | [0-9]+.[0-9]+ resulting to an improvement // nore than an order of magnitude (~20 times).const int EPSILON = -1;const int ACCEPT  = -9;const int REPEAT_LIMIT  = 1024;// An nfa node represents a node in a// nonderministic finite automaton (NFA)// and is associated with 2 transitions // that occur on symbol 'edge'.//// NFA nodes could be linearized in a// continous memory buffer for efficiency.//// nfa node flavors:// edge: {int, nfa_node*, 0, 0}// accept: {-9, 0, 0, 0}// epsilon: {-1, nfa_node*, nfa_node*, 0}struct nfa_node {	typedef unsigned char uchar_t;	typedef std::string::size_type size_type;		nfa_node(int e, nfa_node *n1 = 0, nfa_node *n2 = 0)	:	edge(e), next1(n1), next2(n2), anchor(0) {		++nfa_nodes_counter; 	}	~nfa_node() { --nfa_nodes_counter;}	void set_transition(int e, nfa_node *n1 = 0, nfa_node *n2 = 0)		{ edge = e; next1 = n1; next2 = n2;}	bool is_epsilon_trans() const { return edge == EPSILON;}	bool is_important_trans() const { return edge != EPSILON;}	bool is_accept_node() { return edge == ACCEPT;}	int get_anchor() const { return anchor;}	char get_edge_repr() const { return (edge == EPSILON)?'!':((edge == ACCEPT)?'$':char(edge));}	// Used for expr construction	nfa_node *clone(std::map<nfa_node*, nfa_node*>& clones);		int edge;	nfa_node *next1;	nfa_node *next2;	int anchor;		// verifier	static int nfa_nodes_counter;};// forward declaration;class nfa_matcher;// An nfa_expr is a directed graph of nfa nodes that represent a NFA.// An nfa_expr object keeps a reference to the start and the accept NFA nodes// and is the owner of the NFA nodes.class nfa_expr {  public:  	typedef unsigned char uchar_t;	typedef std::string::size_type size_type;	nfa_expr() : accept(0), start(0) {}		explicit nfa_expr(int edge) : accept(0), start(0) {		accept = new nfa_node(ACCEPT);		start = new nfa_node(edge, accept);	}		explicit nfa_expr(char edge, bool opt = false) : accept(0), start(0) {		accept = new nfa_node(ACCEPT);		start = new nfa_node(uchar_t(edge), accept);		if(opt) optional();	}		nfa_expr(const char *s)	:	accept(0), start(0) {		cat_expr(s);	}		// Assignment constructor.	nfa_expr(const nfa_expr& other) 	:	accept(0), start(0) {		std::map<nfa_node*, nfa_node*> clones;		accept = other.accept->clone(clones);		start = other.start->clone(clones);	}		// Copy constructor	const nfa_expr& operator=(const nfa_expr& expr) {		if(this == &expr) return *this;		free();		nfa_expr *eptr = expr.clone(); 		start = eptr->start; 		accept = eptr->accept;		eptr->clear();		delete eptr; 		return *this;	}		////////////////	// Constructors that simplify some common cases		nfa_expr(const char *s1, const char *s2) : accept(0), start(0) {		cat_expr(s1);		or_expr(s2);	}		nfa_expr(const char *s1, const char *s2, const char *s3) : accept(0), start(0) {		cat_expr(s1);		or_expr(s2);		or_expr(s3);	}		nfa_expr(const char *s1, const char *s2, const char *s3, const char *s4) : accept(0), start(0) {		cat_expr(s1);		or_expr(s2);		or_expr(s3);		or_expr(s4);	}		nfa_expr(char c1, char c2) : accept(0), start(0) {		cat_expr(c1);		or_expr(c2);	}		nfa_expr(char c1, char c2, char c3) : accept(0), start(0) {		cat_expr(c1);		or_expr(c2);		or_expr(c3);	}		nfa_expr(char c1, char c2, char c3, char c4) : accept(0), start(0) {		cat_expr(c1);		or_expr(c2);		or_expr(c3);		or_expr(c4);	}		// The destructor deletes all the nodes of the expr	~nfa_expr() { free(); }		// Free the NFA nodes of this expr	void free();		// nullify this without deleting nodes	void clear() { start = accept = 0;}	// deep copy of this	nfa_expr *clone() const;	// Returns true when the urderlying NFA is empty	bool empty() const {return start == 0;}		// Returns the number of nodes of this expr	int size() const { 		std::set<nfa_node*> nodes; 		return int(get_expr_nodes(nodes).size());	}		// Returns the memory consumed by this expr in bytes	int memsize() const { 		return int(size()*sizeof(nfa_node)) + int(sizeof(nfa_expr));	}	void mark_expr(int index);		//////////////////////////	// Match		// Matches as much as possible from the input.	// Positions the iterator at the end of the parsed input and returns the length parsed.	// Returns -1 on failure to parse anything	std::ptrdiff_t	parse(std::string::const_iterator& it, const std::string::const_iterator& end) const;					// Matches the argument string against the regex 	// that this nfa_expr represents. 	bool matches(const std::string& str) const {		std::string::const_iterator b = str.begin(), e = str.end();		return (size_t)parse(b, e) == str.length();	}		// Creates a matcher for the argument string	nfa_matcher* create_matcher(const std::string& str) const;		//////////////////////////	// Regex operations			// expr expr	const nfa_expr& cat_expr(const nfa_expr& expr);		// expr | expr	const nfa_expr& or_expr(const nfa_expr& expr);		// expr?	const nfa_expr& optional() { 		nfa_expr e(EPSILON);		or_expr_abs(&e);		assert(e.empty());		return *this;	}			// expr*	const nfa_expr& star();	// expr+	const nfa_expr& plus() {		nfa_expr *eptr = this->clone();		eptr->star();		cat_expr_abs(eptr);		assert(eptr->empty());		delete eptr;		return *this;	}	// expr{n}	const nfa_expr& power(int n);	// expr{n, m}	const nfa_expr& repeat(int n, int m);	// sets this expression to the char class 	// consisting of the literals in the string	const lib::nfa_expr& 	set_to_char_class(const std::string& s);		//////////////////	// Regex operations shortcuts 			const nfa_expr& cat_expr(char ch)	{ 		nfa_expr e(ch);		cat_expr_abs(&e);		assert(e.empty());		return *this;	}	const nfa_expr& cat_expr(const char *psz);	const nfa_expr& or_expr(char ch) {		nfa_expr e(ch);		or_expr_abs(&e);		assert(e.empty());		return *this;	}		const nfa_expr& or_expr(const char *psz) {		nfa_expr e(psz);		or_expr_abs(&e);		assert(e.empty());		return *this;	}		// Alt cat_expr	const nfa_expr& operator+=(char ch) { return cat_expr(ch);}	const nfa_expr& operator+=(const char *psz) { return cat_expr(psz);}	const nfa_expr& operator+=(const nfa_expr& e) { return cat_expr(e);}		// Alt or_expr	const nfa_expr& operator*=(char ch) { return or_expr(ch);}	const nfa_expr& operator*=(const char *psz) { return or_expr(psz);}	const nfa_expr& operator*=(const nfa_expr& e) { return or_expr(e);}	// Returns the nodes of this expression	std::set<nfa_node*>& get_expr_nodes(std::set<nfa_node*>& nodes) const;		// expr expr	// This consumes expr which becomes null	const nfa_expr& cat_expr_abs(nfa_expr *eptr);		// expr | expr	// This consumes expr which becomes null	const nfa_expr& or_expr_abs(nfa_expr *eptr);		friend class nfa_matcher;	  private:		// invariants checks	void verify1() const;		// match algorithm execution   	static void closure(std::set<nfa_node*>& nodes, std::stack<nfa_node*>& stack);	static void  move(std::set<nfa_node*>& nodes, std::stack<nfa_node*>& stack, int edge);	bool accepts_state(const std::set<nfa_node*>& nodes) const {		return !nodes.empty() && nodes.find(accept) != nodes.end();	}	nfa_node *accept;	nfa_node *start;};class nfa_matcher {  public:  	typedef unsigned char uchar_t;	typedef std::string::size_type size_type;  	enum {max_group = 32};		nfa_matcher(const std::string& str, const nfa_expr* expr) 	:	m_str(str), m_expr(expr) {		memset(m_group_begin, 0, max_group*sizeof(int)); 		memset(m_group_end, 0, max_group*sizeof(int));		m_matches = match(m_str);	}		bool seen_group(int i) const {		return i<max_group && m_group_end[i] > 0;	}		std::string get_group(int i) const {		if(i>=max_group || m_group_end[i] == 0) return "";		int n = m_group_end[i] - m_group_begin[i];		return m_str.substr(m_group_begin[i], n);	}		int length() const { 		return m_group_end[0] - m_group_begin[0];	}		void set_groups_begin(const std::set<int>& groups, int pos) {		std::set<int>::const_iterator it;		for(it=groups.begin();it!=groups.end();it++) 			m_group_begin[*it] = pos;	}		void set_groups_end(const std::set<int>& groups, int pos) {		std::set<int>::const_iterator it;		for(it=groups.begin();it!=groups.end();it++) 			m_group_end[*it] = pos;	}		void set_match_end(int pos) {		m_group_end[0] = pos;	}		bool matches() const { return m_matches; }	#ifndef AMBULANT_NO_IOSTREAMS	void dump_groups(std::ostream& os);#endif  private:	// NFA search algorithm helpers	bool match(const std::string& str);	void  move(std::set<nfa_node*>& nodes, std::stack<nfa_node*>& stack, 		int edge, std::set<int>& groups);    static void get_groups(int anchor, std::set<int>& groups);	std::string m_str;	const nfa_expr* m_expr;	bool m_matches;	int m_group_begin[max_group];	int m_group_end[max_group];};} // namespace lib } // namespace ambulant#ifndef AMBULANT_NO_IOSTREAMS#ifndef AMBULANT_NO_OSTREAM#include <ostream>#else /*AMBULANT_NO_OSTREAM*/#include <ostream.h>#endif/*AMBULANT_NO_OSTREAM*/inline std::ostream& operator<<(std::ostream& os, const ambulant::lib::nfa_node& n) {	return os << n.get_edge_repr();}std::ostream& operator<<(std::ostream& os, const std::set<ambulant::lib::nfa_node*>& nodes);#endif#endif // AMBULANT_LIB_NFA_H

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -