📄 amistreeformat.h
字号:
//////////////////////////////////////////////////////////////////////////// Copyright (c) 2000, Yusuke Miyao/// You may distribute under the terms of the Artistic License.////// <id>$Id: AmisTreeFormat.h,v 1.8 2003/05/12 05:28:12 yusuke Exp $</id>/// <collection>Maximum Entropy Estimator</collection>/// <name>AmisTreeFormat.h</name>/// <overview>Parser for AmisTree-style data files</overview>/////////////////////////////////////////////////////////////////////////#ifndef Amis_AmisTreeFormat_h_#define Amis_AmisTreeFormat_h_#include <amis/configure.h>#include <amis/AmisEventFormat.h>#include <amis/Tokenizer.h>#include <amis/EventTree.h>#include <amis/EventTreeSpaceBase.h>#include <amis/StringHash.h>// sorry! (this is only to enable compiling with gcc3)//#include <amis/Name.h>AMIS_NAMESPACE_BEGIN///////////////////////////////////////////////////////////////////////// <classdef>/// <name>AmisTreeFormat</name>/// <overview>Parser for AmisTree-style event files</overview>/// <desc>/// The class imports event data from AmisTree-style data files./// The Amis format enables non-binary features and packed events./// For details, see README./// </desc>/// <see>Model, Event, EventFormat, AmisEventFormat</see>/// <body>template < class Feature, class Name = std::string >class AmisTreeFormat : public AmisEventFormat< Feature, Name > {private: StringHash< EventTreeNodeID > node_hash; std::string current_event;public: typedef typename Feature::FeatureFreq FeatureFreq; AmisTreeFormat() : AmisEventFormat< Feature >() {} virtual ~AmisTreeFormat() {}protected: EventTreeNodeID inputEventDisjNode( const Model< Name >& model, Tokenizer& t, EventTree< Feature >& et ) { std::string token; std::vector< EventTreeNodeID > disj_list; if ( ! t.nextToken( token ) ) { // name of the node throw IllegalEventFormatError( "Cannot find the name of a disjunctive node in Event " + current_event, t.lineNumber() ); } std::string node_name( '$' + token ); if ( node_hash.find( node_name ) != node_hash.end() ) { // the same name node found throw IllegalEventFormatError( "You cannot use the same name of a node in Event " + current_event + " : " + token, t.lineNumber() ); } while ( true ) { if ( ! t.nextToken( token ) ) { // end of line throw IllegalEventFormatError( "Truncated line found in a disjunctive node in Event " + current_event, t.lineNumber() ); } else if ( token.length() == 1 ) { switch ( token[ 0 ] ) { case '{': // disjunction starts in a disjunction throw IllegalEventFormatError( "Nested disjunction found in Event " + current_event, t.lineNumber() ); case ')': // end of EventTreeNode throw IllegalEventFormatError( "Too many ')'s in Event " + current_event, t.lineNumber() ); case '(': { // start of EventTreeNode EventTreeNodeID id = inputEventConjNode( model, t, et ); disj_list.push_back( id ); continue; } case '}': // end of disjunction if ( node_hash.find( node_name ) != node_hash.end() ) { // cycle found throw IllegalEventFormatError( "Cycle found in Event " + current_event + " at the disjunctive node: " + node_name, t.lineNumber() ); } EventTreeNodeID id = et.newDisjunctiveNode( disj_list ); if ( node_name.compare( "$_" ) != 0 ) { // '_' is anonymous node_hash[ node_name ] = id; } return id; } } else if ( token[ 0 ] == '$' ) { // reference to a conjunctive node StringHash< EventTreeNodeID >::const_iterator it = node_hash.find( token ); if ( it == node_hash.end() ) { throw IllegalEventFormatError( "Unknown conjunctive node in Event " + current_event + " : " + token, t.lineNumber() ); } if ( et[ it->second ].isDisjunctiveNode() ) { throw IllegalEventFormatError( "Daughter of a disjunctive node must be conjunctive in Event " + current_event + " : " + token, t.lineNumber() ); } disj_list.push_back( it->second ); } else { throw IllegalEventFormatError( "Feature cannot be specified in a disjunctive node in Event " + current_event + " : " + token, t.lineNumber() ); } } } /// Input an event tree EventTreeNodeID inputEventConjNode( const Model< Name >& model, Tokenizer& t, EventTree< Feature >& et ) { std::string token; std::vector< Feature > feature_list; std::vector< EventTreeNodeID > daughter_list; if ( ! t.nextToken( token ) ) { // name of the node throw IllegalEventFormatError( "Cannot find the name of an event tree node in Event " + current_event, t.lineNumber() ); } std::string node_name( '$' + token ); if ( node_hash.find( node_name ) != node_hash.end() ) { // the same name node found throw IllegalEventFormatError( "You cannot use the same name in an event tree in Event " + current_event + " : " + token, t.lineNumber() ); } while ( true ) { if ( ! t.nextToken( token ) ) { // end of line throw IllegalEventFormatError( "Truncated line found in an event tree node in Event " + current_event, t.lineNumber() ); } else if ( token.length() == 1 ) { switch ( token[ 0 ] ) { case '(': // event tree node starts in an event tree node throw IllegalEventFormatError( "Nested event tree node found in Event " + current_event, t.lineNumber() ); case '}': // end of disjunction throw IllegalEventFormatError( "Too many '}'s in Event " + current_event, t.lineNumber() ); case ')': { // end of an event tree node if ( node_hash.find( node_name ) != node_hash.end() ) { // cycle found throw IllegalEventFormatError( "Cycle found in Event " + current_event + " at the conjunctive node: " + node_name, t.lineNumber() ); } EventTreeNodeID id = et.newConjunctiveNode( feature_list, daughter_list ); if ( node_name.compare( "$_" ) != 0 ) { // '_' is anonymous node_hash[ node_name ] = id; } return id; } case '{': // a new disjunction found daughter_list.push_back( inputEventDisjNode( model, t, et ) ); continue; } } else if ( token[ 0 ] == '$' ) { // reference to a disjunctive node StringHash< EventTreeNodeID >::const_iterator it = node_hash.find( token ); if ( it == node_hash.end() ) { throw IllegalEventFormatError( "Unknown disjunctive node in Event " + current_event + " : " + token, t.lineNumber() ); } if ( ! et[ it->second ].isDisjunctiveNode() ) { throw IllegalEventFormatError( "Daughter of a conjunctive node must be disjunctive in Event " + current_event + " : " + token, t.lineNumber() ); } daughter_list.push_back( it->second ); } else { // a feature found FeatureFreq freq = extractFreq( token ); if ( freq <= static_cast< FeatureFreq >( 0 ) ) { throw IllegalEventFormatError( "Feature frequency must be positive in Event " + current_event, t.lineNumber() ); } IStringStream feature_is( token ); Name feature; feature_is >> feature; feature_list.push_back( Feature( model.featureID( feature ), freq ) ); } } } /// Input an event tree nodepublic: void inputEventSpace( std::istream& s, const ModelBase& model_base, EventSpaceBase& event_space_base ) { const Model< Name >* model = dynamic_cast< const Model< Name >* >( &model_base ); if ( model == NULL ) { throw IllegalEventFormatError( "AmisTreeFormat can be used only for Model< ... > class", 0 ); } EventTreeSpaceBase< Feature >* event_space = dynamic_cast< EventTreeSpaceBase< Feature >* >( &event_space_base ); if ( event_space == NULL ) { throw IllegalEventFormatError( "AmisTreeFormat can be used only for EventTreeSpace class", 0 ); } inputEventSpace( s, *model, *event_space ); } virtual void inputEventSpace( std::istream& s, const Model< Name >& model, EventTreeSpaceBase< Feature >& event_tree_space ) { Tokenizer t( s ); EventTree< Feature > et; AMIS_DEBUG_MESSAGE( 3, "\nInput events...\n" ); AMIS_DEBUG_MESSAGE( 5, "\t----------------------------------------\n" ); AMIS_DEBUG_MESSAGE( 5, "\tEvent\tFreq.\n" ); while ( ! t.endOfStream() ) { std::string dummy; if ( ! t.nextToken( current_event ) ) continue; // empty line Real prob = 0.0; if ( t.nextToken( dummy ) ) { prob = t.str2Real( dummy ); if ( t.nextToken( dummy ) ) { throw IllegalEventFormatError( "Too many tokens found in Event " + current_event, t.lineNumber() ); } } else { prob = 1.0; }#ifdef AMIS_JOINT_PROB et.setEventProbability( prob );#endif // AMIS_JOINT_PROB EventFreq freq; if ( ! t.nextToken( freq ) ) { throw IllegalEventFormatError( "Event freqeuency not found in Event " + current_event, t.lineNumber() ); } if ( freq <= 0 ) { throw IllegalEventFormatError( "Event frequency must be positive in Event " + current_event, t.lineNumber() ); } std::vector< Feature > fl; inputFeatureList( model, t, fl ); if ( ! t.nextToken( dummy ) || dummy.compare( "{" ) != 0 ) { throw IllegalEventFormatError( "Event tree not found in Event " + current_event, t.lineNumber() ); } node_hash.clear(); et.clear(); et.addObservedEvent( freq, fl ); (void)inputEventDisjNode( model, t, et ); AMIS_DEBUG_MESSAGE( 5, '\t' << current_event << '\t' << freq << '\n' ); event_tree_space.addEvent( et ); if ( t.nextToken( dummy ) ) { throw IllegalEventFormatError( "A token found after the event tree in Event " + current_event, t.lineNumber() ); } } //cerr << "Number of events = " << event_space->numEvents() << std::endl; //cerr << "Count of events = " << event_space->sumEventCount() << std::endl; AMIS_DEBUG_MESSAGE( 5, "\t----------------------------------------\n" ); node_hash.clear(); } /// Input event data from an input stream};AMIS_NAMESPACE_END/// </body>/// </classdef>#endif // AmisTreeFormat_h_// end of AmisTreeFormat.h
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -