⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 amistreeformat.h

📁 Amis - A maximum entropy estimator 一个最大熵模型统计工具
💻 H
字号:
////////////////////////////////////////////////////////////////////////////  Copyright (c) 2000, Yusuke Miyao///  You may distribute under the terms of the Artistic License.//////  <id>$Id: AmisTreeFormat.h,v 1.8 2003/05/12 05:28:12 yusuke Exp $</id>///  <collection>Maximum Entropy Estimator</collection>///  <name>AmisTreeFormat.h</name>///  <overview>Parser for AmisTree-style data files</overview>/////////////////////////////////////////////////////////////////////////#ifndef Amis_AmisTreeFormat_h_#define Amis_AmisTreeFormat_h_#include <amis/configure.h>#include <amis/AmisEventFormat.h>#include <amis/Tokenizer.h>#include <amis/EventTree.h>#include <amis/EventTreeSpaceBase.h>#include <amis/StringHash.h>// sorry! (this is only to enable compiling with gcc3)//#include <amis/Name.h>AMIS_NAMESPACE_BEGIN///////////////////////////////////////////////////////////////////////// <classdef>/// <name>AmisTreeFormat</name>/// <overview>Parser for AmisTree-style event files</overview>/// <desc>/// The class imports event data from AmisTree-style data files./// The Amis format enables non-binary features and packed events./// For details, see README./// </desc>/// <see>Model, Event, EventFormat, AmisEventFormat</see>/// <body>template < class Feature, class Name = std::string >class AmisTreeFormat : public AmisEventFormat< Feature, Name > {private:  StringHash< EventTreeNodeID > node_hash;  std::string current_event;public:  typedef typename Feature::FeatureFreq FeatureFreq;  AmisTreeFormat() : AmisEventFormat< Feature >() {}  virtual ~AmisTreeFormat() {}protected:  EventTreeNodeID inputEventDisjNode( const Model< Name >& model, Tokenizer& t, EventTree< Feature >& et ) {    std::string token;    std::vector< EventTreeNodeID > disj_list;    if ( ! t.nextToken( token ) ) {      // name of the node      throw IllegalEventFormatError( "Cannot find the name of a disjunctive node in Event " + current_event, t.lineNumber() );    }    std::string node_name( '$' + token );    if ( node_hash.find( node_name ) != node_hash.end() ) {      // the same name node found      throw IllegalEventFormatError( "You cannot use the same name of a node in Event " + current_event + " : " + token, t.lineNumber() );    }    while ( true ) {      if ( ! t.nextToken( token ) ) {        // end of line        throw IllegalEventFormatError( "Truncated line found in a disjunctive node in Event " + current_event, t.lineNumber() );      } else if ( token.length() == 1 ) {        switch ( token[ 0 ] ) {        case '{':          // disjunction starts in a disjunction          throw IllegalEventFormatError( "Nested disjunction found in Event " + current_event, t.lineNumber() );        case ')':          // end of EventTreeNode          throw IllegalEventFormatError( "Too many ')'s in Event " + current_event, t.lineNumber() );        case '(':          {            // start of EventTreeNode            EventTreeNodeID id = inputEventConjNode( model, t, et );            disj_list.push_back( id );            continue;          }        case '}':          // end of disjunction	  if ( node_hash.find( node_name ) != node_hash.end() ) {	    // cycle found	    throw IllegalEventFormatError( "Cycle found in Event " + current_event + " at the disjunctive node: " + node_name, t.lineNumber() );	  }          EventTreeNodeID id = et.newDisjunctiveNode( disj_list );	  if ( node_name.compare( "$_" ) != 0 ) {  // '_' is anonymous	    node_hash[ node_name ] = id;	  }	  return id;        }      } else if ( token[ 0 ] == '$' ) {	// reference to a conjunctive node	StringHash< EventTreeNodeID >::const_iterator it = node_hash.find( token );	if ( it == node_hash.end() ) {	  throw IllegalEventFormatError( "Unknown conjunctive node in Event " + current_event + " : " + token, t.lineNumber() );	}	if ( et[ it->second ].isDisjunctiveNode() ) {	  throw IllegalEventFormatError( "Daughter of a disjunctive node must be conjunctive in Event " + current_event + " : " + token, t.lineNumber() );	}	disj_list.push_back( it->second );      } else {	throw IllegalEventFormatError( "Feature cannot be specified in a disjunctive node in Event " + current_event + " : " + token, t.lineNumber() );      }    }  }  /// Input an event tree  EventTreeNodeID inputEventConjNode( const Model< Name >& model, Tokenizer& t, EventTree< Feature >& et ) {    std::string token;    std::vector< Feature > feature_list;    std::vector< EventTreeNodeID > daughter_list;    if ( ! t.nextToken( token ) ) {      // name of the node      throw IllegalEventFormatError( "Cannot find the name of an event tree node in Event " + current_event, t.lineNumber() );    }    std::string node_name( '$' + token );    if ( node_hash.find( node_name ) != node_hash.end() ) {      // the same name node found      throw IllegalEventFormatError( "You cannot use the same name in an event tree in Event " + current_event + " : " + token, t.lineNumber() );    }    while ( true ) {      if ( ! t.nextToken( token ) ) {        // end of line        throw IllegalEventFormatError( "Truncated line found in an event tree node in Event " + current_event, t.lineNumber() );      } else if ( token.length() == 1 ) {        switch ( token[ 0 ] ) {        case '(':          // event tree node starts in an event tree node          throw IllegalEventFormatError( "Nested event tree node found in Event " + current_event, t.lineNumber() );        case '}':          // end of disjunction          throw IllegalEventFormatError( "Too many '}'s in Event " + current_event, t.lineNumber() );        case ')':          {            // end of an event tree node	    if ( node_hash.find( node_name ) != node_hash.end() ) {	      // cycle found	      throw IllegalEventFormatError( "Cycle found in Event " + current_event + " at the conjunctive node: " + node_name, t.lineNumber() );	    }            EventTreeNodeID id = et.newConjunctiveNode( feature_list, daughter_list );            if ( node_name.compare( "$_" ) != 0 ) {  // '_' is anonymous              node_hash[ node_name ] = id;            }            return id;          }        case '{':          // a new disjunction found          daughter_list.push_back( inputEventDisjNode( model, t, et ) );          continue;        }      } else if ( token[ 0 ] == '$' ) {	// reference to a disjunctive node	StringHash< EventTreeNodeID >::const_iterator it = node_hash.find( token );	if ( it == node_hash.end() ) {	  throw IllegalEventFormatError( "Unknown disjunctive node in Event " + current_event + " : " + token, t.lineNumber() );	}	if ( ! et[ it->second ].isDisjunctiveNode() ) {	  throw IllegalEventFormatError( "Daughter of a conjunctive node must be disjunctive in Event " + current_event + " : " + token, t.lineNumber() );	}	daughter_list.push_back( it->second );      } else {	// a feature found	FeatureFreq freq = extractFreq( token );	if ( freq <= static_cast< FeatureFreq >( 0 ) ) {	  throw IllegalEventFormatError( "Feature frequency must be positive in Event " + current_event, t.lineNumber() );	}	IStringStream feature_is( token );	Name feature;	feature_is >> feature;	feature_list.push_back( Feature( model.featureID( feature ), freq ) );      }    }  }  /// Input an event tree nodepublic:  void inputEventSpace( std::istream& s, const ModelBase& model_base, EventSpaceBase& event_space_base ) {    const Model< Name >* model = dynamic_cast< const Model< Name >* >( &model_base );    if ( model == NULL ) {      throw IllegalEventFormatError( "AmisTreeFormat can be used only for Model< ... > class", 0 );    }    EventTreeSpaceBase< Feature >* event_space =      dynamic_cast< EventTreeSpaceBase< Feature >* >( &event_space_base );    if ( event_space == NULL ) {      throw IllegalEventFormatError( "AmisTreeFormat can be used only for EventTreeSpace class", 0 );    }    inputEventSpace( s, *model, *event_space );  }  virtual void inputEventSpace( std::istream& s, const Model< Name >& model, EventTreeSpaceBase< Feature >& event_tree_space ) {    Tokenizer t( s );    EventTree< Feature > et;    AMIS_DEBUG_MESSAGE( 3, "\nInput events...\n" );    AMIS_DEBUG_MESSAGE( 5, "\t----------------------------------------\n" );    AMIS_DEBUG_MESSAGE( 5, "\tEvent\tFreq.\n" );    while ( ! t.endOfStream() ) {      std::string dummy;      if ( ! t.nextToken( current_event ) ) continue; // empty line      Real prob = 0.0;      if ( t.nextToken( dummy ) ) {        prob = t.str2Real( dummy );        if ( t.nextToken( dummy ) ) {          throw IllegalEventFormatError( "Too many tokens found in Event " + current_event, t.lineNumber() );        }      } else {        prob = 1.0;      }#ifdef AMIS_JOINT_PROB      et.setEventProbability( prob );#endif // AMIS_JOINT_PROB      EventFreq freq;      if ( ! t.nextToken( freq ) ) {        throw IllegalEventFormatError( "Event freqeuency not found in Event " + current_event,                                      t.lineNumber() );      }      if ( freq <= 0 ) {        throw IllegalEventFormatError( "Event frequency must be positive in Event " + current_event,                                      t.lineNumber() );      }      std::vector< Feature > fl;      inputFeatureList( model, t, fl );      if ( ! t.nextToken( dummy ) || dummy.compare( "{" ) != 0 ) {        throw IllegalEventFormatError( "Event tree not found in Event " + current_event,                                      t.lineNumber() );      }      node_hash.clear();      et.clear();      et.addObservedEvent( freq, fl );      (void)inputEventDisjNode( model, t, et );      AMIS_DEBUG_MESSAGE( 5, '\t' << current_event << '\t' << freq << '\n' );      event_tree_space.addEvent( et );      if ( t.nextToken( dummy ) ) {        throw IllegalEventFormatError( "A token found after the event tree in Event " + current_event,                                      t.lineNumber() );      }    }    //cerr << "Number of events = " << event_space->numEvents() << std::endl;    //cerr << "Count  of events = " << event_space->sumEventCount() << std::endl;    AMIS_DEBUG_MESSAGE( 5, "\t----------------------------------------\n" );    node_hash.clear();  }  /// Input event data from an input stream};AMIS_NAMESPACE_END/// </body>/// </classdef>#endif // AmisTreeFormat_h_// end of AmisTreeFormat.h

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -