📄 tokenizer.h

📁 Amis - A maximum entropy estimator 一个最大熵模型统计工具
💻 H
字号:
////////////////////////////////////////////////////////////////////////////  Copyright (c) 2000, Yusuke Miyao///  You may distribute under the terms of the Artistic License.//////  <id>$Id: Tokenizer.h,v 1.4 2003/04/24 06:41:06 yusuke Exp $</id>///  <collection>Maximum Entropy Estimator</collection>///  <name>Tokenizer.h</name>///  <overview>String tokenizer</overview>/////////////////////////////////////////////////////////////////////////#ifndef Amis_Tokenizer_h_#define Amis_Tokenizer_h_#include <amis/configure.h>#include <amis/Real.h>#include <amis/ErrorBase.h>AMIS_NAMESPACE_BEGIN///////////////////////////////////////////////////////////////////////// <classdef>/// <name>TokenError</name>/// <overview>Exception for invalid tokens</overview>/// <desc>/// This class signals invalid tokens in the Tokenizer class./// </desc>/// <body>class TokenError : public ErrorBase { public:  TokenError( const std::string& m ) : ErrorBase( m ) {}  /// Initialize with an error message  TokenError( const char* m ) : ErrorBase( m ) {}  /// Initialize with an error message};/// </body>/// </classdef>///////////////////////////////////////////////////////////////////////// <classdef>/// <name>Tokenizer</name>/// <overview>String tokenizer</overview>/// <desc>/// This class splits an input string into tokens/// </desc>/// <body>class Tokenizer { private:  std::istream& s;  mutable bool end_of_stream;  int line_number;  const char* word_delimiters;  const char* line_delimiters;  const char* escape;  const char* symbols;  const char* comment_start;  const char* comment_end; protected:  static bool findChar( const char* str, char c ) {    while ( *str != '\0' ) {      if ( *str == c ) return true;      ++str;    }    return false;  } public:  Tokenizer( std::istream& s_,             const char* word_delim = " \t",             const char* line_delim = "\n",             const char* esc = "\\",             const char* sym = "",             const char* com_start = "#",             const char* com_end = "\n" )    : s( s_ ),      word_delimiters( word_delim ), line_delimiters( line_delim ),      escape( esc ), symbols( sym ),      comment_start( com_start ), comment_end( com_end ) {    end_of_stream = false;    line_number = 0;  }  /// Initialize with an input stream  virtual ~Tokenizer() {}  bool endOfStream( void ) const {    if( s ) {      end_of_stream = false;    }    else{      end_of_stream = true;    }    return end_of_stream;  }  /// Whether we've reached to the end of the stream  int lineNumber( void ) const {    return line_number;  }  /// Get the current line number  static int str2int( const std::string& str ) {    char* end_ptr;    int num = static_cast< int >( strtol( str.c_str(), &end_ptr, 0 ) );    if ( *end_ptr != '\0' ) {      throw TokenError( "Illegal format for integer: " + str );    }    return num;  }  /// Translate a string into an integer  static Real str2Real( const std::string& str ) {    char* end_ptr;    Real num = static_cast< Real >( strtod( str.c_str(), &end_ptr ) );    if ( *end_ptr != '\0' ) {      throw TokenError( "Illegal format for real: " + str );    }    return num;  }  /// Translate a string into a real number  bool gotoNewLine() {    if ( end_of_stream ) return false;    int c = s.get();    while ( true ) {      if ( findChar( line_delimiters, c ) ) {        ++line_number;        return true;      } else if ( c == EOF ) {        end_of_stream = true;        return false;      } else {        c = s.get();      }    }  }  bool nextToken( std::string& token ) {    if ( end_of_stream ) return false;    int c = s.get();    while ( true ) {      if ( findChar( line_delimiters, c ) ) {        ++line_number;        return false;      } else if ( c == EOF ) {        end_of_stream = true;        return false;      } else if ( findChar( symbols, c ) ) {        token = c;        return true;      } else if ( findChar( comment_start, c ) ) {        while ( c != EOF && ! findChar( comment_end, c ) ) c = s.get();        if ( c == EOF ) end_of_stream = true; else ++line_number;        return false;      } else if ( findChar( word_delimiters, c ) ) {        c = s.get();        continue;      } else {        break;      }    }    token.resize( 0 );    while ( true ) {      if ( findChar( escape, c ) ) {        c = s.get();        if ( c == EOF )          throw TokenError( "Escape character found at the end of the stream" );      }      token.push_back( c );      c = s.get();      if ( findChar( word_delimiters, c ) ) {        break;      } else if ( findChar( line_delimiters, c ) ||                  findChar( comment_start, c ) ||                  findChar( symbols, c ) ) {        s.putback( c );        break;      } else if ( c == EOF ) {        end_of_stream = true;        break;      }    }    return ! token.empty();  }  /// Get a next token from an input line  bool nextToken( int& i ) {    std::string t;    if ( nextToken( t ) ) {      i = str2int( t );      return true;    } else {      return false;    }  }  /// Get a next integer token from an input line  bool Tokenizer::nextToken( Real& r ) {    std::string t;    if ( nextToken( t ) ) {      r = str2Real( t );      return true;    } else {      return false;    }  }  /// Get a next real number token from an input line};/// </body>/// </classdef>AMIS_NAMESPACE_END#endif // Tokenizer_h_// end of Tokenizer.h
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -