⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 chunker.cpp

📁 用于汉字识别和分类的支持向量机SVMTEST测试算法,很好用啊
💻 CPP
📖 第 1 页 / 共 2 页
字号:
     			  const std::vector <std::string>& rest = param.getRestArg ();     			  if (rest.size())			  {				  for (unsigned int i = 0; i < rest.size(); i++)				  {					  std::ifstream ifs (rest[i].c_str ());					  if (!ifs)					  {						  throw std::runtime_error (rest[i] + ", no such file or directory");					  }					  					  while (parse (ifs, *ofs))					  {					  };				  }			  }			  else			  {				  while (parse (std::cin, *ofs)) {};			  }  			  if (ofs != &std::cout) delete ofs;			  return EXIT_SUCCESS;		}		catch (std::exception &e)		{			std::cerr << "FATAL: " << e.what () << std::endl;			return EXIT_FAILURE;		}	}		Chunker::~Chunker()	{		close ();	}	bool Chunker::close () 	{		if (features)		{			for (unsigned int i = 0; i < MAX_FEATURE_LEN; i++) delete [] features[i];			delete [] features;		}		features = 0;		features_size = 0;		if (svm!=0)		{		    delete svm;		    svm = 0;		}		delete feature_index;		feature_index = 0;		is_reverse      = false;		is_write_header = false;		is_partial      = false;		is_verbose      = false;		mode            = 0;		selector_func   = 0;		class_size      = 0;		clear ();		return true;	}	bool Chunker::clear ()	{		tag.clear();		context.clear();		dist.clear ();		features_size = 0;		return true;	}	std::string& Chunker::getFeature(int i, int j)	{		if (i < 0) 		{			for (int k = - static_cast<int>(bos.size())-1; k >= i; k--)			{				char buf [32];				std::ostrstream os (buf, 32);				os << k << "__BOS__" << std::ends;				bos.push_back(std::string(buf));			}            //printf("%s\n",bos[-i-1].c_str());//CCR			return bos[-i-1];		}		else if (i >= static_cast<int>(context.size()))		{			for (int k = 1 + eos.size(); k <= (i - static_cast<int>(context.size()) + 1); k++) 			{				char buf [32];				std::ostrstream os (buf, 32);				os << '+' << k << "__EOS__" << std::ends;				eos.push_back (std::string(buf));			}             			//printf("%s\n", eos[i-context.size()].c_str());//CCR			return eos[i-context.size()];		}		else		{            //printf("%s\n",context[i][j].c_str());//CCR			return context[i][j];		}	}	  unsigned int Chunker::select (int i)	  {		  features_size = 0;		  if (selector_func)		  {			  (*selector_func) (this, i);		  }		  unsigned int l = features_size;		  for (unsigned int j = 0; j < feature_index->features.size(); j++) 		  {			  std::ostrstream os (features[l], MAX_STR_LEN);			  os << "F:";              int iTmpTest = feature_index->features[j].first;			  			  if (feature_index->features[j].first >= 0) 			  {				  os << '+';			  }							  			  os << feature_index->features[j].first			  << ':'  << feature_index->features[j].second 			  << ':'  <<  getFeature (i + feature_index->features[j].first, 						              feature_index->features[j].second) << std::ends;			  l++;		  }		  for (unsigned int j = 0; j < feature_index->tags.size(); j++) 		  {			  int k = i + feature_index->tags[j];			  if (k >= 0) 			  {				  std::ostrstream os (features[l], MAX_STR_LEN);				  os << "T:" << feature_index->tags[j] << ':' << tag[k] << std::ends;			      l++;			  }			  //printf("%s\n",features[l]);		  }		  return l;	  }	  void Chunker::reverse()	  {		  if (! is_reverse) return;		  std::reverse (context.begin(), context.end());		  std::reverse (tag.begin(),tag.end());		  std::reverse (dist.begin(),dist.end());	  }	  bool Chunker::setSelector (int (*func)(Chunker *, int))	  {		  selector_func  = func;		  return true;	  }	  unsigned int Chunker::addFeature (char *s)	  {		  strncpy (features[features_size], s, MAX_STR_LEN);		  features_size++;		  return features_size;	  }	  unsigned int Chunker::add (std::vector <std::string> &s)	  {		  context.push_back (s);		  return context.size ();	  }	  unsigned int Chunker::add (std::string &line) 	  {		  std::vector <std::string> column;		  unsigned int s = split_string (line, "\t ", column);		  if (column_size == 0) 		  {			  column_size = s;		  }		  for (; s < column_size; s++)		  {			  column.push_back ("");		  }		  		  return add (column);	  }	  std::istream& Chunker::read (std::istream &is)	  {		  try		  {			  clear();			  std::string line;			  for (;;)			  {				  if (! std::getline (is, line)) 				  {					  is.clear (std::ios::eofbit|std::ios::badbit);			          return is;				  }				  if (line == "\t" || line == "" || line == "EOS")				  {					  break;				  }				  add (line);  // CCR REMARK 这个函数返回的是context.size();其功能是把line里面的内容加入context里面。			  }			  return is;		  }		  catch (std::exception &e) 		  {			  _what = std::string ("Chunker::read(): ") + e.what ();			  is.clear (std::ios::eofbit|std::ios::badbit);		      return is;		  }	  }	  std::ostream& Chunker::write (std::ostream &os)	  {		  try		  {			  switch (mode)			  {			  case 0:					  return is_verbose ? writeDetail (os) : writeNormal (os);			  case 1:					  return writeSelect (os);			  }			  			  return os;		  }		  catch (std::exception &e)		  {			  _what = std::string ("Chunker::write(): ") + e.what ();			  os.clear (std::ios::eofbit|std::ios::badbit);			  return os;		  }	  }	  bool Chunker::parse (std::istream &is, std::ostream &os)	  {		  if (! read (is))		  {			  return false;		  }		  if (! parse())		  {			  return false;		  }		  write (os);		  return true;	  }	  bool Chunker::parse ()	  {		  try		  {			  switch (mode)			  {			  case 0:				  return  is_verbose ? parseDetail () : parseNormal ();			  case 1:				  return parseSelect ();			  }			  return true;		  }		  catch (std::exception &e)		  {			  _what = std::string ("Chunker::parse(): ") + e.what ();			  throw std::runtime_error (_what);			  return false;		  }	  }	  bool Chunker::parseSelect ()	  {		  if (column_size <= 1) 		  {			  throw std::runtime_error ("answer tags are not defined");		  }		  if (! feature_index)		  {			  feature_index = new FeatureIndex;			  feature_index->setFeature (feature, column_size-1);		  }		  for (unsigned int i = 0; i < size(); i++) 		  {			  tag.push_back (context[i][column_size-1]); // push last column		  }		  		  reverse ();		  return true;	  }	  std::ostream& Chunker::writeSelect (std::ostream &os) 	  {		  if (! is_write_header)		  {			  if (column_size <= 1) 				  throw std::runtime_error ("answer tags are not defined");			  			  if (! feature_index) 			  {				  feature_index = new FeatureIndex;				  feature_index->setFeature (feature, column_size-1);			  }			  os << "Version: "           << VERSION << std::endl;			  os << "Package: "           << PACKAGE << std::endl;			  os << "Parsing_Direction: " << (is_reverse ? "backward" : "forward") << std::endl;			  os << "Feature_Parameter: " << feature << std::endl;			  os << "Column_Size: "       << column_size-1 << std::endl; // NOTE: must -1; last colum is ANSWER			  os << "Tag_Features:";			  for (unsigned int i = 0; i < feature_index->tags.size(); i++) 			  os << ' ' << feature_index->tags[i];			  os << std::endl;			  os << "Features:";			  for (unsigned int i = 0; i < feature_index->features.size(); i++) 			  os << ' ' << feature_index->features[i].first << ":" << feature_index->features[i].second;			  os << std::endl << std::endl;			  is_write_header = true;		  }	      for (unsigned int i = 0; i < size(); i++) 		  {			  os << tag[i];		      unsigned int size = select (i);		      for (unsigned int j = 0; j < size; j++)			  {				  os << ' ' << features[j];			  }		      os << std::endl;		  }		  		  os << std::endl;		  		  return os;	  }}#define _YAMCHA_PARSE_DETAIL#include "chunkersub.h"#undef _YAMCHA_PARSE_DETAIL#include "chunkersub.h"

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -