⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 chunker.cpp

📁 用于汉字识别和分类的支持向量机SVMTEST测试算法,很好用啊
💻 CPP
📖 第 1 页 / 共 2 页
字号:
/* YamCha -- Yet Another Multipurpose CHunk Annotator $Id: chunker.cpp,v 1.16 2003/07/04 04:55:17 taku-ku Exp $; Copyright (C) 2001  Taku Kudoh <taku-ku@aist-nara.ac.jp> All rights reserved. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later verjsion. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*/#pragma warning(disable: 4786)//CCR 2004.04.05#include <vector>#include <stdexcept>#include <string>#include <strstream>#include <map>#include <algorithm>#include <functional>#include <fstream>#include "feature_index.h"#include "param.h"#include "yamcha.h"#include "common.h"namespace YamCha {#define _INIT_CHUNKER feature_index(0), svm(0), \                      is_reverse (0), is_write_header (0), \                      is_partial (0), is_verbose(0), mode(0), \                      column_size(0), class_size(0), features (0), \                      features_size(0), locCount(0), SpCountFlag(0),selector_func (0)#define CHUNKER_ERROR  std::ostrstream os; \                      os << "Tagger::open(): " << param.what () << "\n\n" \                         <<  COPYRIGHT << "\ntry '--help' for more information.\n" << std::ends; \                      _what = os.str(); os.freeze (false);		static const Option long_options[] = 	{		{"model",          'm', 0, "FILE", "use FILE as model file" },		{"feature",        'F', 0, "PAT",  "use PAT as the feature template pattern"},		{"eos-string" ,    'e', 0, "STR",  "use STR as sentence-boundary marker" },		{"verbose",        'V', 0, 0,      "verbose mode" },     		{"candidate",      'C', 0, 0,      "partial chunking model"} ,		{"backward",       'B', 0, 0,      "select features from the end of sentence" },		{"output",         'o', 0, "FILE", "use FILE as output file" },     		{"version",        'v', 0, 0,      "show the version and exit" },		{"help",           'h', 0, 0,      "show this help and exit" },		{0,0,0,0,0}	};  	Chunker::Chunker(): _INIT_CHUNKER 	{		if (!initMap()) throw std::runtime_error (_what);//MTT ADD	}	Chunker::Chunker (Param &p): _INIT_CHUNKER 	{ 		if (! open (p)) throw std::runtime_error (_what);	}	Chunker::Chunker (int argc, char** argv): _INIT_CHUNKER 	{ 		if (! open (argc, argv)) throw std::runtime_error (_what);	}	Chunker::Chunker (const char* arg): _INIT_CHUNKER 	{ 		if (! open (arg)) throw std::runtime_error (_what);	}	bool Chunker::initMap()	{		ifstream	InFile;		InFile.open("place.txt");		string		Line;		string		Item;		while (getline(InFile, Line))		{			if (Line.size()%2!=0)			{				std::cout<<"File is bad!"<<std::endl;				return	false;			}			for (int i=0; i<Line.size(); i+=2)			{				Item=Line.substr(i, 2);				if (i==0)				{					++BeginWord[Item];					MiddleWord[Item];					EndWord[Item];					++TotalWord[Item];				}				else if(i==Line.size()-2)				{					BeginWord[Item];					MiddleWord[Item];					++EndWord[Item];					++TotalWord[Item];				}				else				{					BeginWord[Item];					++MiddleWord[Item];					EndWord[Item];					++TotalWord[Item];				}			}		}		InFile.clear();		InFile.close();		return true;	}	int Chunker::ComputeSpFrq (std::string  line, std::ofstream& os)	{		int		bTime = 0;		int		mTime = 0;		int		eTime = 0;		int		total = 0;		double	SpFrq = 0;		double  SpFrq1 = 0;		double  SpFrq2 = 1;		double  SpFrq3 = 0;		string	item;		map<string, int>::const_iterator iter;		for (int i=0; i < line.size(); i+=2)		{			item=line.substr(i,2);			iter=TotalWord.find(item);			if(iter!=TotalWord.end())				total=iter->second;			else				total=0;			if (i==0)			{				iter=BeginWord.find(item);				if (iter!=BeginWord.end())					bTime=iter->second;				else					bTime=0;				if ((total!=0)&&(bTime!=0))				{					SpFrq1 = (double)bTime/total;				}				else				{					SpFrq1=0;				}			}			else if (i==(line.size()-2))			{				iter=EndWord.find(item);				if (iter!=EndWord.end())					eTime=iter->second;				else					eTime=0;				if ((total!=0)&&(eTime!=0))				{					SpFrq3 = (double)eTime/total;				}				else				{					SpFrq3=0;				}			}			else			{				iter=MiddleWord.find(item);				if (iter!=MiddleWord.end())					mTime=iter->second;				else					mTime=0;				if ((total!=0)&&(mTime!=0))				{					SpFrq2 *= (double)mTime/total;				}				else				{					SpFrq2=0;				}			}		}		SpFrq=SpFrq1*SpFrq2*SpFrq3;		if ((SpFrq<0.09)||((SpFrq>0.09)&&(SpFrq1<0.45)))			return  1;		else			return  0;		//os<<SpFrq<<'\t'<<SpFrq1<<std::endl;		//return SpFrq;			}		bool Chunker::open (int argc, char **argv)	{		Param param;		if (! param.open (argc, argv, long_options))		{			CHUNKER_ERROR;			return false;		}		return open (param);	}	bool Chunker::open (const char *arg)	{		Param param;		if (! param.open (arg, long_options))		{			 CHUNKER_ERROR;			 return false;		}		return open (param);	}    bool Chunker::open (Param &param)	{		try		{			if (param.getProfileInt ("help"))			{				std::ostrstream ostrs;				param.help (ostrs, long_options);				ostrs << std::ends; 				std::runtime_error e (ostrs.str());				ostrs.freeze (false);				throw e; 			 }			 if (param.getProfileInt ("version"))			 {				 std::ostrstream ostrs;				 param.version (ostrs, long_options);				 ostrs << std::ends; 	 				 std::runtime_error e (ostrs.str());				 ostrs.freeze (false);				 throw e; 			 }			 close ();			 feature  = param.getProfileString ("feature");			 is_partial = param.getProfileInt("candidate");			 is_verbose = param.getProfileInt("verbose");			 eos_string = param.getProfileString("eos-string");			 std::string model = param.getProfileString ("model");//返回模型名			 printf("%s\n", model.c_str());//			 if (model != "")			 {				 mode = 0;								 svm = new SVM;				 if (! svm->open (model.c_str())) 				 {					 throw std::runtime_error (svm->what());				 }								 feature_index = new FeatureIndex;				 feature_index->setFeature (svm->getProfileString ("features"), 											svm->getProfileString ("tag_features"));// "-2 -1"				 column_size = svm->getProfileInt ("column_size");				 				 if (column_size == 0)				 {					 column_size = feature_index->getColumnSize ();				 }				 if (column_size == 0)				 {					throw std::runtime_error (std::string ("column size is 0 or unknown: ") + model);				 }				 if (svm->getProfileString("parsing_direction") == "backward") // direction = "forward"				 {					 is_reverse = true;				 }				 class_size = svm->getClassSize ();			 }			 else if (feature != "")			 {				 mode       = 1;				 is_reverse = param.getProfileInt ("backward");						 }			 else 			 {				  throw std::runtime_error ("unknown action mode");			 }			 features = new char * [MAX_FEATURE_LEN];			 for (unsigned int i = 0; i < MAX_FEATURE_LEN; i++) 			 {				 features[i] = new char [MAX_STR_LEN];			 }					 return true;		}		catch (std::exception &e)		{			  _what = std::string ("Chunker::open(): ") + e.what ();			  throw std::runtime_error (_what);			  return false;		}	}		int Chunker::parse (int argc, char **argv)	{		try		{			Param param;    			if (! param.open (argc, argv, long_options)) //找到模型和输出到的文件的地址			{				CHUNKER_ERROR;				throw std::runtime_error (_what);			}			 if (param.getProfileInt ("help")) 			 {				 param.help (std::cout, long_options);				 return EXIT_SUCCESS;			 }			 if (param.getProfileInt ("version"))			 {				 param.version (std::cout, long_options);				 return EXIT_SUCCESS;			  }			  if (! open (param))			  {				  throw std::runtime_error (_what);			  }			  std::ostream *ofs = &std::cout;			  std::string outputFileName = param.getProfileString ("output");			  printf("%s\n", outputFileName.c_str());//CCR			  if (! outputFileName.empty())			  {				  ofs = new std::ofstream (outputFileName.c_str());				  if (! *ofs)				  {					  throw std::runtime_error (outputFileName + ", no such file or directory");				  }			  }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -