📄 chunker.cpp
字号:
/* YamCha -- Yet Another Multipurpose CHunk Annotator $Id: chunker.cpp,v 1.16 2003/07/04 04:55:17 taku-ku Exp $; Copyright (C) 2001 Taku Kudoh <taku-ku@aist-nara.ac.jp> All rights reserved. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later verjsion. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*/#pragma warning(disable: 4786)//CCR 2004.04.05#include <vector>#include <stdexcept>#include <string>#include <strstream>#include <map>#include <algorithm>#include <functional>#include <fstream>#include "feature_index.h"#include "param.h"#include "yamcha.h"#include "common.h"namespace YamCha {#define _INIT_CHUNKER feature_index(0), svm(0), \ is_reverse (0), is_write_header (0), \ is_partial (0), is_verbose(0), mode(0), \ column_size(0), class_size(0), features (0), \ features_size(0), locCount(0), SpCountFlag(0),selector_func (0)#define CHUNKER_ERROR std::ostrstream os; \ os << "Tagger::open(): " << param.what () << "\n\n" \ << COPYRIGHT << "\ntry '--help' for more information.\n" << std::ends; \ _what = os.str(); os.freeze (false); static const Option long_options[] = { {"model", 'm', 0, "FILE", "use FILE as model file" }, {"feature", 'F', 0, "PAT", "use PAT as the feature template pattern"}, {"eos-string" , 'e', 0, "STR", "use STR as sentence-boundary marker" }, {"verbose", 'V', 0, 0, "verbose mode" }, {"candidate", 'C', 0, 0, "partial chunking model"} , {"backward", 'B', 0, 0, "select features from the end of sentence" }, {"output", 'o', 0, "FILE", "use FILE as output file" }, {"version", 'v', 0, 0, "show the version and exit" }, {"help", 'h', 0, 0, "show this help and exit" }, {0,0,0,0,0} }; Chunker::Chunker(): _INIT_CHUNKER { if (!initMap()) throw std::runtime_error (_what);//MTT ADD } Chunker::Chunker (Param &p): _INIT_CHUNKER { if (! open (p)) throw std::runtime_error (_what); } Chunker::Chunker (int argc, char** argv): _INIT_CHUNKER { if (! open (argc, argv)) throw std::runtime_error (_what); } Chunker::Chunker (const char* arg): _INIT_CHUNKER { if (! open (arg)) throw std::runtime_error (_what); } bool Chunker::initMap() { ifstream InFile; InFile.open("place.txt"); string Line; string Item; while (getline(InFile, Line)) { if (Line.size()%2!=0) { std::cout<<"File is bad!"<<std::endl; return false; } for (int i=0; i<Line.size(); i+=2) { Item=Line.substr(i, 2); if (i==0) { ++BeginWord[Item]; MiddleWord[Item]; EndWord[Item]; ++TotalWord[Item]; } else if(i==Line.size()-2) { BeginWord[Item]; MiddleWord[Item]; ++EndWord[Item]; ++TotalWord[Item]; } else { BeginWord[Item]; ++MiddleWord[Item]; EndWord[Item]; ++TotalWord[Item]; } } } InFile.clear(); InFile.close(); return true; } int Chunker::ComputeSpFrq (std::string line, std::ofstream& os) { int bTime = 0; int mTime = 0; int eTime = 0; int total = 0; double SpFrq = 0; double SpFrq1 = 0; double SpFrq2 = 1; double SpFrq3 = 0; string item; map<string, int>::const_iterator iter; for (int i=0; i < line.size(); i+=2) { item=line.substr(i,2); iter=TotalWord.find(item); if(iter!=TotalWord.end()) total=iter->second; else total=0; if (i==0) { iter=BeginWord.find(item); if (iter!=BeginWord.end()) bTime=iter->second; else bTime=0; if ((total!=0)&&(bTime!=0)) { SpFrq1 = (double)bTime/total; } else { SpFrq1=0; } } else if (i==(line.size()-2)) { iter=EndWord.find(item); if (iter!=EndWord.end()) eTime=iter->second; else eTime=0; if ((total!=0)&&(eTime!=0)) { SpFrq3 = (double)eTime/total; } else { SpFrq3=0; } } else { iter=MiddleWord.find(item); if (iter!=MiddleWord.end()) mTime=iter->second; else mTime=0; if ((total!=0)&&(mTime!=0)) { SpFrq2 *= (double)mTime/total; } else { SpFrq2=0; } } } SpFrq=SpFrq1*SpFrq2*SpFrq3; if ((SpFrq<0.09)||((SpFrq>0.09)&&(SpFrq1<0.45))) return 1; else return 0; //os<<SpFrq<<'\t'<<SpFrq1<<std::endl; //return SpFrq; } bool Chunker::open (int argc, char **argv) { Param param; if (! param.open (argc, argv, long_options)) { CHUNKER_ERROR; return false; } return open (param); } bool Chunker::open (const char *arg) { Param param; if (! param.open (arg, long_options)) { CHUNKER_ERROR; return false; } return open (param); } bool Chunker::open (Param ¶m) { try { if (param.getProfileInt ("help")) { std::ostrstream ostrs; param.help (ostrs, long_options); ostrs << std::ends; std::runtime_error e (ostrs.str()); ostrs.freeze (false); throw e; } if (param.getProfileInt ("version")) { std::ostrstream ostrs; param.version (ostrs, long_options); ostrs << std::ends; std::runtime_error e (ostrs.str()); ostrs.freeze (false); throw e; } close (); feature = param.getProfileString ("feature"); is_partial = param.getProfileInt("candidate"); is_verbose = param.getProfileInt("verbose"); eos_string = param.getProfileString("eos-string"); std::string model = param.getProfileString ("model");//返回模型名 printf("%s\n", model.c_str());// if (model != "") { mode = 0; svm = new SVM; if (! svm->open (model.c_str())) { throw std::runtime_error (svm->what()); } feature_index = new FeatureIndex; feature_index->setFeature (svm->getProfileString ("features"), svm->getProfileString ("tag_features"));// "-2 -1" column_size = svm->getProfileInt ("column_size"); if (column_size == 0) { column_size = feature_index->getColumnSize (); } if (column_size == 0) { throw std::runtime_error (std::string ("column size is 0 or unknown: ") + model); } if (svm->getProfileString("parsing_direction") == "backward") // direction = "forward" { is_reverse = true; } class_size = svm->getClassSize (); } else if (feature != "") { mode = 1; is_reverse = param.getProfileInt ("backward"); } else { throw std::runtime_error ("unknown action mode"); } features = new char * [MAX_FEATURE_LEN]; for (unsigned int i = 0; i < MAX_FEATURE_LEN; i++) { features[i] = new char [MAX_STR_LEN]; } return true; } catch (std::exception &e) { _what = std::string ("Chunker::open(): ") + e.what (); throw std::runtime_error (_what); return false; } } int Chunker::parse (int argc, char **argv) { try { Param param; if (! param.open (argc, argv, long_options)) //找到模型和输出到的文件的地址 { CHUNKER_ERROR; throw std::runtime_error (_what); } if (param.getProfileInt ("help")) { param.help (std::cout, long_options); return EXIT_SUCCESS; } if (param.getProfileInt ("version")) { param.version (std::cout, long_options); return EXIT_SUCCESS; } if (! open (param)) { throw std::runtime_error (_what); } std::ostream *ofs = &std::cout; std::string outputFileName = param.getProfileString ("output"); printf("%s\n", outputFileName.c_str());//CCR if (! outputFileName.empty()) { ofs = new std::ofstream (outputFileName.c_str()); if (! *ofs) { throw std::runtime_error (outputFileName + ", no such file or directory"); } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -