📄 chunker.cpp
字号:
const std::vector <std::string>& rest = param.getRestArg (); if (rest.size()) { for (unsigned int i = 0; i < rest.size(); i++) { std::ifstream ifs (rest[i].c_str ()); if (!ifs) { throw std::runtime_error (rest[i] + ", no such file or directory"); } while (parse (ifs, *ofs)) { }; } } else { while (parse (std::cin, *ofs)) {}; } if (ofs != &std::cout) delete ofs; return EXIT_SUCCESS; } catch (std::exception &e) { std::cerr << "FATAL: " << e.what () << std::endl; return EXIT_FAILURE; } } Chunker::~Chunker() { close (); } bool Chunker::close () { if (features) { for (unsigned int i = 0; i < MAX_FEATURE_LEN; i++) delete [] features[i]; delete [] features; } features = 0; features_size = 0; if (svm!=0) { delete svm; svm = 0; } delete feature_index; feature_index = 0; is_reverse = false; is_write_header = false; is_partial = false; is_verbose = false; mode = 0; selector_func = 0; class_size = 0; clear (); return true; } bool Chunker::clear () { tag.clear(); context.clear(); dist.clear (); features_size = 0; return true; } std::string& Chunker::getFeature(int i, int j) { if (i < 0) { for (int k = - static_cast<int>(bos.size())-1; k >= i; k--) { char buf [32]; std::ostrstream os (buf, 32); os << k << "__BOS__" << std::ends; bos.push_back(std::string(buf)); } //printf("%s\n",bos[-i-1].c_str());//CCR return bos[-i-1]; } else if (i >= static_cast<int>(context.size())) { for (int k = 1 + eos.size(); k <= (i - static_cast<int>(context.size()) + 1); k++) { char buf [32]; std::ostrstream os (buf, 32); os << '+' << k << "__EOS__" << std::ends; eos.push_back (std::string(buf)); } //printf("%s\n", eos[i-context.size()].c_str());//CCR return eos[i-context.size()]; } else { //printf("%s\n",context[i][j].c_str());//CCR return context[i][j]; } } unsigned int Chunker::select (int i) { features_size = 0; if (selector_func) { (*selector_func) (this, i); } unsigned int l = features_size; for (unsigned int j = 0; j < feature_index->features.size(); j++) { std::ostrstream os (features[l], MAX_STR_LEN); os << "F:"; int iTmpTest = feature_index->features[j].first; if (feature_index->features[j].first >= 0) { os << '+'; } os << feature_index->features[j].first << ':' << feature_index->features[j].second << ':' << getFeature (i + feature_index->features[j].first, feature_index->features[j].second) << std::ends; l++; } for (unsigned int j = 0; j < feature_index->tags.size(); j++) { int k = i + feature_index->tags[j]; if (k >= 0) { std::ostrstream os (features[l], MAX_STR_LEN); os << "T:" << feature_index->tags[j] << ':' << tag[k] << std::ends; l++; } //printf("%s\n",features[l]); } return l; } void Chunker::reverse() { if (! is_reverse) return; std::reverse (context.begin(), context.end()); std::reverse (tag.begin(),tag.end()); std::reverse (dist.begin(),dist.end()); } bool Chunker::setSelector (int (*func)(Chunker *, int)) { selector_func = func; return true; } unsigned int Chunker::addFeature (char *s) { strncpy (features[features_size], s, MAX_STR_LEN); features_size++; return features_size; } unsigned int Chunker::add (std::vector <std::string> &s) { context.push_back (s); return context.size (); } unsigned int Chunker::add (std::string &line) { std::vector <std::string> column; unsigned int s = split_string (line, "\t ", column); if (column_size == 0) { column_size = s; } for (; s < column_size; s++) { column.push_back (""); } return add (column); } std::istream& Chunker::read (std::istream &is) { try { clear(); std::string line; for (;;) { if (! std::getline (is, line)) { is.clear (std::ios::eofbit|std::ios::badbit); return is; } if (line == "\t" || line == "" || line == "EOS") { break; } add (line); // CCR REMARK 这个函数返回的是context.size();其功能是把line里面的内容加入context里面。 } return is; } catch (std::exception &e) { _what = std::string ("Chunker::read(): ") + e.what (); is.clear (std::ios::eofbit|std::ios::badbit); return is; } } std::ostream& Chunker::write (std::ostream &os) { try { switch (mode) { case 0: return is_verbose ? writeDetail (os) : writeNormal (os); case 1: return writeSelect (os); } return os; } catch (std::exception &e) { _what = std::string ("Chunker::write(): ") + e.what (); os.clear (std::ios::eofbit|std::ios::badbit); return os; } } bool Chunker::parse (std::istream &is, std::ostream &os) { if (! read (is)) { return false; } if (! parse()) { return false; } write (os); return true; } bool Chunker::parse () { try { switch (mode) { case 0: return is_verbose ? parseDetail () : parseNormal (); case 1: return parseSelect (); } return true; } catch (std::exception &e) { _what = std::string ("Chunker::parse(): ") + e.what (); throw std::runtime_error (_what); return false; } } bool Chunker::parseSelect () { if (column_size <= 1) { throw std::runtime_error ("answer tags are not defined"); } if (! feature_index) { feature_index = new FeatureIndex; feature_index->setFeature (feature, column_size-1); } for (unsigned int i = 0; i < size(); i++) { tag.push_back (context[i][column_size-1]); // push last column } reverse (); return true; } std::ostream& Chunker::writeSelect (std::ostream &os) { if (! is_write_header) { if (column_size <= 1) throw std::runtime_error ("answer tags are not defined"); if (! feature_index) { feature_index = new FeatureIndex; feature_index->setFeature (feature, column_size-1); } os << "Version: " << VERSION << std::endl; os << "Package: " << PACKAGE << std::endl; os << "Parsing_Direction: " << (is_reverse ? "backward" : "forward") << std::endl; os << "Feature_Parameter: " << feature << std::endl; os << "Column_Size: " << column_size-1 << std::endl; // NOTE: must -1; last colum is ANSWER os << "Tag_Features:"; for (unsigned int i = 0; i < feature_index->tags.size(); i++) os << ' ' << feature_index->tags[i]; os << std::endl; os << "Features:"; for (unsigned int i = 0; i < feature_index->features.size(); i++) os << ' ' << feature_index->features[i].first << ":" << feature_index->features[i].second; os << std::endl << std::endl; is_write_header = true; } for (unsigned int i = 0; i < size(); i++) { os << tag[i]; unsigned int size = select (i); for (unsigned int j = 0; j < size; j++) { os << ' ' << features[j]; } os << std::endl; } os << std::endl; return os; }}#define _YAMCHA_PARSE_DETAIL#include "chunkersub.h"#undef _YAMCHA_PARSE_DETAIL#include "chunkersub.h"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -