📄 feature.cpp
字号:
/* CRF++ -- Yet Another CRF toolkit $Id: feature.cpp 1558 2006-11-25 04:59:20Z taku $; Copyright(C) 2005 Taku Kudo <taku@chasen.org> This is free software with ABSOLUTELY NO WARRANTY. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or(at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/#include "feature_index.h"#include "common.h"#include "node.h"#include "path.h"#include "tagger.h"namespace CRFPP{ static const char *BOS[4] = { "_B-1", "_B-2", "_B-3", "_B-4"}; static const char *EOS[4] = { "_B+1", "_B+2", "_B+3", "_B+4"}; const char *FeatureIndex::get_index(char *&p, size_t pos, TaggerImpl &tagger) { if (*p++ !='[') return 0; int col = 0; int row = 0; int neg = 1; if (*p++ == '-') neg = -1; else --p; for (; *p; ++p) { switch(*p) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': row = 10 * row +(*p - '0'); break; case ',': ++p; goto NEXT1; default: return 0; } } NEXT1: for (; *p; ++p) { switch(*p) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': col = 10 * col +(*p - '0'); break; case ']': goto NEXT2; default: return 0; } } NEXT2: row *= neg; if (row < -4 || row > 4 || col < 0 || col >= (int)tagger.xsize()) return 0; max_xsize_ = _max(max_xsize_, static_cast<unsigned int> (col + 1)); int idx = pos + row; if (idx < 0) return BOS[-idx-1]; if (idx >= (int)tagger.size()) return EOS[idx - tagger.size()]; return tagger.x(idx, col); } bool FeatureIndex::apply_rule(string_buffer &os, char* p, size_t pos, TaggerImpl& tagger) { os.assign(""); // clear const char *r; for (; *p; p++) { switch(*p) { default: os << *p; break; case '%': switch(*++p) { case 'x': ++p; r = get_index(p, pos, tagger); if (! r) return false; os << r; break; default: return false; } break; } } os << '\0'; return true; } void FeatureIndex::rebuildFeatures(TaggerImpl &tagger) { size_t fid = tagger.feature_id(); unsigned short thread_id = tagger.thread_id(); path_freelist_[thread_id].free(); node_freelist_[thread_id].free(); for (size_t cur = 0; cur < tagger.size(); ++cur) { int *f = feature_cache_[fid++]; for (size_t i = 0; i < y_.size(); ++i) { Node *n = node_freelist_[thread_id].alloc(); n->clear(); n->x = cur; n->y = i; n->fvector = f; tagger.set_node(n, cur, i); } } for (size_t cur = 1; cur < tagger.size(); ++cur) { int *f = feature_cache_[fid++]; for (size_t j = 0; j < y_.size(); ++j) { for (size_t i = 0; i < y_.size(); ++i) { Path *p = path_freelist_[thread_id].alloc(); p->clear(); p->add(tagger.node(cur-1, j), tagger.node(cur, i)); p->fvector = f; } } } }#define ADD { int id = this->getID(os.c_str()); \ if (id != -1) feature.push_back(id); } while (0) bool FeatureIndex::buildFeatures(TaggerImpl &tagger) { string_buffer os; std::vector <int> feature; tagger.set_feature_id(feature_cache_.size()); for (size_t cur = 0; cur < tagger.size(); ++cur) { for (std::vector<char *>::iterator it = unigram_templs_.begin(); it != unigram_templs_.end(); ++it) { CHECK_FALSE(apply_rule(os, *it, cur, tagger)) << " format error: " << *it; ADD; } feature_cache_.add(feature); feature.clear(); } for (size_t cur = 1; cur < tagger.size(); ++cur) { for (std::vector<char *>::iterator it = bigram_templs_.begin(); it != bigram_templs_.end(); ++it) { CHECK_FALSE(apply_rule(os, *it, cur, tagger)) << "format error: " << *it; ADD; } feature_cache_.add(feature); feature.clear(); } return true; }#undef ADD}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -