⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 feature.cpp

📁 Conditional Random Fields的训练识别工具
💻 CPP
字号:
/*  CRF++ -- Yet Another CRF toolkit  $Id: feature.cpp 1558 2006-11-25 04:59:20Z taku $;  Copyright(C) 2005 Taku Kudo <taku@chasen.org>  This is free software with ABSOLUTELY NO WARRANTY.  This library is free software; you can redistribute it and/or  modify it under the terms of the GNU Lesser General Public  License as published by the Free Software Foundation; either  version 2.1 of the License, or(at your option) any later version.  This library is distributed in the hope that it will be useful,  but WITHOUT ANY WARRANTY; without even the implied warranty of  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU  Lesser General Public License for more details.  You should have received a copy of the GNU Lesser General Public  License along with this library; if not, write to the Free Software  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/#include "feature_index.h"#include "common.h"#include "node.h"#include "path.h"#include "tagger.h"namespace CRFPP{  static const char *BOS[4] = { "_B-1", "_B-2", "_B-3", "_B-4"};  static const char *EOS[4] = { "_B+1", "_B+2", "_B+3", "_B+4"};  const char *FeatureIndex::get_index(char *&p,                                      size_t pos,                                      TaggerImpl &tagger)  {    if (*p++ !='[') return 0;    int col = 0;    int row = 0;    int neg = 1;    if (*p++ == '-') neg = -1;    else --p;    for (; *p; ++p) {      switch(*p) {      case '0': case '1': case '2': case '3': case '4':      case '5': case '6': case '7': case '8': case '9':        row = 10 * row +(*p - '0'); break;      case ',':        ++p; goto NEXT1;      default: return  0;      }    }  NEXT1:    for (; *p; ++p) {      switch(*p) {      case '0': case '1': case '2': case '3': case '4':      case '5': case '6': case '7': case '8': case '9':        col = 10 * col +(*p - '0'); break;      case ']': goto NEXT2;      default: return 0;      }    }  NEXT2:    row *= neg;    if (row < -4 || row > 4 || col < 0 || col >= (int)tagger.xsize()) return 0;    max_xsize_ = _max(max_xsize_, static_cast<unsigned int> (col + 1));    int idx = pos + row;    if (idx < 0) return BOS[-idx-1];    if (idx >= (int)tagger.size()) return EOS[idx - tagger.size()];    return tagger.x(idx, col);  }  bool FeatureIndex::apply_rule(string_buffer &os,                                char* p,                                size_t pos,                                TaggerImpl& tagger)  {    os.assign(""); // clear    const char *r;    for (; *p; p++) {      switch(*p) {      default:        os << *p; break;      case '%':        switch(*++p) {        case 'x':          ++p;          r = get_index(p, pos, tagger);          if (! r) return false;          os << r;          break;        default:          return false;        }        break;      }    }    os << '\0';    return true;  }  void FeatureIndex::rebuildFeatures(TaggerImpl &tagger)  {    size_t fid = tagger.feature_id();    unsigned short thread_id = tagger.thread_id();    path_freelist_[thread_id].free();    node_freelist_[thread_id].free();    for (size_t cur = 0; cur < tagger.size(); ++cur) {      int *f = feature_cache_[fid++];      for (size_t i = 0; i < y_.size(); ++i) {        Node *n = node_freelist_[thread_id].alloc();        n->clear();        n->x = cur;        n->y = i;        n->fvector = f;        tagger.set_node(n, cur, i);      }    }    for (size_t cur = 1; cur < tagger.size(); ++cur) {      int *f = feature_cache_[fid++];      for (size_t j = 0; j < y_.size(); ++j) {        for (size_t i = 0; i < y_.size(); ++i) {          Path *p = path_freelist_[thread_id].alloc();          p->clear();          p->add(tagger.node(cur-1, j), tagger.node(cur, i));          p->fvector = f;        }      }    }  }#define ADD { int id = this->getID(os.c_str()); \              if (id != -1) feature.push_back(id); } while (0)  bool FeatureIndex::buildFeatures(TaggerImpl &tagger)  {    string_buffer os;    std::vector <int> feature;    tagger.set_feature_id(feature_cache_.size());    for (size_t cur = 0; cur < tagger.size(); ++cur) {      for (std::vector<char *>::iterator it = unigram_templs_.begin();           it != unigram_templs_.end(); ++it) {        CHECK_FALSE(apply_rule(os, *it, cur, tagger)) << " format error: " << *it;        ADD;      }      feature_cache_.add(feature);      feature.clear();    }    for (size_t cur = 1; cur < tagger.size(); ++cur) {      for (std::vector<char *>::iterator it = bigram_templs_.begin();           it != bigram_templs_.end(); ++it) {        CHECK_FALSE(apply_rule(os, *it, cur, tagger)) << "format error: " << *it;        ADD;      }      feature_cache_.add(feature);      feature.clear();    }    return true;  }#undef ADD}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -