⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 darts.h

📁 Conditional Random Fields的训练识别工具
💻 H
字号:
/*  Darts -- Double-ARray Trie System  $Id: darts.h 1528 2006-08-07 02:39:50Z taku $;  Copyright(C) 2001-2006 Taku Kudo <taku@chasen.org>  All rights reserved.  This library is free software; you can redistribute it and/or  modify it under the terms of the GNU Library General Public  License as published by the Free Software Foundation; either  version 2 of the License, or(at your option) any later verjsion.  This library is distributed in the hope that it will be useful,  but WITHOUT ANY WARRANTY; without even the implied warranty of  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU  Library General Public License for more details.  You should have received a copy of the GNU Library General Public  License along with this library; if not, write to the  Free Software Foundation, Inc., 59 Temple Place - Suite 330,  Boston, MA 02111-1307, USA.*/#ifndef _DARTS_H_#define _DARTS_H_#define DARTS_VERSION "0.3"#include <vector>#include <cstring>#include <cstdio>#ifdef HAVE_ZLIB_Hnamespace zlib {#include <zlib.h>}#define SH(p)((unsigned short)(unsigned char)((p)[0]) |((unsigned short)(unsigned char)((p)[1]) << 8))#define LG(p)((unsigned long)(SH(p)) |((unsigned long)(SH((p)+2)) << 16))#endifnamespace Darts {  template <class T> inline T _max(T x, T y) { return(x > y) ? x : y; }  template <class T> inline T* _resize(T* ptr, size_t n, size_t l, T v)    {      T *tmp = new T [l];      for (size_t i = 0; i < n; ++i) tmp[i] = ptr[i];      for (size_t i = n; i < l; ++i) tmp[i] = v;      delete [] ptr;      return tmp;    }  template <class T>    class Length {    public: size_t operator()(const T *key) const      { size_t i; for (i = 0; key[i] != (T)0; ++i) {}; return i; }  };  template <> class Length<char> {  public: size_t operator()(const char *key) const    { return std::strlen(key); };  };  template  <class node_type_,  class node_u_type_,    class array_type_, class array_u_type_, class length_func_ = Length<node_type_> >    class DoubleArrayImpl    {      private:      struct node_t      {        array_u_type_ code;        size_t     depth;        size_t     left;        size_t     right;      };      struct unit_t      {        array_type_   base;        array_u_type_ check;      };      unit_t        *array_;      unsigned char *used_;      size_t        size_;      size_t        alloc_size_;      node_type_    **key_;      size_t        key_size_;      size_t        *length_;      array_type_   *value_;      size_t        progress_;      size_t        next_check_pos_;      bool          no_delete_;      int           error_;      int(*progress_func_)(size_t, size_t);      size_t resize(const size_t new_size)      {        unit_t tmp;        tmp.base = 0;        tmp.check = 0;        array_ = _resize(array_, alloc_size_, new_size, tmp);        used_  = _resize(used_,  alloc_size_, new_size,(unsigned char)0);        alloc_size_ = new_size;        return new_size;      }      size_t fetch(const node_t &parent, std::vector <node_t> &siblings)      {        if (error_ < 0) return 0;        array_u_type_ prev = 0;        for (size_t i = parent.left; i < parent.right; ++i) {          if ((length_ ? length_[i] : length_func_()(key_[i])) < parent.depth) continue;          const node_u_type_ *tmp =(node_u_type_ *)(key_[i]);          array_u_type_ cur = 0;          if ((length_ ? length_[i] : length_func_()(key_[i])) != parent.depth)            cur =(array_u_type_)tmp[parent.depth] + 1;          if (prev > cur) {            error_ = -3;            return 0;          }          if (cur != prev || siblings.empty()) {            node_t tmp_node;            tmp_node.depth = parent.depth + 1;            tmp_node.code  = cur;            tmp_node.left  = i;            if (! siblings.empty()) siblings[siblings.size()-1].right = i;            siblings.push_back(tmp_node);          }          prev = cur;        }        if (! siblings.empty())        siblings[siblings.size()-1].right = parent.right;        return siblings.size();      }      size_t insert(const std::vector <node_t> &siblings)      {        if (error_ < 0) return 0;        size_t begin = 0;        size_t pos   = _max((size_t)siblings[0].code + 1, next_check_pos_) - 1;        size_t nonzero_num = 0;        int    first = 0;        if (alloc_size_ <= pos) resize (pos + 1);        while (true) {        next:          ++pos;          if (alloc_size_ <= pos) resize (pos + 1);          if (array_[pos].check) {            ++nonzero_num;            continue;          } else if (! first) {            next_check_pos_ = pos;            first = 1;          }          begin = pos - siblings[0].code;          if (alloc_size_ <= (begin + siblings[siblings.size()-1].code))            resize((size_t)(alloc_size_ * _max(1.05, 1.0 * key_size_ / progress_)));          if (used_[begin]) continue;          for (size_t i = 1; i < siblings.size(); ++i)            if (array_[begin + siblings[i].code].check != 0) goto next;          break;        }        // -- Simple heuristics --        // if the percentage of non-empty contents in check between the index        // 'next_check_pos' and 'check' is greater than some constant value(e.g. 0.9),        // new 'next_check_pos' index is written by 'check'.        if (1.0 * nonzero_num/(pos - next_check_pos_ + 1) >= 0.95)        next_check_pos_ = pos;        used_[begin] = 1;        size_ = _max(size_, begin + (size_t)siblings[siblings.size()-1].code + 1);        for (size_t i = 0; i < siblings.size(); ++i)        array_[begin + siblings[i].code].check = begin;        for (size_t i = 0; i < siblings.size(); ++i) {          std::vector <node_t> new_siblings;          if (! fetch(siblings[i], new_siblings)) {            array_[begin + siblings[i].code].base =              value_ ?(array_type_)(-value_[siblings[i].left]-1) :(array_type_)(-siblings[i].left-1);            if (value_ && (array_type_)(-value_[siblings[i].left]-1) >= 0) {              error_ = -2;              return 0;            }            ++progress_;            if (progress_func_) (*progress_func_) (progress_, key_size_);          } else {            size_t h = insert(new_siblings);            array_[begin + siblings[i].code].base = h;          }        }        return begin;      }      public:      typedef array_type_  value_type;      typedef node_type_   key_type;      typedef array_type_  result_type;  // for compatibility      struct result_pair_type      {        value_type value;        size_t     length;      };      explicit DoubleArrayImpl(): array_(0), used_(0), size_(0), alloc_size_(0), no_delete_(0), error_(0) {}      ~DoubleArrayImpl() { clear(); }      void set_result(value_type&       x, value_type r, size_t) { x = r; }      void set_result(result_pair_type& x, value_type r, size_t l) { x.value = r; x.length = l; }      void set_array(void *ptr, size_t size = 0)      {        clear();        array_ =(unit_t *)ptr;        no_delete_ = true;        size_ = size;      }      void *array()      {        return(void *)array_;      }      void clear()      {        if (! no_delete_) delete [] array_;        delete [] used_;        array_ = 0;        used_ = 0;        alloc_size_ = 0;        size_ = 0;        no_delete_ = false;      }      size_t unit_size()  const { return sizeof(unit_t); };      size_t size()       const { return size_; };      size_t total_size() const { return size_ * sizeof(unit_t); };      size_t nonzero_size() const      {        size_t result = 0;        for (size_t i = 0; i < size_; ++i)          if (array_[i].check) ++result;        return result;      }      int build(size_t     key_size,                key_type   **key,                size_t     *length = 0,                value_type *value = 0,                int(*progress_func)(size_t, size_t) = 0)      {        if (! key_size || ! key) return 0;        progress_func_ = progress_func;        key_           = key;        length_        = length;        key_size_      = key_size;        value_         = value;        progress_      = 0;        resize(8192);        array_[0].base = 1;        next_check_pos_ = 0;        node_t root_node;        root_node.left  = 0;        root_node.right = key_size;        root_node.depth = 0;        std::vector <node_t> siblings;        fetch(root_node, siblings);        insert(siblings);        size_ +=(1 << 8 * sizeof(key_type)) + 1;        if (size_ >= alloc_size_) resize (size_);        delete [] used_;        used_ = 0;        return error_;      }      int open(const char *file,               const char *mode = "rb",               size_t offset = 0,               size_t size = 0)      {        std::FILE *fp = std::fopen(file, mode);        if (! fp) return -1;        if (std::fseek (fp, offset, SEEK_SET) != 0) return -1;        if (! size) {          if (std::fseek (fp, 0L,     SEEK_END) != 0) return -1;          size = std::ftell(fp);          if (std::fseek (fp, offset, SEEK_SET) != 0) return -1;        }        clear();        size_ = size;        size_ /= sizeof(unit_t);        array_  = new unit_t [size_];        if (size_ != std::fread ((unit_t *)array_,  sizeof(unit_t), size_, fp)) return -1;        std::fclose(fp);        return 0;      }      int save(const char *file,               const char *mode = "wb",               size_t offset = 0)      {        if (! size_) return -1;        std::FILE *fp = std::fopen(file, mode);        if (! fp) return -1;        if (size_ != std::fwrite((unit_t *)array_,  sizeof(unit_t), size_, fp))        return -1;        std::fclose(fp);        return 0;      }#ifdef HAVE_ZLIB_H      int gzopen(const char *file,                 const char *mode = "rb",                 size_t offset = 0,                 size_t size = 0)      {        std::FILE *fp  = std::fopen(file, mode);        if (! fp) return -1;        clear();        size_ = size;        if (! size_) {          if (-1L != (long)std::fseek (fp, (-8), SEEK_END)) {            char buf [8];            if (std::fread ((char*)buf, 1, 8, fp) != sizeof(buf)) {              std::fclose(fp);              return -1;            }            size_ = LG(buf+4);            size_ /= sizeof(unit_t);          }        }        std::fclose(fp);        if (! size_) return -1;        zlib::gzFile gzfp = zlib::gzopen(file, mode);        if (! gzfp) return -1;        array_ = new unit_t [size_];        if (zlib::gzseek (gzfp, offset, SEEK_SET) != 0) return -1;        zlib::gzread(gzfp,(unit_t *)array_,  sizeof(unit_t) * size_);        zlib::gzclose(gzfp);        return 0;      }      int gzsave(const char *file, const char *mode = "wb", size_t offset = 0)      {        zlib::gzFile gzfp = zlib::gzopen(file, mode);        if (!gzfp) return -1;        zlib::gzwrite(gzfp,(unit_t *)array_,  sizeof(unit_t) * size_);        zlib::gzclose(gzfp);        return 0;      }#endif      template <class T>      inline void exactMatchSearch(const key_type *key,                                   T & result,                                   size_t len = 0,                                   size_t node_pos = 0)      {        result = exactMatchSearch<T>(key, len, node_pos);        return;      }      template <class T>      inline T exactMatchSearch(const key_type *key,                                size_t len = 0,                                size_t node_pos = 0)      {        if (! len) len = length_func_() (key);        T result;        set_result(result, -1, 0);        register array_type_  b = array_[node_pos].base;        register array_u_type_ p;        for (register size_t i = 0; i < len; ++i) {          p = b + (node_u_type_)(key[i]) + 1;          if ((array_u_type_)b == array_[p].check) b = array_[p].base;          else return result;        }        p = b;        array_type_ n = array_[p].base;        if ((array_u_type_)b == array_[p].check && n < 0) {          set_result(result, -n-1, len);        }        return result;      }      template <class T>      size_t commonPrefixSearch(const key_type *key,                                T* result,                                size_t result_len,                                size_t len = 0,                                size_t node_pos = 0)      {        if (! len) len = length_func_() (key);        register array_type_  b   = array_[node_pos].base;        register size_t     num = 0;        register array_type_  n;        register array_u_type_ p;        for (register size_t i = 0; i < len; ++i) {          p = b; // + 0;          n = array_[p].base;          if ((array_u_type_) b == array_[p].check && n < 0) {            if (num < result_len) set_result (result[num], -n-1, i); // result[num] = -n-1;            ++num;          }          p = b + (node_u_type_)(key[i]) + 1;          if ((array_u_type_) b == array_[p].check) b = array_[p].base;          else return num;        }        p = b;        n = array_[p].base;        if ((array_u_type_)b == array_[p].check && n < 0) {          if (num < result_len) set_result(result[num], -n-1, len);          ++num;        }        return num;      }      value_type traverse(const key_type *key,                          size_t &node_pos,                          size_t &key_pos,                          size_t len = 0)      {        if (! len) len = length_func_() (key);        register array_type_  b = array_[node_pos].base;        register array_u_type_ p;        for (; key_pos < len; ++key_pos) {          p = b + (node_u_type_)(key[key_pos]) + 1;          if ((array_u_type_)b == array_[p].check) { node_pos = p; b = array_[p].base; }          else return -2; // no node        }        p = b;        array_type_ n = array_[p].base;        if ((array_u_type_)b == array_[p].check && n < 0) return -n-1;        return -1; // found, but no value      }    };#if 4 == 2  typedef Darts::DoubleArrayImpl<char, unsigned char, short, unsigned short> DoubleArray;#define DARTS_ARRAY_SIZE_IS_DEFINED 1#endif#if 4 == 4 && ! defined(DARTS_ARRAY_SIZE_IS_DEFINED)  typedef Darts::DoubleArrayImpl<char, unsigned char, int, unsigned int> DoubleArray;#define DARTS_ARRAY_SIZE_IS_DEFINED 1#endif#if 4 == 4 && ! defined(DARTS_ARRAY_SIZE_IS_DEFINED)  typedef Darts::DoubleArrayImpl<char, unsigned char, long, unsigned long> DoubleArray;#define DARTS_ARRAY_SIZE_IS_DEFINED 1#endif#if 4 == 8 && ! defined(DARTS_ARRAY_SIZE_IS_DEFINED)  typedef Darts::DoubleArrayImpl<char, unsigned char, long long, unsigned long long> DoubleArray;#endif}#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -