⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 darts.h

📁 双数组建树代码
💻 H
字号:
/*  Darts -- Double-ARray Trie System  $Id: darts.h.in,v 1.40 2005/12/24 11:01:33 taku Exp $;  Copyright (C) 2001-2006 Taku Kudo <taku@chasen.org>  All rights reserved.  This library is free software; you can redistribute it and/or  modify it under the terms of the GNU Library General Public  License as published by the Free Software Foundation; either  version 2 of the License, or (at your option) any later verjsion.  This library is distributed in the hope that it will be useful,  but WITHOUT ANY WARRANTY; without even the implied warranty of  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU  Library General Public License for more details.  You should have received a copy of the GNU Library General Public  License along with this library; if not, write to the  Free Software Foundation, Inc., 59 Temple Place - Suite 330,  Boston, MA 02111-1307, USA.*/#ifndef _DARTS_H_#define _DARTS_H_#define DARTS_VERSION "0.3"#include <vector>#include <cstring>#include <cstdio>#ifdef HAVE_ZLIB_Hnamespace zlib {#include <zlib.h>}#define SH(p) ((unsigned short)(unsigned char)((p)[0]) | ((unsigned short)(unsigned char)((p)[1]) << 8))#define LG(p) ((unsigned long)(SH(p)) | ((unsigned long)(SH((p)+2)) << 16))#endifnamespace Darts {  template <class T> inline T _max (T x, T y) { return (x > y) ? x : y; }  template <class T> inline T* _resize (T* ptr, size_t n, size_t l, T v)  {    T *tmp = new T [l];    for (size_t i = 0; i < n; ++i) tmp[i] = ptr[i];    for (i = n; i < l; ++i) tmp[i] = v;//size_t     delete [] ptr;    return tmp;  }  template <class T>  class Length {  public: size_t operator() (const T *key) const    { size_t i; for (i = 0; key[i] != (T)0; ++i) {}; return i; }  };  template <> class Length<char> {  public: size_t operator() (const char *key) const    { return strlen (key); };//std::  };  template  <class node_type_,  class node_u_type_,	     class array_type_, class array_u_type_, class length_func_ = Length<node_type_> >  class DoubleArrayImpl  {  private:    struct node_t    {      array_u_type_ code;      size_t     depth;      size_t     left;      size_t     right;    };    struct unit_t    {      array_type_   base;      array_u_type_ check;    };    unit_t        *array_;    unsigned char *used_;    size_t        size_;    size_t        alloc_size_;    node_type_    **key_;    size_t        key_size_;    size_t        *length_;    array_type_   *value_;    size_t        progress_;    size_t        next_check_pos_;    bool          no_delete_;    int           error_;    int           (*progress_func_) (size_t, size_t);    size_t resize (const size_t new_size)    {      unit_t tmp;      tmp.base = 0;      tmp.check = 0;      array_ = _resize (array_, alloc_size_, new_size, tmp);      used_  = _resize (used_,  alloc_size_, new_size, (unsigned char)0);      alloc_size_ = new_size;      return new_size;    }    size_t fetch (const node_t &parent, std::vector <node_t> &siblings)    {      if (error_ < 0) return 0;      array_u_type_ prev = 0;      for (size_t i = parent.left; i < parent.right; ++i) {	if ((length_ ? length_[i] : length_func_()(key_[i])) < parent.depth) continue;	const node_u_type_ *tmp = (node_u_type_ *)(key_[i]);	array_u_type_ cur = 0;	if ((length_ ? length_[i] : length_func_()(key_[i])) != parent.depth)	  cur = (array_u_type_)tmp[parent.depth] + 1;	if (prev > cur) {	  error_ = -3;	  return 0;	}		if (cur != prev || siblings.empty()) {	  node_t tmp_node;	  tmp_node.depth = parent.depth + 1;	  tmp_node.code  = cur;	  tmp_node.left  = i;	  if (! siblings.empty()) 
		  siblings[siblings.size()-1].right = i;	  siblings.push_back(tmp_node);	}	prev = cur;      }      if (! siblings.empty())	siblings[siblings.size()-1].right = parent.right;      return siblings.size();    }    size_t insert (const std::vector <node_t> &siblings)    {      if (error_ < 0) return 0;      size_t begin = 0;      size_t pos   = _max ((size_t)siblings[0].code + 1, next_check_pos_) - 1;      size_t nonzero_num = 0;      int    first = 0;      if (alloc_size_ <= pos) resize (pos + 1);      while (true) {      next:	++pos;	if (alloc_size_ <= pos) resize (pos + 1);	if (array_[pos].check) {	  ++nonzero_num;	  continue;	} else if (! first) {	  next_check_pos_ = pos;	  first = 1;	}	begin = pos - siblings[0].code;	if (alloc_size_ <= (begin + siblings[siblings.size()-1].code))	  resize((size_t)(alloc_size_ * _max(1.05, 1.0 * key_size_ / progress_)));	if (used_[begin]) continue;	for (size_t i = 1; i < siblings.size(); ++i)	  if (array_[begin + siblings[i].code].check != 0) goto next;	break;      }      // -- Simple heuristics --      // if the percentage of non-empty contents in check between the index      // 'next_check_pos' and 'check' is greater than some constant value (e.g. 0.9),      // new 'next_check_pos' index is written by 'check'.      if (1.0 * nonzero_num/(pos - next_check_pos_ + 1) >= 0.95) 	next_check_pos_ = pos;      used_[begin] = 1;      size_ = _max (size_, begin + (size_t)siblings[siblings.size()-1].code + 1);      for (size_t i = 0; i < siblings.size(); ++i)	array_[begin + siblings[i].code].check = begin;      for (i = 0; i < siblings.size(); ++i) {//size_t 	std::vector <node_t> new_siblings;	if (! fetch(siblings[i], new_siblings)) {	  array_[begin + siblings[i].code].base =	    value_ ? (array_type_)(-value_[siblings[i].left]-1) : (array_type_)(-siblings[i].left-1);	  if (value_ && (array_type_)(-value_[siblings[i].left]-1) >= 0) {	    error_ = -2;	    return 0;	  }	  ++progress_;	  if (progress_func_) (*progress_func_) (progress_, key_size_);	} else {	  size_t h = insert(new_siblings);	  array_[begin + siblings[i].code].base = h;	}      }      return begin;    }  public:    typedef array_type_  value_type;    typedef node_type_   key_type;    typedef array_type_  result_type;  // for compatibility        struct result_pair_type    {      value_type value;      size_t     length;    };    explicit DoubleArrayImpl (): array_(0), used_(0), size_(0), alloc_size_(0), no_delete_(0), error_(0) {}    ~DoubleArrayImpl () { clear(); }    void set_result (value_type&       x, value_type r, size_t) { x = r; }    void set_result (result_pair_type& x, value_type r, size_t l) { x.value = r; x.length = l; }    void set_array (void *ptr, size_t size = 0)    {      clear();      array_ = (unit_t *)ptr;      no_delete_ = true;      size_ = size;    }    void *array ()    {      return (void *)array_;    }    void clear ()     {      if (! no_delete_) delete [] array_;      delete [] used_;      array_ = 0;      used_ = 0;      alloc_size_ = 0;      size_ = 0;      no_delete_ = false;    }    size_t unit_size()  const { return sizeof(unit_t); };    size_t size()       const { return size_; };    size_t total_size() const { return size_ * sizeof(unit_t); };    size_t nonzero_size () const    {      size_t result = 0;      for (size_t i = 0; i < size_; ++i)	if (array_[i].check) ++result;      return result;    }    int build (size_t     key_size,	       key_type   **key,	       size_t     *length = 0,	       value_type *value = 0,	       int (*progress_func)(size_t, size_t) = 0)    {      if (! key_size || ! key) return 0;      progress_func_ = progress_func;      key_           = key;      length_        = length;      key_size_      = key_size;      value_         = value;      progress_      = 0;      resize (8192);      array_[0].base = 1;      next_check_pos_ = 0;      node_t root_node;      root_node.left  = 0;      root_node.right = key_size;      root_node.depth = 0;      std::vector <node_t> siblings;      fetch (root_node, siblings);      insert (siblings);       size_ += (1 << 8 * sizeof (key_type)) + 1;      if (size_ >= alloc_size_) resize (size_);      delete [] used_;      used_ = 0;      return error_;    }    int open (const char *file,	      const char *mode = "rb",	      size_t offset = 0,	      size_t size = 0)    {      FILE *fp = fopen(file, mode);//std::std::      if (! fp) return -1;      if (fseek (fp, offset, SEEK_SET) != 0) return -1;//std::      if (! size) {	if (fseek (fp, 0L,     SEEK_END) != 0) return -1;//std::	size = ftell (fp);//std::	if (fseek (fp, offset, SEEK_SET) != 0) return -1;//std::      }      clear();      size_ = size;      size_ /= sizeof(unit_t);      array_  = new unit_t [size_];      if (size_ != fread ((unit_t *)array_,  sizeof(unit_t), size_, fp)) return -1;      fclose (fp);//std::std::      return 0;    }    int save (const char *file,	      const char *mode = "wb",	      size_t offset = 0)    {      if (! size_) return -1;      FILE *fp = fopen(file, mode);//std::std::std::std::      if (! fp) return -1;      if (size_ != fwrite((unit_t *)array_,  sizeof(unit_t), size_, fp)) 	return -1;      fclose (fp);      return 0;    }#ifdef HAVE_ZLIB_H    int gzopen (const char *file,		const char *mode = "rb",		size_t offset = 0,		size_t size = 0)    {      std::FILE *fp  = std::fopen (file, mode);      if (! fp) return -1;      clear();      size_ = size;      if (! size_) {	if (-1L != (long)std::fseek (fp, (-8), SEEK_END)) {	  char buf [8];	  if (std::fread ((char*)buf, 1, 8, fp) != sizeof(buf)) {	    std::fclose(fp);	    return -1;	  }	  size_ = LG (buf+4);	  size_ /= sizeof (unit_t);	}      }      std::fclose(fp);      if (! size_) return -1;      zlib::gzFile gzfp = zlib::gzopen(file, mode);      if (! gzfp) return -1;      array_ = new unit_t [size_];      if (zlib::gzseek (gzfp, offset, SEEK_SET) != 0) return -1;      zlib::gzread (gzfp, (unit_t *)array_,  sizeof(unit_t) * size_);      zlib::gzclose (gzfp);      return 0;    }    int gzsave (const char *file, const char *mode = "wb", size_t offset = 0)    {      zlib::gzFile gzfp = zlib::gzopen (file, mode);      if (!gzfp) return -1;      zlib::gzwrite (gzfp, (unit_t *)array_,  sizeof(unit_t) * size_);      zlib::gzclose (gzfp);      return 0;    }#endif    template <class T>    inline void exactMatchSearch (const key_type *key,				  T & result,				  size_t len = 0,				  size_t node_pos = 0)    {      result = exactMatchSearch<T>(key, len, node_pos);      return;    }    template <class T>    inline T exactMatchSearch (const key_type *key,			       size_t len = 0,			       size_t node_pos = 0)    {      if (! len) len = length_func_() (key);      T result;      set_result (result, -1, 0);      register array_type_  b = array_[node_pos].base;      register array_u_type_ p;      for (register size_t i = 0; i < len; ++i) {	p = b + (node_u_type_)(key[i]) + 1;	if ((array_u_type_)b == array_[p].check) b = array_[p].base;	else return result;      }      p = b;      array_type_ n = array_[p].base;      if ((array_u_type_)b == array_[p].check && n < 0) {	set_result (result, -n-1, len);      }      return result;    }    template <class T>    size_t commonPrefixSearch (const key_type *key,			       T* result,			       size_t result_len,			       size_t len = 0,			       size_t node_pos = 0)    {      if (! len) len = length_func_() (key);      register array_type_  b   = array_[node_pos].base;      register size_t     num = 0;      register array_type_  n;      register array_u_type_ p;      for (register size_t i = 0; i < len; ++i) {	p = b; // + 0;	n = array_[p].base;	if ((array_u_type_) b == array_[p].check && n < 0) {	  if (num < result_len) set_result (result[num], -n-1, i); // result[num] = -n-1;	  ++num;	}	p = b + (node_u_type_)(key[i]) + 1;	if ((array_u_type_) b == array_[p].check) b = array_[p].base;	else return num;      }      p = b;      n = array_[p].base;      if ((array_u_type_)b == array_[p].check && n < 0) {	if (num < result_len) set_result(result[num], -n-1, len);	++num;      }      return num;    }    value_type traverse (const key_type *key, 			 size_t &node_pos, 			 size_t &key_pos,			 size_t len = 0)    {      if (! len) len = length_func_() (key);      register array_type_  b = array_[node_pos].base;      register array_u_type_ p;      for (; key_pos < len; ++key_pos) {	p = b + (node_u_type_)(key[key_pos]) + 1;	if ((array_u_type_)b == array_[p].check) { node_pos = p; b = array_[p].base; }	else return -2; // no node      }      p = b;      array_type_ n = array_[p].base;      if ((array_u_type_)b == array_[p].check && n < 0) return -n-1;      return -1; // found, but no value    }  };#if 4 == 2  typedef Darts::DoubleArrayImpl<char, unsigned char, short, unsigned short> DoubleArray;#define DARTS_ARRAY_SIZE_IS_DEFINED 1#endif#if 4 == 4 && ! defined (DARTS_ARRAY_SIZE_IS_DEFINED)  typedef Darts::DoubleArrayImpl<char, unsigned char, int, unsigned int> DoubleArray;#define DARTS_ARRAY_SIZE_IS_DEFINED 1#endif#if 4 == 4 && ! defined (DARTS_ARRAY_SIZE_IS_DEFINED)  typedef Darts::DoubleArrayImpl<char, unsigned char, long, unsigned long> DoubleArray;#define DARTS_ARRAY_SIZE_IS_DEFINED 1#endif#if 4 == 8 && ! defined (DARTS_ARRAY_SIZE_IS_DEFINED)  typedef Darts::DoubleArrayImpl<char, unsigned char, long long, unsigned long long> DoubleArray;#endif}#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -