⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 darts.h

📁 用于汉字识别和分类的支持向量机SVMTEST测试算法,很好用啊
💻 H
字号:
/*  Darts -- Double-ARray Trie System  $Id: darts.h,v 1.7 2003/08/15 02:06:10 taku-ku Exp $;  Copyright (C) 2001-2003  Taku Kudo <taku-ku@is.aist-nara.ac.jp>  All rights reserved.  This library is free software; you can redistribute it and/or  modify it under the terms of the GNU Library General Public  License as published by the Free Software Foundation; either  version 2 of the License, or (at your option) any later verjsion.  This library is distributed in the hope that it will be useful,  but WITHOUT ANY WARRANTY; without even the implied warranty of  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU  Library General Public License for more details.  You should have received a copy of the GNU Library General Public  License along with this library; if not, write to the  Free Software Foundation, Inc., 59 Temple Place - Suite 330,  Boston, MA 02111-1307, USA.*/#ifndef _DARTS_H#define _DARTS_H#define DARTS_VERSION "0.2"#pragma warning(disable: 4786)//CCR 2004.04.05#include <vector>#include <cstring>#include <cstdio>#ifdef HAVE_ZLIB_Hnamespace zlib {	#include <zlib.h>}#define SH(p) ((unsigned short)(unsigned char)((p)[0]) | ((unsigned short)(unsigned char)((p)[1]) << 8))#define LG(p) ((unsigned long)(SH(p)) | ((unsigned long)(SH((p)+2)) << 16))#endifnamespace Darts {  template <class T> inline T _max (T x, T y)  {	  return (x > y) ? x : y;  }    template <class T> inline T* _resize (T* ptr, size_t n, size_t l, T v)  {	  T *tmp = new T [l]; // realloc	  for (size_t i = 0; i < n; ++i)	  {		  tmp[i] = ptr[i]; // copy	  }      for (size_t i = n; i < l; ++i)	  {		  tmp[i] = v;	  }      	  delete [] ptr;      return tmp;  }  template <class T>class Length  {   public:	  size_t operator() (const T *str) const 	  {		  size_t i;		  		  for (i = 0; str[i] != (T)0; ++i) 		  {		  }; 		  		  return i; 	  }   };  template <> class Length<char>  {   public:	  size_t operator() (const char *str) const  	  { 		 // CCR DELETE return std::strlen(str);		return strlen(str); //CCR ADD	  };   };  template  <class NodeType,  class NodeUType,	         class ArrayType, class ArrayUType, class LengthFunc = Length<NodeType> >  class DoubleArrayImpl  {  private:	  struct Node	  {		  ArrayUType code;          size_t     depth;		  size_t     left;		  size_t     right;	  };	  struct Unit 	  {		  ArrayType    base;		  ArrayUType   check;	  };	  	  Unit           *array;	  size_t         *used;	  size_t         size;	  size_t         alloc_size;	  NodeType       **str;	  size_t         str_size;	  size_t         *len;	  ArrayType      *val;	  	  unsigned int   progress;	  size_t         next_check_pos;	  int            no_delete;      int            (*progress_func) (size_t, size_t);      size_t resize (const size_t new_size)	  {		  Unit tmp;		  tmp.base = 0;		  tmp.check = 0;      		  array = _resize (array, alloc_size, new_size, tmp);		  used  = _resize (used,  alloc_size, new_size, (size_t)0);		  alloc_size = new_size;		  return new_size;	  }	  	  size_t fetch (const Node &parent, std::vector <Node> &siblings)	  {		  ArrayUType prev = 0;		  		  for (size_t i = parent.left; i < parent.right; ++i)		  {			  if ((len ? len[i] : LengthFunc()(str[i])) < parent.depth)			  {				  continue;			  }			  			  const NodeUType *tmp = (NodeUType *)(str[i]);			  ArrayUType cur = 0;			  if ((len ? len[i] : LengthFunc()(str[i])) != parent.depth) 			  {				  cur = (ArrayUType)tmp[parent.depth] + 1;			  }			  			  if (prev > cur)			  {				  throw -3;			  }			  			  if (cur != prev || siblings.empty())			  {				  Node tmp_node;				  tmp_node.depth = parent.depth + 1;				  tmp_node.code  = cur;				  tmp_node.left  = i;				  				  if (! siblings.empty())				  {					  siblings[siblings.size()-1].right = i;				  }				  siblings.push_back(tmp_node);			  }			  			  prev = cur;		  }		  		  if (! siblings.empty())		  {			  siblings[siblings.size()-1].right = parent.right;		  }		  		  return siblings.size();    }         size_t insert (const std::vector <Node> &siblings)    {		size_t begin = 0;        size_t pos = _max ((size_t)siblings[0].code + 1, next_check_pos) - 1;        size_t nonzero_num = 0;        int first = 0;				if (alloc_size <= pos)		{			resize (pos + 1);		}				while (1)		{		next:			++pos;			if (array[pos].check) 			{				++nonzero_num;				continue;			}			else if (! first)			{				next_check_pos = pos;				first = 1;			}						begin = pos - siblings[0].code;						if (alloc_size < (begin + siblings[siblings.size()-1].code))			{				resize((size_t)(alloc_size * _max(1.05, 1.0 * str_size / progress)));			}						if (used[begin])			{				continue;			}						for (size_t i = 1; i < siblings.size(); ++i)			{				if (array[begin + siblings[i].code].check != 0)				{					goto next;				}			}							break;		}      // -- Simple heuristics --      // if the percentage of non-empty contents in check between the index      // 'next_check_pos' and 'check' is greater than some constant value (e.g. 0.9),      // new 'next_check_pos' index is written by 'check'.		if (1.0 * nonzero_num/(pos - next_check_pos + 1) >= 0.95)		{			next_check_pos = pos;		}				used[begin] = 1;		size = _max (size, begin + (size_t)siblings[siblings.size()-1].code + 1);				for (size_t i = 0; i < siblings.size(); ++i)		{			array[begin + siblings[i].code].check = begin;		}				for (size_t i = 0; i < siblings.size(); ++i)		{			std::vector <Node> new_siblings;						if (! fetch(siblings[i], new_siblings))			{				array[begin + siblings[i].code].base = 					val ? (ArrayType)(-val[siblings[i].left]-1) : (ArrayType)(-siblings[i].left-1);								if (val && (ArrayType)(-val[siblings[i].left]-1) >= 0)				{					throw -2;				}								++progress;								if (progress_func)				{					(*progress_func) (progress, str_size);				}			}			else			{				size_t h = insert(new_siblings);				array[begin + siblings[i].code].base = h;			}		}				return begin;	}  public:	  typedef ArrayType   result_type;	  typedef NodeType    key_type;      DoubleArrayImpl (): array(0), used(0), size(0), alloc_size(0), no_delete(0) {}      ~DoubleArrayImpl () { clear(); }      int setArray (void *ptr, size_t array_size = 0)	  {		  clear();		  array = (Unit *)ptr;		  no_delete = 1;		  size = array_size;		  return 1;	  }	  	  void *getArray ()	  {		  return (void *)array;	  }	  	  void clear ()	  {		  if (! no_delete)		  {			  delete [] array;		  }				  delete [] used;		  array = 0;		  used = 0;		  alloc_size = 0;		  size = 0;          no_delete  = 0;	  }	  	  size_t getUnitSize()	  {		  return sizeof(Unit);	  }        size_t getSize()	{		return size;	}    size_t getNonzeroSize ()    {		size_t result = 0;		for (size_t i = 0; i < size; ++i)		{			if (array[i].check)			{				++result;			}		}				return result;	}    int build (size_t _str_size,			  key_type **_str,			  size_t        *_len = 0,	          result_type   *_val = 0,	          int (*_progress_func)(size_t, size_t) = 0)    {		try 		{			if (!_str_size || ! _str) 			{				return 0;			}			progress_func  = _progress_func;			str            = _str;			len            = _len;			str_size       = _str_size;			val            = _val;			progress       = 0;			resize (1024 * 10);			array[0].base = 1;			next_check_pos = 0;			Node root_node;			root_node.left  = 0;			root_node.right = _str_size;			root_node.depth = 0;			std::vector <Node> siblings;			fetch (root_node, siblings);			insert (siblings);			size += sizeof (ArrayType);			if (size > alloc_size)			{				resize (size);			}			delete [] used;			used  = 0;			return 0;		}		catch (int &e)		{			delete [] used;			used  = 0;			clear ();			return e;		}		catch (...)		{			delete [] used;			used  = 0;			clear ();			return -1;		}	}    int open (const char *file,	      const char *mode = "rb",	      size_t offset = 0,	      size_t _size = 0)    {		std::FILE *fp = std::fopen(file, mode);		if (! fp)		{			return -1;		}				if (std::fseek (fp, offset, SEEK_SET) != 0)		{			return -1;		}				if (! _size)		{ 			if (std::fseek (fp, 0L, SEEK_END) != 0) 			{				return -1;			}						_size = std::ftell (fp);						if (std::fseek (fp, offset, SEEK_SET) != 0)			{				return -1;			}		}				clear();		size = _size;		size /= sizeof(Unit);		array  = new Unit [size];		if (size != std::fread ((Unit *)array,  sizeof(Unit), size, fp))		{			return -1;		}				std::fclose (fp);				return 0;	}    int save (const char *file,const char *mode = "wb",size_t offset = 0)    {		if (! size)		{			return -1;		}				std::FILE *fp = std::fopen(file, mode);				if (! fp)		{			return -1;		}				if (size != std::fwrite((Unit *)array,  sizeof(Unit), size, fp))		{			return -1;		}				std::fclose (fp);				return 0;    }#ifdef HAVE_ZLIB_H    int gzopen (const char *file, const char *mode = "rb",size_t offset = 0,size_t _size = 0)    {		std::FILE *fp  = std::fopen (file, mode);		if (! fp) return -1;		clear();      		size = _size;		if (! size) 		{			if (-1L != (long)std::fseek (fp, (-8), SEEK_END))			{				char buf [8];				if (std::fread ((char*)buf, 1, 8, fp) != sizeof(buf)) 				{					std::fclose(fp);					return -1;				}								size = LG (buf+4);				size /= sizeof (Unit);			}		}				std::fclose(fp);		if (! size)		{			return -1;		}				zlib::gzFile gzfp = zlib::gzopen(file, mode);				if (! gzfp) 		{			return -1;		}				array = new Unit [size];				if (zlib::gzseek (gzfp, offset, SEEK_SET) != 0) 		{			return -1;		}				zlib::gzread (gzfp, (Unit *)array,  sizeof(Unit) * size);			zlib::gzclose (gzfp);				return 0;    }    int gzsave (const char *file, const char *mode = "wb", size_t offset = 0)    {		zlib::gzFile gzfp = zlib::gzopen (file, mode);		if (!gzfp) 		{			return -1;		}        		zlib::gzwrite (gzfp, (Unit *)array,  sizeof(Unit) * size);        zlib::gzclose (gzfp);        		return 0;    }#endif            result_type exactMatchSearch (const key_type *key,size_t len = 0,size_t node_pos = 0)    {		if (! len) 		{			len = LengthFunc() (key);		}				register ArrayType  b = array[node_pos].base;		register ArrayUType p;		for (register size_t i = 0; i < len; ++i)		{			p = b + (NodeUType)(key[i]) + 1;			if ((ArrayUType)b == array[p].check) 			{				b = array[p].base;			}			else			{				return -1;			}		}		p = b;		ArrayType n = array[p].base;				if ((ArrayUType)b == array[p].check && n < 0) 		{			return -n-1;		}				return -1;	}		size_t commonPrefixSearch (const key_type *key, result_type *result,							   size_t result_len, size_t len = 0, size_t node_pos = 0)    {		if (! len) 		{			len = LengthFunc() (key);		}				register ArrayType  b = array[node_pos].base;		register size_t num = 0;        register ArrayType  n;        register ArrayUType p;				for (register size_t i = 0; i < len; ++i) 		{			p = b; // + 0;			n = array[p].base;			if ((ArrayUType) b == array[p].check && n < 0) 			{				if (num < result_len)				{					result[num] = -n-1;				}								++num;			}						p = b + (NodeUType)(key[i]) + 1;						if ((ArrayUType) b == array[p].check)			{				b = array[p].base;			}			else			{				return num;			}		}				p = b;		n = array[p].base;				if ((ArrayUType)b == array[p].check && n < 0) 		{			if (num < result_len) 			{				result[num] = -n-1;			}						++num;		}				return num;	}    result_type traverse (const key_type *key, size_t &node_pos, size_t &key_pos, size_t len = 0)    {		if (! len)		{			len = LengthFunc() (key);		}				register ArrayType  b = array[node_pos].base;		register ArrayUType p;       		for (; key_pos < len; ++key_pos) 		{			p = b + (NodeUType)(key[key_pos]) + 1;						if ((ArrayUType)b == array[p].check)			{				node_pos = p;				b = array[p].base; 			}			else			{				return -2; // no node			}		}				p = b;		ArrayType n = array[p].base;				if ((ArrayUType)b == array[p].check && n < 0)		{			return -n-1;		}				return -1; // found, but no value	} };#if 4 == 2  typedef Darts::DoubleArrayImpl<char, unsigned char, short, unsigned short> DoubleArray;#define DARTS_ARRAY_SIZE_IS_DEFINED 1  #endif#if 4 == 4 && ! defined (DARTS_ARRAY_SIZE_IS_DEFINED)  typedef Darts::DoubleArrayImpl<char, unsigned char, int, unsigned int> DoubleArray;#define DARTS_ARRAY_SIZE_IS_DEFINED 1#endif#if 4 == 4 && ! defined (DARTS_ARRAY_SIZE_IS_DEFINED)  typedef Darts::DoubleArrayImpl<char, unsigned char, long, unsigned long> DoubleArray;#define DARTS_ARRAY_SIZE_IS_DEFINED 1#endif#if 4 == 8 && ! defined (DARTS_ARRAY_SIZE_IS_DEFINED)  typedef Darts::DoubleArrayImpl<char, unsigned char, long long, unsigned long long> DoubleArray;#endif}#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -