⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ternarytree.cs

📁 itextsharp for pdf document
💻 CS
📖 第 1 页 / 共 2 页
字号:
using System;
using System.Collections;
using System.Text;

/*
 * $Id: TernaryTree.cs,v 1.1.1.1 2003/02/04 02:58:44 geraldhenson Exp $
 * Copyright (C) 2001 The Apache Software Foundation. All rights reserved.
 * For details on use and redistribution please refer to the
 * LICENSE file included with these sources.
 */

namespace iTextSharp.text.pdf.hyphenation {
	/**
	 * <h2>Ternary Search Tree</h2>
	 *
	 * <p>A ternary search tree is a hibrid between a binary tree and
	 * a digital search tree (trie). Keys are limited to strings.
	 * A data value of type char is stored in each leaf node.
	 * It can be used as an index (or pointer) to the data.
	 * Branches that only contain one key are compressed to one node
	 * by storing a pointer to the trailer substring of the key.
	 * This class is intended to serve as base class or helper class
	 * to implement Dictionary collections or the like. Ternary trees
	 * have some nice properties as the following: the tree can be
	 * traversed in sorted order, partial matches (wildcard) can be
	 * implemented, retrieval of all keys within a given distance
	 * from the target, etc. The storage requirements are higher than
	 * a binary tree but a lot less than a trie. Performance is
	 * comparable with a hash table, sometimes it outperforms a hash
	 * function (most of the time can determine a miss faster than a hash).</p>
	 *
	 * <p>The main purpose of this java port is to serve as a base for
	 * implementing TeX's hyphenation algorithm (see The TeXBook,
	 * appendix H). Each language requires from 5000 to 15000 hyphenation
	 * patterns which will be keys in this tree. The strings patterns
	 * are usually small (from 2 to 5 characters), but each char in the
	 * tree is stored in a node. Thus memory usage is the main concern.
	 * We will sacrify 'elegance' to keep memory requirenments to the
	 * minimum. Using java's char type as pointer (yes, I know pointer
	 * it is a forbidden word in java) we can keep the size of the node
	 * to be just 8 bytes (3 pointers and the data char). This gives
	 * room for about 65000 nodes. In my tests the english patterns
	 * took 7694 nodes and the german patterns 10055 nodes,
	 * so I think we are safe.</p>
	 *
	 * <p>All said, this is a map with strings as keys and char as value.
	 * Pretty limited!. It can be extended to a general map by
	 * using the string representation of an object and using the
	 * char value as an index to an array that contains the object
	 * values.</p>
	 *
	 * @author cav@uniscope.co.jp
	 */

	public class TernaryTree : ICloneable {

		/**
		 * We use 4 arrays to represent a node. I guess I should have created
		 * a proper node class, but somehow Knuth's pascal code made me forget
		 * we now have a portable language with memory management and
		 * automatic garbage collection! And now is kind of late, furthermore,
		 * if it ain't broken, don't fix it.
		 */

		/**
		 * Pointer to low branch and to rest of the key when it is
		 * stored directly in this node, we don't have unions in java!
		 */
		protected char[] lo;

		/**
		 * Pointer to high branch.
		 */
		protected char[] hi;

		/**
		 * Pointer to equal branch and to data when this node is a string terminator.
		 */
		protected char[] eq;

		/**
		 * <P>The character stored in this node: splitchar
		 * Two special values are reserved:</P>
		 * <ul><li>0x0000 as string terminator</li>
		 * <li>0xFFFF to indicate that the branch starting at
		 * this node is compressed</li></ul>
		 * <p>This shouldn't be a problem if we give the usual semantics to
		 * strings since 0xFFFF is garanteed not to be an Unicode character.</p>
		 */
		protected char[] sc;

		/**
		 * This vector holds the trailing of the keys when the branch is compressed.
		 */
		protected CharVector kv;

		protected char root;
		protected char freenode;
		protected int length;    // number of items in tree

		protected static int BLOCK_SIZE = 2048;    // allocation size for arrays

		internal TernaryTree() {
			init();
		}

		protected void init() {
			root = (char)0;
			freenode = (char)1;
			length = 0;
			lo = new char[BLOCK_SIZE];
			hi = new char[BLOCK_SIZE];
			eq = new char[BLOCK_SIZE];
			sc = new char[BLOCK_SIZE];
			kv = new CharVector();
		}

		/**
		 * Branches are initially compressed, needing
		 * one node per key plus the size of the string
		 * key. They are decompressed as needed when
		 * another key with same prefix
		 * is inserted. This saves a lot of space,
		 * specially for long keys.
		 */
		public void insert(string key, char val) {
			// make sure we have enough room in the arrays
			int len = key.Length
				+ 1;    // maximum number of nodes that may be generated
			if (freenode + len > eq.Length)
				redimNodeArrays(eq.Length + BLOCK_SIZE);
			char[] strkey = new char[len--];
			key.CopyTo(0, strkey, 0, len); 
			strkey[len] = (char)0;
			root = insert(root, strkey, 0, val);
		}

		public void insert(char[] key, int start, char val) {
			int len = strlen(key) + 1;
			if (freenode + len > eq.Length)
				redimNodeArrays(eq.Length + BLOCK_SIZE);
			root = insert(root, key, start, val);
		}

		/**
		 * The actual insertion function, recursive version.
		 */
		private char insert(char p, char[] key, int start, char val) {
			int len = strlen(key, start);
			if (p == 0) {
				// this means there is no branch, this node will start a new branch.
				// Instead of doing that, we store the key somewhere else and create
				// only one node with a pointer to the key
				p = freenode++;
				eq[p] = val;           // holds data
				length++;
				hi[p] = (char)0;
				if (len > 0) {
					sc[p] = (char)0xFFFF;    // indicates branch is compressed
					lo[p] = (char)kv.alloc(len
						+ 1);    // use 'lo' to hold pointer to key
					strcpy(kv.Arr, lo[p], key, start);
				} else {
					sc[p] = (char)0;
					lo[p] = (char)0;
				}
				return p;
			}

			if (sc[p] == 0xFFFF) {
				// branch is compressed: need to decompress
				// this will generate garbage in the external key array
				// but we can do some garbage collection later
				char pp = freenode++;
				lo[pp] = lo[p];    // previous pointer to key
				eq[pp] = eq[p];    // previous pointer to data
				lo[p] = (char)0;
				if (len > 0) {
					sc[p] = kv[lo[pp]];
					eq[p] = pp;
					lo[pp]++;
					if (kv[lo[pp]] == 0) {
						// key completly decompressed leaving garbage in key array
						lo[pp] = (char)0;
						sc[pp] = (char)0;
						hi[pp] = (char)0;
					} else
						sc[pp] =
							(char)0xFFFF;    // we only got first char of key, rest is still there
				} else {
					// In this case we can save a node by swapping the new node
					// with the compressed node
					sc[pp] = (char)0xFFFF;
					hi[p] = pp;
					sc[p] = (char)0;
					eq[p] = val;
					length++;
					return p;
				}
			}
			char s = key[start];
			if (s < sc[p])
				lo[p] = insert(lo[p], key, start, val);
			else if (s == sc[p]) {
				if (s != 0)
					eq[p] = insert(eq[p], key, start + 1, val);
				else {
					// key already in tree, overwrite data
					eq[p] = val;
				}

			} else
				hi[p] = insert(hi[p], key, start, val);
			return p;
		}

		/**
		 * Compares 2 null terminated char arrays
		 */
		public static int strcmp(char[] a, int startA, char[] b, int startB) {
			for (; a[startA] == b[startB]; startA++, startB++)
				if (a[startA] == 0)
					return 0;
			return a[startA] - b[startB];
		}

		/**
		 * Compares a string with null terminated char array
		 */
		public static int strcmp(string str, char[] a, int start) {
			int i, d, len = str.Length;
			for (i = 0; i < len; i++) {
				d = (int)str[i] - a[start + i];
				if (d != 0)
					return d;
				if (a[start + i] == 0)
					return d;
			}
			if (a[start + i] != 0)
				return (int)-a[start + i];
			return 0;

		}

		public static void strcpy(char[] dst, int di, char[] src, int si) {
			while (src[si] != 0)
				dst[di++] = src[si++];
			dst[di] = (char)0;
		}

		public static int strlen(char[] a, int start) {
			int len = 0;
			for (int i = start; i < a.Length && a[i] != 0; i++)
				len++;
			return len;
		}

		public static int strlen(char[] a) {
			return strlen(a, 0);
		}

		public int find(string key) {
			int len = key.Length;
			char[] strkey = new char[len + 1];
			key.CopyTo(0, strkey, 0, len);
			strkey[len] = (char)0;

			return find(strkey, 0);
		}

		public int find(char[] key, int start) {
			int d;
			char p = root;
			int i = start;
			char c;

			while (p != 0) {
				if (sc[p] == 0xFFFF) {
					if (strcmp(key, i, kv.Arr, lo[p]) == 0)
						return eq[p];
					else
						return -1;
				}
				c = key[i];
				d = c - sc[p];
				if (d == 0) {
					if (c == 0)
						return eq[p];
					i++;
					p = eq[p];
				} else if (d < 0)
					p = lo[p];
				else
					p = hi[p];
			}
			return -1;
		}

		public bool knows(string key) {
			return (find(key) >= 0);
		}

		// redimension the arrays
		private void redimNodeArrays(int newsize) {
			int len = newsize < lo.Length ? newsize : lo.Length;
			char[] na = new char[newsize];
			Array.Copy(lo, 0, na, 0, len);
			lo = na;
			na = new char[newsize];
			Array.Copy(hi, 0, na, 0, len);
			hi = na;
			na = new char[newsize];
			Array.Copy(eq, 0, na, 0, len);
			eq = na;
			na = new char[newsize];
			Array.Copy(sc, 0, na, 0, len);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -