📄 ternarytree.cs
字号:
using System;
using System.Collections;
using System.Text;
/*
* $Id: TernaryTree.cs,v 1.1.1.1 2003/02/04 02:58:44 geraldhenson Exp $
* Copyright (C) 2001 The Apache Software Foundation. All rights reserved.
* For details on use and redistribution please refer to the
* LICENSE file included with these sources.
*/
namespace iTextSharp.text.pdf.hyphenation {
/**
* <h2>Ternary Search Tree</h2>
*
* <p>A ternary search tree is a hibrid between a binary tree and
* a digital search tree (trie). Keys are limited to strings.
* A data value of type char is stored in each leaf node.
* It can be used as an index (or pointer) to the data.
* Branches that only contain one key are compressed to one node
* by storing a pointer to the trailer substring of the key.
* This class is intended to serve as base class or helper class
* to implement Dictionary collections or the like. Ternary trees
* have some nice properties as the following: the tree can be
* traversed in sorted order, partial matches (wildcard) can be
* implemented, retrieval of all keys within a given distance
* from the target, etc. The storage requirements are higher than
* a binary tree but a lot less than a trie. Performance is
* comparable with a hash table, sometimes it outperforms a hash
* function (most of the time can determine a miss faster than a hash).</p>
*
* <p>The main purpose of this java port is to serve as a base for
* implementing TeX's hyphenation algorithm (see The TeXBook,
* appendix H). Each language requires from 5000 to 15000 hyphenation
* patterns which will be keys in this tree. The strings patterns
* are usually small (from 2 to 5 characters), but each char in the
* tree is stored in a node. Thus memory usage is the main concern.
* We will sacrify 'elegance' to keep memory requirenments to the
* minimum. Using java's char type as pointer (yes, I know pointer
* it is a forbidden word in java) we can keep the size of the node
* to be just 8 bytes (3 pointers and the data char). This gives
* room for about 65000 nodes. In my tests the english patterns
* took 7694 nodes and the german patterns 10055 nodes,
* so I think we are safe.</p>
*
* <p>All said, this is a map with strings as keys and char as value.
* Pretty limited!. It can be extended to a general map by
* using the string representation of an object and using the
* char value as an index to an array that contains the object
* values.</p>
*
* @author cav@uniscope.co.jp
*/
public class TernaryTree : ICloneable {
/**
* We use 4 arrays to represent a node. I guess I should have created
* a proper node class, but somehow Knuth's pascal code made me forget
* we now have a portable language with memory management and
* automatic garbage collection! And now is kind of late, furthermore,
* if it ain't broken, don't fix it.
*/
/**
* Pointer to low branch and to rest of the key when it is
* stored directly in this node, we don't have unions in java!
*/
protected char[] lo;
/**
* Pointer to high branch.
*/
protected char[] hi;
/**
* Pointer to equal branch and to data when this node is a string terminator.
*/
protected char[] eq;
/**
* <P>The character stored in this node: splitchar
* Two special values are reserved:</P>
* <ul><li>0x0000 as string terminator</li>
* <li>0xFFFF to indicate that the branch starting at
* this node is compressed</li></ul>
* <p>This shouldn't be a problem if we give the usual semantics to
* strings since 0xFFFF is garanteed not to be an Unicode character.</p>
*/
protected char[] sc;
/**
* This vector holds the trailing of the keys when the branch is compressed.
*/
protected CharVector kv;
protected char root;
protected char freenode;
protected int length; // number of items in tree
protected static int BLOCK_SIZE = 2048; // allocation size for arrays
internal TernaryTree() {
init();
}
protected void init() {
root = (char)0;
freenode = (char)1;
length = 0;
lo = new char[BLOCK_SIZE];
hi = new char[BLOCK_SIZE];
eq = new char[BLOCK_SIZE];
sc = new char[BLOCK_SIZE];
kv = new CharVector();
}
/**
* Branches are initially compressed, needing
* one node per key plus the size of the string
* key. They are decompressed as needed when
* another key with same prefix
* is inserted. This saves a lot of space,
* specially for long keys.
*/
public void insert(string key, char val) {
// make sure we have enough room in the arrays
int len = key.Length
+ 1; // maximum number of nodes that may be generated
if (freenode + len > eq.Length)
redimNodeArrays(eq.Length + BLOCK_SIZE);
char[] strkey = new char[len--];
key.CopyTo(0, strkey, 0, len);
strkey[len] = (char)0;
root = insert(root, strkey, 0, val);
}
public void insert(char[] key, int start, char val) {
int len = strlen(key) + 1;
if (freenode + len > eq.Length)
redimNodeArrays(eq.Length + BLOCK_SIZE);
root = insert(root, key, start, val);
}
/**
* The actual insertion function, recursive version.
*/
private char insert(char p, char[] key, int start, char val) {
int len = strlen(key, start);
if (p == 0) {
// this means there is no branch, this node will start a new branch.
// Instead of doing that, we store the key somewhere else and create
// only one node with a pointer to the key
p = freenode++;
eq[p] = val; // holds data
length++;
hi[p] = (char)0;
if (len > 0) {
sc[p] = (char)0xFFFF; // indicates branch is compressed
lo[p] = (char)kv.alloc(len
+ 1); // use 'lo' to hold pointer to key
strcpy(kv.Arr, lo[p], key, start);
} else {
sc[p] = (char)0;
lo[p] = (char)0;
}
return p;
}
if (sc[p] == 0xFFFF) {
// branch is compressed: need to decompress
// this will generate garbage in the external key array
// but we can do some garbage collection later
char pp = freenode++;
lo[pp] = lo[p]; // previous pointer to key
eq[pp] = eq[p]; // previous pointer to data
lo[p] = (char)0;
if (len > 0) {
sc[p] = kv[lo[pp]];
eq[p] = pp;
lo[pp]++;
if (kv[lo[pp]] == 0) {
// key completly decompressed leaving garbage in key array
lo[pp] = (char)0;
sc[pp] = (char)0;
hi[pp] = (char)0;
} else
sc[pp] =
(char)0xFFFF; // we only got first char of key, rest is still there
} else {
// In this case we can save a node by swapping the new node
// with the compressed node
sc[pp] = (char)0xFFFF;
hi[p] = pp;
sc[p] = (char)0;
eq[p] = val;
length++;
return p;
}
}
char s = key[start];
if (s < sc[p])
lo[p] = insert(lo[p], key, start, val);
else if (s == sc[p]) {
if (s != 0)
eq[p] = insert(eq[p], key, start + 1, val);
else {
// key already in tree, overwrite data
eq[p] = val;
}
} else
hi[p] = insert(hi[p], key, start, val);
return p;
}
/**
* Compares 2 null terminated char arrays
*/
public static int strcmp(char[] a, int startA, char[] b, int startB) {
for (; a[startA] == b[startB]; startA++, startB++)
if (a[startA] == 0)
return 0;
return a[startA] - b[startB];
}
/**
* Compares a string with null terminated char array
*/
public static int strcmp(string str, char[] a, int start) {
int i, d, len = str.Length;
for (i = 0; i < len; i++) {
d = (int)str[i] - a[start + i];
if (d != 0)
return d;
if (a[start + i] == 0)
return d;
}
if (a[start + i] != 0)
return (int)-a[start + i];
return 0;
}
public static void strcpy(char[] dst, int di, char[] src, int si) {
while (src[si] != 0)
dst[di++] = src[si++];
dst[di] = (char)0;
}
public static int strlen(char[] a, int start) {
int len = 0;
for (int i = start; i < a.Length && a[i] != 0; i++)
len++;
return len;
}
public static int strlen(char[] a) {
return strlen(a, 0);
}
public int find(string key) {
int len = key.Length;
char[] strkey = new char[len + 1];
key.CopyTo(0, strkey, 0, len);
strkey[len] = (char)0;
return find(strkey, 0);
}
public int find(char[] key, int start) {
int d;
char p = root;
int i = start;
char c;
while (p != 0) {
if (sc[p] == 0xFFFF) {
if (strcmp(key, i, kv.Arr, lo[p]) == 0)
return eq[p];
else
return -1;
}
c = key[i];
d = c - sc[p];
if (d == 0) {
if (c == 0)
return eq[p];
i++;
p = eq[p];
} else if (d < 0)
p = lo[p];
else
p = hi[p];
}
return -1;
}
public bool knows(string key) {
return (find(key) >= 0);
}
// redimension the arrays
private void redimNodeArrays(int newsize) {
int len = newsize < lo.Length ? newsize : lo.Length;
char[] na = new char[newsize];
Array.Copy(lo, 0, na, 0, len);
lo = na;
na = new char[newsize];
Array.Copy(hi, 0, na, 0, len);
hi = na;
na = new char[newsize];
Array.Copy(eq, 0, na, 0, len);
eq = na;
na = new char[newsize];
Array.Copy(sc, 0, na, 0, len);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -