⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 burstsort.cpp

📁 目前最快速的字符串排序算法
💻 CPP
字号:
/* Copyright 2007 Stefan Webb

This file is part of Burstsort.

Burstsort is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

Burstsort is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with Burstsort; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */

#include "main.h"

Burstsort::Burstsort() {
	// Initialise the trie to having one level
	trie.numnodes = 27;
	trie.nodes = (Node*) malloc(sizeof(Node)*27);
	memset(&trie.nodes[0], 0, 27 * sizeof(Node));
	// initialise variable
	maxlength = 0;
	sorted = false;
}

Burstsort::~Burstsort() {
	// At the moment, doesn't free buckets. That will be a little more complicated!
	free(trie.nodes);
}

void Burstsort::insert(wchar_t* string) {
	// make sure the string is valid to be inserted into the trie
	// a valid string contains only lower case ascii
	// also record the size of the string
	unsigned int i = 0;
	for (;;i++) {
		// if it's the end of the string we're done
		if (!string[i])
			break;
		// if the character is not a lowercase Latin letter, return
		if (string[i] < 0x0061 || string[i] > 0x007a)
			return;
	}
	if (maxlength < i)
		maxlength = i;
	// this loop examines as many characters in the string as necessary
	unsigned int a = 0, node = 0;
	for(;;) {
		// if we've reached the end of the string then update the empty symbol string count
		if (!string[a]) {
			trie.nodes[node].stringsinbucket++;
			break;
		}
		// determine what bucket we need to insert the string into
		unsigned int symbolnode = node + string[a] - 0x0060;
		// if there is no bucket, create one
		if (trie.nodes[symbolnode].stringsinbucket == 0) {
			trie.nodes[symbolnode].bucket = (char*) malloc(sizeof(char));
			trie.nodes[symbolnode].firstunused = trie.nodes[symbolnode].bucket;
			trie.nodes[symbolnode].bottomofbucket = trie.nodes[symbolnode].firstunused + sizeof(char);
		}
		// if this node points to a bucket and not another node then insert
		if (trie.nodes[symbolnode].stringsinbucket != -1) {
			unsigned int b = a + 1;
			for(;;) {
				// check whether we need to burst or resize the bucket
				if (trie.nodes[symbolnode].firstunused == trie.nodes[symbolnode].bottomofbucket) {
					unsigned int size = trie.nodes[symbolnode].bottomofbucket - trie.nodes[symbolnode].bucket;
					// if the size has not yet reached 8192 then copy the bucket to a new one twice the size
					if(size != 8192) {
						char* bucket = (char*) malloc(size << 1);
						// we cannot use realloc here since it does not guarantee contiguity
						memcpy(bucket, trie.nodes[symbolnode].bucket, size);
						free(trie.nodes[symbolnode].bucket);
						trie.nodes[symbolnode].bucket = bucket;
						trie.nodes[symbolnode].firstunused = bucket + size;
						trie.nodes[symbolnode].bottomofbucket = bucket + (size << 1);
					}
					// otherwise burst the bucket
					else {
						// fixed up a bug here, I think
						burst(symbolnode);
						break;
					}
				}
				// otherwise copy the new character
				// not sure about pointers here, need to use reference to check
				// should be able to combine these two lines into one somehow
				trie.nodes[symbolnode].firstunused[0] = (char) string[b];
				trie.nodes[symbolnode].firstunused++;
				// if the end of the string then increase the number of strings and exit
				if (!string[b++]) {
					trie.nodes[symbolnode].stringsinbucket++;
					return;
				}
			}
		}
		// otherwise, go on to the next node
		node = trie.nodes[symbolnode].node;
		a++;
	}
}

// Debug
/*int scmp(const void *sp1, const void *sp2 )
{
    return( strcmp(*(char **)sp1, *(char **)sp2) );
}*/

// sort is essentially a depth-first search, which calls sort_bucket when it gets to a bucket
void Burstsort::sort() {
	Stack stack(maxlength);
	unsigned int currentnode = 0;
	for (;;) {
		for (;;) {
			// take care of the empty char symbol
			if ((currentnode % 27) == 0) {
				currentnode++;
				continue;
			}
			// if this node points to another node then go further depthwise
			if (trie.nodes[currentnode].stringsinbucket == -1) {
				stack.push(currentnode);
				currentnode = trie.nodes[currentnode].node;
				continue;
			} else if (trie.nodes[currentnode].stringsinbucket != 0) {
				// if we're in here then we've found a nonempty bucket
				if (!make_tailpointers(currentnode)) {
					burst(currentnode);
					continue;
				} else {
					/*if (trie.nodes[currentnode].stringsinbucket > 10) {
						for (unsigned int i = 0; i < trie.nodes[currentnode].stringsinbucket; i++) {
							char* tailstring = ((char**)trie.nodes[currentnode].firstunused)[i];
							printf(tailstring);
							wprintf(L"\n");
						}*/
						ssort2main((char**)trie.nodes[currentnode].firstunused, trie.nodes[currentnode].stringsinbucket);
						//qsort((char**)trie.nodes[currentnode].firstunused, trie.nodes[currentnode].stringsinbucket, sizeof(char*), scmp);
						/*wprintf(L"\n strings are sorted now\n\n");
						for (unsigned int i = 0; i < trie.nodes[currentnode].stringsinbucket; i++) {
							char* tailstring = ((char**)trie.nodes[currentnode].firstunused)[i];
							printf(tailstring);
							wprintf(L"\n");
						}

						return;
					}*/
					//sort_bucket(currentnode);
				}
			}
		if ((++currentnode % 27) == 0)
			break;
		}
		for (;;) {
			// if the stack is empty then we're done
			if (!stack.getsize()) {
				sorted = true;
				return;
			}
			// else retrace our steps by poping the stack
			currentnode = stack.pop() + 1;
			if ((currentnode % 27) != 0)
				break;
		}
	}
}

void Burstsort::burst(unsigned int node) {
	//this->print();
	// create a new set of 27 nodes
	Trie temptrie = {trie.numnodes + 27, (Node*) malloc(sizeof(Node) * (trie.numnodes + 27))};
	memcpy(temptrie.nodes, trie.nodes, sizeof(Node) * temptrie.numnodes);
	trie = temptrie;
	// make sure the new nodes are zeroed
	memset(&trie.nodes[trie.numnodes - 27], 0, 27 * sizeof(Node));
	// insert bucket's strings into new buckets
	int j = 0;
	for (int i = 0; i < trie.nodes[node].stringsinbucket; i++) {
	// **Debug**
	//if (i == 830)
	//	int testabc = 0;
		// if the empty string was in the bucket then update the empty char node
		if (!trie.nodes[node].bucket[j]) {
			trie.nodes[trie.numnodes - 27].stringsinbucket++;
			j++;
			continue;
		}
		unsigned int symbolnode = trie.numnodes - 27 + trie.nodes[node].bucket[j++] - 0x0060;
		// ** Debug **
		//if (symbolnode > trie.numnodes)
		//	int testabde = 0;
		//if (symbolnode == 54)
		//	int aslkxjcv = 0;
		// if there is no bucket, create one
		if (trie.nodes[symbolnode].stringsinbucket == 0) {
			trie.nodes[symbolnode].bucket = (char*) malloc(sizeof(char));
			trie.nodes[symbolnode].firstunused = trie.nodes[symbolnode].bucket;
			trie.nodes[symbolnode].bottomofbucket = trie.nodes[symbolnode].firstunused + sizeof(char);
		}
		// if this node points to a bucket and not another node then insert
		if (trie.nodes[symbolnode].stringsinbucket != -1) {
			for(;;) {
				// check whether we need to burst or resize the bucket
				if (trie.nodes[symbolnode].firstunused == trie.nodes[symbolnode].bottomofbucket) {
					unsigned int size = trie.nodes[symbolnode].bottomofbucket - trie.nodes[symbolnode].bucket;
					// copy the bucket to a new one twice the size
					char* bucket = (char*) malloc(size << 1);
					// we cannot use realloc here since it does not guarantee contiguity
					memcpy(bucket, trie.nodes[symbolnode].bucket, size);
					trie.nodes[symbolnode].firstunused = bucket + (trie.nodes[symbolnode].firstunused - trie.nodes[symbolnode].bucket);
					free(trie.nodes[symbolnode].bucket);
					trie.nodes[symbolnode].bucket = bucket;
					trie.nodes[symbolnode].bottomofbucket = bucket + (size << 1);
					//break;
				}
				// otherwise copy the new character
				// not sure about pointers here, need to use reference to check
				*trie.nodes[symbolnode].firstunused = (char) trie.nodes[node].bucket[j];
				trie.nodes[symbolnode].firstunused++;
				// if the end of the string then increase the number of strings and exit
				if (!trie.nodes[node].bucket[j++]) {
					trie.nodes[symbolnode].stringsinbucket++;
					break;
				}
			}
		}
	}
	// update the old node
	trie.nodes[node].stringsinbucket = -1;
	trie.nodes[node].node = trie.numnodes - 27;
	//this->print();
}

void Burstsort::sort_bucket(unsigned int node) {
	ssort2main((char**) trie.nodes[node].firstunused, trie.nodes[node].stringsinbucket);
}

void Burstsort::print() {
	Stack stack(maxlength);
	wchar_t* string = (wchar_t*) malloc(maxlength * sizeof(wchar_t));
	unsigned int currentnode = 0;
	// this for debug only
	FILE* file = _wfopen(L"debug.txt", L"wb");
	wchar_t unicodeheader[1];
	unicodeheader[0] = 0xfeff;
	fwrite(unicodeheader, sizeof(wchar_t), 1, file);
	for (;;) {
		for (;;) {
			// take care of the empty char symbol
			if ((currentnode % 27) == 0) {
				// make the string zero terminated
				string[stack.getsize()] = 0;
				// and print as many as there are with the empty char termining them
				for (unsigned int i = 0; i < trie.nodes[currentnode].stringsinbucket; i++) {
					wprintf(string);
					wprintf(L"\n");
					// debug lines
					fwrite(string, sizeof(wchar_t), wcslen(string), file);
					fwrite(L"\n", sizeof(wchar_t), wcslen(L"\n"), file);
				}
				currentnode++;
				continue;
			}
			// if this node points to another node then go further depthwise
			if (trie.nodes[currentnode].stringsinbucket == -1) {
				// put the current character in the temporary string buffer
				string[stack.getsize()] = (wchar_t) (currentnode % 27) + 0x0060;
				// push the current node onto the stack so we can retrace our steps
				stack.push(currentnode);
				// set the current node to the one further into the trie 
				currentnode = trie.nodes[currentnode].node;
				continue;
			} else if (trie.nodes[currentnode].stringsinbucket != 0) {
				// if we're in here then we got to a bucket, so print all strings in bucket
				string[stack.getsize()] = (wchar_t) (currentnode % 27) + 0x0060;
				unsigned int j = 0; 
				for (unsigned int i = 0; i < trie.nodes[currentnode].stringsinbucket; i++) {
					unsigned int k = stack.getsize() + 1;
					if(!trie.nodes[currentnode].stringsinbucket)
						string[k] = 0;
					else if(sorted) {
						j = 0;
						do {		
							string[k++] = (char)((char*)((char**)trie.nodes[currentnode].firstunused)[i])[j];
						} while ((char)((char*)((char**)trie.nodes[currentnode].firstunused)[i])[j++]);
					} else {
						do {
							string[k++] = (char) trie.nodes[currentnode].bucket[j];
						} while (trie.nodes[currentnode].bucket[j++]);
					}
					wprintf(string);
					wprintf(L"\n");
					// debug lines
					fwrite(string, sizeof(wchar_t), wcslen(string), file);
					fwrite(L"\n", sizeof(wchar_t), wcslen(L"\n"), file);
				}
			}
		if ((++currentnode % 27) == 0)
			break;
		}
		for (;;) {
			// if the stack is empty then we're done
			if (!stack.getsize()) {
				// debug only
				fclose(file);
				return;
			}
			// else retrace our steps by poping the stack
			currentnode = stack.pop() + 1;
			if ((currentnode % 27) != 0)
				break;
		}
	}
}

bool Burstsort::make_tailpointers(unsigned int node) {
	// ensure this function is only called if this is a nonempty bucket node
	//if (trie.nodes[node].stringsinbucket == 0)
	//	return true;
	//if (trie.nodes[node].stringsinbucket == -1)
	//	return true;
	// if there is not enough room in the bucket then return false
	unsigned int left = (trie.nodes[node].bottomofbucket - trie.nodes[node].firstunused);
	unsigned int needed = trie.nodes[node].stringsinbucket * sizeof(char*);
	unsigned int used = (trie.nodes[node].firstunused - trie.nodes[node].bucket);
	if (left < needed) {
		// if we can expand the bucket then do that
		if ((used + needed) < 8192) {
			char* bucket = (char*) malloc(used + needed);
			memcpy(bucket, trie.nodes[node].bucket, used);
			trie.nodes[node].firstunused = bucket + used;
			free(trie.nodes[node].bucket);
			trie.nodes[node].bucket = bucket;
			trie.nodes[node].bottomofbucket = bucket + used + needed;
		} else {
		// otherwise we will need to burst the bucket
			return false;
		}
	}
	unsigned int j = 0;
	char* i = trie.nodes[node].bucket;
	do {
		// if we're just starting off then insert a pointer to the tail string in the bucket
		if (i == trie.nodes[node].bucket)
			*(((char**) trie.nodes[node].firstunused) + j++) = i;
		// if the last character was an end of string then do the same
		else if (*(i - 1) == 0)
			*(((char**) trie.nodes[node].firstunused) + j++) = i;
	} while(++i < trie.nodes[node].firstunused);
	//for (unsigned int i = 0; i < trie.nodes[node].stringsinbucket; i++)
	//	char* tailstring = ((char**)trie.nodes[node].firstunused)[i];
	return true;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -