📄 burstsort.cpp
字号:
/* Copyright 2007 Stefan Webb
This file is part of Burstsort.
Burstsort is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
Burstsort is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Burstsort; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
#include "main.h"
Burstsort::Burstsort() {
// Initialise the trie to having one level
trie.numnodes = 27;
trie.nodes = (Node*) malloc(sizeof(Node)*27);
memset(&trie.nodes[0], 0, 27 * sizeof(Node));
// initialise variable
maxlength = 0;
sorted = false;
}
Burstsort::~Burstsort() {
// At the moment, doesn't free buckets. That will be a little more complicated!
free(trie.nodes);
}
void Burstsort::insert(wchar_t* string) {
// make sure the string is valid to be inserted into the trie
// a valid string contains only lower case ascii
// also record the size of the string
unsigned int i = 0;
for (;;i++) {
// if it's the end of the string we're done
if (!string[i])
break;
// if the character is not a lowercase Latin letter, return
if (string[i] < 0x0061 || string[i] > 0x007a)
return;
}
if (maxlength < i)
maxlength = i;
// this loop examines as many characters in the string as necessary
unsigned int a = 0, node = 0;
for(;;) {
// if we've reached the end of the string then update the empty symbol string count
if (!string[a]) {
trie.nodes[node].stringsinbucket++;
break;
}
// determine what bucket we need to insert the string into
unsigned int symbolnode = node + string[a] - 0x0060;
// if there is no bucket, create one
if (trie.nodes[symbolnode].stringsinbucket == 0) {
trie.nodes[symbolnode].bucket = (char*) malloc(sizeof(char));
trie.nodes[symbolnode].firstunused = trie.nodes[symbolnode].bucket;
trie.nodes[symbolnode].bottomofbucket = trie.nodes[symbolnode].firstunused + sizeof(char);
}
// if this node points to a bucket and not another node then insert
if (trie.nodes[symbolnode].stringsinbucket != -1) {
unsigned int b = a + 1;
for(;;) {
// check whether we need to burst or resize the bucket
if (trie.nodes[symbolnode].firstunused == trie.nodes[symbolnode].bottomofbucket) {
unsigned int size = trie.nodes[symbolnode].bottomofbucket - trie.nodes[symbolnode].bucket;
// if the size has not yet reached 8192 then copy the bucket to a new one twice the size
if(size != 8192) {
char* bucket = (char*) malloc(size << 1);
// we cannot use realloc here since it does not guarantee contiguity
memcpy(bucket, trie.nodes[symbolnode].bucket, size);
free(trie.nodes[symbolnode].bucket);
trie.nodes[symbolnode].bucket = bucket;
trie.nodes[symbolnode].firstunused = bucket + size;
trie.nodes[symbolnode].bottomofbucket = bucket + (size << 1);
}
// otherwise burst the bucket
else {
// fixed up a bug here, I think
burst(symbolnode);
break;
}
}
// otherwise copy the new character
// not sure about pointers here, need to use reference to check
// should be able to combine these two lines into one somehow
trie.nodes[symbolnode].firstunused[0] = (char) string[b];
trie.nodes[symbolnode].firstunused++;
// if the end of the string then increase the number of strings and exit
if (!string[b++]) {
trie.nodes[symbolnode].stringsinbucket++;
return;
}
}
}
// otherwise, go on to the next node
node = trie.nodes[symbolnode].node;
a++;
}
}
// Debug
/*int scmp(const void *sp1, const void *sp2 )
{
return( strcmp(*(char **)sp1, *(char **)sp2) );
}*/
// sort is essentially a depth-first search, which calls sort_bucket when it gets to a bucket
void Burstsort::sort() {
Stack stack(maxlength);
unsigned int currentnode = 0;
for (;;) {
for (;;) {
// take care of the empty char symbol
if ((currentnode % 27) == 0) {
currentnode++;
continue;
}
// if this node points to another node then go further depthwise
if (trie.nodes[currentnode].stringsinbucket == -1) {
stack.push(currentnode);
currentnode = trie.nodes[currentnode].node;
continue;
} else if (trie.nodes[currentnode].stringsinbucket != 0) {
// if we're in here then we've found a nonempty bucket
if (!make_tailpointers(currentnode)) {
burst(currentnode);
continue;
} else {
/*if (trie.nodes[currentnode].stringsinbucket > 10) {
for (unsigned int i = 0; i < trie.nodes[currentnode].stringsinbucket; i++) {
char* tailstring = ((char**)trie.nodes[currentnode].firstunused)[i];
printf(tailstring);
wprintf(L"\n");
}*/
ssort2main((char**)trie.nodes[currentnode].firstunused, trie.nodes[currentnode].stringsinbucket);
//qsort((char**)trie.nodes[currentnode].firstunused, trie.nodes[currentnode].stringsinbucket, sizeof(char*), scmp);
/*wprintf(L"\n strings are sorted now\n\n");
for (unsigned int i = 0; i < trie.nodes[currentnode].stringsinbucket; i++) {
char* tailstring = ((char**)trie.nodes[currentnode].firstunused)[i];
printf(tailstring);
wprintf(L"\n");
}
return;
}*/
//sort_bucket(currentnode);
}
}
if ((++currentnode % 27) == 0)
break;
}
for (;;) {
// if the stack is empty then we're done
if (!stack.getsize()) {
sorted = true;
return;
}
// else retrace our steps by poping the stack
currentnode = stack.pop() + 1;
if ((currentnode % 27) != 0)
break;
}
}
}
void Burstsort::burst(unsigned int node) {
//this->print();
// create a new set of 27 nodes
Trie temptrie = {trie.numnodes + 27, (Node*) malloc(sizeof(Node) * (trie.numnodes + 27))};
memcpy(temptrie.nodes, trie.nodes, sizeof(Node) * temptrie.numnodes);
trie = temptrie;
// make sure the new nodes are zeroed
memset(&trie.nodes[trie.numnodes - 27], 0, 27 * sizeof(Node));
// insert bucket's strings into new buckets
int j = 0;
for (int i = 0; i < trie.nodes[node].stringsinbucket; i++) {
// **Debug**
//if (i == 830)
// int testabc = 0;
// if the empty string was in the bucket then update the empty char node
if (!trie.nodes[node].bucket[j]) {
trie.nodes[trie.numnodes - 27].stringsinbucket++;
j++;
continue;
}
unsigned int symbolnode = trie.numnodes - 27 + trie.nodes[node].bucket[j++] - 0x0060;
// ** Debug **
//if (symbolnode > trie.numnodes)
// int testabde = 0;
//if (symbolnode == 54)
// int aslkxjcv = 0;
// if there is no bucket, create one
if (trie.nodes[symbolnode].stringsinbucket == 0) {
trie.nodes[symbolnode].bucket = (char*) malloc(sizeof(char));
trie.nodes[symbolnode].firstunused = trie.nodes[symbolnode].bucket;
trie.nodes[symbolnode].bottomofbucket = trie.nodes[symbolnode].firstunused + sizeof(char);
}
// if this node points to a bucket and not another node then insert
if (trie.nodes[symbolnode].stringsinbucket != -1) {
for(;;) {
// check whether we need to burst or resize the bucket
if (trie.nodes[symbolnode].firstunused == trie.nodes[symbolnode].bottomofbucket) {
unsigned int size = trie.nodes[symbolnode].bottomofbucket - trie.nodes[symbolnode].bucket;
// copy the bucket to a new one twice the size
char* bucket = (char*) malloc(size << 1);
// we cannot use realloc here since it does not guarantee contiguity
memcpy(bucket, trie.nodes[symbolnode].bucket, size);
trie.nodes[symbolnode].firstunused = bucket + (trie.nodes[symbolnode].firstunused - trie.nodes[symbolnode].bucket);
free(trie.nodes[symbolnode].bucket);
trie.nodes[symbolnode].bucket = bucket;
trie.nodes[symbolnode].bottomofbucket = bucket + (size << 1);
//break;
}
// otherwise copy the new character
// not sure about pointers here, need to use reference to check
*trie.nodes[symbolnode].firstunused = (char) trie.nodes[node].bucket[j];
trie.nodes[symbolnode].firstunused++;
// if the end of the string then increase the number of strings and exit
if (!trie.nodes[node].bucket[j++]) {
trie.nodes[symbolnode].stringsinbucket++;
break;
}
}
}
}
// update the old node
trie.nodes[node].stringsinbucket = -1;
trie.nodes[node].node = trie.numnodes - 27;
//this->print();
}
void Burstsort::sort_bucket(unsigned int node) {
ssort2main((char**) trie.nodes[node].firstunused, trie.nodes[node].stringsinbucket);
}
void Burstsort::print() {
Stack stack(maxlength);
wchar_t* string = (wchar_t*) malloc(maxlength * sizeof(wchar_t));
unsigned int currentnode = 0;
// this for debug only
FILE* file = _wfopen(L"debug.txt", L"wb");
wchar_t unicodeheader[1];
unicodeheader[0] = 0xfeff;
fwrite(unicodeheader, sizeof(wchar_t), 1, file);
for (;;) {
for (;;) {
// take care of the empty char symbol
if ((currentnode % 27) == 0) {
// make the string zero terminated
string[stack.getsize()] = 0;
// and print as many as there are with the empty char termining them
for (unsigned int i = 0; i < trie.nodes[currentnode].stringsinbucket; i++) {
wprintf(string);
wprintf(L"\n");
// debug lines
fwrite(string, sizeof(wchar_t), wcslen(string), file);
fwrite(L"\n", sizeof(wchar_t), wcslen(L"\n"), file);
}
currentnode++;
continue;
}
// if this node points to another node then go further depthwise
if (trie.nodes[currentnode].stringsinbucket == -1) {
// put the current character in the temporary string buffer
string[stack.getsize()] = (wchar_t) (currentnode % 27) + 0x0060;
// push the current node onto the stack so we can retrace our steps
stack.push(currentnode);
// set the current node to the one further into the trie
currentnode = trie.nodes[currentnode].node;
continue;
} else if (trie.nodes[currentnode].stringsinbucket != 0) {
// if we're in here then we got to a bucket, so print all strings in bucket
string[stack.getsize()] = (wchar_t) (currentnode % 27) + 0x0060;
unsigned int j = 0;
for (unsigned int i = 0; i < trie.nodes[currentnode].stringsinbucket; i++) {
unsigned int k = stack.getsize() + 1;
if(!trie.nodes[currentnode].stringsinbucket)
string[k] = 0;
else if(sorted) {
j = 0;
do {
string[k++] = (char)((char*)((char**)trie.nodes[currentnode].firstunused)[i])[j];
} while ((char)((char*)((char**)trie.nodes[currentnode].firstunused)[i])[j++]);
} else {
do {
string[k++] = (char) trie.nodes[currentnode].bucket[j];
} while (trie.nodes[currentnode].bucket[j++]);
}
wprintf(string);
wprintf(L"\n");
// debug lines
fwrite(string, sizeof(wchar_t), wcslen(string), file);
fwrite(L"\n", sizeof(wchar_t), wcslen(L"\n"), file);
}
}
if ((++currentnode % 27) == 0)
break;
}
for (;;) {
// if the stack is empty then we're done
if (!stack.getsize()) {
// debug only
fclose(file);
return;
}
// else retrace our steps by poping the stack
currentnode = stack.pop() + 1;
if ((currentnode % 27) != 0)
break;
}
}
}
bool Burstsort::make_tailpointers(unsigned int node) {
// ensure this function is only called if this is a nonempty bucket node
//if (trie.nodes[node].stringsinbucket == 0)
// return true;
//if (trie.nodes[node].stringsinbucket == -1)
// return true;
// if there is not enough room in the bucket then return false
unsigned int left = (trie.nodes[node].bottomofbucket - trie.nodes[node].firstunused);
unsigned int needed = trie.nodes[node].stringsinbucket * sizeof(char*);
unsigned int used = (trie.nodes[node].firstunused - trie.nodes[node].bucket);
if (left < needed) {
// if we can expand the bucket then do that
if ((used + needed) < 8192) {
char* bucket = (char*) malloc(used + needed);
memcpy(bucket, trie.nodes[node].bucket, used);
trie.nodes[node].firstunused = bucket + used;
free(trie.nodes[node].bucket);
trie.nodes[node].bucket = bucket;
trie.nodes[node].bottomofbucket = bucket + used + needed;
} else {
// otherwise we will need to burst the bucket
return false;
}
}
unsigned int j = 0;
char* i = trie.nodes[node].bucket;
do {
// if we're just starting off then insert a pointer to the tail string in the bucket
if (i == trie.nodes[node].bucket)
*(((char**) trie.nodes[node].firstunused) + j++) = i;
// if the last character was an end of string then do the same
else if (*(i - 1) == 0)
*(((char**) trie.nodes[node].firstunused) + j++) = i;
} while(++i < trie.nodes[node].firstunused);
//for (unsigned int i = 0; i < trie.nodes[node].stringsinbucket; i++)
// char* tailstring = ((char**)trie.nodes[node].firstunused)[i];
return true;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -