⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sort.cpp

📁 可能是能找到的处理速度最快
💻 CPP
字号:
#include "sort.h"
#include "assert.h"

inline unsigned long random(unsigned long seed)//generate a randomize value between 0 and seed-1, both 0 and seed-1 are porssible.
{
	return (seed)* rand()/(RAND_MAX+1);
}

int TXT_FILE::Sort()
{
	temp=0;
	cprintf("\n");
	TreeMid=TreeHead;
	TreeTial=TreeHead;
	AccessMax=ArrayCount;
	if (AccessMax==0)
	{
		cprintf("\rSorting: %10d",temp);
		return SUCCESS;
	}
	AccessMax--;
	//unsigned long hitedcount=0;
	if (UseCache)
	{
//		CCache Cache;
		while(AccessMax!=0xFFFFFFFF)
		{
			unsigned long location=random(AccessMax);
			AV_TREE *tree1=RandArray[location];
			if (!TreeMid)		// exchange pointers
			{
				RandArray[location]=RandArray[AccessMax];
				RandArray[AccessMax]=tree1;	// backup the pointer
				AccessMax--;
			}
			else
			{
				RandArray[location]=TreeMid->tree;
				TreeMid->tree=tree1;		//	backup the pointer
				TreeMid=TreeMid->next;
			}
			AV_TREE *p=Cache.Add(tree1);
			//assert(p);
			if (p)
			{
				if (p!=tree1)
					SAdd(p,tree1);
				else if (Root)
					SAdd(Root,tree1);
				else
					Root=tree1;
			}
			else
				return NO_ENOUGH_MEM;	
			// end process index tables
			if (temp%10000==0)
				cprintf("\rSorting: %10d",temp);
			temp+=tree1->count;
		}		
	}
	else
	{
		while(AccessMax!=0xFFFFFFFF)
		{
			unsigned long location=random(AccessMax);
			AV_TREE *tree1=RandArray[location];
			if (TreeMid)		// exchange pointers
			{
				RandArray[location]=TreeMid->tree;
				TreeMid->tree=tree1;		//	backup the pointer
				TreeMid=TreeMid->next;
			}
			else
			{
				RandArray[location]=RandArray[AccessMax];
				RandArray[AccessMax]=tree1;	// backup the pointer
				AccessMax--;
			}
			if (Root)
				SAdd(Root, tree1);
			else
				Root=tree1;
			if (temp%10000==0)
				cprintf("\rSorting: %10d",temp);
			temp+=tree1->count;
		}		
	}
	cprintf("\rSorting: %10d",temp);
	//if (temp)
	//	cprintf("\nhit rat:%10d/%10d=%3.2d",hitedcount,temp,100*hitedcount/temp);
	return SUCCESS;
}

inline AV_TREE *TXT_FILE::SAdd(AV_TREE *tree1,AV_TREE *tree2)// tree2 add to tree1
{   
	unsigned char *str=(unsigned char *)tree2->data;
Loop:
	unsigned char *s1=str;
	unsigned char *s2=(unsigned char *)tree1->data;
	while(*s2&&*s1==*s2)
		++s1,++s2;
	int a=*s1-*s2;
	if (a==0)
	{
		if (ShowDulpicateLines)
			printf("%s\n",tree1->data);
		if (DelDuplicate)
			return NULL;
		else	if (tree1->count<0xff-tree2->count)
		{
			tree1->count+=tree2->count;
			return tree1;
		}
	}
	if (a<=0)
	{
		if (tree1->left)
		{
			tree1=tree1->left;
			goto Loop;
		}
		else
		{
			tree1->left=tree2;
			return tree2;
		}
	}
	else
	{
		if ( tree1->right)
		{
			tree1=tree1->right;
			goto Loop;
		}
		else
		{
			tree1->right=tree2;
			return tree2;
		}
	}
}

AV_TREE *TXT_FILE::Add(char *str)
{
/*	if (UseCache)
	{
		CCacheNode *p2=Cache.FindFirst(CCacheNode(NULL,*(unsigned short *)&str[0]));
		AV_TREE *p=p2?p2->tree:NULL;
		if (p)
			return SAdd(p,str);
		else
		{
			if (Root)
				return SAdd(Root,str);
			else
			{
				Root=CreateNewNode(str);
				return Root;
			}
		}
	}
	else*/
	{
		if (Root)
			return SAdd(str);
		else
		{
			Root=CreateNewNode(str);
			return Root;
		}
	}
}

inline int mystrcmp(char *a,char *b)
{
	while(*b&&*a==*b)
		++a,++b;
	return *a-*b;
}

inline AV_TREE *TXT_FILE::SAdd(char *str)// tree2 add to tree1
{   
//	unsigned char *str=(unsigned char *)tree2->data;
	AV_TREE *most_recent=Root,*most_recent_parent=NULL,*parent=NULL,*new_Root=NULL;
	AV_TREE *p=NULL,*tree1=Root;
	while(tree1)
	{
		if (tree1->balance!= 0)
		{
			most_recent = tree1;
			most_recent_parent = parent;
		}
		int a=mystrcmp(str,tree1->data);
		if (a==0)
		{
			if (ShowDulpicateLines)
				printf("%s\n",tree1->data);
			if (DelDuplicate)
				return tree1;
			else	if (tree1->count<0xff-1)
			{
				tree1->count++;
				return tree1;
			}
		}
		if (a<=0)
		{
			if (tree1->left)
			{
				parent=tree1;
				tree1=tree1->left;
			}
			else
			{
				p=CreateNewNode(str);
				tree1->left=p;
				break;
			}
		}
		else
		{
			if ( tree1->right)
			{
				parent=tree1;
				tree1=tree1->right;
			}
			else
			{
				p=CreateNewNode(str);
				tree1->right=p;
				break;
			}
		}
	}
	if(p==NULL)
		return NULL;
	int rebal_direc;
	if ( mystrcmp(str,most_recent->data) > 0)
	{
		tree1 = most_recent->right;
		rebal_direc = -1;
	}
	else
	{
		tree1 = most_recent->left;
		rebal_direc = +1;
	}
	AV_TREE *most_recent_child = tree1;
	while (tree1!= p)
	{
		if (mystrcmp(str,tree1->data) > 0)
		{
			tree1-> balance = -1;// hight of right is increased by 1
			tree1 = tree1->right;
		}
		else
		{
			tree1->balance = +1;				// hight of left is increased by 1
			tree1 = tree1->left;
		}
	}
	// need to check if the balance of
	// the tree is alright
	int unbal_flag = 1;
	if (most_recent-> balance == 0)
	{
		// tree is not unbalanced
		most_recent->balance = rebal_direc;
		unbal_flag = 0;
	}
	if ( (most_recent->balance + rebal_direc) == 0 )
	{
		// tree is not unbalanced
		most_recent->balance = 0;
		unbal_flag = 0;
	}
	if (unbal_flag == 1)
	{
		// Tree is unbalanced. determine
		// rotation direction, and rotate.
		if (rebal_direc == +1)
		{
			// left subtree is imbalanced
			// Rotate left
			if (most_recent_child->balance == +1)
			{
				most_recent->left = most_recent_child->right;
				most_recent_child->right = most_recent;
				most_recent->balance = 0;
				most_recent_child->balance = 0;
			}
			else
			{
				new_Root = most_recent_child->right;
				most_recent_child->right = new_Root->left;
				most_recent->left = new_Root->right;
				new_Root->left = most_recent_child;
				new_Root->right = most_recent;
				switch (new_Root->balance)
				{
				case +1:
					most_recent->balance = -1;
					most_recent_child->balance = 0;
					break;
				case -1:
					most_recent_child->balance = 1;
					most_recent->balance = 0;
					break;
				case 0:
					most_recent_child->balance = 0;
					most_recent->balance = 0;
					break;
				}// end of the switch statement
				new_Root->balance = 0;
				most_recent_child = new_Root;
			}
		}
		else
		{
			// right substree is unbalanced
			if (most_recent_child->balance == -1)
			{
				most_recent->right = most_recent_child->left;
				most_recent_child->left = most_recent;
				most_recent->balance = 0;
				most_recent_child->balance = 0;
			}
			else
			{
				new_Root = most_recent_child->left;
				most_recent_child->left = new_Root->right;
				most_recent->right = new_Root->left;
				new_Root->right = most_recent_child;
				new_Root->left = most_recent;
				switch (new_Root->balance)
				{
				case -1:				// Should be -1
					most_recent->balance = 1;
					most_recent_child->balance = 0;
					break;
				case +1:				// Should be +1
					most_recent_child->balance = -1;
					most_recent->balance = 0;
					break;
				case 0:
					most_recent_child->balance = 0;
					most_recent->balance = 0;
					break;
				}
				new_Root->balance = 0;
				most_recent_child = new_Root;
			}// end of the else
		}  // end of the if rebal_direc = -1

		if (most_recent_parent == NULL)
			Root = most_recent_child;
		else if (most_recent == most_recent_parent->left)
			most_recent_parent->left = most_recent_child;
		else if (most_recent == most_recent_parent->right)
			most_recent_parent->right = most_recent_child;
	} // end of if unbalanced
	return p;
}

int TXT_FILE::Save(AV_TREE *tree, FILE *f)
{
	if (IncreadOrder)
	{
		if (tree->left!=NULL)
			Save(tree->left, f);
		for (int i=tree->count;i>0;i--)
		{
			temp++;
			if (temp%100000==0)
				cprintf("\rSaving:  %10d",temp);
			fputs(tree->data, f);
		}
		if (tree->right!=NULL)
			Save(tree->right, f);
	}
	else
	{
		if (tree->right!=NULL)
			Save(tree->right, f);
		for (unsigned long i=tree->count;i>0;i--)
		{
			temp++;
			if (temp%100000==0)
				cprintf("\rSaving:  %10d",temp);
			fputs(tree->data, f);
		}
		if (tree->left!=NULL)
			Save(tree->left, f);
	}
	return SUCCESS;
}


inline int IsEmptyLine(char *buffer)
{
	register char *p=buffer;
	register unsigned short s=CHAR_TO_WORD((unsigned short)p[0],(unsigned short)p[1]);
	while ((*p!='\0')&&((*p=='\x0d')||(*p=='\x9')||(*p=='\x0a')||(*p==' ')||(s==0x8140)))
	{
		if (*p<0)
			p+=2;
		else 
			p++;
		s=CHAR_TO_WORD((unsigned short)p[0],(unsigned short)p[1]);
	}
	return (*p=='\0');
}

inline int TXT_FILE::RandAdd(char *str)
{
	if (ArrayCount)
	{
		unsigned char *s1=(unsigned char *)str;
		unsigned char *s2=(unsigned char *)MaxPointer->data;
		while(*s2&&*s1==*s2)
			++s1,++s2;
		if (*s1==*s2)
		{
			if (ShowDulpicateLines)
				printf("%s\n",str);
			if (DelDuplicate)
				return SUCCESS;
			else	if (MaxPointer->count<0xff-1)
			{
				MaxPointer->count++;
				return SUCCESS;
			}
		}
	}
	MaxPointer=CreateNewNode(str);
	if (MaxPointer)
	{
		if (ArrayCount<ArraySize)
		{
			RandArray[ArrayCount]=MaxPointer;
			AccessMax=ArrayCount;
			ArrayCount++;
		}
		else
		{
			TREE_LINK *p=TreeHead;
			TreeHead=new TREE_LINK;
			if (TreeHead)
			{
				TreeHead->tree=MaxPointer;
				TreeHead->next=p;
//				TreeHead->MidChar=0;//[0]='\0';
			}
			else
			{
				return NO_ENOUGH_MEM;
			}
		}
	}
	else
		return NO_ENOUGH_MEM;
	return SUCCESS;
}


int TXT_FILE::LoadFromFile( char *filename)
{
	FILE *f=fopen(filename,"rt");
	if (f==NULL)
		return FILE_OPEN_ERROR;
	int fn=fileno(f);
	unsigned long size=filelength(fn);
	if (size==0)
	{
		fclose(f);
		cprintf("\rLoading: %10d",temp);
		return SUCCESS;
	}
	if (!SkipRanding)
	{
		ArraySize=size/10+1;	// at least one unit
		RandArray=new AV_TREE*[ArraySize];
	}
	static char buffer[MAX_CHAR];
	temp=0;
	char *str=&buffer[MAX_CHAR-1];
	*str=0x55;
	while(fgets(buffer,MAX_CHAR,f))
	{
		//int len=strlen(buffer)+1;
		//if (len==1)
		//	continue;
		if (*str!=0x55)
		{
			*str=0x55;
			cprintf("Lines in this file are longer than %d, it's a risk to sort this file\n", MAX_CHAR-1);
		}
		if (NoEmptyLines&&IsEmptyLine(buffer))
		{
			temp++;
			if (temp%100000==0)
				cprintf("\rLoading: %10d",temp);
			continue;
		}
		if (SkipRanding)
		{
			if(Add(buffer)==NULL)
			{
				fclose(f);
				cprintf("\rLoading: %10d",temp);
				return NO_ENOUGH_MEM;
			}
		}
		else if( RandAdd(buffer)!=SUCCESS)
		{
			fclose(f);
			cprintf("\rLoading: %10d",temp);
			return NO_ENOUGH_MEM;
		}
		if (temp%100000==0)
			cprintf("\rLoading: %10d",temp);
		temp++;
	}
	fclose(f);
	cprintf("\rLoading: %10d",temp);
	return SUCCESS;
}
int TXT_FILE::SaveToFile(char *filename)
{
	FILE *f;
	f=fopen(filename,"w+t");
	temp=0;
	cprintf("\n");
	if (f==NULL)
		return FILE_OPEN_ERROR;
	if (Root!=NULL)
		Save(Root, f);
	fclose(f);
	cprintf("\rSaving:  %10d\n",temp);
	return SUCCESS;
}

void TXT_FILE::Delete(AV_TREE *p)
{
	if (p->left)
		Delete(p->left);
	if (p->right)
		Delete(p->right);
	delete p;
}

TXT_FILE::~TXT_FILE(void)
{
	if (!GlobalHeap.MemUseUp)
		return;
	if (SkipRanding)
	{
		if (Root)
			Delete(Root);
	}
	else
	{
		for (unsigned long i=0;i<ArrayCount;i++)
			delete[] RandArray[i];
		delete[] RandArray;
		TREE_LINK *p1=TreeHead;
		TREE_LINK *p2;
		while(p1!=NULL)
		{
			p2=p1->next;
			delete p1;
			p1=p2;
		}
	}
}

COptions &TXT_FILE::operator=(COptions &option)
{
	IncreadOrder=option.IncreadOrder;
	DelDuplicate=option.DelDuplicate;
	NoEmptyLines=option.NoEmptyLines;
	Column=option.Column;
	UseCache=option.UseCache;
	SkipRanding=option.SkipRanding;
	ShowDulpicateLines=option.ShowDulpicateLines;
	return option;
}

TXT_FILE::TXT_FILE(void):COptions()
{
		AccessMax=0;
		ArrayCount=0;
        ArraySize=0;
		MaxPointer=NULL;

		TreeHead=NULL;
		TreeMid=NULL;	// for getting strings to sort
		TreeTial=NULL;

		Root=NULL;
		RandArray=NULL;
		srand((unsigned )time(NULL));
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -