📄 rules.cpp

📁 This is a little tool to generate dependency and it is aimed to achieve BCD-normal form in relationa
💻 CPP
字号:
#include "Rules.h"

void Rules::decomposition()
{
	bool keyFound=false;
	Set dummy;
	findKey();
	canonical();
	for (int i=0; i<ruleNumber; i++)
	{
		dummy=rules[i].lhs+rules[i].rhs;
		cout<<"\ndecomposition #"<<i+1<<":";
		display(dummy);
		cout<<"\ndependency is:\n";
		
		for (int j=0; j<ruleNumber; j++)
		{
			if ((rules[j].lhs<=dummy)&&(rules[j].rhs<=dummy))
			{
				displayRule(j);
				//cout<<endl;
			}			
		}
		cout<<endl;
		for (j=0; j<keyCount; j++)
		{
			if (candidateKey[j]<=dummy)
			{
				keyFound=true;//some decomposition contains some key
			}
		}
	}
	if (!keyFound)
	{
		cout<<"\ndecomposition #"<<ruleNumber+1<<":";
		display(candidateKey[0]);
		cout<<"\nthe key has no particular dependency\n";
	}
	needKey=!keyFound;
}
	
	
//English can be ambiguous: this function really means:
//"this" relation imply "dummy" relation
bool Rules::imply(const Rules& dummy) const
{
	Set left;
	//for all dependency in "dummy" relation...
	for (int i=0; i<dummy.ruleNumber; i++)
	{
		left=dummy.rules[i].lhs;
		//if the closure of that particular dependency under
		//dependency of "this" relation
		//cannot contains its rhs in that dependency in "dummy" 
		//relation, we say...
		closure(left, -1);
		if (!(dummy.rules[i].rhs<=left))
		{
			//we say "dummy" relation is not implied by "this" relation
			return false;
		}
	}
	//can we?????
	return true;
}

//the "secret" name for this function is "equivalent"
bool Rules::operator ==(const Rules& dummy) const
{
	return imply(dummy)&&dummy.imply(*this);
}



void Rules::canonical()
{
	int i;
	bool found;
	split();
	//cout<<"\nafter split";
	//display();
	do
	{
		i=0; 
		found=false;
		while (i<ruleNumber)
		{
			//displayRule(i);
			if (checkAttr(i))
			{
				found=true;				
			}
			i++;
		}
	}while (found);
	//cout<<"\nafter checkattr";
	//display();
	i=0;
	while (i<ruleNumber)
	{	
		if (!checkRule(i))
		{
			i++;
		}		
	}
	//cout<<"\nafter check rule";
	//display();
	//displayRules();
	combine();
	//displayRules();
	
}

void Rules::displayRules()
{
	for (int i=0; i<ruleNumber; i++)
	{
		displayRule(i);
	}
}

void Rules::combine()
{
	int j;
	for (int i=0; i<ruleNumber-1; i++)
	{
		j=i+1;
		while (j<ruleNumber)
		{
			if (rules[i].lhs==rules[j].lhs)
			{	
				/*
				cout<<"\ncombine rules of \n";
				displayRule(i);
				displayRule(j);
				cout<<"\nfinish\n";
				*/
				rules[i].rhs=rules[i].rhs+rules[j].rhs;
				removeRule(j);
				j=i;//restart is safe!!!
				//Please note here: without j++;
				//I will test "j" again in case originally
				//j+1 is also the same rule to combine!!!!!
			}		
			j++;		
		}
	}
}


void Rules::removeRule(int index)
{
	if (index<ruleNumber)
	{
		ruleNumber--;
		for (int i=index; i<ruleNumber; i++)
		{
			rules[i]=rules[i+1];
		}
	}	
}



bool Rules::checkRule(int index)
{
	Set dummy;
	dummy=rules[index].lhs;

	//displayRule(index);
	closure(dummy, index);
	//display(dummy);
	if (rules[index].rhs<=dummy)
	{
		removeRule(index);
		return true;
	}
	return false;
}


bool Rules::checkAttr(int index)
{
	Set dummy;
	//for the out parameter "dummy", you cannot modify it!
	rules[index].lhs.forEachSubSet(dummy);

	while (rules[index].lhs.eachSub(dummy))
	{
		Set old;	
		//so, you have to use a copy of dummy to calc closure of it
		old=dummy;
		closure(old, index);
	
		if (rules[index].rhs<=old)
		{	
			rules[index].lhs=dummy;
			//if this new rule already exists...
			for (int i=0; i<ruleNumber; i++)
			{
				if (i!=index)
				{
					if (rules[i]==rules[index])
					{
						//found repeat
						removeRule(index);
					}
				}
			}
			
			//otherwise do nothing
			return true;
		}
	}
	return false;
}


int Rules::findKey()
{
	Set theLeft, theRight, excluding;
	bool isSub;
	leftSet.setSize(attrCount);
	rightSet.setSize(attrCount);
	excluding.setSize(attrCount);
	excluding.reset();
	leftSet.set();//the universal set
	rightSet.reset();//the empty set
	for (int i=0; i<ruleNumber; i++)
	{
		rightSet= rightSet+rules[i].rhs;
		excluding=excluding+rules[i].lhs;
	}
	//excluding are those that only appear at rhs
	//but never appear at lhs, so, they cannot 
	//determine anybody, they cannot be part of key;
	excluding=rightSet-excluding;
	//rightset are those that donot appear at rhs
	//therefore it must be part of key
	rightSet=leftSet-rightSet;//rightSet is the minimum part of candidate key
	
	//leftset are those not of part of minimum key
	leftSet=leftSet-rightSet;//leftSet is the difference of rightSet,should be added to key
	keyCount=0;
	theRight=rightSet;
	theLeft=leftSet;

	closure(theRight, -1);
	if (theRight.count()==attrCount)//the only key
	{
		candidateKey[keyCount++]=rightSet;		
	}
	else
	{
		leftSet.forEachSubSet(theLeft);
		while (leftSet.eachSub(theLeft, excluding))
		{
			isSub=false;
			theRight=rightSet;
			theRight=theRight+theLeft;
			for (int i=0; i<keyCount; i++)
			{
				//display(theRight);
				//display(candidateKey[i]);
				if (candidateKey[i]<theRight)
				{
					isSub=true;
					break;
				}
			}
			if (isSub)
			{
				continue;//if subset of any candidate key, no need to test
			}
			Set temp;			
			temp=theRight;
			closure(temp, -1);
			if (temp.count()==attrCount)
			{
				candidateKey[keyCount++]=theRight;
			}

		}
	}	
	return keyCount;
}

void Rules::showMatrix(const Set* matrix, int row)
{
	for (int j=0; j<attrCount; j++)
	{
		cout<<"\t"<<attributes[j];
	}
	cout<<"\n";
	for (int i=0; i<row; i++)
	{
		displayRule(i);
		cout<<"\t"<<matrix[i]<<endl;
	}
}

//this is a standard algorithm and it makes sure
//that all dependency agree with each other
//or in good English that there is some common agreement 
//between all dependency. Is this good enough? I doubt it.
bool Rules::checkLossless()
{
	int row=needKey?ruleNumber+1:ruleNumber;
	
	Set* matrix=new Set[row];
	for (int i=0; i<row; i++)
	{
		if (needKey&&i==row-1)
		{
			matrix[i]=candidateKey[0];	
		}
		else
		{
			matrix[i]=rules[i].lhs+rules[i].rhs;
		}
	}
	//showMatrix(matrix, row);
	int index;
	bool found;
	do
	{
		index=0;
		found=false;
		while (index<row)
		{
			for (int j=0; j<ruleNumber; j++)
			{
				if (j!=index)
				{
					if (rules[j].lhs<=matrix[index])
					{
						if (!(rules[j].rhs<=matrix[index]))
						{
							matrix[index]=matrix[index]+rules[j].rhs;
							found=true;
						}
					}
				}
			}
			index++;
		}
	}while(found);
	showMatrix(matrix, row);
	for (index=0; index<row; index++)
	{
		if (matrix[index].count()==attrCount)
		{
			delete []matrix;
			return true;
		}
	}
	delete [] matrix;
	return false;
}


void Rules::calcClosure()
{
	for (int i=0; i<attrCount; i++)
	{
		attrClosure[i].setSize(attrCount);
		attrClosure[i].reset();
		attrClosure[i].set(i);
		closure(attrClosure[i], -1);
	}
}

void  Rules::closure(Set& attrSet, int maskedRule) const
{
	bool found=false;
	int i=0;
	do 
	{
		i=0;
		found=false;
		while (i<ruleNumber)
		{
			if (i!=maskedRule)//this rule will not be calculated
			{
				if (rules[i].lhs<=attrSet)//lhs is contained
				{
					if ((attrSet*rules[i].rhs)!=rules[i].rhs)
					{
						attrSet=attrSet+rules[i].rhs;
						found=true;
					}
				}
			}
			i++;
		}
	}
	while (found);
}

void Rules::display(const Set& attrSet)
{
	bool first=true;
	cout<<"{";
	for (int i=0; i<attrCount; i++)
	{
		
		if (attrSet.test(i))
		{
			if (first)
			{
				first=false;
			}
			else
			{
				cout<<",";
			}
			cout<<attributes[i];
		}
	}
	cout<<"}";
}

void Rules::display()
{
	cout<<"\nnow display\n";
	cout<<relationName<<"(";
	for (int i=0; i<attrCount; i++)
	{
		cout<<attributes[i];
		if (i!=attrCount-1)
		{
			cout<<",";
		}
		else
		{
			cout<<");\n";
		}
	}
	for (i=0; i<ruleNumber; i++)
	{
		displayRule(i);
	}
}


bool Rule::test(int pos, bool isLeft)
{
	if (isLeft)
	{
		return lhs.test(pos);
	}
	else
	{
		return rhs.test(pos);
	}
}


void Rules::displayRule(int index)
{
	for (int i=0; i<attrCount; i++)
	{
		if (rules[index].test(i, true))
		{
			cout<<attributes[i]<<" ";
		}
	}
	cout<<" -> ";
	for (i=0; i<attrCount; i++)
	{
		if (rules[index].test(i, false))
		{
			cout<<attributes[i]<<" ";
		}
	}
	cout<<";\n";
}


void Rule::set(int pos, bool isLeft)
{
	if (isLeft)
	{
		lhsSet(pos);
	}
	else
	{
		rhsSet(pos);
	}
}

void Rule::rhsSet(int pos)
{
	rhs.set(pos);
}

void Rule::lhsSet(int pos)
{
	lhs.set(pos);
}

Rule::Rule()
{
	lhs.reset();
	rhs.reset();
	
}


void Rules::readFile(const char* fileName)
{
	FILE* stream;
	stream=fopen(fileName, "r");
	attrCount=extractNames(stream, ",()\n ", ';');//ignore the first relation name
	ruleReading(stream);

}

int Rules::searchByName(const char* name)
{
	for (int i=0; i<attrCount; i++)
	{
		if (strcmp(name, attributes[i])==0)
		{
			return i;
		}
	}
	return -1;
}

/*
void Rules::ruleReading(FILE* stream)
{
	char ch;
	char buffer[MaxNameLength+1];
	int step=0;
	int result;
	ruleNumber=0;//initialize
	bool isLeft=true;
	rules[ruleNumber].setSize(attrCount);//do twice!!
	while (!feof(stream))
	{		
		ch=fgetc(stream);
		if (ch==';'||ch==',')
		{
			ruleNumber++;
			rules[ruleNumber].setSize(attrCount);//do twice!!
			isLeft=true;
		}
		else
		{
			if (ch=='-'||ch=='>')
			{
				isLeft=false;
			}
			else
			{
				if (isalpha(ch)||ch=='_')
				{
					/*
					buffer[step]=ch;
					searchByChar(ch, step);
					if (numberFound!=1)
					{
						if (numberFound==0)
						{
							cout<<"undefined attributes encountered: ";
							buffer[step+1]='\0';
							cout<<buffer<<endl;
							exit(3);
						}
						if (step<MaxNameLength)
						{
							step++;
						}
						else
						{
							cout<<"ambuiguous attributes between ";
							for (int i=0; i<numberFound; i++)
							{
								cout<<attrFound[i]<<"\t";
							}
							exit(1);
						}
					}
					else
					{
						//first condition is that we look for the shortest
						//matched name
						buffer[step+1]='\0';
						if (searchByName(buffer))
						{
							step=0;							
							rules[ruleNumber].set(attrFound[0], isLeft);
						}
					}
					*/
/*
					buffer[step]=ch;
					step++;
				}
				else
				{
					if (step!=0)
					{
						buffer[step]='\0';
						result=searchByName(buffer);
						if (result>=0)
						{
							step=0;		
							rules[ruleNumber].set(result, isLeft);
							//rules[ruleNumber].set(attrFound[0], isLeft);
						}
						else
						{
							cout<<"undefined attribute: "<<buffer<<endl;
							exit(2);
						}
					}
				}

			}
		

		}
	}
}
*/

void Rules::ruleReading(FILE* stream)
{
	char ch;
	char buffer[MaxNameLength+1];
	int step=0;
	int result;
	ruleNumber=0;//initialize
	bool isLeft=true;
	rules[ruleNumber].setSize(attrCount);//do twice!!
	while (!feof(stream))
	{		
		ch=fgetc(stream);
		if (ch=='-'||ch=='>')
		{
			isLeft=false;
		}
		if (isalpha(ch)||ch=='_')
		{			
			buffer[step]=ch;
			step++;
		}
		else//otherwise it is a new attribute
		{
			if (step!=0)//ignore space or change_new_line
			{
				buffer[step]='\0';
				result=searchByName(buffer);
				if (result>=0)
				{
					step=0;		
					rules[ruleNumber].set(result, isLeft);
					//rules[ruleNumber].set(attrFound[0], isLeft);
				}
				else
				{
					cout<<"undefined attribute: "<<buffer<<endl;
					exit(2);
				}
			}
			if (ch==';'||ch==',')
			{
				ruleNumber++;
				rules[ruleNumber].setSize(attrCount);//do twice!!
				isLeft=true;
			}
		}
	}
}


void Rule::setSize(int theSize)
{
	lhs.setSize(theSize);
	rhs.setSize(theSize);
}
	

int Rules::searchByChar(char ch, int step)
{
	if (step==0)//this is first time, and all attributes are searched
	{
		numberFound=0;
		for (int i=0; i<attrCount; i++)//
		{
			if (ch==attributes[i][step])
			{
				attrFound[numberFound]=i;
				numberFound++;
			}
		}
	}
	else
	{
		int number=0;//new 'attrFound' re-counting
		for (int i=0; i<numberFound; i++)
		{
			if (ch==attributes[attrFound[i]][step])
			{
				attrFound[number]=i;
				number++;
			}
		}
		numberFound=number;
	}
	return numberFound;
}


int findChar(char ch, char* str)
{
	int index=0;
	while (str[index]!='\0')
	{
		if (str[index]==ch)
		{
			return index;
		}
		index++;
	}
	return -1;
}

//extract names from a line delimetered by various chars, like ',','(',')'....
int Rules::extractNames(FILE* stream, char* delimeters, char endChar)
{
	int findChar(char ch, char* str);
	char ch;
	int nameIndex=0;
	char buffer[MaxNameLength+1];
	char* ptr=buffer;

	while (!feof(stream))
	{
		ch=getc(stream);
		if (ch==' '||ch==10||ch==9||ch==13)
		{
			continue;
		}
		if (ch==endChar)
		{
			if (ptr!=buffer)
			{
				*ptr='\0';
				if (searchByName(buffer)<0)
				{
					strcpy(attributes[nameIndex], buffer);
				}
				else
				{
					cout<<"found repeat attribute "<<buffer<<endl;
					exit(2);
				}
			}
			break;
		}

		if (findChar(ch, delimeters)>=0)//deli reached
		{
			//skip the consecutive deli
			if (ptr!=buffer)
			{
				*ptr='\0';	
				if (ch=='(')//the very first one
				{
					strcpy(relationName, buffer);
				}
				else
				{
					if (searchByName(buffer)<0)
					{
						strcpy(attributes[nameIndex], buffer);
						nameIndex++;
					}
					else
					{
						cout<<"found repeat attribute "<<buffer<<endl;
						exit(2);
					}					
				}
				ptr=buffer;//restart
			}
		}
		else
		{
			*ptr=ch;
			ptr++;
		}
	}
	return nameIndex;
}		

Rules::Rules()
{
	numberFound=attrCount=0;
}
	
void Rule::operator =(const Rule& dummy)
{
	lhs=dummy.lhs;
	rhs=dummy.rhs;
}

void Rules::addRule(const Rule& dummy)
{
	rules[ruleNumber++]=dummy;
}

void Rules::addRule(const Set& left, const Set& right)
{
	rules[ruleNumber++].setRule(left, right);
}

void Rule::setRule(const Set& left, const Set& right)
{
	lhs=left;
	rhs=right;
}

void Rules::split()
{
	int index;
	for (int i=0; i<ruleNumber; i++)
	{
		if (rules[i].rhs.count()>1)
		{			
			index=rules[i].rhs.first();
			index=rules[i].rhs.next(index);//starting from second one
			while (index!=-1)
			{
				Set right;				
				right.setSize(rules[i].rhs);
				right.set(index);
				rules[i].rhs.reset(index);//remove from old rule
				addRule(rules[i].lhs, right);
				index=rules[i].rhs.next(index);
			}
		}
	}
}

bool Rule::operator ==(const Rule& dummy)
{
	return (lhs==dummy.lhs&&rhs==dummy.rhs);
}
💿 文件大小 131 K
👤 上传用户 gjq2000
📂 所属分类数据结构
🏷️ 相关标签

#BCD-normal #dependency #relationa #generate
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -