⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fptree.cpp

📁 FP-growth算法的改进C++程序,具有较好的扩展性和应用性,本程序改成用行读取
💻 CPP
📖 第 1 页 / 共 3 页
字号:
 return;
}


/******************************************************************************************
 * Function: buildConTree
 *
 * Description:
 *	Build a conditional FP-tree and the corresponding header table from
 *	a conditional pattern base.
 *
 * Invoked from:	
 *	genConditionalPatternTree()
 *
 * Functions to be invoked:
 *	q_sortA()
 *	insert_tree()
 *	
 * Parameters:
 *	*conRoot	-> Root of the conditional FP-tree
 *	**conHeader	-> Conditional header table for all the frequent items.
 *	conHeaderSize	-> Number of items (frequent) in the conditional header table
 *	*conLargeItem	-> Stores all the frequent items of the conditional pattern base 
 *	*conLargeItemSupprt	-> The conditional support counts of the conditional items
 *				   "Conditional support" of an item is 
 *				    the number of itemsets containing the item and the base items.
 *	T		-> The parent conditional FP-tree
 *	headerTable	-> The parent header table
 *	baseIndex	-> The index position of the parent header table that refer to the item
 *			   contained in the base of this conditional FP-tree
 *	headerSize	-> The number of items in the parent header table
 */
 void Fptree::buildConTree(FPTreeNode *conRoot, FPTreeNode **conHeader, int conHeaderSize, int *conLargeItem,
		int *conLargeItemSupport, FPTreeNode T, FPTreeNode *headerTable, int baseIndex, int headerSize)
{
 FPTreeNode aNode;
 FPTreeNode ancestorNode;
 int *freqItemP;	/* Holds all the frequent items of a transaction of the conditional pattern base */
 int *indexList;	/* Holds the index position of the parent header table for the corresponding item
			   in freqItemP[] */
 int path;
 int count;
 int i;
 /* create conditional header table 
  */
 *conHeader = (FPTreeNode *) malloc (sizeof(FPTreeNode) * conHeaderSize);
 if (*conHeader == NULL) {
        printf("out of memory\n");
        exit(1);
 }
 for (i=0; i < conHeaderSize; i++)
        (*conHeader)[i] = NULL;

 /* create root of the conditional FP-tree 
  */
 (*conRoot) = (FPTreeNode) malloc (sizeof(FPNode));
 if (*conRoot == NULL) {
        printf("out of memory\n");
        exit(1);
 }

 /* Initialize the root of the conditional FP-tree 
  */
 (*conRoot)->numPath = 1;
 (*conRoot)->parent = NULL;
 (*conRoot)->children = NULL;
 (*conRoot)->hlink = NULL;

 freqItemP = (int *) malloc (sizeof(int) * conHeaderSize);
 if (freqItemP == NULL) {
        printf("out of memory\n");
        exit(1);
 }

 indexList = (int *) malloc (sizeof(int) * conHeaderSize);
 if (indexList == NULL) {
        printf("out of memory\n");
        exit(1);
 }

 /* Set aNode to the first node of the parent FP-tree
  * that contains the base item.
  */
 aNode = headerTable[baseIndex];

 /* Visit each path from the base item to the root
  * of the parent FP-tree and
  * extract all the frequent items of the conditional pattern base.
  * Sort this items in descending order of their frequency and
  * insert them to the conditional FP-tree.
  */ 
 while (aNode != NULL) {
	ancestorNode = aNode->parent;
	count = 0;

	/* Identify the frequent items in each path from the
	 * node containing the item in the base to the root.
	 * Each of such path is just like a transaction of the
	 * conditional pattern base.
	 * This identification can be done because
	 * conLargeItem[] contains all the frequent items
	 * of the conditional pattern base.
	 * Store the frequent items to freqItemP[], and
	 * the corresponding index position of the 
	 * conditional header table to indexList[].
	 */
	while (ancestorNode != T) {
		for (i=0; i < conHeaderSize; i++) {
			if (ancestorNode->item == conLargeItem[i]) {
				freqItemP[count] = ancestorNode->item;
				indexList[count] = i;
				count++;
				break;
			}
		}
		ancestorNode = ancestorNode->parent;
	}

	/* Sort the frequent items in this path in ascending order
	 * of indexList[], i.e. sort in descending order of the
	 * frequency of the items in the conditional pattern base.
	 */
	q_sortA(indexList, freqItemP, 0, count-1, count);

	path = 0;

	/* Insert the frequent items to the conditional FP-tree.
	 */
	insert_tree(&(freqItemP[0]), &(indexList[0]), aNode->count, 0, count, *conRoot, *conHeader, &path);

	aNode = aNode->hlink;
 }

 free(freqItemP);
 free(indexList);

 return;
}


/******************************************************************************************
 * Function: genConditionalPatternTree
 *
 * Description:
 *	-- Form the conditional pattern base of each item in the header table.
 *	-- Build the conditional FP-tree for the item.
 *	-- Perfrom FPgrowth() for the conditional FP-tree.
 *
 * Parameters (In):
 *	pattern[]		-> Base items for the conditional FP-tree.
 *	baseSize		-> Number of base items -1.
 *	patternSupport		-> Support of the base items (it is a large itemset).
 *	T			-> FP-tree.
 *	headerIndex		-> Index position in the header table refering
 *				   the base item for the conditional FP-tree.
 *	headerSize		-> Number of items in the header table.
 *	*headerTableLink	-> Header table of the FP-tree (T).
 *
 * Invoked from:	
 *	FPgrowth()
 *
 * Functions to be invoked:
 *	q_sortD()
 *	buildConTree()
 *	FPgrowth()
 *
 * Parameters (Out):
 *	conLargeItem[]		-> To hold frequent items in the conditional pattern base.
 *	conLargeItemSupport[]	-> To hold "conditional support" of each items in the conditional pattern base.
 *				   "Conditional support" of an item is 
 *				   the number of itemsets containing the item and the base items.
 *
 * Global Variables (read only):
 *	threshold		-> Support threshold
 */
 void Fptree::genConditionalPatternTree(int *pattern, int baseSize, int patternSupport,
				int *conLargeItem, int *conLargeItemSupport, FPTreeNode T,
				int headerIndex, int headerSize, FPTreeNode *headerTableLink)
{
 int conHeaderSize;
 FPTreeNode *conHeader;
 FPTreeNode conRoot;
 FPTreeNode aNode, ancestorNode;
 int j;

 for (j=0; j < headerSize; j++)
	conLargeItemSupport[j] = 0;

 aNode = headerTableLink[headerIndex];
 conHeaderSize = 0;

 /* Find all the frequent items in the conditional pattern base.
  * -- Visit, in bottom-up manner, all the ancestor nodes of all the nodes containing this item.
  * -- Count the "conditional supports" of the items in the ancestor nodes 
  *    (i.e. items in conditional pattern base), and store the values to conLargeSupport[].
  *    "Conditional support" of an item is 
  *    the number of itemsets containing the item and the base items.
  * -- Items in the ancestor nodes having conditional supports >= threshold are added to 
  *    conLargeItem[]. 
  */
 while (aNode != NULL) {
	ancestorNode = aNode->parent; 

	while (ancestorNode != T) {

		for (j=0; j < headerSize; j++) {
			if (ancestorNode->item == headerTableLink[j]->item) {

				/* Increment the conditional support count 
				 * for this ancestor item 
				 */
				conLargeItemSupport[j] += aNode->count;  

				if ((conLargeItemSupport[j] >= threshold) &&
				   (conLargeItemSupport[j] - aNode->count < 
					threshold)) {

					/* Add the ancestor item to the conditional pattern base,
					 * and update the number of items in the
					 * conditional header table 
					 */
					conLargeItem[j] = ancestorNode->item;
					conHeaderSize++;	
				}
				break;
			}
		}
		ancestorNode = ancestorNode->parent;
	}

	/* Next node of the FP-tree containing the base item
	 */
	aNode = aNode->hlink;
 }

 /* Sort the items in the conditional pattern base in descending order of their 
  * conditional support count
  */
 q_sortD(conLargeItemSupport, conLargeItem, 0, headerSize-1, headerSize);


 /* Generate the conditional FP-tree and mine recursively 
  */
 if (conHeaderSize > 0) {

	/* Build conditional FP-tree 
	 */
	buildConTree(&conRoot, &conHeader, conHeaderSize, conLargeItem, conLargeItemSupport, 
			T, headerTableLink, headerIndex, headerSize);

	/* Mining 
	 */
	FPgrowth(conRoot, conHeader, conHeaderSize, pattern, baseSize+1);

 	free(conHeader);
 	destroyTree(conRoot);
 }

 return;
}

/******************************************************************************************
 * Function: q_sortD
 *
 * Description:
 * 	Quick sort two arrays, support[] and the corresponding itemset[], 
 *	in descending order of support[].
 *
 * Invoked from:	
 *	pass1()
 *	genConditionalPatternTree()
 *	q_sortD()
 * 
 * Functions to be invoked:
 *	swap()
 *	q_sortD()
 *
 * Input Parameters:
 *      low		-> lower bound index of the array to be sorted
 *      high		-> upper bound index of the array to be sorted
 *      size		-> size of the array
 *	length		-> length of an itemset
 *
 * In/Out Parameters:
 *      support[]	-> array to be sorted
 *      itemset[]	-> array to be sorted
 */
void Fptree::q_sortD(int *support, int *itemset, int low,int high, int size)
{
 int pass;
 int highptr=high++;     /* highptr records the last element */
 /* the first element in list is always served as the pivot */
 int pivot=low;

 if(low>=highptr) return;
 do {
	/* Find out, from the head of support[], 
	 * the 1st element value not larger than the pivot's 
	 */
	pass=1;
	while(pass==1) {
		if(++low<size) {
			if(support[low] > support[pivot])
				pass=1;
			else pass=0;
		} else pass=0;
	} 

	/* Find out, from the tail of support[], 
	 * the 1st element value not smaller than the pivot's 
	 */ 
	pass=1; 
	while(pass==1) {
		if(high-->0) { 
			if(support[high] < support[pivot]) 
				pass=1;
			else pass=0; 
		} else pass=0; 
	}

	/* swap elements pointed by low pointer & high pointer */
	if(low<high)
		swap(support, itemset, low, high);
 } while(low<=high);

 swap(support, itemset, pivot, high);

 /* divide list into two for further sorting */ 
 q_sortD(support, itemset, pivot, high-1, size); 
 q_sortD(support, itemset, high+1, highptr, size);
 
 return;
}
/******************************************************************************************
 * Function: swap
 *
 * Description:
 *	Swap x-th element and i-th element of each of the
 *	two arrays, support[] and itemset[].
 *
 * Invoked from:	
 *	q_sortD()
 *	q_sortA()
 * 
 * Functions to be invoked: None
 *
 * Input Parameters:
 *	support	-> Corresponding supports of the items in itemset.
 *	itemset	-> Array of items.
 *	x, i	-> The two indexes for swapping.
 *
 * Global variable: None
 */
 void Fptree::swap(int *support, int *itemset, int x, int i)
{ 
 int temp; 

 temp = support[x];
 support[x] = support[i];
 support[i] = temp;
 temp = itemset[x];
 itemset[x] = itemset[i];
 itemset[i] = temp;
 
 return;
}
 void Fptree::input(const char *configFile)
 {
	 FILE *fp;
	 
	 if ((fp = fopen(configFile, "r")) == NULL) {
		 printf("Can't open config. file, %s.\n", configFile);
		 exit(1);
	 }
	 
	 fscanf(fp, "%d %f %d %d", &expectedK, &thresholdDecimal, &numItem, &numTrans);
	 fscanf(fp, "%s %s", dataFile, outFile);
	 int state = 0;
	 fscanf(fp, "%d", &state);
	 fclose(fp);
	 
	 printf("expectedK = %d\n", expectedK);
	 printf("thresholdDecimal = %f\n", thresholdDecimal);
	 printf("numItem = %d\n", numItem);
	 printf("numTrans = %d\n", numTrans);
	 printf("dataFile = %s\n", dataFile);
	 printf("outFile = %s\n\n", outFile);
	 threshold = thresholdDecimal * numTrans;
	 if (threshold == 0) threshold = 1;
	 printf("threshold = %ld\n", (long)threshold);
	 if(state){
		 m_sumflag = true;
		printf("sum flag = true\n");			 
	 }else{
		 printf("sum flag = false\n");
	 }
	 m_line.SetSumFlag(m_sumflag);
	 return;
 }
 


int Fptree::Execute(const char *param, const char *subparam)
{
	printf("input\n");
	input(param);
	string paramset = m_line.GetBuf("sup", subparam, ';');
	string sumfile  = m_line.GetBuf("sumfile", subparam, ';');
	string paramtmp;
	paramtmp = m_line.GetBuf("cycle", subparam, ';');
	int cycle = 1;
	if(!paramtmp.empty()){
		cycle = atoi(paramtmp.c_str());
	}
	for(int k=0; k<cycle; k++){
	int i=0;
	paramtmp = m_line.GetBuf(i, paramset.c_str(), ',');
	for(; !paramtmp.empty(); ++i, paramtmp = m_line.GetBuf(i, paramset.c_str(), ',')){
		thresholdDecimal = atof(paramtmp.c_str())/100.0;
		threshold = thresholdDecimal * numTrans;
		printf("第 %d 次运行, 支持度为 %f, 支持度计数为%f\nn", i+1, thresholdDecimal, threshold);
		m_tm1 = m_line.GetTime(true);
		printf("\npass1\n");
		ParseItemSet();
	
		/* Mine the large k-itemsets (k = 2 to realK) -----*/
		if (numLarge[0] > 0) {
		
		/* create FP-tree --------------------------*/
		printf("\nbuildTree\n");
		BuildTree();
		m_tm2 = m_line.GetTime(true);
		/* mining frequent patterns ----------------*/
		printf("\npassK\n");
		m_headerTableSize= numLarge[0];
		numLarge[0] = 0;
		FPgrowth(root, headerTableLink, m_headerTableSize, NULL, 0);
		m_tm3 = m_line.GetTime(true);
		/* output result of large itemsets ---------*/
		printf("\nresult\n");
		displayResult();
	}
	DisplayTime();
	WriteSumFile(sumfile.c_str());
	/* free memory ------------------------------------*/
	printf("\ndestroy\n");
	Destroy();
	}

	}
	return 0;

}

void Fptree::WriteSumFile(const char *FileName)
{
	//fstream fs;
	//fs.open(FileName, 0, ios::app);
	FILE* fp = fopen(FileName, "a+t");
	if(fp){
		fprintf(fp, "%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%ld\t%ld\t%ld\n", thresholdDecimal, threshold, 
			    m_tm2.m_ktime-m_tm1.m_ktime,m_tm2.m_utime-m_tm1.m_utime, 
				m_tm3.m_ktime-m_tm2.m_ktime, m_tm3.m_utime-m_tm2.m_utime,
				m_sysTime, m_userTime, m_tm1.m_vsize, m_tm2.m_vsize, m_tm3.m_vsize);
	}else{
		printf("写统计信息文件出错\n");
	}
	//fprintf(fp, "over...\n");
	
	fclose(fp);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -