📄 fptree.cpp
字号:
return;
}
/******************************************************************************************
* Function: buildConTree
*
* Description:
* Build a conditional FP-tree and the corresponding header table from
* a conditional pattern base.
*
* Invoked from:
* genConditionalPatternTree()
*
* Functions to be invoked:
* q_sortA()
* insert_tree()
*
* Parameters:
* *conRoot -> Root of the conditional FP-tree
* **conHeader -> Conditional header table for all the frequent items.
* conHeaderSize -> Number of items (frequent) in the conditional header table
* *conLargeItem -> Stores all the frequent items of the conditional pattern base
* *conLargeItemSupprt -> The conditional support counts of the conditional items
* "Conditional support" of an item is
* the number of itemsets containing the item and the base items.
* T -> The parent conditional FP-tree
* headerTable -> The parent header table
* baseIndex -> The index position of the parent header table that refer to the item
* contained in the base of this conditional FP-tree
* headerSize -> The number of items in the parent header table
*/
void Fptree::buildConTree(FPTreeNode *conRoot, FPTreeNode **conHeader, int conHeaderSize, int *conLargeItem,
int *conLargeItemSupport, FPTreeNode T, FPTreeNode *headerTable, int baseIndex, int headerSize)
{
FPTreeNode aNode;
FPTreeNode ancestorNode;
int *freqItemP; /* Holds all the frequent items of a transaction of the conditional pattern base */
int *indexList; /* Holds the index position of the parent header table for the corresponding item
in freqItemP[] */
int path;
int count;
int i;
/* create conditional header table
*/
*conHeader = (FPTreeNode *) malloc (sizeof(FPTreeNode) * conHeaderSize);
if (*conHeader == NULL) {
printf("out of memory\n");
exit(1);
}
for (i=0; i < conHeaderSize; i++)
(*conHeader)[i] = NULL;
/* create root of the conditional FP-tree
*/
(*conRoot) = (FPTreeNode) malloc (sizeof(FPNode));
if (*conRoot == NULL) {
printf("out of memory\n");
exit(1);
}
/* Initialize the root of the conditional FP-tree
*/
(*conRoot)->numPath = 1;
(*conRoot)->parent = NULL;
(*conRoot)->children = NULL;
(*conRoot)->hlink = NULL;
freqItemP = (int *) malloc (sizeof(int) * conHeaderSize);
if (freqItemP == NULL) {
printf("out of memory\n");
exit(1);
}
indexList = (int *) malloc (sizeof(int) * conHeaderSize);
if (indexList == NULL) {
printf("out of memory\n");
exit(1);
}
/* Set aNode to the first node of the parent FP-tree
* that contains the base item.
*/
aNode = headerTable[baseIndex];
/* Visit each path from the base item to the root
* of the parent FP-tree and
* extract all the frequent items of the conditional pattern base.
* Sort this items in descending order of their frequency and
* insert them to the conditional FP-tree.
*/
while (aNode != NULL) {
ancestorNode = aNode->parent;
count = 0;
/* Identify the frequent items in each path from the
* node containing the item in the base to the root.
* Each of such path is just like a transaction of the
* conditional pattern base.
* This identification can be done because
* conLargeItem[] contains all the frequent items
* of the conditional pattern base.
* Store the frequent items to freqItemP[], and
* the corresponding index position of the
* conditional header table to indexList[].
*/
while (ancestorNode != T) {
for (i=0; i < conHeaderSize; i++) {
if (ancestorNode->item == conLargeItem[i]) {
freqItemP[count] = ancestorNode->item;
indexList[count] = i;
count++;
break;
}
}
ancestorNode = ancestorNode->parent;
}
/* Sort the frequent items in this path in ascending order
* of indexList[], i.e. sort in descending order of the
* frequency of the items in the conditional pattern base.
*/
q_sortA(indexList, freqItemP, 0, count-1, count);
path = 0;
/* Insert the frequent items to the conditional FP-tree.
*/
insert_tree(&(freqItemP[0]), &(indexList[0]), aNode->count, 0, count, *conRoot, *conHeader, &path);
aNode = aNode->hlink;
}
free(freqItemP);
free(indexList);
return;
}
/******************************************************************************************
* Function: genConditionalPatternTree
*
* Description:
* -- Form the conditional pattern base of each item in the header table.
* -- Build the conditional FP-tree for the item.
* -- Perfrom FPgrowth() for the conditional FP-tree.
*
* Parameters (In):
* pattern[] -> Base items for the conditional FP-tree.
* baseSize -> Number of base items -1.
* patternSupport -> Support of the base items (it is a large itemset).
* T -> FP-tree.
* headerIndex -> Index position in the header table refering
* the base item for the conditional FP-tree.
* headerSize -> Number of items in the header table.
* *headerTableLink -> Header table of the FP-tree (T).
*
* Invoked from:
* FPgrowth()
*
* Functions to be invoked:
* q_sortD()
* buildConTree()
* FPgrowth()
*
* Parameters (Out):
* conLargeItem[] -> To hold frequent items in the conditional pattern base.
* conLargeItemSupport[] -> To hold "conditional support" of each items in the conditional pattern base.
* "Conditional support" of an item is
* the number of itemsets containing the item and the base items.
*
* Global Variables (read only):
* threshold -> Support threshold
*/
void Fptree::genConditionalPatternTree(int *pattern, int baseSize, int patternSupport,
int *conLargeItem, int *conLargeItemSupport, FPTreeNode T,
int headerIndex, int headerSize, FPTreeNode *headerTableLink)
{
int conHeaderSize;
FPTreeNode *conHeader;
FPTreeNode conRoot;
FPTreeNode aNode, ancestorNode;
int j;
for (j=0; j < headerSize; j++)
conLargeItemSupport[j] = 0;
aNode = headerTableLink[headerIndex];
conHeaderSize = 0;
/* Find all the frequent items in the conditional pattern base.
* -- Visit, in bottom-up manner, all the ancestor nodes of all the nodes containing this item.
* -- Count the "conditional supports" of the items in the ancestor nodes
* (i.e. items in conditional pattern base), and store the values to conLargeSupport[].
* "Conditional support" of an item is
* the number of itemsets containing the item and the base items.
* -- Items in the ancestor nodes having conditional supports >= threshold are added to
* conLargeItem[].
*/
while (aNode != NULL) {
ancestorNode = aNode->parent;
while (ancestorNode != T) {
for (j=0; j < headerSize; j++) {
if (ancestorNode->item == headerTableLink[j]->item) {
/* Increment the conditional support count
* for this ancestor item
*/
conLargeItemSupport[j] += aNode->count;
if ((conLargeItemSupport[j] >= threshold) &&
(conLargeItemSupport[j] - aNode->count <
threshold)) {
/* Add the ancestor item to the conditional pattern base,
* and update the number of items in the
* conditional header table
*/
conLargeItem[j] = ancestorNode->item;
conHeaderSize++;
}
break;
}
}
ancestorNode = ancestorNode->parent;
}
/* Next node of the FP-tree containing the base item
*/
aNode = aNode->hlink;
}
/* Sort the items in the conditional pattern base in descending order of their
* conditional support count
*/
q_sortD(conLargeItemSupport, conLargeItem, 0, headerSize-1, headerSize);
/* Generate the conditional FP-tree and mine recursively
*/
if (conHeaderSize > 0) {
/* Build conditional FP-tree
*/
buildConTree(&conRoot, &conHeader, conHeaderSize, conLargeItem, conLargeItemSupport,
T, headerTableLink, headerIndex, headerSize);
/* Mining
*/
FPgrowth(conRoot, conHeader, conHeaderSize, pattern, baseSize+1);
free(conHeader);
destroyTree(conRoot);
}
return;
}
/******************************************************************************************
* Function: q_sortD
*
* Description:
* Quick sort two arrays, support[] and the corresponding itemset[],
* in descending order of support[].
*
* Invoked from:
* pass1()
* genConditionalPatternTree()
* q_sortD()
*
* Functions to be invoked:
* swap()
* q_sortD()
*
* Input Parameters:
* low -> lower bound index of the array to be sorted
* high -> upper bound index of the array to be sorted
* size -> size of the array
* length -> length of an itemset
*
* In/Out Parameters:
* support[] -> array to be sorted
* itemset[] -> array to be sorted
*/
void Fptree::q_sortD(int *support, int *itemset, int low,int high, int size)
{
int pass;
int highptr=high++; /* highptr records the last element */
/* the first element in list is always served as the pivot */
int pivot=low;
if(low>=highptr) return;
do {
/* Find out, from the head of support[],
* the 1st element value not larger than the pivot's
*/
pass=1;
while(pass==1) {
if(++low<size) {
if(support[low] > support[pivot])
pass=1;
else pass=0;
} else pass=0;
}
/* Find out, from the tail of support[],
* the 1st element value not smaller than the pivot's
*/
pass=1;
while(pass==1) {
if(high-->0) {
if(support[high] < support[pivot])
pass=1;
else pass=0;
} else pass=0;
}
/* swap elements pointed by low pointer & high pointer */
if(low<high)
swap(support, itemset, low, high);
} while(low<=high);
swap(support, itemset, pivot, high);
/* divide list into two for further sorting */
q_sortD(support, itemset, pivot, high-1, size);
q_sortD(support, itemset, high+1, highptr, size);
return;
}
/******************************************************************************************
* Function: swap
*
* Description:
* Swap x-th element and i-th element of each of the
* two arrays, support[] and itemset[].
*
* Invoked from:
* q_sortD()
* q_sortA()
*
* Functions to be invoked: None
*
* Input Parameters:
* support -> Corresponding supports of the items in itemset.
* itemset -> Array of items.
* x, i -> The two indexes for swapping.
*
* Global variable: None
*/
void Fptree::swap(int *support, int *itemset, int x, int i)
{
int temp;
temp = support[x];
support[x] = support[i];
support[i] = temp;
temp = itemset[x];
itemset[x] = itemset[i];
itemset[i] = temp;
return;
}
void Fptree::input(const char *configFile)
{
FILE *fp;
if ((fp = fopen(configFile, "r")) == NULL) {
printf("Can't open config. file, %s.\n", configFile);
exit(1);
}
fscanf(fp, "%d %f %d %d", &expectedK, &thresholdDecimal, &numItem, &numTrans);
fscanf(fp, "%s %s", dataFile, outFile);
int state = 0;
fscanf(fp, "%d", &state);
fclose(fp);
printf("expectedK = %d\n", expectedK);
printf("thresholdDecimal = %f\n", thresholdDecimal);
printf("numItem = %d\n", numItem);
printf("numTrans = %d\n", numTrans);
printf("dataFile = %s\n", dataFile);
printf("outFile = %s\n\n", outFile);
threshold = thresholdDecimal * numTrans;
if (threshold == 0) threshold = 1;
printf("threshold = %ld\n", (long)threshold);
if(state){
m_sumflag = true;
printf("sum flag = true\n");
}else{
printf("sum flag = false\n");
}
m_line.SetSumFlag(m_sumflag);
return;
}
int Fptree::Execute(const char *param, const char *subparam)
{
printf("input\n");
input(param);
string paramset = m_line.GetBuf("sup", subparam, ';');
string sumfile = m_line.GetBuf("sumfile", subparam, ';');
string paramtmp;
paramtmp = m_line.GetBuf("cycle", subparam, ';');
int cycle = 1;
if(!paramtmp.empty()){
cycle = atoi(paramtmp.c_str());
}
for(int k=0; k<cycle; k++){
int i=0;
paramtmp = m_line.GetBuf(i, paramset.c_str(), ',');
for(; !paramtmp.empty(); ++i, paramtmp = m_line.GetBuf(i, paramset.c_str(), ',')){
thresholdDecimal = atof(paramtmp.c_str())/100.0;
threshold = thresholdDecimal * numTrans;
printf("第 %d 次运行, 支持度为 %f, 支持度计数为%f\nn", i+1, thresholdDecimal, threshold);
m_tm1 = m_line.GetTime(true);
printf("\npass1\n");
ParseItemSet();
/* Mine the large k-itemsets (k = 2 to realK) -----*/
if (numLarge[0] > 0) {
/* create FP-tree --------------------------*/
printf("\nbuildTree\n");
BuildTree();
m_tm2 = m_line.GetTime(true);
/* mining frequent patterns ----------------*/
printf("\npassK\n");
m_headerTableSize= numLarge[0];
numLarge[0] = 0;
FPgrowth(root, headerTableLink, m_headerTableSize, NULL, 0);
m_tm3 = m_line.GetTime(true);
/* output result of large itemsets ---------*/
printf("\nresult\n");
displayResult();
}
DisplayTime();
WriteSumFile(sumfile.c_str());
/* free memory ------------------------------------*/
printf("\ndestroy\n");
Destroy();
}
}
return 0;
}
void Fptree::WriteSumFile(const char *FileName)
{
//fstream fs;
//fs.open(FileName, 0, ios::app);
FILE* fp = fopen(FileName, "a+t");
if(fp){
fprintf(fp, "%f\t%f\t%f\t%f\t%f\t%f\t%f\t%f\t%ld\t%ld\t%ld\n", thresholdDecimal, threshold,
m_tm2.m_ktime-m_tm1.m_ktime,m_tm2.m_utime-m_tm1.m_utime,
m_tm3.m_ktime-m_tm2.m_ktime, m_tm3.m_utime-m_tm2.m_utime,
m_sysTime, m_userTime, m_tm1.m_vsize, m_tm2.m_vsize, m_tm3.m_vsize);
}else{
printf("写统计信息文件出错\n");
}
//fprintf(fp, "over...\n");
fclose(fp);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -