📄 assoc_sparse.cpp
字号:
// counts number of leaf nodes not using any recursion
long TSparseITree::countLeafNodes() {
long countLeaf = 0;
vector<TSparseINode *> nodeQue;
TSparseINode *currNode;
nodeQue.push_back(root);
while (!nodeQue.empty()) { //repeats until que is empty
currNode = nodeQue.back();
nodeQue.pop_back();
if (!currNode->subNode.empty()) //if node is leaf count++ else count children
RITERATE(TSparseISubNodes,sni,currNode->subNode)
nodeQue.push_back(sni->second);
else countLeaf++; // node is leaf
}
return countLeaf;
};
// counts supports of all aimLength long branches in tree using one example (itemset) data
void TSparseITree::considerItemset(long itemset[], int iLength, float weight, int aimLength) {
typedef pair<int,int> IntPair; // <parent node index, depth>
typedef pair<TSparseINode *,IntPair> NodeDepth;
vector<NodeDepth> nodeQue;
int currDepth;
int currPrIndex; //parent index
TSparseINode *currNode;
int i, end = iLength - aimLength;
nodeQue.push_back(NodeDepth(root,IntPair(-1,0))); // put root in que
while (!nodeQue.empty()) { //repeats until que is empty
currNode = nodeQue.back().first; // node
currPrIndex = nodeQue.back().second.first; // parentIndex
currDepth = nodeQue.back().second.second; // depth
nodeQue.pop_back();
if (currDepth == aimLength) { currNode->weiSupp += weight; continue;} // we found an instance
if (currNode->subNode.empty()) continue; // if node does't have successors
for (i = currDepth + end; i!=currPrIndex; i--) //go through all posible successors of this node
if (currNode->hasNode(itemset[i]))
nodeQue.push_back(NodeDepth((*currNode)[itemset[i]],IntPair(i,currDepth + 1)));
}
};
// counts supports of all aimLength long branches in tree using examples data
void TSparseITree::considerExamples(TSparseExamples *examples, int aimLength) {
ITERATE(vector<TSparseExample*>,ei,examples->transaction)
if (aimLength <= (*ei)->length)
considerItemset((*ei)->itemset, (*ei)->length, (*ei)->weight, aimLength);
}
// deletes all leaves that have weiSupp smaler than given minSupp;
void TSparseITree::delLeafSmall(float minSupp) {
long countLeaf = 0;
vector<TSparseINode *> nodeQue;
TSparseINode *currNode;
nodeQue.push_back(root);
while (!nodeQue.empty()) { //repeats until que is empty
currNode = nodeQue.back();
nodeQue.pop_back();
if (!currNode->subNode.empty()) //if node is not leaf add children else check support
RITERATE(TSparseISubNodes,sni,currNode->subNode)
nodeQue.push_back(sni->second);
else
if (currNode->weiSupp < minSupp) {
currNode->parent->subNode.erase(currNode->value);
delete currNode;
}
}
};
// generates all posible association rules from tree using given confidence
PAssociationRules TSparseITree::genRules(int maxDepth, float minConf, float nOfExamples) {
typedef pair<TSparseINode *,int> NodeDepth; //<node,depth>
int count=0;
vector<NodeDepth> nodeQue;
PAssociationRules rules = mlnew TAssociationRules();
long *itemset = new long[maxDepth];
int currDepth;
TSparseINode *currNode;
nodeQue.push_back(NodeDepth(root,0)); // put root in que
while (!nodeQue.empty()) { //repeats until que is empty
currNode = nodeQue.back().first; // node
currDepth = nodeQue.back().second; // depth
nodeQue.pop_back();
if (currDepth) itemset[currDepth - 1] = currNode->value; // create itemset to check for confidence
if (currDepth > 1)
count += getItemsetRules(itemset, currDepth, minConf, currNode->weiSupp, nOfExamples, rules); //creates rules from itemsets and adds them to rules
RITERATE(TSparseISubNodes,sni,currNode->subNode) //adds subnodes to list
nodeQue.push_back(NodeDepth(sni->second, currDepth + 1));
}
return rules;
};
// checks if itemset generates some rules with enough confidence and adds these rules to resultset
long TSparseITree::getItemsetRules(long itemset[], int iLength, float minConf,
float nAppliesBoth, float nOfExamples,
PAssociationRules rules) {
float nAppliesLeft, nAppliesRight;
long count = 0;
PAssociationRule rule;
TExample exLeft(domain), exRight(domain);
const bool sparseRules = domain->variables->empty();
nAppliesLeft=nAppliesBoth;
nAppliesRight=nAppliesBoth;
typedef pair<int,int> IntPair; // <parent node index, depth>
typedef pair<TSparseINode *,IntPair> NodeDepth;
vector<NodeDepth> nodeQue;
int currDepth, i, j;
int currPrIndex; //parent index
TSparseINode *currNode, *tempNode;
long *leftItemset = new long[iLength - 1];
float thisConf;
nodeQue.push_back(NodeDepth(root,IntPair(-1,0))); // put root in que
while (!nodeQue.empty()) { //repeats until que is empty
currNode = nodeQue.back().first; // node
currPrIndex = nodeQue.back().second.first; // parentIndex
currDepth = nodeQue.back().second.second; // depth
nodeQue.pop_back();
nAppliesLeft = currNode->weiSupp; // support of left side
thisConf = nAppliesBoth/nAppliesLeft;
if (thisConf >= minConf) { // if confidence > min confidence do ... else don't folow this branch
if (currDepth) {
leftItemset[currDepth-1] = currNode->value;
if (sparseRules) {
PExample exLeftS = mlnew TExample(domain, false);
PExample exRightS = mlnew TExample(domain, false);
tempNode = root;
for(i=0, j=0; (i<currDepth) && (j<iLength); ) {
if (itemset[j] < leftItemset[i]) {
exRightS->setMeta(itemset[j], TValue(1.0));
tempNode = (*tempNode)[itemset[j]];
j++;
}
else {
_ASSERT(itemset[j] == leftItemset[i]);
exLeftS->setMeta(leftItemset[i], TValue(1.0));
i++;
j++;
}
}
_ASSERT(i==currDepth);
for(; j<iLength; j++) {
exRightS->setMeta(itemset[j], TValue(1.0));
tempNode = (*tempNode)[itemset[j]];
}
/*
for (i=0; i<currDepth; i++) //generating left and right example and get support of left side
exLeft[leftItemset[i]] = 1.0;
tempNode = root;
for (i=0; i< iLength; i++)
if ( ) {
exRight[itemset[i]] = 1.0;
tempNode = (*tempNode)[itemset[i]];
}
*/
nAppliesRight = tempNode->weiSupp; //get support of left side
//add rules
rule = mlnew TAssociationRule(exLeftS, exRightS, nAppliesLeft, nAppliesRight, nAppliesBoth, nOfExamples, currDepth, iLength-currDepth);
rules->push_back(rule);
count ++;
}
else {
for (i=0; i<currDepth;i++) { //generating left and right example and get support of left side
exLeft[leftItemset[i]].setSpecial(false);
exLeft[leftItemset[i]].varType=0;
}
tempNode = root;
for (i=0; i<iLength;i++)
if (exLeft[itemset[i]].isSpecial()) {
exRight[itemset[i]].setSpecial(false);
exRight[itemset[i]].varType=0;
tempNode = (*tempNode)[itemset[i]];
}
nAppliesRight = tempNode->weiSupp; //get support of left side
//add rules
rule = mlnew TAssociationRule(mlnew TExample(exLeft), mlnew TExample(exRight), nAppliesLeft, nAppliesRight, nAppliesBoth, nOfExamples, currDepth, iLength-currDepth);
rules->push_back(rule);
count ++;
for (i=0; i<iLength;i++) { //deleting left and right example
exLeft[itemset[i]].setSpecial(true);
exLeft[itemset[i]].varType=1;
exRight[itemset[i]].setSpecial(true);
exRight[itemset[i]].varType=1;
}
}
}
if (currDepth < iLength - 1) //if we are not too deep
for (i = iLength - 1; i!=currPrIndex; i--) //go through all posible successors of this node
if (currNode->hasNode(itemset[i])) //if this node exists among childrens
nodeQue.push_back(NodeDepth((*currNode)[itemset[i]],IntPair(i,currDepth + 1)));
}
}
return count;
};
/****************************************************************************************
TAssociationRulesSparseInducer
*****************************************************************************************/
TAssociationRulesSparseInducer::TAssociationRulesSparseInducer(float asupp, float aconf, int awei)
: maxItemSets(15000),
confidence(aconf),
support(asupp),
nOfExamples(0.0)
{}
PAssociationRules TAssociationRulesSparseInducer::operator()(PExampleGenerator examples, const int &weightID)
{ float nMinSupp;
long currItemSets, i,newItemSets;
// reformat examples in sparseExm for better efficacy
TSparseExamples sparseExm(examples, weightID);
// build first level of tree
TSparseITree *tree = new TSparseITree(sparseExm);
newItemSets = tree->buildLevelOne(sparseExm.intDomain);
nMinSupp = support * sparseExm.fullWeight;
//while it is posible to extend tree repeat...
for(i=1;newItemSets;i++) {
tree->considerExamples(&sparseExm,i);
tree->delLeafSmall(nMinSupp);
currItemSets = tree->countLeafNodes();
newItemSets = tree->extendNextLevel(i, maxItemSets - currItemSets);
//test if tree is too large
if (newItemSets + currItemSets >= maxItemSets) {
raiseError("too many itemsets (%i); increase 'maxItemSets'", maxItemSets);
newItemSets = 0;
}
}
return tree->genRules(i, confidence, sparseExm.fullWeight);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -