⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 assoc_sparse.cpp

📁 orange源码 数据挖掘技术
💻 CPP
📖 第 1 页 / 共 2 页
字号:

// counts number of leaf nodes not using any recursion
long TSparseITree::countLeafNodes() {	
	long countLeaf = 0;
	vector<TSparseINode *> nodeQue;
	TSparseINode *currNode;

	nodeQue.push_back(root);

	while (!nodeQue.empty()) {					//repeats until que is empty
		currNode = nodeQue.back();
		nodeQue.pop_back();

		if (!currNode->subNode.empty()) 		//if node is leaf count++ else count children
			RITERATE(TSparseISubNodes,sni,currNode->subNode) 
				nodeQue.push_back(sni->second);
		else countLeaf++;						// node is leaf
	}

	return countLeaf;
};


// counts supports of all aimLength long branches in tree using one example (itemset) data
void TSparseITree::considerItemset(long itemset[], int iLength, float weight, int aimLength) {	
	typedef pair<int,int> IntPair; // <parent node index, depth>
	typedef pair<TSparseINode *,IntPair> NodeDepth;

	vector<NodeDepth> nodeQue;
	
	int currDepth;
	int currPrIndex;								//parent index
	TSparseINode *currNode; 
	int i, end = iLength - aimLength;

	nodeQue.push_back(NodeDepth(root,IntPair(-1,0))); // put root in que

	while (!nodeQue.empty()) {						//repeats until que is empty
		currNode = nodeQue.back().first;			// node
		currPrIndex = nodeQue.back().second.first;	// parentIndex
		currDepth = nodeQue.back().second.second;	// depth
		
		nodeQue.pop_back();

		if (currDepth == aimLength) { currNode->weiSupp += weight; continue;}	// we found an instance
		if (currNode->subNode.empty()) continue;	// if node does't have successors

		for (i = currDepth + end; i!=currPrIndex; i--)		//go through all posible successors of this node
			if (currNode->hasNode(itemset[i]))
				nodeQue.push_back(NodeDepth((*currNode)[itemset[i]],IntPair(i,currDepth + 1)));
	}
};

// counts supports of all aimLength long branches in tree using examples data
void TSparseITree::considerExamples(TSparseExamples *examples, int aimLength) {	
		ITERATE(vector<TSparseExample*>,ei,examples->transaction)
			if (aimLength <= (*ei)->length)
				considerItemset((*ei)->itemset, (*ei)->length, (*ei)->weight, aimLength);
}


// deletes all leaves that have weiSupp smaler than given minSupp;
void TSparseITree::delLeafSmall(float minSupp) {	
	long countLeaf = 0;
	vector<TSparseINode *> nodeQue;
	TSparseINode *currNode;

	nodeQue.push_back(root);

	while (!nodeQue.empty()) {			//repeats until que is empty
		currNode = nodeQue.back();
		nodeQue.pop_back();

		if (!currNode->subNode.empty()) 	//if node is not leaf add children else check support
			RITERATE(TSparseISubNodes,sni,currNode->subNode) 
				nodeQue.push_back(sni->second);
		else 
			if (currNode->weiSupp < minSupp) {
				currNode->parent->subNode.erase(currNode->value);
				delete currNode;
			}
	}
};


// generates all posible association rules from tree using given confidence
PAssociationRules TSparseITree::genRules(int maxDepth, float minConf, float nOfExamples) {
	typedef pair<TSparseINode *,int> NodeDepth; //<node,depth>

	int count=0;
	vector<NodeDepth> nodeQue;
	
	PAssociationRules rules = mlnew TAssociationRules();
	
	long *itemset = new long[maxDepth];
	int currDepth;
	TSparseINode *currNode; 

	nodeQue.push_back(NodeDepth(root,0)); // put root in que

	while (!nodeQue.empty()) {						//repeats until que is empty
		currNode = nodeQue.back().first;			// node
		currDepth = nodeQue.back().second;			// depth

		nodeQue.pop_back();

		if (currDepth) itemset[currDepth - 1] = currNode->value;  // create itemset to check for confidence
	
		if (currDepth > 1)
			count += getItemsetRules(itemset, currDepth, minConf, currNode->weiSupp, nOfExamples, rules);	//creates rules from itemsets and adds them to rules

		RITERATE(TSparseISubNodes,sni,currNode->subNode)		//adds subnodes to list
			nodeQue.push_back(NodeDepth(sni->second, currDepth + 1));
	}
	
	return rules;
};

// checks if itemset generates some rules with enough confidence and adds these rules to resultset
long TSparseITree::getItemsetRules(long itemset[], int iLength, float minConf, 
								   float nAppliesBoth, float nOfExamples, 
								   PAssociationRules rules) {
	
	float nAppliesLeft, nAppliesRight;
	long count = 0;
	PAssociationRule rule;
	TExample exLeft(domain), exRight(domain);
  const bool sparseRules = domain->variables->empty();
	
	nAppliesLeft=nAppliesBoth;
	nAppliesRight=nAppliesBoth;
	
	typedef pair<int,int> IntPair; // <parent node index, depth>
	typedef pair<TSparseINode *,IntPair> NodeDepth;

	vector<NodeDepth> nodeQue;
	
	int currDepth, i, j;
	int currPrIndex; //parent index
	TSparseINode *currNode, *tempNode;
	
	long *leftItemset = new long[iLength - 1];
	float thisConf;
	
	nodeQue.push_back(NodeDepth(root,IntPair(-1,0))); // put root in que

	while (!nodeQue.empty()) {			//repeats until que is empty
		currNode = nodeQue.back().first;			// node
		currPrIndex = nodeQue.back().second.first;	// parentIndex
		currDepth = nodeQue.back().second.second;	// depth
		
		nodeQue.pop_back();

		nAppliesLeft = currNode->weiSupp;			// support of left side
		thisConf = nAppliesBoth/nAppliesLeft;

		if (thisConf >= minConf) {	// if confidence > min confidence do ... else don't folow this branch
			if (currDepth) {
				leftItemset[currDepth-1] = currNode->value;

        if (sparseRules) {
          PExample exLeftS = mlnew TExample(domain, false);
          PExample exRightS = mlnew TExample(domain, false);

   			  tempNode = root;
          for(i=0, j=0; (i<currDepth) && (j<iLength); ) {
            if (itemset[j] < leftItemset[i]) {
              exRightS->setMeta(itemset[j], TValue(1.0));
              tempNode = (*tempNode)[itemset[j]];
              j++;
            }
            else { 
              _ASSERT(itemset[j] == leftItemset[i]);
              exLeftS->setMeta(leftItemset[i], TValue(1.0));
              i++;
              j++;
            }
          }

          _ASSERT(i==currDepth);
          for(; j<iLength; j++) {
            exRightS->setMeta(itemset[j], TValue(1.0));
            tempNode = (*tempNode)[itemset[j]];
          }

/*
  				for (i=0; i<currDepth; i++)		//generating left and right example and get support of left side
					  exLeft[leftItemset[i]] = 1.0;

          tempNode = root;
          for (i=0; i< iLength; i++) 
            if (   ) {
              exRight[itemset[i]] = 1.0;
              tempNode = (*tempNode)[itemset[i]];
            }
*/
				  nAppliesRight = tempNode->weiSupp;	//get support of left side

				  //add rules
				  rule = mlnew TAssociationRule(exLeftS, exRightS, nAppliesLeft, nAppliesRight, nAppliesBoth, nOfExamples, currDepth, iLength-currDepth);
				  rules->push_back(rule);
				  count ++;
        }
        else {
  				for (i=0; i<currDepth;i++) {		//generating left and right example and get support of left side
					  exLeft[leftItemset[i]].setSpecial(false);
					  exLeft[leftItemset[i]].varType=0;
					}
        
				
				  tempNode = root;
				  for (i=0; i<iLength;i++) 
					  if (exLeft[itemset[i]].isSpecial()) {
						  exRight[itemset[i]].setSpecial(false);
						  exRight[itemset[i]].varType=0;
						  tempNode = (*tempNode)[itemset[i]];
					  }

				  nAppliesRight = tempNode->weiSupp;	//get support of left side

				  //add rules
				  rule = mlnew TAssociationRule(mlnew TExample(exLeft), mlnew TExample(exRight), nAppliesLeft, nAppliesRight, nAppliesBoth, nOfExamples, currDepth, iLength-currDepth);
				  rules->push_back(rule);
				  count ++;

				  for (i=0; i<iLength;i++) {					//deleting left and right example
					  exLeft[itemset[i]].setSpecial(true);
					  exLeft[itemset[i]].varType=1;
					  exRight[itemset[i]].setSpecial(true);
					  exRight[itemset[i]].varType=1;
				  }
			  }
      }

			if (currDepth < iLength - 1)							//if we are not too deep
				for (i = iLength - 1; i!=currPrIndex; i--)		//go through all posible successors of this node
					if (currNode->hasNode(itemset[i]))				//if this node exists among childrens
						nodeQue.push_back(NodeDepth((*currNode)[itemset[i]],IntPair(i,currDepth + 1)));
		}
	}
		
	return count;
};

/****************************************************************************************
TAssociationRulesSparseInducer
*****************************************************************************************/

TAssociationRulesSparseInducer::TAssociationRulesSparseInducer(float asupp, float aconf, int awei)
: maxItemSets(15000),
  confidence(aconf),
  support(asupp),
  nOfExamples(0.0)
{}

PAssociationRules TAssociationRulesSparseInducer::operator()(PExampleGenerator examples, const int &weightID)
{	float nMinSupp;
	long currItemSets, i,newItemSets;

	// reformat examples in sparseExm for better efficacy
	TSparseExamples sparseExm(examples, weightID);

	// build first level of tree
	TSparseITree *tree = new TSparseITree(sparseExm);
	newItemSets = tree->buildLevelOne(sparseExm.intDomain);	

	nMinSupp = support * sparseExm.fullWeight;
	
	//while it is posible to extend tree repeat...
	for(i=1;newItemSets;i++) {
		tree->considerExamples(&sparseExm,i);
		tree->delLeafSmall(nMinSupp);
		
		currItemSets = tree->countLeafNodes();

		newItemSets = tree->extendNextLevel(i, maxItemSets - currItemSets);

		//test if tree is too large
		if (newItemSets + currItemSets >= maxItemSets) {
			raiseError("too many itemsets (%i); increase 'maxItemSets'", maxItemSets);
			newItemSets = 0;
		}
	}
	
	return tree->genRules(i, confidence, sparseExm.fullWeight);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -