⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 vfbn1.c

📁 数据挖掘方面的源码
💻 C
📖 第 1 页 / 共 5 页
字号:
         srcParentOfDstIndex = BNNodeLookupParentIndex(dstNode, srcNode);

         if(dstParentOfSrcIndex == -1 && srcParentOfDstIndex == -1) {
            /* nodes unrelated, make 2 copies, one with link each way */

            /* copy one */
            preAllocation = MGetTotalAllocation();
            newBN = BNCloneNoCPTs(bn);

            BNFlushStructureCache(newBN);

            srcNode = BNGetNodeByID(newBN, i);
            dstNode = BNGetNodeByID(newBN, j);

            BNNodeAddParent(srcNode, dstNode);
            BNNodeInitCPT(srcNode);

            _InitUserData(newBN, bn);
            data = BNNodeGetUserData(srcNode);
            data->isChangedFromCurrent = 1;
            netData = BNGetUserData(newBN);
            netData->changedOne = srcNode;
            netData->changeComplexity = 2;
            netData->scoreRange = _GetNetScoreRange(newBN);

            isLinkCovered = 0;

            if((gMaxParentsPerNode == -1 ||
                   BNNodeGetNumParents(srcNode) <= gMaxParentsPerNode) &&

                   (gMaxParameterGrowthMult == -1 ||
                    (BNGetNumParameters(newBN) < 
                        (gMaxParameterGrowthMult * gInitialParameterCount))) &&

                   (gMaxParameterCount == -1 ||
                    (BNNodeGetNumParameters(srcNode) <= 
                        gMaxParameterCount)) &&

             (gLimitBytes == -1 ||
              ((MGetTotalAllocation() - preAllocation) <  gMaxBytesPerModel))&&

                   !BNHasCycle(newBN)) {
               VLAppend(list, newBN);
               isLinkCovered = _IsLinkCoveredIn(dstNode, srcNode, newBN);
            } else {
//printf("Preallocation %ld current %ld limit %ld\n", preAllocation, MGetTotalAllocation(), gMaxBytesPerModel);

               _FreeUserData(newBN);
               BNFree(newBN);
            }

            if(!isLinkCovered) {
               /* if it is covered then copy two would be equivilant to
                        copy one, so don't bother adding it */
               /* copy two */
               preAllocation = MGetTotalAllocation();
               newBN = BNCloneNoCPTs(bn);
               BNFlushStructureCache(newBN);

               srcNode = BNGetNodeByID(newBN, i);
               dstNode = BNGetNodeByID(newBN, j);

               BNNodeAddParent(dstNode, srcNode);
               BNNodeInitCPT(dstNode);

               _InitUserData(newBN, bn);
               data = BNNodeGetUserData(dstNode);
               data->isChangedFromCurrent = 1;
               netData = BNGetUserData(newBN);
               netData->changedOne = dstNode;
               netData->changeComplexity = 2;
               netData->scoreRange = _GetNetScoreRange(newBN);

               if((gMaxParentsPerNode == -1 ||
                      BNNodeGetNumParents(dstNode) <= gMaxParentsPerNode) &&

                    (gMaxParameterGrowthMult == -1 ||
                       (BNGetNumParameters(newBN) < 
                        (gMaxParameterGrowthMult * gInitialParameterCount))) &&

                   (gMaxParameterCount == -1 ||
                    (BNNodeGetNumParameters(dstNode) <= 
                        gMaxParameterCount)) &&

             (gLimitBytes == -1 ||
              ((MGetTotalAllocation() - preAllocation) <  gMaxBytesPerModel))&&

                      !BNHasCycle(newBN)) {
                  VLAppend(list, newBN);
               } else {
                  _FreeUserData(newBN);
                  BNFree(newBN);
               }
            }
         } else {
            /* one is a parent of the other, make 2 copies, one with
                     link removed, one with link reversed */

            /* copy one: remove */
            /* removing a link changes the structure and will not
                        be equivilant to any of the candidates */
            newBN = BNCloneNoCPTs(bn);
            BNFlushStructureCache(newBN);

            preAllocation = MGetTotalAllocation();

            srcNode = BNGetNodeByID(newBN, i);
            dstNode = BNGetNodeByID(newBN, j);

            if(dstParentOfSrcIndex != -1) {
               BNNodeRemoveParent(srcNode, dstParentOfSrcIndex);
               BNNodeInitCPT(srcNode);

               _InitUserData(newBN, bn);
               data = BNNodeGetUserData(srcNode);
               data->isChangedFromCurrent = 1;
               netData = BNGetUserData(newBN);
               netData->changedOne = srcNode;
               netData->changeComplexity = -1;
               netData->scoreRange = _GetNetScoreRange(newBN);
            } else {
               BNNodeRemoveParent(dstNode, srcParentOfDstIndex);
               BNNodeInitCPT(dstNode);

               _InitUserData(newBN, bn);
               data = BNNodeGetUserData(dstNode);
               data->isChangedFromCurrent = 1;
               netData = BNGetUserData(newBN);
               netData->changedOne = dstNode;
               netData->changeComplexity = -1;
               netData->scoreRange = _GetNetScoreRange(newBN);
            }

            if(!BNHasCycle(newBN) &&

               (gMaxParameterGrowthMult == -1 ||
                  (BNGetNumParameters(newBN) < 
                  (gMaxParameterGrowthMult * gInitialParameterCount))) &&

               (gMaxParameterCount == -1 ||
                  (BNNodeGetNumParameters(srcNode) <= 
                   gMaxParameterCount  &&
                  BNNodeGetNumParameters(dstNode) <= 
                                         gMaxParameterCount))&&


               (gLimitBytes == -1 ||
                    ((MGetTotalAllocation() - preAllocation) <  
                                                gMaxBytesPerModel))) {
               VLAppend(list, newBN);
            } else {
               _FreeUserData(newBN);
               BNFree(newBN);
            }

            /* copy two: reverse */
            if(!gNoReverse) {
               preAllocation = MGetTotalAllocation();
               newBN = BNCloneNoCPTs(bn);
               BNFlushStructureCache(newBN);

               srcNode = BNGetNodeByID(newBN, i);
               dstNode = BNGetNodeByID(newBN, j);

               if(dstParentOfSrcIndex != -1) {
                  BNNodeRemoveParent(srcNode, dstParentOfSrcIndex);
                  BNNodeAddParent(dstNode, srcNode);
                  BNNodeInitCPT(srcNode);
                  BNNodeInitCPT(dstNode);

                  _InitUserData(newBN, bn);
                  data = BNNodeGetUserData(srcNode);
                  data->isChangedFromCurrent = 1;
                  data = BNNodeGetUserData(dstNode);
                  data->isChangedFromCurrent = 1;
                  netData = BNGetUserData(newBN);
                  netData->changedOne = srcNode;
                  netData->changedTwo = dstNode;
                  netData->changeComplexity = 1;
                  netData->scoreRange = _GetNetScoreRange(newBN);

                  isLinkCovered = _IsLinkCoveredIn(srcNode, dstNode, newBN);
               } else {
                  BNNodeRemoveParent(dstNode, srcParentOfDstIndex);
                  BNNodeAddParent(srcNode, dstNode);
                  BNNodeInitCPT(srcNode);
                  BNNodeInitCPT(dstNode);

                  _InitUserData(newBN, bn);
                  data = BNNodeGetUserData(srcNode);
                  data->isChangedFromCurrent = 1;
                  data = BNNodeGetUserData(dstNode);
                  data->isChangedFromCurrent = 1;
                  netData = BNGetUserData(newBN);
                  netData->changedOne = srcNode;
                  netData->changedTwo = dstNode;
                  netData->changeComplexity = 1;
                  netData->scoreRange = _GetNetScoreRange(newBN);

                  isLinkCovered = _IsLinkCoveredIn(dstNode, srcNode, newBN);
               }

               if((gMaxParentsPerNode == -1 ||
                      (BNNodeGetNumParents(srcNode) <= gMaxParentsPerNode &&
                       BNNodeGetNumParents(dstNode) <= gMaxParentsPerNode)) &&

                !isLinkCovered &&

             (gLimitBytes == -1 ||
              ((MGetTotalAllocation() - preAllocation) <  gMaxBytesPerModel))&&

                (gMaxParameterGrowthMult == -1 ||
                    (BNGetNumParameters(newBN) < 
                        (gMaxParameterGrowthMult * gInitialParameterCount))) &&

               (gMaxParameterCount == -1 ||
                  (BNNodeGetNumParameters(srcNode) <= 
                   gMaxParameterCount  &&
                  BNNodeGetNumParameters(dstNode) <= 
                                         gMaxParameterCount))&&

                !BNHasCycle(newBN)) {
                  VLAppend(list, newBN);
               } else {
                  _FreeUserData(newBN);
                  BNFree(newBN);
               }
            }
         }
      }
   }
}

static void _OptimizedAddSample(BeliefNet current, VoidListPtr newNets, 
                                               ExamplePtr e) {
   BNUserData netData;
   int i;

   BNAddSample(current, e);

   for(i = 0 ; i < VLLength(newNets) ; i++) {
      netData = BNGetUserData(VLIndex(newNets, i));

      if(netData->changedOne) {
         BNNodeAddSample(netData->changedOne, e);
      }
      if(netData->changedTwo) {
         BNNodeAddSample(netData->changedTwo, e);
      }
   }
}

static float _ScoreBN(BeliefNet bn) {
   int numCPTRows;
   int i,j,k;
   BeliefNetNode bnn;
   double score;
   double prob, numSamples;

   /* score is sum over atribs, over parent combos, over attrib value of:
         p_ijk lg P_ijk - p_ij lg p_ij */


   score = 0;

   for(i = 0 ; i < BNGetNumNodes(bn) ; i++) {
      bnn = BNGetNodeByID(bn, i);

      numSamples = BNNodeGetNumSamples(bnn);
      numCPTRows = BNNodeGetNumCPTRows(bnn);

      for(j = 0 ; j < numCPTRows ; j++) {
         /* HACK for efficiency break BNN ADT */
         for(k = 0 ; k < BNNodeGetNumValues(bnn) ; k++) {
            if(numSamples != 0) {
               prob = bnn->eventCounts[j][k] / numSamples;
               DebugMessage(1, 4,
                 "   i: %d j: %d k: %d eventcount: %lf rowcount: %lf\n",
                     i, j, k, bnn->eventCounts[j][k],
                       bnn->rowCounts[j]);
               if(prob != 0) {
                  score += prob * log(prob);
                  DebugMessage(1, 4, "   Score now: %lf\n", score);
               }
            }
         }
         if(numSamples != 0) {
            prob = bnn->rowCounts[j] / BNNodeGetNumSamples(bnn);
            DebugMessage(1, 4,
              "   i: %d j: %d rowcount: %lf numsamples: %lf\n",
                  i, j, bnn->rowCounts[j], 
                            numSamples);
            if(prob != 0) {
               score -= prob * log(prob);
               DebugMessage(1, 4, "   Score now: %lf\n", score);
            }
         }
      }
   }

   return score;
}

static double _MaxPLgP(double p, float numSamples) {
   double maxP, minP, bound;

   if(numSamples == 0) {
      /* return the highest value of the function */
      return 0;
   }

   bound = StatHoeffdingBoundOne(1.0, gDeltaStar, numSamples);
   maxP = min(p + bound, 1.0);
   minP = max(p - bound, 0.0);

   if(minP == 0) {
      /* plgp is 0 when p is 0, this is a max */
      return 0;
   } else {
      return max(minP * log(minP), maxP * log(maxP));
   }
}

static double _MinPLgP(double p, float numSamples) {
   double maxP, minP, bound;

   if(numSamples == 0) {
      /* return the lowest value of the function */
      return kOneOverE * log(kOneOverE);
   }

   bound = StatHoeffdingBoundOne(1.0, gDeltaStar, numSamples);
   maxP = min(p + bound, 1.0);
   minP = max(p - bound, 0.0);

   if(minP < kOneOverE && kOneOverE < maxP) {
      /* this is the min value of the function, if it is range use it */
      return kOneOverE * log(kOneOverE);
   } else if(minP == 0) {
      /* plgp is 0 when p is 0, this is a max but will mess up in log so 
            special case it by returning value of the other (which
            can't be larger than the function's max) */
      return maxP * log(maxP);
   } else {
      return min(minP * log(minP), maxP * log(maxP));
   }
}


int _SortDoublesCompare(const void *one, const void *two) {
   if(*(double *)one == *(double *)two) {
      return 0;
   } else if(*(double *)one > *(double *)two) {
      return 1;
   } else {
      return -1;
   }
}

/* should work as long as no probabilities are greater than 1/e,
   two important observations are that slope increases as move from 1/e
    and it increases faster as p moves towards 0 than towards 1 */
static double _GetMaxPLgPCPTRowSum(BeliefNetNode bnn, double *probs, 
                                  double targetPij, double pijkEpsilon) {
   double *outputValues;
   double sum, plgp, maxDelta;
   int i;

   outputValues = MNewPtr(sizeof(double) * BNNodeGetNumValues(bnn));

   /* do one pass, set output values to minimum & find the current sum */
   sum = 0;
   for(i = 0 ; i < BNNodeGetNumValues(bnn) ; i++) {
      outputValues[i] = max(probs[i] - pijkEpsilon, 0);
      sum += outputValues[i];
   }
   
   /* do another pass, moving the larger probs back up till sum is reached */
   /*   and calculate plgp */
   plgp = 0;
   for(i = BNNodeGetNumValues(bnn) - 1 ; i >= 0 ; i--) {
      if(sum < targetPij) {
         maxDelta = (probs[i] + pijkEpsilon) - outputValues[i];
         if(sum + maxDelta < targetPij) {
            outputValues[i] += maxDelta;
            sum += maxDelta;
         } else {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -