⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 vfkm.c

📁 数据挖掘方面的源码
💻 C
📖 第 1 页 / 共 4 页
字号:
         /* see if we are toast, if so don't do the rest of the book keep */
         breakOut = 0;
         lastIs = 0;
         thisIs = VALIndex(gStatsList, VALLength(gStatsList) - 1);
         if(!gBatch && 
                (_CalculateIDKMEarlyBound() > (40000 * gThisErrorTarget) ||
                !thisIs->foundBound)) {
            /* do one last check to see if we're on the last round */
            if(VALLength(gStatsList) > 1) {
               lastIs = VALIndex(gStatsList, VALLength(gStatsList) - 2);
               if(!(lastIs->n >= gDBSize)) {
                  breakOut = 1;
               } else {
                  breakOut = 0;
               }
            } else {
               breakOut = 0;
            }
         }

         if(breakOut) {
            /* IDKM could have stoped and we are a loooong way away */
            if(gMessageLevel > 1) {
               printf("      broke out of a round early foundBound %d current bound %f - Target * 40000: %f\n", thisIs->foundBound,
                    _CalculateIDKMEarlyBound(), 40000 * gThisErrorTarget);
               printf("        dbSize: %ld n: %ld\n", gDBSize, lastIs->n);
            }
            thisIs->foundBound = 0;
            break;
         }

         /* do the book keeping to go on to the next iteration */
         learnTime += endtime.tms_utime - starttime.tms_utime;

         /* reset the file for the next iteration */
         if(!gStdin) {
            /* in stream mode we never close & reopen the file */
            fclose(exampleIn);
            sprintf(fileNames, "%s/%s.data", gSourceDirectory, gFileStem);
            exampleIn = fopen(fileNames, "r");
            DebugError(exampleIn == 0, "Unable to open the .data file");
         }

         if(gDoTests) {
            _doTests(es, thisIs->centroids, gIteration, learnTime, 0);
         }

         gIteration++;

         /* reset the timer for the next iteration */
         times(&starttime);
      } /* end of a round */


      /* test to see if we can get anything else from the file, if not
            then we won't be able to increase n and we are finished */
      if(!gStdin) {
         e = ExampleRead(exampleIn, es);
         if(e == 0) {
            fileDone = 1;
         } else {
            fileDone = 0;
            ExampleFree(e);
         }

         /* close the file for the beginning of the next round can open */
         fclose(exampleIn);
      } else {
         if(gN >= gMaxExamplesPerIteration) {
            fileDone = 1;
         } else {
            fileDone = 0;
         }
      }

      thisIs = VALIndex(gStatsList, VALLength(gStatsList) - 1);

      if(thisIs->foundBound && thisIs->guarenteeIDConverge) {
         /* re-estimate l, update effective delta */
         gEstimatedNumIterations = gIteration * 1.5;

         lastDelta = gNeededDelta;
         gNeededDelta = 1.0 - pow(1.0 - gDelta, 1.0 /
                 (float)(gD * gNumClusters * gEstimatedNumIterations));

         nIncrement = pow(thisIs->maxEkd/gTargetEkd, 2) *
                 (log(2.0 / gNeededDelta) / log(2.0 / lastDelta)) *
                      gN * 1.1;

         //if(gUseNormalApprox) {
         //   /* hueristic to correct for normal being tighter */
         //   nIncrement /= 5;
         //}

         if(gMessageLevel > 1) {
            printf("suggested gN increment: %ld current gN: %ld\n", nIncrement, gN);
         }

         if(nIncrement > gN) {
            gN += nIncrement;
         } else {
            gN *= 2;
         }
         if(gN > gMaxExamplesPerIteration) {
            gN = gMaxExamplesPerIteration;
         }
      } else {
         gN *= 2;
      }

      if(gIterationNs != 0) {
         MFreePtr(gIterationNs);
         gIterationNs = 0;
         gNumIterationNs = 0;
      }
      if(!gBatch && gFancyStop && thisIs->foundBound) {
         if(gMessageLevel > 2) { IterationStatsWrite(thisIs, stdout); }

         CalculateExamplesPerIteration(gStatsList, &gIterationNs,
                         &gNumIterationNs);

         /* normalize & update to # examples per iteration */
         iterationNSum = 0;
         for(i = 0 ; i < gNumIterationNs ; i++) {
            iterationNSum += gIterationNs[i];
         }

         if(iterationNSum > gDBSize * gNumIterationNs) {
            /* go on to a final round */
            gN = gDBSize + 1; /* make sure it's final */
            if(gIterationNs != 0) {
               MFreePtr(gIterationNs);
               gIterationNs = 0;
               gNumIterationNs = 0;
            }
         } else {
            if(iterationNSum < gN * gNumIterationNs) {
               /* use the actual Nis only if their sum is larger than 
                    the number of examples suggested by gN,
                    otherwise use their ratios as the percent of gN * l
                    to use in each iteration */
               for(i = 0 ; i < gNumIterationNs ; i++) {
                  gIterationNs[i] = ((gIterationNs[i] / iterationNSum) * gN) *
                          gNumIterationNs;
               }
            }

            if(gMessageLevel > 1) {
               printf("ni:\n");
               for(i = 0 ; i < gNumIterationNs ; i++) {
                  printf("\t%d: %.4f\n", i, gIterationNs[i]);
               }
            }
         }
      }

      bound = _CalculateErrorBound();
      if(gMessageLevel > 1) {
         printf("   found bound %d bound: %f guarentee converge %d file done %d\n", thisIs->foundBound, bound, thisIs->guarenteeIDConverge, fileDone);
         printf("===");
         fflush(stdout);
      }

   } while(!(thisIs->guarenteeIDConverge && thisIs->foundBound && 
                bound <= gThisErrorTarget) && 
           !(gAllowBadConverge && thisIs->wouldKMConverge && 
                thisIs->foundBound && bound <= gThisErrorTarget) &&
            !fileDone && !gBatch);



   times(&endtime);
   learnTime += endtime.tms_utime - starttime.tms_utime;

   _OutputAllCentroids(bound);


   _doTests(es, 
      ((IterationStatsPtr)VALIndex(gStatsList,
             VALLength(gStatsList) - 1))->centroids,
                   gIteration, learnTime, 0);

   fclose(stdin);
   return 0;
}


/* God help you if you need to comprehend or change this function */
/*  Get our paper and read it */
void CalculateExamplesPerIteration(VoidAListPtr last, float **nextNiOut, int *num) {
   IterationStatsPtr lastIs, currentIs;
   float **ni, **a, **b, **c, *o, **alpha, **f, *nTotal;
   float eLast, thisDelta;
   int i, j, k;
   float tmp;

   *num = VALLength(last) - 1;
   (*nextNiOut) = MNewPtr(sizeof(float) * *num);

   // the (float)(gNumClusters * *num)) part may not supposed to be *, dunno
   thisDelta = 1.0 - pow(1.0 - gDelta, 1.0 / (float)(gNumClusters * *num));

   o     = MNewPtr(sizeof(float) * gNumClusters);

   a     = MNewPtr(sizeof(float *) * gNumClusters);
   b     = MNewPtr(sizeof(float *) * gNumClusters);
   c     = MNewPtr(sizeof(float *) * gNumClusters);
   f     = MNewPtr(sizeof(float *) * gNumClusters);
   ni    = MNewPtr(sizeof(float *) * gNumClusters);
   alpha = MNewPtr(sizeof(float *) * gNumClusters);

   nTotal = MNewPtr(sizeof(float) * *num);
   for(i = 0 ; i < *num; i++) {
      nTotal[i] = 0;
   }

   for(i = 0 ; i < gNumClusters ; i++) {
      a[i] = MNewPtr(sizeof(float) * *num);
      b[i] = MNewPtr(sizeof(float) * *num);
      c[i] = MNewPtr(sizeof(float) * *num);
      f[i] = MNewPtr(sizeof(float) * *num);
      ni[i] = MNewPtr(sizeof(float) * *num);
      alpha[i] = MNewPtr(sizeof(float) * *num);
   }

   for(k = 0 ; k < gNumClusters ; k++) {
      for(i = 0 ; i < *num; i++) {
         lastIs = VALIndex(last, i);
         currentIs = VALIndex(last, i + 1);

         nTotal[i] += lastIs->nHat[k];

         eLast = lastIs->lastBound[k];

         /* calcualte a */
         if(i == 0) {
            a[k][i] = 0;
         } else {
            a[k][i] = 1.0 / ((float)lastIs->nHat[k] * eLast);
            tmp = 0;
            for(j = 0 ; j < gD ; j++) {
               tmp += pow(max(lastIs->deltaPlus[k][j],
                           lastIs->deltaMinus[k][j]), 2);
            }
            a[k][i] *= sqrt(tmp);
         }

         /* calculate b */
         if(i == 0) {
            b[k][i] = 0;
         } else {
            b[k][i] = (float)lastIs->nPlus[k] / 
                   ((float)lastIs->nHat[k] * eLast);
         }

         /* calculate alpha */
         alpha[k][i] = a[k][i] / pow(1 - b[k][i] * eLast, 2);
      }
   }


   for(k = 0 ; k < gNumClusters ; k++) {
      for(i = 0 ; i < *num; i++) {
         /* calculate c */
         lastIs = VALIndex(last, i);
         currentIs = VALIndex(last, i + 1);

         eLast = lastIs->lastBound[k];

         c[k][i] = sqrt((pow(gR, 2) * log(2.0 / thisDelta)) / 
                      (2.0 * (1.0 - b[k][i] * eLast)));
         for(j = i + 1 ; j < *num ; j++) {
            c[k][i] *= alpha[k][j];
         }

         /* calculate the o's */
         o[k] = 0;

         tmp = (a[k][i] * b[k][i] * pow(eLast, 2)) / 
                   pow(1 - b[k][i] * eLast, 2);

         for(j = i + 1 ; j < *num ; j++) {
            tmp *= alpha[k][j];
         }

         o[k] += tmp;
      }

      o[k] *= -1;
   }


   /* calculate the nis */
   for(k = 0 ; k < gNumClusters ; k++) {
      for(i = 0 ; i < *num ; i++) {
         tmp = 0;
         for(j = 0 ; j < *num ; j++) {
            tmp += pow(c[k][i] * pow(c[k][j],2), 1.0/3.0);
         }
         ni[k][i] = pow(tmp, 2) / 
              pow(sqrt(gThisErrorTarget / (float)gNumClusters) - o[k], 2);
      }
   }

   /* calculate Fs */
   for(k = 0 ; k < gNumClusters ; k++) {
      for(i = 0 ; i < *num ; i++) {
         lastIs = VALIndex(last, i);
         currentIs = VALIndex(last, i + 1);

         f[k][i] = (float)(lastIs->nHat[k]) / (float)(nTotal[i]);
      }
   }

   if(gMessageLevel > 1) {
      /* do some printing */
      for(i = 0 ; i < *num ; i++) {
         printf("iteration %d nTotal: %f\n", i, nTotal[i]);
         for(k = 0 ; k < gNumClusters ; k++) {
            printf("c%d: ", k);
            printf("a %.3f b %.3f c %.3f ", a[k][i], b[k][i], c[k][i]);
            printf("o %.3f alp %.3f ", o[k], alpha[k][i]);
            printf("ni %.3f f %.3f", ni[k][i], f[k][i]);
            printf("\n");
         }
      }
   }

   /* find the maximums for each iteration */
   for(i = 0 ; i < *num ; i++) {
      (*nextNiOut)[i] = 0;

      for(k = 0 ; k < gNumClusters ; k++) {
         tmp = ni[k][i] / f[k][i];
         if(tmp > (*nextNiOut)[i]) {
            (*nextNiOut)[i] = tmp;
         }
      }
   }

   /* FREE EVERYTHING */
   MFreePtr(o);
   MFreePtr(nTotal);
   for(i = 0 ; i < gNumClusters ; i++) {
      MFreePtr(a[i]);
      MFreePtr(b[i]);
      MFreePtr(c[i]);
      MFreePtr(f[i]);
      MFreePtr(ni[i]);
      MFreePtr(alpha[i]);
   }

   MFreePtr(a);
   MFreePtr(b);
   MFreePtr(c);
   MFreePtr(f);
   MFreePtr(ni);
   MFreePtr(alpha);
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -