⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 vfem.c

📁 数据挖掘方面的源码
💻 C
📖 第 1 页 / 共 4 页
字号:
            }            foundBound = 1;         } else {            if(gMessageLevel >= 1) {               printf("Have a bound and ID may or may not converge, we don't converge.\n");            }         }      }   }   if(foundBound) {      bound = _CalculateErrorBound();   } else {      bound = -1;   }   if(!gTestOnTrain) {      /* just output the distance between matched centers */      /* load the test centers */      testCenters = VALNew();      sprintf(fileNames, "%s/%s.test", gSourceDirectory, gFileStem);      testCentersIn = fopen(fileNames, "r");      DebugError(testCentersIn == 0, "Unable to open the .test file");            if(gMessageLevel >= 2) {         printf("reading the test centers file...\n");      }      tc = ExampleRead(testCentersIn, es);      while(tc != 0) {         VALAppend(testCenters, tc);         tc = ExampleRead(testCentersIn, es);      }      fclose(testCentersIn);         /* Match learned centers with the test centers */      loss = _MatchCentersGetDistanceSquare(learnedCenters, testCenters);      /* free the test centers */      for(i = 0 ; i < VALLength(testCenters) ; i++) {         ExampleFree(VALIndex(testCenters, i));      }      VALFree(testCenters);   } else { /* Sum Square distance of example to assigned cluster */      loss = 0;      sprintf(fileNames, "%s/%s.data", gSourceDirectory, gFileStem);      exampleIn = fopen(fileNames, "r");      DebugError(exampleIn == 0, "Unable to open the .data file");            if(gMessageLevel >= 1) {         printf("opened test file, starting scan...\n");      }      e = ExampleRead(exampleIn, es);      /* HERE only tests on the first 10k test examples? Parameter?? */      while(e != 0 && tested < gMaxExamplesPerIteration) {         tested++;         lc = _FindClosestCenter(e, learnedCenters);         loss += pow(ExampleDistance(e, lc), 2);         ExampleFree(e);         e = ExampleRead(exampleIn, es);      }      if(e != 0) {         ExampleFree(e);      }      fclose(exampleIn);   }   if(finalOutput) {      printf("%.4f\t0\n", loss);   } else {      if(foundBound) {         if(bound < gThisErrorTarget) {            printf("%d\t%ld\t%d\t%.6f\t%.6f\t%.2lf\n",                gRound, learnCount, gTotalExamplesSeen,                bound, loss, ((double)learnTime) / 100);         } else {            printf("%d\t%ld\t%d\t*%.6f\t%.6f\t%.2lf\n",                gRound, learnCount, gTotalExamplesSeen,                bound, loss, ((double)learnTime) / 100);         }      } else {         if(gMessageLevel > 1) {             printf("   No bound, Current bound estimate is %f guarenteed converge %d\n",                       _CalculateErrorBound(),                       ((IterationStatsPtr)VALIndex(gStatsList,                            VALLength(gStatsList) - 1))->guarenteeIDConverge);         }         printf("%d\t%ld\t%d\t***\t%.6f\t%.2lf\n",                gRound, learnCount, gTotalExamplesSeen,                loss, ((double)learnTime) / 100);      }   }   fflush(stdout);    if(0) {//gOutputCenters) {      sprintf(fileNames, "%s-%lu.centers", gFileStem, learnCount);      centersOut = fopen(fileNames, "w");      for(i = 0 ; i < VALLength(learnedCenters) ; i++) {         ExampleWrite(VALIndex(learnedCenters, i), centersOut);//         ExampleWrite(VALIndex(learnedCenters, i), stdout);      }//      printf("------------------\n");      fclose(centersOut);   }}static int _CheckConverganceUpdateStats(IterationStatsPtr last,                                  IterationStatsPtr current) {   float thisDistance;   float bound, lowerBound, upperBound, clusterBound;   float error;   ExamplePtr eThis, eLast;   int i, j;   bound = 0;   lowerBound = 0;   upperBound = 0;   for(i = 0 ; i < VALLength(last->centroids) ; i++) {      eLast = VALIndex(last->centroids, i);      eThis = VALIndex(current->centroids, i);      clusterBound = 0;      for(j = 0 ; j < ExampleGetNumAttributes(eThis) ; j++) {         /* HERE fix for discrete ?? */         thisDistance = ExampleGetContinuousAttributeValue(eLast, j) -                    ExampleGetContinuousAttributeValue(eThis, j);         if(thisDistance < 0) {            thisDistance *= -1;         }         error = IterationStatsErrorBoundDimension(last, i, j) +	   IterationStatsErrorBoundDimension(current, i, j);         bound += pow(thisDistance, 2);         clusterBound += pow(thisDistance, 2);         lowerBound += pow(max(thisDistance - error, 0), 2);         upperBound += pow(thisDistance + error, 2);         if(gMessageLevel > 3) {           printf("e: %.4f LossDeltas: dim %.4f sum %.4f min %.4f max %.4f\n",                 error, bound, clusterBound, lowerBound, upperBound);         }      }      if(gMessageLevel > 0) {         printf("   cluster %d moved ^2 loss of %f\n", i, clusterBound);      }   }   if(gMessageLevel > 1) {      for(i = 0 ; i < VALLength(current->centroids) ; i++) {         for(j = 0 ; j < VALLength(current->centroids) ; j++) {            printf("%.3f  ", ExampleDistance(VALIndex(current->centroids, i),                                        VALIndex(current->centroids, j)));         }         printf("\n");      }   }   if(gMessageLevel > 0) {      printf("   clusters moved [ %f - %f - %f ] tau %f\n",                              lowerBound, bound, upperBound, gConvergeDelta);   }   if(bound <= gConvergeDelta / 3.0) {      current->convergeVFEM = 1;   }   if(lowerBound <= gConvergeDelta) {      current->possibleIDConverge = 1;      if(bound <= gConvergeDelta) {         current->wouldEMConverge = 1;      }      if(upperBound <= gConvergeDelta) {          current->guarenteeIDConverge = 1;      } else if(gMessageLevel > 0) {         printf("      IDEM may have or may not have converged.\n");      }   }   if(gMessageLevel > 0) {      printf("   converge info guarenteeID: %d possibleID: %d - EM / 3.0 this: %d last: %d\n", current->guarenteeIDConverge, current->possibleIDConverge, current->convergeVFEM, last->convergeVFEM);   }   if(gBatch || gAllowBadConverge) {      if(gMessageLevel > 0 && gAllowBadConverge && current->wouldEMConverge) {         printf("      found a potentially bad converge.\n");      }      return current->wouldEMConverge;   } else {      return current->guarenteeIDConverge ||           (current->convergeVFEM && last->convergeVFEM);   }}float AssignmentScaledDeltaMax(ExamplePtr e,                          ExamplePtr centroid, ExamplePtr min,                            ExamplePtr max, float epsilon) {   float observedDelta;   observedDelta = ExampleDistance(e, centroid);   // maximumDelta = max(ExampleDistance(e, min),   //                     ExampleDistance(e, max));   ///* deal with the assignErrorScale */   //return observedDelta + (gAssignErrorScale * (maximumDelta - observedDelta));   return observedDelta + epsilon;}static int _PointInBox(ExamplePtr e, ExamplePtr cMin, ExamplePtr cMax) {   int i;   for(i = 0 ; i < ExampleGetNumAttributes(e) ; i++) {      if(ExampleGetContinuousAttributeValue(e, i) <           ExampleGetContinuousAttributeValue(cMin, i) ||         ExampleGetContinuousAttributeValue(e, i) >           ExampleGetContinuousAttributeValue(cMax, i)) {         return 0;      }   }   return 1;}float AssignmentScaledDeltaMin(ExamplePtr e,                          ExamplePtr centroid, ExamplePtr min,                            ExamplePtr max, float epsilon) {   float observedDelta;   observedDelta = ExampleDistance(e, centroid);   //if(_PointInBox(e, min, max)) {   //   minimumDelta = 0;   //} else {   //   minimumDelta = min(ExampleDistance(e, min),   //           ExampleDistance(e, max));   //}   //return min(observedDelta - (gAssignErrorScale *    //                 (observedDelta - minimumDelta)), observedDelta);   return max(observedDelta - epsilon, 0);}static void _RecordGeoffBoundInfo(ExamplePtr e, IterationStatsPtr is, ExampleSpecPtr es) {   int i, j;   ExamplePtr centroid, cMin, cMax;   double denominator, numerator, weight;   double *denomValues;   /* HERE modify for negative Xs */   /* do the W-Plusses */   denomValues = MNewPtr(sizeof(double) * VALLength(is->centroids));   denominator = 0;   for(i = 0 ; i < VALLength(is->centroids) ; i++) {      centroid = VLIndex(is->centroids, i);      cMax = VLIndex(is->cMax, i);      cMin = VLIndex(is->cMin, i);      denomValues[i] = exp( (-1.0 / (2.0 * gSigmaSquare)) *        pow(AssignmentScaledDeltaMax(e, centroid, cMin, cMax,                                              is->lastBound[i]), 2));      denominator += denomValues[i];   }   for(i = 0 ; i < VALLength(is->centroids) ; i++) {      centroid = VLIndex(is->centroids, i);      cMax = VLIndex(is->cMax, i);      cMin = VLIndex(is->cMin, i);      numerator = exp( (-1.0 / (2.0 * gSigmaSquare)) *         pow(AssignmentScaledDeltaMin(e, centroid, cMin, cMax,                                             is->lastBound[i]), 2));      denominator -= denomValues[i];      denominator += numerator;      weight = (numerator / denominator);      if(weight > 1.0) {         weight = 1.0;      }      //printf("c%d num: %.4f denom: %.4f w+: %.4f\n", i, numerator,      //        denominator, weight);      // printf("   ob delta: %.4f as deltamin: %.4f\n",      //        ExampleDistance(e, centroid),      //         AssignmentScaledDeltaMin(e, centroid, cMin, cMax));      is->wPlus[i] += (numerator / denominator);      is->wPlusSquare[i] += (numerator / denominator) *                               (numerator / denominator);      for(j = 0 ; j < ExampleSpecGetNumAttributes(es) ; j++) {         if(ExampleGetContinuousAttributeValue(e, j) >= 0) {            is->wxPlus[i][j] += (numerator / denominator) *                ExampleGetContinuousAttributeValue(e, j);         } else {            is->wxMinus[i][j] += (numerator / denominator) *                ExampleGetContinuousAttributeValue(e, j);         }      }      denominator += denomValues[i];      denominator -= numerator;   }   /* do the W-Minuses */   denominator = 0;   for(i = 0 ; i < VALLength(is->centroids) ; i++) {      centroid = VLIndex(is->centroids, i);      cMax = VLIndex(is->cMax, i);      cMin = VLIndex(is->cMin, i);      denomValues[i] = exp( (-1.0 / (2.0 * gSigmaSquare)) *        pow(AssignmentScaledDeltaMin(e, centroid, cMin, cMax,                                 is->lastBound[i]), 2));      denominator += denomValues[i];   }   for(i = 0 ; i < VALLength(is->centroids) ; i++) {      centroid = VLIndex(is->centroids, i);      cMax = VLIndex(is->cMax, i);      cMin = VLIndex(is->cMin, i);      numerator = exp( (-1.0 / (2.0 * gSigmaSquare)) *         pow(AssignmentScaledDeltaMax(e, centroid, cMin, cMax,                                 is->lastBound[i]), 2));      denominator -= denomValues[i];      denominator += numerator;      //printf("c%d num: %.4f denom: %.4f w-: %.4f\n", i, numerator,      //          denominator, (numerator / denominator));      //printf("   ob delta: %.4f as deltamax: %.4f\n",      //          ExampleDistance(e, centroid),      //         AssignmentScaledDeltaMax(e, centroid, cMin, cMax));      is->wMinus[i] += (numerator / denominator);      for(j = 0 ; j < ExampleSpecGetNumAttributes(es) ; j++) {         if(ExampleGetContinuousAttributeValue(e, j) >= 0) {            is->wxMinus[i][j] += (numerator / denominator) *                 ExampleGetContinuousAttributeValue(e, j);         } else {            is->wxMinus[i][j] += (numerator / denominator) *                 ExampleGetContinuousAttributeValue(e, j);         }      }      denominator += denomValues[i];      denominator -= numerator;   }   MFreePtr(denomValues);}static int _DoClusterIterationDidConverge(FILE *data, ExampleSpecPtr es, FILE *boundData) {   int i,j;   ExamplePtr e, centroid;   long seen = 0;   int done;   IterationStatsPtr is, newIs;   double denominator, numerator;   is = VALIndex(gStatsList, VALLength(gStatsList) - 1);   if(gMessageLevel > 1) {      printf("enter iteration %d seen %d\n", gIteration, gTotalExamplesSeen);      fflush(stdout);   }   done = 0;   e = ExampleRead(data, es);   while(e != 0 && !done ) {      seen++;      gTotalExamplesSeen++;      is->n++;      if(gMessageLevel > 3) {         //IterationStatsWrite(is, es, stdout);         printf("-------------------------------\nincorporating: ");         ExampleWrite(e, stdout);         for(i = 0 ; i < VALLength(is->centroids) ; i++) {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -