📄 vfkm.c
字号:
/* see if we are toast, if so don't do the rest of the book keep */
breakOut = 0;
lastIs = 0;
thisIs = VALIndex(gStatsList, VALLength(gStatsList) - 1);
if(!gBatch &&
(_CalculateIDKMEarlyBound() > (40000 * gThisErrorTarget) ||
!thisIs->foundBound)) {
/* do one last check to see if we're on the last round */
if(VALLength(gStatsList) > 1) {
lastIs = VALIndex(gStatsList, VALLength(gStatsList) - 2);
if(!(lastIs->n >= gDBSize)) {
breakOut = 1;
} else {
breakOut = 0;
}
} else {
breakOut = 0;
}
}
if(breakOut) {
/* IDKM could have stoped and we are a loooong way away */
if(gMessageLevel > 1) {
printf(" broke out of a round early foundBound %d current bound %f - Target * 40000: %f\n", thisIs->foundBound,
_CalculateIDKMEarlyBound(), 40000 * gThisErrorTarget);
printf(" dbSize: %ld n: %ld\n", gDBSize, lastIs->n);
}
thisIs->foundBound = 0;
break;
}
/* do the book keeping to go on to the next iteration */
learnTime += endtime.tms_utime - starttime.tms_utime;
/* reset the file for the next iteration */
if(!gStdin) {
/* in stream mode we never close & reopen the file */
fclose(exampleIn);
sprintf(fileNames, "%s/%s.data", gSourceDirectory, gFileStem);
exampleIn = fopen(fileNames, "r");
DebugError(exampleIn == 0, "Unable to open the .data file");
}
if(gDoTests) {
_doTests(es, thisIs->centroids, gIteration, learnTime, 0);
}
gIteration++;
/* reset the timer for the next iteration */
times(&starttime);
} /* end of a round */
/* test to see if we can get anything else from the file, if not
then we won't be able to increase n and we are finished */
if(!gStdin) {
e = ExampleRead(exampleIn, es);
if(e == 0) {
fileDone = 1;
} else {
fileDone = 0;
ExampleFree(e);
}
/* close the file for the beginning of the next round can open */
fclose(exampleIn);
} else {
if(gN >= gMaxExamplesPerIteration) {
fileDone = 1;
} else {
fileDone = 0;
}
}
thisIs = VALIndex(gStatsList, VALLength(gStatsList) - 1);
if(thisIs->foundBound && thisIs->guarenteeIDConverge) {
/* re-estimate l, update effective delta */
gEstimatedNumIterations = gIteration * 1.5;
lastDelta = gNeededDelta;
gNeededDelta = 1.0 - pow(1.0 - gDelta, 1.0 /
(float)(gD * gNumClusters * gEstimatedNumIterations));
nIncrement = pow(thisIs->maxEkd/gTargetEkd, 2) *
(log(2.0 / gNeededDelta) / log(2.0 / lastDelta)) *
gN * 1.1;
//if(gUseNormalApprox) {
// /* hueristic to correct for normal being tighter */
// nIncrement /= 5;
//}
if(gMessageLevel > 1) {
printf("suggested gN increment: %ld current gN: %ld\n", nIncrement, gN);
}
if(nIncrement > gN) {
gN += nIncrement;
} else {
gN *= 2;
}
if(gN > gMaxExamplesPerIteration) {
gN = gMaxExamplesPerIteration;
}
} else {
gN *= 2;
}
if(gIterationNs != 0) {
MFreePtr(gIterationNs);
gIterationNs = 0;
gNumIterationNs = 0;
}
if(!gBatch && gFancyStop && thisIs->foundBound) {
if(gMessageLevel > 2) { IterationStatsWrite(thisIs, stdout); }
CalculateExamplesPerIteration(gStatsList, &gIterationNs,
&gNumIterationNs);
/* normalize & update to # examples per iteration */
iterationNSum = 0;
for(i = 0 ; i < gNumIterationNs ; i++) {
iterationNSum += gIterationNs[i];
}
if(iterationNSum > gDBSize * gNumIterationNs) {
/* go on to a final round */
gN = gDBSize + 1; /* make sure it's final */
if(gIterationNs != 0) {
MFreePtr(gIterationNs);
gIterationNs = 0;
gNumIterationNs = 0;
}
} else {
if(iterationNSum < gN * gNumIterationNs) {
/* use the actual Nis only if their sum is larger than
the number of examples suggested by gN,
otherwise use their ratios as the percent of gN * l
to use in each iteration */
for(i = 0 ; i < gNumIterationNs ; i++) {
gIterationNs[i] = ((gIterationNs[i] / iterationNSum) * gN) *
gNumIterationNs;
}
}
if(gMessageLevel > 1) {
printf("ni:\n");
for(i = 0 ; i < gNumIterationNs ; i++) {
printf("\t%d: %.4f\n", i, gIterationNs[i]);
}
}
}
}
bound = _CalculateErrorBound();
if(gMessageLevel > 1) {
printf(" found bound %d bound: %f guarentee converge %d file done %d\n", thisIs->foundBound, bound, thisIs->guarenteeIDConverge, fileDone);
printf("===");
fflush(stdout);
}
} while(!(thisIs->guarenteeIDConverge && thisIs->foundBound &&
bound <= gThisErrorTarget) &&
!(gAllowBadConverge && thisIs->wouldKMConverge &&
thisIs->foundBound && bound <= gThisErrorTarget) &&
!fileDone && !gBatch);
times(&endtime);
learnTime += endtime.tms_utime - starttime.tms_utime;
_OutputAllCentroids(bound);
_doTests(es,
((IterationStatsPtr)VALIndex(gStatsList,
VALLength(gStatsList) - 1))->centroids,
gIteration, learnTime, 0);
fclose(stdin);
return 0;
}
/* God help you if you need to comprehend or change this function */
/* Get our paper and read it */
void CalculateExamplesPerIteration(VoidAListPtr last, float **nextNiOut, int *num) {
IterationStatsPtr lastIs, currentIs;
float **ni, **a, **b, **c, *o, **alpha, **f, *nTotal;
float eLast, thisDelta;
int i, j, k;
float tmp;
*num = VALLength(last) - 1;
(*nextNiOut) = MNewPtr(sizeof(float) * *num);
// the (float)(gNumClusters * *num)) part may not supposed to be *, dunno
thisDelta = 1.0 - pow(1.0 - gDelta, 1.0 / (float)(gNumClusters * *num));
o = MNewPtr(sizeof(float) * gNumClusters);
a = MNewPtr(sizeof(float *) * gNumClusters);
b = MNewPtr(sizeof(float *) * gNumClusters);
c = MNewPtr(sizeof(float *) * gNumClusters);
f = MNewPtr(sizeof(float *) * gNumClusters);
ni = MNewPtr(sizeof(float *) * gNumClusters);
alpha = MNewPtr(sizeof(float *) * gNumClusters);
nTotal = MNewPtr(sizeof(float) * *num);
for(i = 0 ; i < *num; i++) {
nTotal[i] = 0;
}
for(i = 0 ; i < gNumClusters ; i++) {
a[i] = MNewPtr(sizeof(float) * *num);
b[i] = MNewPtr(sizeof(float) * *num);
c[i] = MNewPtr(sizeof(float) * *num);
f[i] = MNewPtr(sizeof(float) * *num);
ni[i] = MNewPtr(sizeof(float) * *num);
alpha[i] = MNewPtr(sizeof(float) * *num);
}
for(k = 0 ; k < gNumClusters ; k++) {
for(i = 0 ; i < *num; i++) {
lastIs = VALIndex(last, i);
currentIs = VALIndex(last, i + 1);
nTotal[i] += lastIs->nHat[k];
eLast = lastIs->lastBound[k];
/* calcualte a */
if(i == 0) {
a[k][i] = 0;
} else {
a[k][i] = 1.0 / ((float)lastIs->nHat[k] * eLast);
tmp = 0;
for(j = 0 ; j < gD ; j++) {
tmp += pow(max(lastIs->deltaPlus[k][j],
lastIs->deltaMinus[k][j]), 2);
}
a[k][i] *= sqrt(tmp);
}
/* calculate b */
if(i == 0) {
b[k][i] = 0;
} else {
b[k][i] = (float)lastIs->nPlus[k] /
((float)lastIs->nHat[k] * eLast);
}
/* calculate alpha */
alpha[k][i] = a[k][i] / pow(1 - b[k][i] * eLast, 2);
}
}
for(k = 0 ; k < gNumClusters ; k++) {
for(i = 0 ; i < *num; i++) {
/* calculate c */
lastIs = VALIndex(last, i);
currentIs = VALIndex(last, i + 1);
eLast = lastIs->lastBound[k];
c[k][i] = sqrt((pow(gR, 2) * log(2.0 / thisDelta)) /
(2.0 * (1.0 - b[k][i] * eLast)));
for(j = i + 1 ; j < *num ; j++) {
c[k][i] *= alpha[k][j];
}
/* calculate the o's */
o[k] = 0;
tmp = (a[k][i] * b[k][i] * pow(eLast, 2)) /
pow(1 - b[k][i] * eLast, 2);
for(j = i + 1 ; j < *num ; j++) {
tmp *= alpha[k][j];
}
o[k] += tmp;
}
o[k] *= -1;
}
/* calculate the nis */
for(k = 0 ; k < gNumClusters ; k++) {
for(i = 0 ; i < *num ; i++) {
tmp = 0;
for(j = 0 ; j < *num ; j++) {
tmp += pow(c[k][i] * pow(c[k][j],2), 1.0/3.0);
}
ni[k][i] = pow(tmp, 2) /
pow(sqrt(gThisErrorTarget / (float)gNumClusters) - o[k], 2);
}
}
/* calculate Fs */
for(k = 0 ; k < gNumClusters ; k++) {
for(i = 0 ; i < *num ; i++) {
lastIs = VALIndex(last, i);
currentIs = VALIndex(last, i + 1);
f[k][i] = (float)(lastIs->nHat[k]) / (float)(nTotal[i]);
}
}
if(gMessageLevel > 1) {
/* do some printing */
for(i = 0 ; i < *num ; i++) {
printf("iteration %d nTotal: %f\n", i, nTotal[i]);
for(k = 0 ; k < gNumClusters ; k++) {
printf("c%d: ", k);
printf("a %.3f b %.3f c %.3f ", a[k][i], b[k][i], c[k][i]);
printf("o %.3f alp %.3f ", o[k], alpha[k][i]);
printf("ni %.3f f %.3f", ni[k][i], f[k][i]);
printf("\n");
}
}
}
/* find the maximums for each iteration */
for(i = 0 ; i < *num ; i++) {
(*nextNiOut)[i] = 0;
for(k = 0 ; k < gNumClusters ; k++) {
tmp = ni[k][i] / f[k][i];
if(tmp > (*nextNiOut)[i]) {
(*nextNiOut)[i] = tmp;
}
}
}
/* FREE EVERYTHING */
MFreePtr(o);
MFreePtr(nTotal);
for(i = 0 ; i < gNumClusters ; i++) {
MFreePtr(a[i]);
MFreePtr(b[i]);
MFreePtr(c[i]);
MFreePtr(f[i]);
MFreePtr(ni[i]);
MFreePtr(alpha[i]);
}
MFreePtr(a);
MFreePtr(b);
MFreePtr(c);
MFreePtr(f);
MFreePtr(ni);
MFreePtr(alpha);
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -