📄 xcs.c
字号:
/* execute the action and get reward * (correct represents a boolean if the classification was correct or wrong) */ reward = doAction(state, action, correct); /* remember the system error */ *sysError=(double)(abs((int)(reward - predictionArray[action])))/getPaymentRange(); /* Clean up */ freeSet(&mset); free(predictionArray);}/*########################## in a multi step environment ##########################*//** * Start one multi-step experiment. */void startOneMultiStepExperiment(struct XCS *xcs, FILE *tabFile, struct xClassifierSet **pop, double **averages, int expnr){ int counter, trialCounter, exploit=1; int *stepToFood; double *sysError; char *state; assert((stepToFood = (int *)calloc(xcs->testFrequency,sizeof(int)))!=0); assert((sysError = (double *)calloc(xcs->testFrequency,sizeof(double)))!=0); assert((state = (char *)calloc(getConditionLength()+1,sizeof(char)))!=0); /* set the \0 char at the end of action and state */ state[getConditionLength()]='\0'; /* Start one experiment, trialCounter counts the number of exploit problems (trials)*/ for( trialCounter=0, counter=0 ; trialCounter<xcs->maxNrSteps ; trialCounter+=exploit) { /* switch between exploration and exploitation */ exploit= (exploit+1)%2; if(!exploit) /* execute one exploration trial */ doOneMultiStepProblemExplore(xcs, pop, state, &counter); else /* execute one exploitation trial monitoring performance */ doOneMultiStepProblemExploit(xcs, pop, state, counter, &stepToFood[trialCounter%(xcs->testFrequency)], &sysError[trialCounter%(xcs->testFrequency)]); /* write out the performance every xcs->testFrequency trials */ if( trialCounter%(xcs->testFrequency)==0 && exploit && trialCounter>0 ) { writePerformance(xcs, tabFile, *pop, stepToFood, sysError, trialCounter, averages, expnr); } } free(stepToFood); free(sysError); free(state);}/** * Execute one multi-step trial in exploration (learning) mode. */void doOneMultiStepProblemExplore(struct XCS *xcs, struct xClassifierSet **pop, char *state, int *counter){ double reward=0., previousReward=0., *predictionArray, maxPredArray; char *lastState, *previousState; struct xClassifierSet *mset, *aset, *paset=NULL, *killset=NULL; int action, stepCounter, reset=0; /* we need room for the prediction array */ assert((predictionArray = (double *)calloc(getNumberOfActions(),sizeof(double)))!=0); /* keep track of the last and the previous state */ assert((lastState=(char *)calloc(getConditionLength()+1,sizeof(char)))!=NULL); assert((previousState=(char *)calloc(getConditionLength()+1,sizeof(char)))!=NULL); /* reset to a new position */ resetState(state); /* Start one problem, stepCounter counts the number of steps executed */ for( stepCounter=0 ; stepCounter<xcs->teletransportation && !reset; stepCounter++, (*counter)++) { /* get the match set and update the previous action set*/ mset=getMatchSet(xcs, state, pop,&killset,(*counter)); /* remove classifiers from the prvious action set if they were deleted */ if( paset!=NULL) updateSet(&paset,killset); freeSet(&killset); /* generate the prediction array */ getPredictionArray(mset, predictionArray, xcs); /* get the action, that wins considering the prediction array * (usually action is chosen at random) */ action = learningActionWinner(predictionArray, xcs->exploreProb); /* get the action set according to the chosen action */ aset = getActionSet(action, mset); /* execute the action and get reward */ strcpy(lastState, state); reward = doAction(state, action, &reset); /* backpropagate the reward to the previous action set and apply the GA */ if( paset!=NULL) { /* get maximum predicted payoff value to backpropagate (similar to the Q-value) */ maxPredArray = predictionArray[deterministicActionWinner(predictionArray)]; /* adjust the action set (RL) */ adjustActionSet(xcs, &paset, maxPredArray, previousReward, pop, &killset); /* update the current action set in case a classifier in the set * was subsumed (action set subsumption) */ updateSet(&aset,killset); freeSet(&killset); /* apply the GA */ discoveryComponent(&paset, pop, &killset,(*counter), previousState, xcs, previousReward*xcs->gamma*maxPredArray); /* update the current action set in case a classifier in the set * was subsumed or deleted (action set subsumption, GA deletion) */ updateSet(&aset,killset); freeSet(&killset); } /* Give immediate reward, if a reset will take place and apply the GA, too */ if( reset ) { /* update with immediate reward before reset*/ adjustActionSet(xcs, &aset, 0, reward, pop, &killset); freeSet(&killset); /* apply GA */ discoveryComponent(&aset, pop, &killset, *counter, lastState, xcs, reward); freeSet(&killset); } /* Clean up */ freeSet(&mset); freeSet(&paset); /* remember reward, action set and state for backpropagation and GA in previous action set */ previousReward=reward; paset=aset; strcpy(previousState, lastState); } freeSet(&paset); free(previousState); free(lastState); free(predictionArray);}/** * Execute one multi-step trial in exploitation (testing) mode. */void doOneMultiStepProblemExploit(struct XCS *xcs, struct xClassifierSet **pop, char *state, int counter, int *stepToFood, double *sysError){ double reward=0., previousReward=0., *predictionArray, predictionValue, previousPrediction=0.; struct xClassifierSet *mset, *aset, *paset=NULL, *killset=NULL; int action, stepCounter, reset=0; /* we need space for the prediction array */ assert((predictionArray = (double *)calloc(getNumberOfActions(),sizeof(double)))!=0); /* set the \0 char at the end of action and init the sysError*/ *sysError=0; resetState(state); /* Start one problem, stepCounter counts the number of steps executed */ for( stepCounter=0 ; stepCounter<xcs->teletransportation && !reset ; stepCounter++) { /* get the match */ mset=getMatchSet(xcs, state, pop, &killset, counter); /* delete the guys in the previous action set that have been deleted (indicated in killset) */ if( paset!=NULL) updateSet(&paset,killset); freeSet(&killset); /* generate the prediction array */ getPredictionArray(mset, predictionArray, xcs); /* get the action that wins in the prediction array */ action = deterministicActionWinner(predictionArray); /* determine the maximum expected reward */ predictionValue= predictionArray[action]; /* get the action set according to the chosen action */ aset = getActionSet(action, mset); /* execute the action and get reward */ reward = doAction(state, action, &reset); /* backpropagate the reward to the previous action set */ if( paset!=NULL) { adjustActionSet(xcs, &paset, predictionValue, previousReward, pop, &killset); updateSet(&aset, killset); freeSet(&killset); (*sysError) += (double)(abs((int)(previousReward + (xcs->gamma) * predictionValue - previousPrediction))) / (double)getPaymentRange(); } /* Give immediate reward, if a reset will take place */ if( reset ){ adjustActionSet(xcs, &aset, 0, reward, pop, &killset); freeSet(&killset); (*sysError) += (double)(abs((int)(reward - predictionValue)))/getPaymentRange(); } /* remind the prediction for the system Error */ previousPrediction=predictionValue; previousReward=reward; /* Clean up */ freeSet(&mset); freeSet(&paset); paset=aset; } freeSet(&paset); /* record performance */ *stepToFood = stepCounter; (*sysError) /= stepCounter; free(predictionArray);}/*########################## performance monitoring #################################*//** * Record and print to screen performance (averaged over the last testFrequency trials) and * other information about the current population status */void writePerformance(struct XCS *xcs, FILE *tabFile, struct xClassifierSet *pop, int *correct, double *sysError, int counter, double **averages, int expnr){ double corr=0.,serr=0., spec=0., assest=0., perr=0., fit=0., exp=0., *specPos; int i, popsize, popnum, pos; struct xClassifierSet *popp; /* record specificities of each position */ assert((specPos=(double *)calloc(getConditionLength(), sizeof(double)))!=NULL); getPosSpecificity(pop, specPos); /* record population properties */ for( popp=pop, popsize=0, popnum=0 ; popp!=NULL ; popp=popp->next ) { popnum += popp->cl->num; /* Just count the size of the population */ popsize++; spec += ((double)getSpecificity(popp->cl)/(double)getConditionLength())*(double)popp->cl->num; assest += popp->cl->peerssest*popp->cl->num; perr += popp->cl->preer*popp->cl->num; fit += popp->cl->fit; exp += (double)popp->cl->exp * popp->cl->num; } spec /= (double)popnum; assest /= (double)popnum; perr /= (double)popnum; fit /= (double)popnum; exp /= (double)popnum; /* record average performance and error over the last 'testFrequency' trials */ for(i=0;i<xcs->testFrequency;i++){ corr+=correct[i]; serr+=sysError[i]; } corr/=(xcs->testFrequency); serr/=(xcs->testFrequency); /* print to screen every 1000th step */ if(counter%1000==0){ printf("t=%d %f %f |[P]|=%d assest=%f exp=%f spec=%f perr=%f fit=%f\n",counter, corr, serr, popsize, assest, exp, spec, perr, fit); for(i=0; i<getConditionLength(); i++){ printf(" %f",((double)specPos[i]/popnum)); } printf("\n"); } /* print to file the learning progress */ fprintf(tabFile, "%d %f %f %d %f %f %f %f %f", counter, corr, serr, popsize, assest, exp, spec, perr, fit); /* write specificities of each attribute (if desired) */ /* * fprintf(tabFile, "\t"); * for(i=0; i<getConditionLength(); i++){ * fprintf(tabFile, " %f",((double)specPos[i]/popnum)); * } */ /* record the learning progress to determine averages and standard deviations */ pos = (8+getConditionLength()) * (int)((counter/(xcs->testFrequency))-1); averages[expnr][pos] = corr; averages[expnr][pos+1] = serr; averages[expnr][pos+2] = popsize; averages[expnr][pos+3] = assest; averages[expnr][pos+4] = exp; averages[expnr][pos+5] = spec; averages[expnr][pos+6] = perr; averages[expnr][pos+7] = fit; for(i=0; i<getConditionLength(); i++) { averages[expnr][pos+8+i] = ((double)specPos[i]/popnum); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -