📄 xcs.c

📁 Simple GA code (Pascal code from Goldberg, D. E. (1989), Genetic Algorithms in Search, Optimization,
💻 C
📖 第 1 页 / 共 3 页
字号:
    /* execute the action and get reward   * (correct represents a boolean if the classification was correct or wrong) */  reward = doAction(state, action, correct);    /* remember the system error */  *sysError=(double)(abs((int)(reward - predictionArray[action])))/getPaymentRange();    /* Clean up */  freeSet(&mset);  free(predictionArray);}/*########################## in a multi step environment ##########################*//** * Start one multi-step experiment. */void startOneMultiStepExperiment(struct XCS *xcs, FILE *tabFile, struct xClassifierSet **pop, double **averages, int expnr){  int counter, trialCounter, exploit=1;  int *stepToFood;  double *sysError;  char *state;  assert((stepToFood = (int *)calloc(xcs->testFrequency,sizeof(int)))!=0);  assert((sysError = (double *)calloc(xcs->testFrequency,sizeof(double)))!=0);  assert((state = (char *)calloc(getConditionLength()+1,sizeof(char)))!=0);  /* set the \0 char at the end of action and state */  state[getConditionLength()]='\0';       /* Start one experiment, trialCounter counts the number of exploit problems (trials)*/  for( trialCounter=0, counter=0 ; trialCounter<xcs->maxNrSteps ; trialCounter+=exploit) {    /* switch between exploration and exploitation */    exploit= (exploit+1)%2;        if(!exploit)      /* execute one exploration trial */      doOneMultiStepProblemExplore(xcs, pop, state, &counter);    else      /* execute one exploitation trial monitoring performance */      doOneMultiStepProblemExploit(xcs, pop, state, counter, &stepToFood[trialCounter%(xcs->testFrequency)], &sysError[trialCounter%(xcs->testFrequency)]);        /* write out the performance every xcs->testFrequency trials */    if( trialCounter%(xcs->testFrequency)==0 && exploit && trialCounter>0 ) {      writePerformance(xcs, tabFile, *pop, stepToFood, sysError, trialCounter, averages, expnr);    }  }  free(stepToFood);  free(sysError);  free(state);}/** * Execute one multi-step trial in exploration (learning) mode. */void doOneMultiStepProblemExplore(struct XCS *xcs, struct xClassifierSet **pop, 				  char *state, int *counter){  double reward=0., previousReward=0., *predictionArray, maxPredArray;  char *lastState, *previousState;  struct xClassifierSet *mset, *aset, *paset=NULL, *killset=NULL;  int action, stepCounter, reset=0;  /* we need room for the prediction array */  assert((predictionArray = (double *)calloc(getNumberOfActions(),sizeof(double)))!=0);  /* keep track of the last and the previous state */  assert((lastState=(char *)calloc(getConditionLength()+1,sizeof(char)))!=NULL);  assert((previousState=(char *)calloc(getConditionLength()+1,sizeof(char)))!=NULL);  /* reset to a new position */  resetState(state);  /* Start one problem, stepCounter counts the number of steps executed */  for( stepCounter=0 ; stepCounter<xcs->teletransportation && !reset; stepCounter++, (*counter)++) {    /* get the match set and update the previous action set*/    mset=getMatchSet(xcs, state, pop,&killset,(*counter));    /* remove classifiers from the prvious action set if they were deleted */    if( paset!=NULL)      updateSet(&paset,killset);    freeSet(&killset);    /* generate the prediction array */    getPredictionArray(mset, predictionArray, xcs);    /* get the action, that wins considering the prediction array      * (usually action is chosen at random) */    action = learningActionWinner(predictionArray, xcs->exploreProb);    /* get the action set according to the chosen action */    aset = getActionSet(action, mset);    /* execute the action and get reward */    strcpy(lastState, state);    reward = doAction(state, action, &reset);    /* backpropagate the reward to the previous action set and apply the GA */    if( paset!=NULL) {      /* get maximum predicted payoff value to backpropagate (similar to the Q-value) */      maxPredArray = predictionArray[deterministicActionWinner(predictionArray)];      /* adjust the action set (RL) */      adjustActionSet(xcs, &paset, maxPredArray, previousReward, pop, &killset);      /* update the current action set in case a classifier in the set        * was subsumed (action set subsumption) */      updateSet(&aset,killset);      freeSet(&killset);      /* apply the GA */      discoveryComponent(&paset, pop, &killset,(*counter), previousState, xcs, 			 previousReward*xcs->gamma*maxPredArray);      /* update the current action set in case a classifier in the set        * was subsumed or deleted (action set subsumption, GA deletion) */      updateSet(&aset,killset);      freeSet(&killset);    }    /* Give immediate reward, if a reset will take place and apply the GA, too */    if( reset ) {      /* update with immediate reward before reset*/      adjustActionSet(xcs, &aset, 0, reward, pop, &killset);      freeSet(&killset);      /* apply GA */      discoveryComponent(&aset, pop, &killset, *counter, lastState, xcs, reward);      freeSet(&killset);    }    /* Clean up */    freeSet(&mset);    freeSet(&paset);    /* remember reward, action set and state for backpropagation and GA in previous action set */    previousReward=reward;    paset=aset;    strcpy(previousState, lastState);  }  freeSet(&paset);  free(previousState);  free(lastState);  free(predictionArray);}/** * Execute one multi-step trial in exploitation (testing) mode. */void doOneMultiStepProblemExploit(struct XCS *xcs, struct xClassifierSet **pop, char *state, 				  int counter, int *stepToFood, double *sysError){  double reward=0., previousReward=0., *predictionArray, predictionValue, previousPrediction=0.;  struct xClassifierSet *mset, *aset, *paset=NULL, *killset=NULL;  int action, stepCounter, reset=0;  /* we need space for the prediction array */  assert((predictionArray = (double *)calloc(getNumberOfActions(),sizeof(double)))!=0);  /* set the \0 char at the end of action and init the sysError*/  *sysError=0;  resetState(state);  /* Start one problem, stepCounter counts the number of steps executed */  for( stepCounter=0 ; stepCounter<xcs->teletransportation && !reset ; stepCounter++) {        /* get the match */    mset=getMatchSet(xcs, state, pop, &killset, counter);    /* delete the guys in the previous action set that have been deleted (indicated in killset) */    if( paset!=NULL)      updateSet(&paset,killset);    freeSet(&killset);    /* generate the prediction array */    getPredictionArray(mset, predictionArray, xcs);    /* get the action that wins in the prediction array */    action = deterministicActionWinner(predictionArray);    /* determine the maximum expected reward */    predictionValue= predictionArray[action];    /* get the action set according to the chosen action */    aset = getActionSet(action, mset);    /* execute the action and get reward */    reward = doAction(state, action, &reset);    /* backpropagate the reward to the previous action set */    if( paset!=NULL) {      adjustActionSet(xcs, &paset, predictionValue, previousReward, pop, &killset);      updateSet(&aset, killset);      freeSet(&killset);      (*sysError) += (double)(abs((int)(previousReward + (xcs->gamma) * predictionValue 					- previousPrediction))) / (double)getPaymentRange();    }    /* Give immediate reward, if a reset will take place */    if( reset ){      adjustActionSet(xcs, &aset, 0, reward, pop, &killset);      freeSet(&killset);      (*sysError) += (double)(abs((int)(reward - predictionValue)))/getPaymentRange();    }    /* remind the prediction for the system Error */    previousPrediction=predictionValue;    previousReward=reward;    /* Clean up */    freeSet(&mset);    freeSet(&paset);    paset=aset;  }  freeSet(&paset);  /* record performance */  *stepToFood = stepCounter;  (*sysError) /= stepCounter;  free(predictionArray);}/*########################## performance monitoring #################################*//** * Record and print to screen performance (averaged over the last testFrequency trials) and  * other information about the current population status  */void writePerformance(struct XCS *xcs, FILE *tabFile, struct xClassifierSet *pop, 		      int *correct, double *sysError, int counter, double **averages, int expnr){  double corr=0.,serr=0., spec=0., assest=0., perr=0., fit=0., exp=0., *specPos;  int i, popsize, popnum, pos;  struct xClassifierSet *popp;  /* record specificities of each position */  assert((specPos=(double *)calloc(getConditionLength(), sizeof(double)))!=NULL);  getPosSpecificity(pop, specPos);  /* record population properties */  for( popp=pop, popsize=0, popnum=0 ; popp!=NULL ;  popp=popp->next ) {    popnum += popp->cl->num; /* Just count the size of the population */    popsize++;    spec += ((double)getSpecificity(popp->cl)/(double)getConditionLength())*(double)popp->cl->num;    assest += popp->cl->peerssest*popp->cl->num;    perr += popp->cl->preer*popp->cl->num;    fit += popp->cl->fit;    exp += (double)popp->cl->exp * popp->cl->num;  }  spec /= (double)popnum;  assest /= (double)popnum;  perr /= (double)popnum;  fit /= (double)popnum;  exp /= (double)popnum;  /* record average performance and error over the last 'testFrequency' trials */  for(i=0;i<xcs->testFrequency;i++){    corr+=correct[i];    serr+=sysError[i];  }  corr/=(xcs->testFrequency);  serr/=(xcs->testFrequency);  /* print to screen every 1000th step */  if(counter%1000==0){   printf("t=%d %f %f |[P]|=%d assest=%f exp=%f spec=%f perr=%f fit=%f\n",counter, corr, serr, popsize, assest, exp, spec, perr, fit);   for(i=0; i<getConditionLength(); i++){     printf(" %f",((double)specPos[i]/popnum));   }   printf("\n");  }  /* print to file the learning progress */  fprintf(tabFile, "%d %f %f %d %f %f %f %f %f", counter, corr, serr, popsize, assest, exp, spec, perr, fit);  /* write specificities of each attribute (if desired) */  /*   * fprintf(tabFile, "\t");   * for(i=0; i<getConditionLength(); i++){   * fprintf(tabFile, " %f",((double)specPos[i]/popnum));   * }   */  /* record the learning progress to determine averages and standard deviations */  pos = (8+getConditionLength()) * (int)((counter/(xcs->testFrequency))-1);  averages[expnr][pos] = corr;  averages[expnr][pos+1] = serr;  averages[expnr][pos+2] = popsize;  averages[expnr][pos+3] = assest;  averages[expnr][pos+4] = exp;  averages[expnr][pos+5] = spec;  averages[expnr][pos+6] = perr;  averages[expnr][pos+7] = fit;  for(i=0; i<getConditionLength(); i++) {    averages[expnr][pos+8+i] = ((double)specPos[i]/popnum);  }
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -