📄 xcs.c

📁 XCS is a new algorithm for artificial intelligent
💻 C
字号:
/*
/       (XCS)
/	------------------------------------
/	Learning Classifier System based on accuracy
/
/     by Martin Butz
/     University of Wuerzburg / University of Illinois at Urbana/Champaign
/     butz@illigal.ge.uiuc.edu
/     Last modified: 10-17-99
/
/     Main program
*/

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

//#include <unistd.h>
#include <time.h>
//#include <resource.h>

#include "classifierList.h"
#include "actionSelection.h"
#include "xcs.h"
#include "env.h"
#include "xcsMacros.h"

int main(int args,char *argv[])
{
  FILE *env_file=NULL;

  /* set the priority */
//  setpriority(PRIO_PROCESS, getpid(), 5);

  /* randomize the pseudo-number generator */
  randomize();


  if(args!=2 && IS_MULTI_STEP){
    printf("Usage: xcs.out FILE\n");
    return 0;
  }

  if(args==2){
    if ((env_file = fopen(argv[1], "rt"))
	== NULL)
      {
	fprintf(stderr, "Cannot open file %s.\n",argv[1]);
	return 0;
      }
  }
  /* Initialize the environment (not always necessary) */
  if(!initEnv(env_file))
    return 0;
  
  if(args==2)
    fclose(env_file);

  /* start the experiments */
  startExperiments();
  
  freeEnv();

  return 1;
}

void startExperiments()
{
  int expcounter;
  struct xClassifierSet *pop;
  FILE *tabFile;

  /*Open files for statistics*/
  if ((tabFile = fopen(TABOUTFILE, "wt"))
      == NULL)
    {
      fprintf(stderr, "Cannot open file");
      fprintf(stderr,TABOUTFILE);
      return;
    }

  /* start the experiments */
  for( expcounter=0 ; expcounter<NR_EXPS ; expcounter++ ) {
    fprintf(tabFile,"Next Experiment\n");
    
    /* Initialize the population */
    pop=NULL;
    if(INITIALIZE_POP)
      pop=createRandomClassifierSet(CONDITION_LENGTH, ACTION_LENGTH);

    if(IS_MULTI_STEP)
      startOneMultiStepExperiment(tabFile, &pop);
    else
      startOneSingleStepExperiment(tabFile, &pop);
    
    freeClassifierSet(&pop);
  }
}


/* ########################## in a single step environment ########################## */

void startOneSingleStepExperiment(FILE *tabFile, struct xClassifierSet **pop)
{
  int trialCounter, exploit=1;
  int correct[50];
  double sysError[50];
  char state[CONDITION_LENGTH+1];

  /* set the \0 char at the end of action and state */
  state[CONDITION_LENGTH]='\0';

  /* Start one experiment, trialCounter counts the number of problems (trials)*/
  for( trialCounter=0 ; trialCounter<MAX_NR_STEPS ; trialCounter+=exploit) {

    /* change from explore to exploit and backwards */
    exploit= (exploit+1)%2;

    resetState(state);
     
    if(!exploit)
      doOneSingleStepProblemExplore(pop, state, trialCounter);
    else
      doOneSingleStepProblemExploit(pop, state, trialCounter, &correct[trialCounter%50], &sysError[trialCounter%50]);

    if( trialCounter%50==0 && exploit && trialCounter>0 ){
      writePerformance(tabFile, *pop, correct, sysError, trialCounter);
    }

    /* write the trialCounter every 1000 trials, to see the progress */
    if(trialCounter%1000==0 && exploit)
      printf("%d\n",trialCounter);
  }
}

void doOneSingleStepProblemExplore(struct xClassifierSet **pop, char *state, int trialCounter)
{        
  struct xClassifierSet *mset, *aset, *killset=NULL;
  char action[ACTION_LENGTH+1];
  double reward=0., predictionArray[NUMBER_OF_ACTIONS];
  int correct;

  /* get the match set */
  mset=getMatchSet(state,pop,&killset,trialCounter);
  /* no updates are necessary in this case */
  freeSet(&killset);

  /* get the Prediction array */
  getPredictionArray(mset, predictionArray);
    
  /* Get the action, that wins in the prediction array */
  action[ACTION_LENGTH]='\0';
  actionWinner(action, predictionArray);
    
  /* Get the action set according to the chosen action aw */
  aset = getActionSet(action, mset);

  /* execute the action and get reward
   * correct represents a boolean for the right or wrong action */
  reward = doAction(state, aset->cl->act, &correct);
        
  /* Give immediate reward */
  adjustActionSet(aset,0,reward);

  /* Exectue the discovery mechanism */
  discoveryComponent(&aset,pop,&killset,trialCounter);
  /* no update necessary here */
  freeSet(&killset);

  /* Clean up */
  freeSet(&mset);
  freeSet(&aset);
}

void doOneSingleStepProblemExploit(struct xClassifierSet **pop, char *state, int trialCounter, 
				   int *correct, double *sysError)
{        
  struct xClassifierSet *mset, *aset, *killset=NULL;
  char action[ACTION_LENGTH+1];
  double reward=0., predictionArray[NUMBER_OF_ACTIONS];

  /* get the match set*/
  mset=getMatchSet(state,pop,&killset,trialCounter);
  /* no updates are necessary in this case */
  freeSet(&killset);
    
  /* get the Prediction array */
  getPredictionArray(mset, predictionArray);
  
  /* Get the action, that wins in the prediction array */
  action[ACTION_LENGTH]='\0';
  deterministicActionWinner(action, predictionArray);
    
  /* Get the action set according to the chosen action aw */
  aset = getActionSet(action, mset);

  /* execute the action and get reward
   * correct represents a boolean for the right or wrong action */
  reward = doAction(state, aset->cl->act, correct);
    
  /* remember the system error */
  *sysError=(double)(abs((int)(reward - predictionArray[getActInt(action)])))/(double)PAYMENT_RANGE;

  /* Clean up */
  freeSet(&mset);
  freeSet(&aset);
}


/* ########################## in a multi step environment ########################## */

void startOneMultiStepExperiment(FILE *tabFile, struct xClassifierSet **pop)
{
  int counter, trialCounter, exploit=0;
  int stepToFood[50];
  double sysError[50];
  char state[CONDITION_LENGTH+1];

  /* set the \0 char at the end of action and state */
  state[CONDITION_LENGTH]='\0';
     
  /* Start one experiment, trialCounter counts the number of exploit problems (trials)*/
  for( trialCounter=0, counter=0 ; trialCounter<MAX_NR_STEPS ; trialCounter+=exploit) {
    
    exploit= (exploit+1)%2;
    
    if(!exploit)
      doOneMultiStepProblemExplore(pop, state, &counter);
    else
      doOneMultiStepProblemExploit(pop, state, &counter, &stepToFood[trialCounter%50], &sysError[trialCounter%50]);
    
    /* write out the performance every 50 trials */
    if( trialCounter%50==0 && exploit && trialCounter>0 ){
      writePerformance(tabFile, *pop, stepToFood, sysError, trialCounter);
    }
    /* write the trialCounter every 500 trials, to see the progress */
    if(trialCounter%500==0 && exploit)
      printf("%d\n",trialCounter);
  }
}

void doOneMultiStepProblemExplore(struct xClassifierSet **pop, char *state, int *counter)
{
  double reward=0., predictionArray[NUMBER_OF_ACTIONS];
  char action[ACTION_LENGTH+1];
  struct xClassifierSet *mset, *aset, *paset=NULL, *killset=NULL;
  int stepCounter, reset=0;

  /* set the \0 char at the end of action */
  action[ACTION_LENGTH]='\0';
  resetState(state);

  /* Start one problem, stepCounter counts the number of steps executed */
  for( stepCounter=0 ; stepCounter<TELETRANSPORTATION && !reset; stepCounter++, (*counter)++) {

    /* get the match set and update the previous action set*/
    mset=getMatchSet(state,pop,&killset,(*counter));
    if( paset!=NULL)
      updateSet(&paset,killset);
    freeSet(&killset);

    /* get the Prediction array */
    getPredictionArray(mset, predictionArray);

    /* Get the action, that wins in the prediction array */
    actionWinner(action, predictionArray);

    /* Get the action set according to the chosen action aw */
    aset = getActionSet(action, mset);

    /* execute the action and get reward */
    reward = doAction(state, aset->cl->act, &reset);

    /* Backpropagate the reward to the previous action set and apply the GA */
    if( paset!=NULL){
      adjustActionSet(paset,predictionArray[detActWinInt(predictionArray)],0);
      discoveryComponent(&paset,pop,&killset,(*counter));
      updateSet(&aset,killset);
      freeSet(&killset);
    }

    /* Give immediate reward, if a reset will take place and apply the GA, too */
    if( reset ){
      adjustActionSet(aset,0,reward);
      discoveryComponent(&aset,pop,&killset,(*counter));
      updateSet(&aset,killset);
      freeSet(&killset);
    }
    /* Clean up */
    freeSet(&mset);
    freeSet(&paset);
    paset=aset;
  }
  freeSet(&paset);
}

void doOneMultiStepProblemExploit(struct xClassifierSet **pop, char *state, int *counter, int *stepToFood, double *sysError )
{
  double reward=0., predictionArray[NUMBER_OF_ACTIONS], predictionValue, previousPrediction=0.;
  char action[ACTION_LENGTH+1];
  struct xClassifierSet *mset, *aset, *paset=NULL, *killset=NULL;
  int stepCounter, reset=0;

  /* set the \0 char at the end of action and init the sysError*/
  action[ACTION_LENGTH]='\0';
  *sysError=0;
  resetState(state);

  /* Start one problem, stepCounter counts the number of steps executed */
  for( stepCounter=0 ; stepCounter<TELETRANSPORTATION && !reset ; stepCounter++) {

    /* get the match set and update the previous action set*/
    mset=getMatchSet(state,pop,&killset,(*counter));
    if( paset!=NULL)
      updateSet(&paset,killset);
    freeSet(&killset);

    /* get the Prediction array */
    getPredictionArray(mset, predictionArray);

    /* Get the action, that wins in the prediction array */
    deterministicActionWinner(action, predictionArray);
    predictionValue= predictionArray[detActWinInt(predictionArray)];

    /* Get the action set according to the chosen action aw */
    aset = getActionSet(action, mset);

    /* execute the action and get reward */
    reward = doAction(state, aset->cl->act, &reset);

    /* Give immediate reward, if a reset will take place */
    if( reset ){
      adjustActionSet(aset,0,reward);
      (*sysError) += (double)(abs((int)(reward - predictionValue)))/(double)PAYMENT_RANGE;
    }

    /* Backpropagate the reward to the previous action set */
    if( paset!=NULL){
      adjustActionSet(paset,predictionArray[detActWinInt(predictionArray)],0);
      (*sysError) += (double)(abs((int)(GAMMA*predictionValue - previousPrediction))) / (double)PAYMENT_RANGE;
    }
    /* remind the prediction for the system Error */
    previousPrediction=predictionValue;

    /* Clean up */
    freeSet(&mset);
    freeSet(&paset);
    paset=aset;
  }
  freeSet(&paset);
  *stepToFood=stepCounter;
  (*sysError)/=stepCounter;
}



/* writes the performance averaged over the last 50 trials */
void writePerformance(FILE *tabFile,struct xClassifierSet *pop, int *correct,double *sysError,int counter)
{
  double corr=0.,serr=0.;
  int i, popsize;

  for( popsize=0 ; pop!=NULL ; pop=pop->next, popsize++ );/* Just count the size of the population */

  for(i=0;i<50;i++){
    corr+=correct[i];
    serr+=sysError[i];
  }
  corr/=50.;
  serr/=50.;
  fprintf(tabFile,"%d;%f;%f;%f\n",counter,corr,serr,(double)popsize/PAYMENT_RANGE);
}



/* randomize the pseudo random generator */
void randomize(void)
{
  int i;

  for (i=0;i<time(NULL)%1000;rand(),i++);
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -