⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bootstrap.java

📁 java数据挖掘算法
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
package fss;import shared.*;import shared.Error;import java.lang.*;

/***************************************************************************
  The Bootstrap is an error estimation method developed by Bradley Efron 
(Efron, An Introduction to the Bootstrap, 1993). This class implements a 
simplified version of bootstrap (simple bootstrap), along with the usual 
.632 bootstrap method.  Optionally, the user may choose a weighting for the 
bootstrap sample error other than .632.

  Assumptions  : Assumes that the order and contents of the training
                    list returned by the inducer is the same as the
		    order and contents of the training list passed to
		    the inducer.
		 The training list may be altered by the inducer
                    during training, but it must be returned to the
		    same state.

  Complexity   : estimate_performance takes O(numTimes *
                       O(Inducer.train_and_test())).
  Enhancements : Keep the relative order of instances intact.  This
		    seems to be hard if the inducer is allowed to
		    modify its training set (e.g. when it does cross
		    validation) since references to the list will change.

@author James Louis 8/13/2001	Ported to Java.
@author Dan Sommerfield 10/4/94 
			Refitted to connect with new PerfEstimator class.
			Added estimate_performance using files.
@author Dan Sommerfield 6/01/94
			Initial revision as CS229B project.
***************************************************************************/

public class Bootstrap extends PerfEstimator {


   // we want to make this enum public, but we have to declare it before
   // all private/protected declarations which depend on it.
   // Hence the unusual public/private/protected ordering.
/* BootstrapType ENUM*/
   public static final byte simple = 0;
   public static final byte fractional = 2;
/* End BootstrapType ENUM*/

   private int numTimes;
   private byte bsType; //BootstrapType enum
   private double bsFract;

   protected static int BootstrapDefaultNumTimes = 50;
   protected static byte BootstrapDefaultType = fractional;//BootstrapType enum
   protected static double BootstrapDefaultFraction = 0.632;
   protected double apparentError;

// constants for dump suffixes used by refined bootstrap when estimating
// from dumped files.  This suffix is used specially to get apparent
// error from a dumped file.
   protected static String BOOTSTRAP_AA_SUFFIX = "-A";



/***************************************************************************
  This class has no access to a copy constructor.
***************************************************************************/
   private Bootstrap(Bootstrap source){}

/***************************************************************************
  This class has no access to an assign method.
***************************************************************************/
   private void assign(Bootstrap source){}



   public int get_times() { return numTimes; }
   public byte get_type() { return bsType; } //returns BootstrapType enum
   public double get_fraction() { return bsFract; }

/****************************************************************************
  Train_and_test, modified to apply the bootstrap formula for the 
appropriate type of bootstrap at each sample.
****************************************************************************/
   protected double train_and_test(BaseInducer inducer,
         InstanceList trainList,
         InstanceList testList,
         String dumpSuffix,
         PerfData localPerfData)
   {
      boolean saveDribble = GlobalOptions.dribble;
      GlobalOptions.dribble = false;
      double error = super.train_and_test(inducer, trainList, testList,
            dumpSuffix, null);
      GlobalOptions.dribble = saveDribble;
   // now apply the bootstrap forumla if we're doing a fractional bootstrap
   // do NOT compute if localPerfData = none.  Then, we're getting apparent
   // error.
   // error = error*bsFract + apparent * (1 - bsFract)
      if(bsType == fractional && localPerfData!= null)
         error = error * bsFract + apparentError * (1.0 - bsFract);

   // insert the error now
      if(localPerfData != null)
         localPerfData.insert_error(error);

   // log the error
      logOptions.LOG(2, "Individual error: "+error+"\n");
      return error;
   }

/*****************************************************************************
 Constructor.  Allows immediate options setting. 
Sets statistical data to NULL.
*****************************************************************************/
public Bootstrap()
{
   super();
   set_type(BootstrapDefaultType);
   set_times(BootstrapDefaultNumTimes);
   set_fraction(BootstrapDefaultFraction);
}

/*****************************************************************************
 Constructor.  Allows immediate options setting. 
Sets statistical data to NULL.
*****************************************************************************/
public Bootstrap(int nTimes)
{
   super();
   set_type(BootstrapDefaultType);
   set_times(nTimes);
   set_fraction(BootstrapDefaultFraction);
}

/*****************************************************************************
 Constructor.  Allows immediate options setting. 
Sets statistical data to NULL.
*****************************************************************************/
public Bootstrap(int nTimes, byte type /*BootstrapType enum*/)
{
   super();
   set_type(type);
   set_times(nTimes);
   set_fraction(BootstrapDefaultFraction);
}

/*****************************************************************************
 Constructor.  Allows immediate options setting. 
Sets statistical data to NULL.
*****************************************************************************/
public Bootstrap(int nTimes, byte type /*BootstrapType enum*/, double fraction)
{
   super();
   set_type(type);
   set_times(nTimes);
   set_fraction(fraction);
}

   
   // options
   public void set_times(int num)
   {
   if(num <= 0)
      Error.fatalErr("Bootstrap.set_times: illegal number of times: "+num);
   numTimes = num;
   }


public void set_type(byte type /*BootstrapType enum*/) {
   if(type == simple || type == fractional)
      bsType = type;
   else
      Error.fatalErr("Bootstrap.set_type: illegal type:"+type);
}

public void set_fraction(double fract) {
   if(fract <= 0.0 || fract > 1.0)
      Error.fatalErr("Bootstrap.set_fraction: Illegal value: "+fract
	  +".  Must be between 0.0 and 1.0");
   bsFract = fract;
}

/*****************************************************************************
 Description:  This function returns a full description of the error
               estimation method, including the specific type (i.e. type of
               bootstrap)
*****************************************************************************/

public String description(){
   String typeName = "";
   switch(bsType) {
      case simple:
	 typeName = "simple";
	 break;
      case fractional:
	 typeName = bsFract+ " fractional";
	 break;
      default:
	 MLJ.ASSERT(false,"Bootstrap.description: BootstrapType invalid.");    // should never get here
	 break;
   }
//obs   return String(numTimes, 0) + "x " + typeName + " Bootstrap";
   return numTimes + "x " + typeName + " Bootstrap";
}


/****************************************************************************
 Description : estimate_performance for Bootstrap does all the work.  It builds
               bootstrap sample, then calls train_performance to get train for
	       the appropriate type of bootstrap method.
 Comments    :
*****************************************************************************/
public double estimate_performance(BaseInducer inducer,
				  InstanceList dataList)
{
  
   logOptions.LOG(2, "Inducer: " + inducer.description() + "\n");
   logOptions.LOG(2, "Number of times: " + numTimes + "\n");
   logOptions.LOG(3, "Training list: " + dataList + "\n");

   if (dataList.get_schema().has_loss_matrix())
      Error.fatalErr("Bootstrap.estimate_performance: Bootstrap can only be used "
	 +"on instance lists without loss matricies. Remove or disable the "
	 +"loss matrix and try again");
   
   // clear out statistical data
//obs   delete perfData;
   perfData = null;
   perfData = new PerfData();
   
   // we will need the "apparent error"--that is, the error of
   // the inducer when trained AND tested on the training set.
   // Apparent error tends to be very low, even 0.0 for many inducers
   // assuming there are no conflicting instances.
   InstanceList dataPtr = dataList;
   boolean saveDribble = GlobalOptions.dribble;
   GlobalOptions.dribble = false;
   apparentError = train_and_test(inducer, dataPtr, dataList, 
				  BOOTSTRAP_AA_SUFFIX, null);
   GlobalOptions.dribble = saveDribble;

   MLJ.ASSERT(numTimes != 0,"Bootstrap.estimate_performance: numTimes == 0.");
   logOptions.DRIBBLE("Bootstrapping " + numTimes + " times: ");
   for(int time = 0; time < numTimes; time++) {
      logOptions.DRIBBLE(time + 1 +" ");    
      // create a bootstrap sample
      InstanceList testList = new InstanceList(dataList.get_schema());
      InstanceList trainList;
      
      trainList =
	 dataList.sample_with_replacement(dataList.num_instances(), testList,
					  rand_num_gen());
      logOptions.LOG(2, "\nTotal samples: " +trainList.num_instances()
	  +"\tUnique Used Samples: "
	  +(dataList.num_instances() - testList.num_instances())
	  +"\tUnused Samples: " + testList.num_instances() +"\n");
      logOptions.LOG(3, "bootstrap sample:\n" + trainList + "\n"
	  +"unused sample:\n"+testList+"\n");
      // make sure test sample has elements if we're using fractional
      // bootstrap.
      // An empty test set occurs if the sampling process happens to
      // sample the entire dataset (very rare for all but very small
      // datasets)
      if(bsType == fractional && testList.no_instances()) {
       logOptions.LOG(2, "Bootstrap: empty test list at time: "+time+"\n");
	 time--;   // force repeat of this run of the loop
      }    
      else {

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -