⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 randomprojection.java

📁 一个数据挖掘软件ALPHAMINERR的整个过程的JAVA版源代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:


  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String  distributionTipText() {
      return "The distribution to use for calculating the random matrix.\n"
	    +"Sparse1 is:\n"
	    +" sqrt(3) * { -1 with prob(1/6), \n"
	    +"               0 with prob(2/3),  \n"
            +"              +1 with prob(1/6) } \n"
	    +"Sparse2 is:\n"
	    +" { -1 with prob(1/2), \n"
	    +"   +1 with prob(1/2) } ";
      
  }
  /** Sets the distribution to use for calculating the random matrix */
  public void setDistribution(SelectedTag newDstr) {

      if (newDstr.getTags() == TAGS_DSTRS_TYPE) {
	  m_distribution = newDstr.getSelectedTag().getID();
      }
  }

  /** Returns the current distribution that'll be used for calculating the 
     random matrix */
  public SelectedTag getDistribution() {
      return new SelectedTag(m_distribution, TAGS_DSTRS_TYPE);
  }

  /**
   * Returns the tip text for this property
   *
   * @return tip text for this property suitable for
   * displaying in the explorer/experimenter gui
   */
  public String replaceMissingValuesTipText() {

    return "If set the filter uses weka.filters.unsupervised.attribute.ReplaceMissingValues"
	 + " to replace the missing values";
  }

  /** 
   * Sets either to use replace missing values filter or not
   */
  public void setReplaceMissingValues(boolean t) {
      m_replaceMissing = t;
  }

  /** Gets the current setting for using ReplaceMissingValues filter */
  public boolean getReplaceMissingValues() {
      return m_replaceMissing;
  }

  /**
   * Sets the format of the input instances.
   *
   * @param instanceInfo an Instances object containing the input 
   * instance structure (any instances contained in the object are 
   * ignored - only the structure is required).
   * @return true if the outputFormat may be collected immediately
   * @exception Exception if the input format can't be set 
   * successfully
   */
  public boolean setInputFormat(Instances instanceInfo) throws Exception {      
    super.setInputFormat(instanceInfo);
    /*
    if (instanceInfo.classIndex() < 0) {
      throw new UnassignedClassException("No class has been assigned to the instances");
    }
    */
    
    for(int i=0; i<instanceInfo.numAttributes(); i++) {        
	if( i!=instanceInfo.classIndex() && instanceInfo.attribute(i).isNominal() ) {
            if(instanceInfo.classIndex()>=0)
                ntob = new weka.filters.supervised.attribute.NominalToBinary();
            else
                ntob = new weka.filters.unsupervised.attribute.NominalToBinary();
            
            break;
	}
    }

    //r.setSeed(m_rndmSeed); //in case the setRandomSeed() is not
                           //called we better set the seed to its 
                           //default value of 42.
    boolean temp=true;
    if(replaceMissing!=null) {
	replaceMissing = new weka.filters.unsupervised.attribute.ReplaceMissingValues();
	if(replaceMissing.setInputFormat(instanceInfo))
	    temp=true;
	else
	    temp=false;
    }
    
    if(ntob!=null) {
	if(ntob.setInputFormat(instanceInfo)) {
	    setOutputFormat();
	    return temp && true;
	}
	else { 
	    return false;
	}
    }
    else {
	setOutputFormat();
	return temp && true;
    }
  }

   
  /**
   * Input an instance for filtering.
   *
   * @param instance the input instance
   * @return true if the filtered instance may now be
   * collected with output().
   * @exception IllegalStateException if no input format has been set
   */
  public boolean input(Instance instance) throws Exception {

    Instance newInstance=null;

    if (getInputFormat()==null) {
	throw new IllegalStateException("No input instance format defined");
    }
    if(m_NewBatch) {
      resetQueue();
      //if(ntob!=null) 
      //	  ntob.m_NewBatch=true;
      m_NewBatch = false;
    }
    
    boolean replaceDone=false;
    if(replaceMissing!=null) {
	if(replaceMissing.input(instance)) {
	    if(m_OutputFormatDefined == false)
		setOutputFormat();
	    newInstance = replaceMissing.output();
	    replaceDone = true;
	}
	else
	    return false;;
    }

    if(ntob!=null) {
	if(replaceDone==false)
	    newInstance = instance;
	if(ntob.input(newInstance)) {
	    if(m_OutputFormatDefined == false) 
		setOutputFormat();
	    newInstance = ntob.output();
	    newInstance = convertInstance(newInstance);
	    push(newInstance);
	    return true;	
	}
	else {
	    return false;
	}
    }
    else {
	if(replaceDone==false)
	    newInstance = instance;
	newInstance = convertInstance(newInstance);
	push(newInstance);
	return true;
    }
  }


  /**
   * Signify that this batch of input to the filter is finished.
   *
   * @return true if there are instances pending output
   * @exception NullPointerException if no input structure has been defined,
   * @exception Exception if there was a problem finishing the batch.
   */
  public boolean batchFinished() throws Exception {
      if (getInputFormat() == null) {
	  throw new NullPointerException("No input instance format defined");
      }
      
      boolean conversionDone=false;
      if(replaceMissing!=null) {
	  if(replaceMissing.batchFinished()) {
	      Instance newInstance, instance;
	      
	      while((instance=replaceMissing.output())!=null) {
		  if(!m_OutputFormatDefined)
		      setOutputFormat();
		  if(ntob!=null) {
		      ntob.input(instance);
		  }
		  else {
		      newInstance = convertInstance(instance);
		      push(newInstance);
		  }
	      }

	      if(ntob!=null) {
		  if(ntob.batchFinished()) {
		      //Instance newInstance, instance;
		      while((instance=ntob.output())!=null) {
			  if(!m_OutputFormatDefined)
			      setOutputFormat();
			  newInstance = convertInstance(instance);
			  push(newInstance);
		      }
		      ntob = null;		      
		  }
	      }
	      replaceMissing = null;
	      conversionDone=true;
	  }
      }

      if(conversionDone==false && ntob!=null) {
	  if(ntob.batchFinished()) {
	      Instance newInstance, instance;
	      while((instance=ntob.output())!=null) {
		  if(!m_OutputFormatDefined)
		      setOutputFormat();
		  newInstance = convertInstance(instance);
		  push(newInstance);
	      }
	      ntob = null;
	  }
      }
      m_OutputFormatDefined=false;
      return super.batchFinished();
  }
    

  /** Sets the output format */  
  private void setOutputFormat() {
      Instances currentFormat;
      if(ntob!=null) {
	  currentFormat = ntob.getOutputFormat();
      }
      else 
	  currentFormat = getInputFormat();
      
      if(m_percent>0)
	  { m_k = (int) ((getInputFormat().numAttributes()-1)*m_percent); 
	  // System.out.print("numAtts: "+currentFormat.numAttributes());
	  // System.out.print("percent: "+m_percent);
	  // System.out.print("percent*numAtts: "+(currentFormat.numAttributes()*m_percent));
	  // System.out.println("m_k: "+m_k);
	  }

      Instances newFormat;
      int newClassIndex=-1;
      FastVector attributes = new FastVector();
      for(int i=0; i<m_k; i++) {
	  attributes.addElement( new Attribute("K"+(i+1)) );
      }
      if(currentFormat.classIndex()!=-1)  {  //if classindex is set
	  //attributes.removeElementAt(attributes.size()-1);
	  attributes.addElement(currentFormat.attribute(currentFormat.classIndex()));
	  newClassIndex = attributes.size()-1;
      }

      newFormat = new Instances(currentFormat.relationName(), attributes, 0);
      if(newClassIndex!=-1)
	  newFormat.setClassIndex(newClassIndex);
      m_OutputFormatDefined=true;

      r = new Random();
      r.setSeed(m_rndmSeed);

      rmatrix = new double[m_k][currentFormat.numAttributes()];
      if(m_distribution==GAUSSIAN) {
	  for(int i=0; i<rmatrix.length; i++) 
	      for(int j=0; j<rmatrix[i].length; j++) 
		  rmatrix[i][j] = r.nextGaussian();
      }
      else {
	  boolean useDstrWithZero = (m_distribution==SPARSE1);
	  for(int i=0; i<rmatrix.length; i++) 
	      for(int j=0; j<rmatrix[i].length; j++) 
		  rmatrix[i][j] = rndmNum(useDstrWithZero);
      }

      setOutputFormat(newFormat);
  }


  /** converts a single instance to the required format */
  private Instance convertInstance(Instance currentInstance) {
      
      Instance newInstance;
      double vals[] = new double[getOutputFormat().numAttributes()];
      int classIndex = (ntob==null) ? getInputFormat().classIndex():ntob.getOutputFormat().classIndex();
      int attNum = m_k;
      //double d = Math.sqrt(1D/attNum);
      
      for(int i=0; i<attNum; i++) {
	  boolean ismissing=false;
	  for(int j=0; j<currentInstance.numValues(); j++) {
	      if(classIndex!=-1 && j==classIndex) //ignore the class value for now
		  continue;
	      if(!currentInstance.isMissing(j)) {
		  vals[i] += rmatrix[i][j] * currentInstance.value(j);
	      }
	      //else {
	      //  ismissing=true;
	      //  vals[i] = currentInstance.missingValue();
	      //  break;
	      //}
	  }
	  //if(ismissing)
	  //    break;
      }
      if(classIndex!=-1) {
	  vals[m_k] = currentInstance.value(classIndex);
      }

      if(currentInstance instanceof SparseInstance) {
	  newInstance = new SparseInstance(currentInstance.weight(), vals);
      }
      else {
	  newInstance = new Instance(currentInstance.weight(), vals);
      }
      newInstance.setDataset(getOutputFormat());
      
      return newInstance;
  }



  private static final int weights[] = {1, 1, 4};
  private static final int vals[] = {-1, 1, 0};
  private static final int weights2[] = {1, 1};
  private static final int vals2[] = {-1, 1};
  private static final double sqrt3 = Math.sqrt(3);

  /**
     returns a double x such that
     x = sqrt(3) * { -1 with prob. 1/6, 0 with prob. 2/3, 1 with prob. 1/6 }
   */
  private double rndmNum(boolean useDstrWithZero) {
      if(useDstrWithZero)
	  return sqrt3 * vals[weightedDistribution(weights)];
      else
	  return vals2[weightedDistribution(weights2)];
  }

  /** Calculates a weighted distribution */
  private int weightedDistribution(int [] weights) {
      int sum=0; 
      
      for(int i=0; i<weights.length; i++) 
	  sum += weights[i];
      
      int val = (int)Math.floor(r.nextDouble()*sum);
      
      for(int i=0; i<weights.length; i++) {
	  val -= weights[i];
	  if(val<0)
	      return i;
      }
      return -1;
  }  

  /**
   * Main method for testing this class.
   *
   * @param argv should contain arguments to the filter: 
   * use -h for help
   */
  public static void main(String [] argv) {

    try {
      if (Utils.getFlag('b', argv)) {
 	Filter.batchFilterFile(new RandomProjection(), argv);
      } else {
	Filter.filterFile(new RandomProjection(), argv);
      }
    } catch (Exception ex) {
      System.out.println(ex.getMessage());
    }
  }

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -