⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 schema.java

📁 决策树分类中经典算法的ID3和C4.5代码公共包!
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
    */
   public boolean is_labelled(boolean fatalOnFalse)  
   {
      if(labelInfo == null)
      {   if(fatalOnFalse==true){
            Error.err("Schema::is_labelled(): labelInfo "
               + "is NULL --> fatal_error");
            return false;
         } else return false;
      }else
         return true;
   } 

   /** Checks if there are any duplicate attributes in this Schema and displays
    * an error message listing them if they do exist.
    * @return TRUE if duplicate attributes exist, FALSE otherwise.
    */
   private boolean check_duplicate_attrs()
   {  boolean fatalOnTrue = true;

      String[] dups = find_duplicate_attrs(); 
 
      //if we found any dup names, display them here in an error message
      boolean hasDups = (dups.length > 0);  
      if(fatalOnTrue == true && hasDups == true){
         Error.err("Schema::check_duplicate_attrs: Duplicate"
            + " attributes found... fatal_error");
         // dups->display(err);
      }
      return hasDups; 
   }

   /** Finds any duplicate attributes in this Schema.
    * @return The names of the duplicate attributes.
    */
   private String[] find_duplicate_attrs()
   {
      //place all att names in an array of String, and sort
      String[] nameArray = new String[num_attr()];
      for(int i=0;i<num_attr();i++)
         nameArray[i] = attr_name(i);
      Arrays.sort(nameArray);  // not sure about this, may need a Comparator

      // Any dups will now be adjacent. Check for duplicity.
      // Accumulate all dups found into a dups array.
      String[] dupsArray = new String[0];  
      for(int i=1;i<num_attr();i++){
         int size = dupsArray.length;
         if(nameArray[i].equals(nameArray[i-1])){
            if(size>0 && nameArray[i-1].equals(dupsArray[size-1])) 
               ; //don't add again if more than two copies 
            else {
               // Hacked Dynamic Array version 
               String[] temp = new String[size+1];
               for(int j=0;j<size;j++)temp[j]=dupsArray[j];
               dupsArray = temp;
               dupsArray[size] = nameArray[i];
            } 
         }
      }
     
      //Log full schema if duplicates were found
      if(dupsArray.length > 0)
         Error.err("GLOBALLOG-->Schema::find_diplicate_attrs : "
             + " Schema containing duplicates");
    
      return dupsArray; 
   }
   
   /** Returns the name of the specified attribute.
    * @return The name of the specified attribute.
    * @param attrNum	The index number of the specified attribute.
    */
   public String attr_name(int attrNum)
   {
      return attr_info(attrNum).name();
   } 

   /** Returns the number of attributes in this Schema object.
    * @return The number of attributes in this Schema object.
    */
   public int num_attr()
   {
      return attr.length;
   }  

   /** Returns the AttrInfo object containing the specified attribute's
    * information.
    * @return The AttrInfo object containing the specified attribute's
    * information.
    * @param num	The index of the specified attribute.
    */
   public AttrInfo attr_info(int num)
   {
      return attr[num];
   } 

   /** Returns the name of the specified category label.
    * @return The name of the specified category label.
    * @param cat	The specified category.
    */
   public String category_to_label_string(int cat)
   {
      return nominal_label_info().get_value(cat);
   }

   /** Displays the names file associated with the instances. The reason we
    * don't call labelInfo.display_values() is to avoid printing the name of the
    * label since a label doesn't have a name.
    * @param stream The Writer to which the Schema will be displayed.
    * @param protectChars TRUE if characters should be protected for display.
    * @param header The String to use as a header to the display.
    */
   public void display_names(Writer stream, 
         boolean protectChars, String header)
   {
      try{
         stream.write("|"+header+"\n");
         if(is_labelled())
            label_info().display_attr_values(stream, protectChars);
         else stream.write("nolabel\n");
         stream.write("\n"); // Have an extra blank line for clarity

   // display attribute values
         for (int attrNum = 0; attrNum < num_attr(); attrNum++) {
            stream.write(attr[attrNum].name()+": ");
            attr[attrNum].display_attr_values(stream, protectChars);
         }
      }catch(IOException e){e.printStackTrace(); System.exit(1);}
   }

   /** Returns TRUE if all attributes in the schema are nominals }
    * (or can be cast to nominals).
    * @return TRUE if all of the attributes are nominal, FALSE otherwise.
    */
  public boolean is_nominal() {
    boolean result = true; 
    for(int i=0; i<num_attr(); i++) {
      if(!attr_info(i).can_cast_to_nominal()) {
        result = false; 
      }
    }
    return result;
  }

  /** Create permutation array for labels if sorting is requested
   * The array is owned by the caller and must be deleted.
   * @return A array of index values of label inromation after sorting.
   */
  public int[] sort_labels() {
    NominalAttrInfo nai = nominal_label_info(); 
    int numLabelValues = nai.num_values(); 
    int[] permutation = new int[numLabelValues]; 
   
    // Find the label permuation for sorted labels
    boolean sortLabels = true;  //get_option_bool("SORT_LABELS", TRUE,
					         //"Sort the labels", TRUE); 
    StringNum[] labels = new StringNum[numLabelValues]; 
    for (int i = 0; i < numLabelValues; i++) { 
//      labels[i].assign_str(nai.get_value(i));
//      labels[i].num = i; 
      labels[i].str = nai.get_value(i); 
      labels[i].num = i; 
      labels[i].convert(); 
    }
    if (sortLabels && !nai.is_linear()) {
      labels = sort(labels); 
    }
    for (int i = 0; i < numLabelValues; i++) {
      permutation[i] = labels[i].num; 
    }
    return permutation; 
  }



  /** String to Double transformation device.
   */  
public class StringNum {
    /** The Integer value of the transformed String.
     */    
  public int num; 
  /** The String to be transformed.
   */  
  public String str; 
  /** The Double value of the String.
   */  
  public double dstr; 
  /** TRUE if this String is transformed, FALSE otherwise.
   */  
  public boolean isconverted = false; 
  /** Constructor.
   * @param s The string to be transformed.
   */  
  public StringNum(String s) {
    str = s; 
  }

  /** Converts the stored String value to a Double value and stores the value in dstr.
   */  
  public void convert() {
    if (!isconverted) {
      dstr = Double.valueOf(str).doubleValue(); 
      isconverted = true; 
    }
  }
}; 

/** Sorts an array of StringNums from smallest to greatest value.
 * @param sn An array of StringNum values to be sorted.
 * @return The sorted array of StringNum values.
 */
  public static StringNum[] sort(StringNum[] sn) {
    for(int e = 0; e < sn.length; e++) {
      int indexofmin = findindexofmin(sn, e); 
      StringNum temp = sn[e]; 
      sn[e] = sn[indexofmin]; 
      sn[indexofmin] = temp; 
    }
    for (int e = 0; e < sn.length-1; e++) {
      if (sn[e].dstr > sn[e+1].dstr) {
        Error.fatalErr("Schema.sort(): Sort error at index "+e+"."); 
      }
    }
    return sn; 
  }

  /** Finds the index of the minimum value in an array of StringNum values.
   * @param sn The array of StringNum values to be searched.
   * @param start The start index.
   * @return The index of the smallest value after the start index.
   */  
  public static int findindexofmin(StringNum[] sn, int start) {
    int indexofmin = start; 
    double mindoublevalue = sn[start].dstr; 
    for (int e = start+1; e < sn.length; e++) {
      if(mindoublevalue > sn[e].dstr) {
        mindoublevalue = sn[e].dstr; 
        indexofmin = e; 
      }
    }
    return indexofmin; 
  }

  /** Removes a loss matrix from this Schema object. This occurs during reading if any
   * extra values get added to the label
   */  
  public void remove_loss_matrix() {
      lossMatrix = null;
  }
  
  /** Returns a Schema with the given AttrInfo removed and the infos renumbered to be
   * sequential starting at 0.
   *
   * @param attrNum The number of the attribute to be removed.
   * @return A copy of this Schema with the specified attribute removed.
   */  
  public Schema remove_attr(int attrNum) {
      Schema newSchema = null;
      try{
          if ( attrNum < 0 || attrNum >= num_attr() )
              Error.fatalErr("Schema.remove_attr(const int): illegal attrNum \n"
              +attrNum+" is passed but the proper range is \n 0 to "+(num_attr() - 1)
              +".");
          
          LinkedList attrInfos = new LinkedList();
          // Add AttrInfos that keep the same attrNum
          for (int i = 0; i < attrNum; i++) {
              AttrInfo ai = (AttrInfo)attr_info(i).clone();
              attrInfos.add(ai);
          }
          // Add AttrInfos that need to have their attrNums subtracted by 1
          for (int i = attrNum + 1; i < num_attr(); i++) {
              AttrInfo ai = (AttrInfo)attr_info(i).clone();
              attrInfos.add(ai);
          }
          
          if (is_labelled()) {
              
              AttrInfo label = (AttrInfo)label_info().clone();
              newSchema = new Schema(attrInfos, label);
              //      ASSERT(label == null);
          }
          else
              newSchema = new Schema(attrInfos);
          //   ASSERT(attrInfos == NULL);
      }catch(CloneNotSupportedException e){
          e.printStackTrace();
      }
      return newSchema;
  }

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -