⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 instancelist.java

📁 java数据挖掘算法
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
     *
     * @param mask Boolean array of the same length as the number of attributes. TRUE values
     * indicate that attribute should have a RealAndLabelColumn object created for it,
     * FALSE otherwise.
     * @return An array of RealAndLabelColumns generated from the attribute values for the
     * Instances stored in this InstanceList.
     */    
    public RealAndLabelColumn[] transpose(boolean[] mask) {
        // @@ it is inefficient to get the number of instances up front since we
        //   have to traverse the whole instance list, but allocating the array as
        //   a single piece will allow efficiency in splitting later.
        int numAttr = schema.num_attr();
        //obs   PtrArray<RealAndLabelColumn*>* columns =
        //obs      new PtrArray<RealAndLabelColumn*>(numAttr);
        RealAndLabelColumn[] columns = new RealAndLabelColumn[numAttr];
        
        // Select only continuous attributes
        int numInst = num_instances();
        int numColumns = 0;
        for (int k = 0; k < numAttr; k++){
            AttrInfo ai = schema.attr_info(k);
            if (ai.can_cast_to_real() && mask[k]) {
                if (schema.is_labelled())
                    columns[k] =
                    new RealAndLabelColumn(numInst,
                    schema.num_label_values(),
                    ai.cast_to_real(),
                    nominal_label_info());
                else
                    columns[k] =
                    new RealAndLabelColumn(numInst, ai.cast_to_real());
                numColumns++;
            }
        }
        // Store each column (attribute, label and weight) in a linear array
        if (schema.is_labelled()) {
            NominalAttrInfo labelInfo = schema.label_info().cast_to_nominal();
            for (ListIterator pix = instances.listIterator(); pix.hasNext();) {
                Instance instance =(Instance)pix.next();
                AttrValue labVal = instance.get_label();
                MLJ.ASSERT(!labelInfo.is_unknown(labVal),"InstanceList.transpose: label is unknown");
                int lab = labelInfo.get_nominal_val(labVal) + 1;//plus 1 added to offset schema -JL
                int iLab = lab - Globals.FIRST_CATEGORY_VAL - Globals.FIRST_NOMINAL_VAL;
                double weight = instance.get_weight();
                for(int k = 0; k < numAttr; k++) {
                    // Skip the NULL references (default in the constructor)
                    RealAndLabelColumn column = columns[k];
                    if (column == null) continue;
                    RealAttrInfo attrInfo = column.attr_info();
                    AttrValue attrValue = instance.get_value(k);
                    if (attrInfo.is_unknown(attrValue))
                        column.add_unknown(iLab, weight);
                    else
                        column.add_known(
                        (float)(attrInfo.get_real_val(attrValue)),
                        iLab, weight);
                }
            }
        } else {
            for (ListIterator pix = instances.listIterator(); pix.hasNext();) {
                Instance instance =(Instance)pix.next();
                double weight = instance.get_weight();
                for(int k = 0; k < numAttr; k++) {
                    // Skip the NULL references (default in the constructor)
                    RealAndLabelColumn column = columns[k];
                    if (column == null) continue;
                    RealAttrInfo attrInfo = column.attr_info();
                    AttrValue attrValue = instance.get_value(k);
                    if (attrInfo.is_unknown(attrValue))
                        column.add_unknown(weight);
                    else
                        column.add_known(
                        (float)(attrInfo.get_real_val(attrValue)), weight);
                }
            }
        }
        logOptions.LOG(6, "Instance list transposed into "+numColumns+" columns."
        +'\n');
        return columns;
    }
    
    /** Checks if the total weight of this InstanceList is approximately 0.
     * @return TRUE if the total weight is approximately equal to 0, FALSE
     * otherwise.
     */
    public boolean no_weight(){
        return MLJ.approx_equal((float)totalWeight,0.0);
    }
    
    /** Returns the Category corresponding to the label that occurs most
     * frequently in the InstanceList. In case of a tie, we prefer the
     * given tieBreaker if it is one of those tied. TieBreaker can be
     * UNKNOWN_CATEGORY_VAL if you prefer the earlier category to the
     * tied ones. The method used differs depending on whether or not we have
     * counters on this List.  It is considerably faster if the counters are
     * present. This method is only meaningful for labels with AttrInfo
     * derived from NominalAttrInfo.  This method will cause fatal_error
     * otherwise. In the case of a tie, returns the Category corresponding to
     * the label which occurs first in the NominalAttrInfo.
     * InstanceList.majority_category() takes time proportional to the number
     * of different categories + the number of instances.
     *
     * @return The category that occurs the most in this InstanceList or
     * UNKNOWN_CATEGORY_VAL if there are no instances.
     * @param tieBreakingOrder Array indicating the order in which ties are broken. The array should be the same
     * length as the number of attributes and each element corresponds to an attribute.
     * Lower number elements represent attributes that are more favorable in an tie
     * than higher number elements.
     */
    public int majority_category(int[] tieBreakingOrder) {
        double[] computedCounts = null;
        double[] labelCounts = null;
        if (bagCounters != null)
            labelCounts = bagCounters.label_counts();
        else {
            NominalAttrInfo nai = nominal_label_info();
            // +1 because of Globals.UNKNOWN_CATEGORY_VAL instead of Globals.FIRST_CATEGORY_VAL
            computedCounts =new double[nai.num_values()+1]; // (Globals.UNKNOWN_CATEGORY_VAL, nai.num_values() + 1, 0);
            double[] count = computedCounts;
            
            //      for (ILPix pix(*this); pix; ++pix)
            for(ListIterator pixLI = instance_list().listIterator();pixLI.hasNext();){
                Instance pix = (Instance)pixLI.next();
                count[nai.get_nominal_val(pix.get_label())] += pix.get_weight();
            }
            labelCounts = computedCounts;
        }
        
        int best = CatDist.majority_category(labelCounts, tieBreakingOrder);
        computedCounts = null;
        return best;
    }
    
    /** This function takes an attribute mask which is an array of booleans
     * indicating whether the corresponding attribute should be included in
     * the projection. InstanceList.project() takes
     * O(num attributes * (num instances + num attributes)) time.
     * @param attrMask A boolean array with the same number of values as there are
     * attributes. Each boolean element corresponds to an attribute
     * in the order they were input. True values represent
     * attributes that are used.
     * @return An InstanceList with a new Schema that includes only the attributes
     * with a mask value true. May return null if an exception occures.
     */
    public InstanceList project(boolean[] attrMask) {
        //   DBGSLOW(OK());
        MLJ.ASSERT(attrMask.length == num_attr(),"InstanceList.project: attrMask's length does not match number of attributes");
        try{
            Schema newSchema = get_schema().project(attrMask);
            InstanceList newInstList = new InstanceList(newSchema);
            for(ListIterator pix = instances.listIterator();pix.hasNext();)
                newInstList.add_instance(((Instance)pix.next()).project(newSchema, attrMask));
            return newInstList;
        }catch(CloneNotSupportedException e){e.printStackTrace();}
        return null;
    }
    
    /** Sets the given StreamTokenizer to parse/not parse numbers.
     * @param stream StreamTokenizer tokenizing information.
     * @param ifYes TRUE if numbers should be parsed as double values, FALSE otherwise.
     */    
    private void parseNumbers(StreamTokenizer stream, boolean ifYes) {
        if (ifYes) stream.parseNumbers();
        else {
            stream.ordinaryChars((int)'0', (int)'9');
            stream.wordChars((int)'0', (int)'9');
        }
    }
    
    /** Returns a String representation of this InstanceList object.
     * @param normalizeReal TRUE if real values in an Instance object should be normalized.
     * @return A String representation of this InstanceList object.
     */    
    public String out(boolean normalizeReal) {
        String rtrn = new String();
        ListIterator pix = instances.listIterator(0);
        while(pix.hasNext()) {
            Instance inst = (Instance)pix.next();
            rtrn = rtrn + inst.out(is_weighted(), normalizeReal);
        }
        if(no_instances())
            rtrn = rtrn + "InstanceList.display: No instances";
        return rtrn;
    }
    
    /** Returns a clone of this InstanceList object.
     * @param preserveCounters TRUE if counters of values should be copied, FALSE otherwise.
     * @return A new object with a copy of the data stored in the supplied InstanceList.
     */    
    public Object clone(boolean preserveCounters) {
        return new InstanceList(this,preserveCounters);
    }
    
    
    /** Returns a clone of this InstanceList object. Does not preserve counters.
     * @return A new object with a copy of the data stored in the supplied InstanceList.
     */    
    public Object clone() {
        return new InstanceList(this,false);
    }
    
    /** Checks integrity constraints. We verify that all instances have the
     * same schema at level 0
     * Comments    : Because the schema has attrinfo's that are updated,
     * everyone must share the EXACT representation, not
     * just logical equivalence.  Specifically, if the schema
     * is updated, we want to make sure all instances see
     * the exact same min/max for RealAttrInfo's. Level of
     * checking is automatically set to 0.
     *
     */    
    public void OK() {
        OK(0);
    }
    
    /** Checks integrity constraints. We verify that all instances have the
     * same schema at level 0
     * Comments    : Because the schema has attrinfo's that are updated,
     * everyone must share the EXACT representation, not
     * just logical equivalence.  Specifically, if the schema
     * is updated, we want to make sure all instances see
     * the exact same min/max for RealAttrInfo's
     *
     * @param level Level of checking done.
     */
    public void OK(int level) {
/*   if (level < 1 && schema != null && instances) {
      Schema schemaRep = get_schema().read_rep();
      for (ILPix pix(*this); pix; ++pix)
         if ((*pix).get_schema().read_rep() != schemaRep)
            err << "InstanceList.OK mismatch in schemas for list and "
               " instance.   Instance is:\n" << *pix <<
               " with schema " << (void *)(*pix).get_schema().read_rep() <<
               " = " << (*pix).get_schema() <<
               "\nList schema is: " << (void *)schemaRep <<
               " = " << *schemaRep << fatal_error;
   }
 
   // Check that the counters agree with the actual number of instances
   //   if we have counters
   // WARNING:  we need to use totalWeight here instead of total_weight().
   //   total_weight() calls OK() in DBG level 2, which would lead
   //   to infinite recursion!!!
   if(bagCounters) {
      double num = counters().OK(); // @@ Dan, change num to weight?
      MLJ.verify_approx_equal(StoredReal(num), StoredReal(totalWeight),
                              "InstanceList.OK: "
                              "Counters claim of weight does not match "
                              "list's total weight");
   }
   ASSERT(instances);
 
   // Check for numerical inaccuracy in the totalWeight cache.
   // DO NOT refresh the cache here because we don't want results
   //   to differ in high debug levels!
   // Check that totalWeight is correct (if we're weighted)
   // Passing StoredReals to approx_equal means using coarse granularity.
   if(is_weighted()) {
      Real compTotalWeight = 0;
      for (ILPix pix(*this); pix; ++pix)
         compTotalWeight += get_weight(pix);
      mlc.verify_approx_equal(StoredReal(compTotalWeight),
                              StoredReal(totalWeight),
                              "InstanceList.OK: computed weight "
                              "fails to match totalWeight");
   }
 
   // Check that totalWeight is close to number of instances
   //   (if we're not weighted)
   // Passing StoredReals to approx_equal means using coarse granularity.
   if(!is_weighted()) {
      mlc.verify_approx_equal((StoredReal) num_instances(),
                              (StoredReal) totalWeight,
                              "InstanceList.OK: List is unweighted, "
                              "but total weight fails to match whole "
                              "number of instances");
   }
 
   // Total weight may not be negative
   if (totalWeight < -MLC.stored_real_epsilon())
      err << "InstanceList.OK: total weight (" << totalWeight
          << ") is negative" << fatal_error;
 */
    }
    
    /** Displays the names file associated with the InstanceList.
     * @param stream Writer object to which the names file will be displayed.
     * @param protectChars TRUE if protected characters are used, FALSE otherwise.
     * @param header A String to use for the header to the display.
    

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -