⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 instancelist.java

📁 java数据挖掘算法
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
    
    /** Adds a new instance to the list, using the structures maintained by
     * InstanceReader.  Properly updates both schemas so that automatic instance
     * removal will work.
     * @param vals The values of the instance to be added.
     * @param labelVal The label value of the instance to be added.
     * @param weight The weight of the instance to be added.
     * @param allowUnknownLabels TRUE if unknown label values are allowed for the instance to be added.
     * @return A new Instance object containing the supplied information.
     */
    public Instance reader_add_instance(AttrValue[] vals, AttrValue labelVal, double weight, boolean allowUnknownLabels) {
        if(schema.num_attr() != vals.length)
            Error.err("InstanceList.reader_add_instance: "
            +"schema has "+schema.num_attr()+" attributes, while supplied "
            +"array has "+vals.length+" -->fatal_error");
        
        Instance newInst = new Instance(schema);
        for(int i=0;i<vals.length;i++)
            newInst.values[i] = vals[i];
        if(labelVal!=null)
            newInst.set_label(labelVal);
        
        //set the weight. if the weight is near zero, don't add the instance.
        //if the weight is negative, don't add the instance but warn as well.
        int numNegWeights = 0;
        
        if(MLJ.approx_equal(weight,0))
            weight = 0; //silent clamp
        else if(weight < 0) {
            newInst.set_weight(0.0);
            if(numNegWeights++ < MAX_NEG_WEIGHT_WARNINGS)
                System.out.println("Instance has a negative weight and will "
                +"be ignored");
            if(numNegWeights == MAX_NEG_WEIGHT_WARNINGS)
                System.out.println("There have been max amount of warning on "
                +"negative weights.  Further warnings will be suppressed");
            return newInst;
        }
        newInst.set_weight(weight);
        
        //Do not add the instance id the weight is near zero and
        //REMOVE_ZERO_WEIGHTS is set.  Note that negative weight instances
        //will never be added.
        //can use operator == because near zero weights were clamped above.
        if(weight == 0.0 && removeZeroWeights)
            return newInst;
        
        //simply add and return the instance if unlabelled.
        if(labelVal == null) {
            add_instance(newInst);
            return newInst;
        }
        
        //abort if too many label values
        if(schema.nominal_label_info().num_values() > get_max_label_vals()){
            if(mineset)
                System.out.println("MINESET clause encountered in InstanceList");
            else
                System.out.println("InstanceList.reader_add_instance: the"
                +" selected label '"+schema.label_info().name() +"' has more "
                +"than the current limit of "+get_max_label_vals() +" label "
                +"values.  It is highly recommeded that you do not use an "
                +"attribute with many label values, but you may increase "
                +"the paramter MAX_LAVEL_VALS to allow this operation "
                +"-->fatal_error");
        }
        
        //don't add(but warn) if label is unknown
        int numUnknownLabels = 0;
        if(schema.label_info().is_unknown(labelVal) && !allowUnknownLabels) {
            if(numUnknownLabels++ < MAX_UNKNOWN_LABEL_WARNING) {
                System.out.println("Warning: instance has an unknown label"
                +" value and will be ignored!");
                if(numUnknownLabels == MAX_UNKNOWN_LABEL_WARNING)
                    System.out.println("There have been max amount of label "
                    +"warnings on unknown labels, further warnings will be "
                    +"suppressed ");
            }
        }else
            add_instance(newInst);
        return newInst;
    }
    
    /** Adds the specified Instance to this InstanceList.
     * @return A ListIterator of all Instances in this InstanceList.
     * @param instance	The Instance to bo added.
     */
    public ListIterator add_instance(Instance instance) {
        
        //causes fatal_error if not equal
        //if (Globals.DBG) MLJ.ASSERT((schema != null)&&(schema.equal(instance.get_schema(), true)),"InstanceList.add_instance: schema not equal to instance.schema");
        
        //update totalWeight cache
        double wt = instance.get_weight();
        if(wt != 1.0)
            weighted = true;
        totalWeight += wt;
        
        //InstanceRC inst(instance);
        
        //set the instance's shema to match the schema for the list.
        //we're just making sure that the two schemas (which are equal)
        //are also equal in memory, so this operation is logically const
        //Failure to perform this step will cause the OK() function
        //to occasionally fail for this list.
        //instance.set_schema(schema);  //inst.set_schema(schema);
        
        //update the counters if we have them
        if(bagCounters != null)
            bagCounters.add_instance(instance);  //(inst);
        
        
        try{
            instances.add(instance.clone());
        }catch(CloneNotSupportedException e){
            Error.err("InstanceList.add_instance:CloneNotSupportedException caught");}
        //instances.add(instance);
        ListIterator pix = instances.listIterator(0);
      /*while(pix.hasNext())
      {
         Instance inst = (Instance)pix.next();
         inst.display(false,false);
      }*/
        return pix;
    }
    
    /** Updates the list by removing specified attributes. This is similar to
     * the project() call, except that it is designed to be used WHILE READING.
     * The size of the projMask may be larger than the number of attributes in
     * the schema. This is to allow InstanceReader to maintain a single copy of
     * the projMask even as the schema shrinks.
     * @param projMask A boolean array with the same number of values as there are
     * attributes. Each boolean element coresponds to an attribute
     * In the order they were input. True values represent
     * attributes that are used.
     */
    public void update_for_overflows(boolean[] projMask) {
        //determine if projection is needed
        boolean projNeeded = false;
        for(int i=0;i<schema.num_attr();i++)
            if(projMask[i] == false) {
                projNeeded = true;
                break;
            }
        
        //only act if attributes need projection
        if(projNeeded) {
            //build a projMask of the correct size
            //Projection happens rarely so this is not a big hit
            boolean[] truncProjMask = new boolean[schema.num_attr()];
            for(int i=0;i<schema.num_attr();i++)
                truncProjMask[i] = projMask[i];
            try{
                project_in_place(truncProjMask);
            }catch(CloneNotSupportedException e){
                Error.err("InstanceList.update_for_overflows:"
                +" clone not supported exception was caught");  }
        }
    }
    
    /** Returns the tiebreaking distribution order stored in the CatDist object
     * for this InstanceList.
     * @return The tiebreaking order.
     */
    public int[] get_distribution_order() {
        return CatDist.tiebreaking_order(counters().label_counts());
    }
    
    /** Returns the sum of the weights of all Instances in the InstanceList.
     * This value is cached for faster access.
     * @return The sum of weights for all Instances stored in this InstanceList.
     */
    public double total_weight(){return total_weight(false);}
    
    /** Returns the sum of the weights of all Instances in the InstanceList.
     * This value is cached for faster access, but can be recalculated to
     * avoid the numerical instabilities involved in weight updates.
     * @return The sum of weights for all Instances stored in this InstanceList.
     * @param recalculate	TRUE if the sum should be recalculated, FALSE if
     * the cached value should be used.
     */
    public double total_weight(boolean recalculate) {
        //Compute total_weight on the fly and compare to
        //the cached value.  This is a very slow test.
        //DBGSLOW(OK());
        
        if(recalculate){
            double newTotalWeight = 0;
            ListIterator pix = instances.listIterator();
            Instance inst = null;
            for(;pix.hasNext();inst = (Instance)pix.next())
                newTotalWeight += get_weight(inst);
            totalWeight = newTotalWeight;
        }
        return totalWeight;
    }
    
    /** Returns the weight for the specified Instance.
     * @return The weight for the Instance supplied.
     * @param instance	The Instance for which weight is questioned.
     */
    public double get_weight(Instance instance) {
        double wt = instance.get_weight();
        if (Globals.DBG) MLJ.ASSERT((weighted) || (wt == 1.0),"InstanceList.get_weight: InstanceList is not weighted");
        return wt;
    }
    
    /** Deletes the counters stored for Instances in this InstanceList.
     */
    public void drop_counters() {
        if(bagCounters != null) {
            InstanceList thisNC = this;//(InstanceList)this;
            //      delete thisNC->bagCounters;
            thisNC.bagCounters = null;
            //      thisNC->bagCounters = NULL;
        }
    }
    
    /** Normalize all weights by the number of instances in the list.
     * After this operation, totalWeight should equal the number of instances.
     * The normalization factor is 1 and zeros are allowed for Instance weights.
     */
    public void normalize_weights() {
        normalize_weights(1.0,true);
    }
    
    /** Normalize all weights by the number of instances in the list, times
     * an optional normalization factor. After this operation, totalWeight
     * should equal the number of instances * the normalization factor. Zeros are
     * allowed for Instance weights.
     * @param normFactor	The normalization factor.
     */
    public void normalize_weights(double normFactor) {
        normalize_weights(normFactor,true);
    }
    
    /** Normalize all weights by the number of instances in the list, times
     * an optional normalization factor. After this operation, totalWeight
     * should equal the number of instances * the normalization factor.
     * @param normFactor	The normalization factor.
     * @param allowZeros TRUE if zeros are allowed for Instance weights. If FALSE,
     * Instance weights that are approximately equal 0, the weight
     * is automatically reset to a lower bound.
     */
    public void normalize_weights(double normFactor,
    boolean allowZeros) {
        // drop counters when calling this--you get too many precision
        // errors otherwise.
        drop_counters();
        
        // Set the weighted flag here.  When we call get_weight, we check
        // if weighted is set whenever we find a nonzero weight.
        weighted = true;
        double newTotalWeight = 0;
        
        double r = normFactor * num_instances() / totalWeight;
        double lbound = MLJ.storedRealEpsilon * 2; //mlc.stored_clamping_epsilon()*2;
        
        // We can use Instance.set_weight() here because we'll be resetting
        // the weights at the end anyway.
        for(ListIterator li = instances.listIterator();
        li.hasNext(); ) {
            Instance p = (Instance)li.next();
            //   for(ILPix p(this); p; ++p) {
            double newWeight = get_weight(p) * r;
            if (MLJ.approx_equal(newWeight,0.0)
            && !allowZeros)
                newWeight = lbound;
            p.set_weight(newWeight);	//instance_list()(p).set_weight(newWeight);
            newTotalWeight += get_weight(p);
        }
        
        // float is used for comparison with coarser granularity.
        //@@ The OK() check here as well as the weight checks are disabled
        //@@ because of precision problems introduced in this function.
        //@@DBGSLOW(OK());
        //@@if (!mlc.approx_equal((float) totalWeight,
        //@@		 (float) (num_instances() * normFactor)))
        //@@   err << "InstanceList.normalize_weights: total weight, "
        //@@	  << (float) totalWeight << ", is not near number of instances "
        //@@	 "times normalization factor, "
        //@@	  << (float) (num_instances() * normFactor) << fatal_error;
        
        // Reset total weight here
        totalWeight = newTotalWeight;
        //   DBG(OK());
    }
    
    //PtrArray<RealAndLabelColumn*>*
    /** Splits the InstanceList into several RealAndLabelColumn structures for the
     * parallel discretization.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -