⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 instancelist.java

📁 java数据挖掘算法
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
            //obs	 //ListIterator temp = pix;
            //obs	 //pix.next();
            
            //Change instance, add to end.  Add instance by hand so we can avoid
            // checking the schema (whch we know won't match at this point)
            //Don't touch the weight, because we're effectively adding
            // and removing the same instance here.
            
            Instance temp = (Instance)instance_list().get(index); //temp.next();
            Instance instance = temp.project(newSchema, projMask);
            MLJ.ASSERT(instance.get_weight() == temp.get_weight(),"InstanceList.project_in_place: ");
            MLJ.ASSERT(bagCounters == null,"InstanceList.project_in_place: bagCounters not equal to null.");
            instance_list().set(index++, instance);
            
            
            //obs	 Instance instance = (Instance)instance_list().get(index++); //temp.next();
            //obs	 instance.project(newSchema, projMask);
            //obs	 //MLJ.ASSERT(instance.get_weight == instance.get_weight()),"InstanceList.project_in_place: ");
            //obs	 MLJ.ASSERT(bagCounters == null,"InstanceList.project_in_place: bagCounters not equal to null.");
            //obs	 int place = instance_list().indexOf(instance);
            //obs         instance_list().set(place, instance);
            //obs         //instance_list().add(instance);
            //obs	 //instance_list().remove(instance);
        }
        
        //We should not have changed the number of instance
        if(numInstBefore != num_instances())
            Error.err("Assert Error, num instances changes");
        
        //swap schemas
        schema = null;
        schema = newSchema;
        if (Globals.DBG) newSchema = null;
        
        //DBG(OK())
    }
    
    /** Clones the supplied Schema and sets this InstanceList object to use it.
     * @param schemaRC The Schema object to be cloned into this InstanceList object.
     * @throws CloneNotSupportedException if the cloning process of the Schema object encounters an error.
     */    
    public void set_schema(Schema schemaRC) throws CloneNotSupportedException {
        //If we already have the schema set, save time.
        //This happens when normalizing assigns a get_unique_copy()
        //schema
        
        //This formerly recursive call has been collapsed here because
        //it used 'this' overloading, not sure how to do this in Java
        
        for(int i=0;i<instances.size();i++){
            Instance inst = (Instance)instances.get(i);
            
            if(schema == null || (schemaRC != schema) ||
            (inst.get_schema() != schema )) {
                if(schemaRC != schema) {
                    schema = null;
                    schema = new Schema(schemaRC);
                }
                
            }
            //DBG(OK());
        }
    }
    
    /** Change the value of an attribute value in each instance to unknown with the
     * given probability.
     * @param rate A number ranging from 0..1 indicating the probability of changing a specific
     * Instance's value to unknown.
     * @param mrandom Random number generator for randomly determining if Instance value should be
     * changed.
     */    
    private void corrupt_values_to_unknown(double rate, Random mrandom) {
        //mlc.clamp_to_range(rate, 0, 1,"InstanceList.corrupt_values_to_unknown"
        //   +": rate outside [0,1]");
        
        InstanceList tempInstList = new InstanceList(this);
        remove_all_instances();
        
        ListIterator pix = instance_list().listIterator();
        Instance instance = null;
        for(;pix.hasNext()==true;instance = (Instance)pix.next()) {
            for(int i=0;i<num_attr();i++)
                if(rate > mrandom.nextDouble()) //not equal for zero to work
                    attr_info(i).set_unknown(instance.get_value(i));
            add_instance(instance);
        }
    }
    
    /** Returns the information about a specific attribute stored in this
     * InstanceList.
     * @return An AttrInfo containing the information about the attribute.
     * @param attrNum	The number of the attribute about which information is
     * requested.
     */
    public AttrInfo attr_info(int attrNum){return get_schema().attr_info(attrNum);}
    
    /** Returns the number of attributes in the InstanceList.
     * @return An integer representing the number of attributes used for each
     * instance in this InstanceList.
     */
    public int num_attr(){return get_schema().num_attr();}
    
    /** Returns the maximum number of attributes that can be used for Instances
     * in this InstanceList.
     * @return The maximum number of attributes.
     */
    public static int get_max_attr_vals(){return maxAttrVals;}
    
    /** Returns the maximum number of labes that can be used to categorize
     * Instances in this InstanceList.
     * @return The maximum number of labels.
     */
    public static int get_max_label_vals(){return maxLabelVals;}
    
    /** Sets the maximum number of attributes for Instances in this InstanceList.
     * @param maxVals	The maximum number of attributes allowed for Instances
     * in this InstanceList.
     */
    public static void set_max_attr_vals(int maxVals){maxAttrVals = maxVals;}
    
    /** Sets the maximum number of labesl for Instances to be categorized as in
     * this InstanceList.
     * @param maxVals	The maximum number of labels that instances may be
     * categorized as.
     */
    public static void set_max_label_vals(int maxVals){maxLabelVals = maxVals;}
    
    /** Returns the Schema for this InstanceList.
     * @return The Schema of Instances in this InstanceList.
     */
    public Schema get_schema() {
        if(schema == null)
            Error.err("InstanceList.get_schema: schema "
            +"has not been set --> fatal_error");
        return schema;
    }
    
    /** Returns the FileSchema loaded into this InstanceList.
     * @return The FileSchema for this InstanceList.
     */
    public FileSchema get_original_schema() {
        if(fileSchema==null)
            Error.err("InstanceList.get_original_schema: there"
            +" is no FileSchema associated with this list-->fatal_error");
        return fileSchema;
    }
    
    /** InstanceList.read_data_line() takes time proportional to the number
     * of characters in the portion of the file that it reads + the total number
     * of possible attribute values for the _Instance.
     * @param dataFile BufferedReader that reads the file containing data.
     * @param isTest TRUE if this file contains testing data.
     * @param reader InstanceReader for reading Instances from the file.
     */
    
    private void read_data_line(BufferedReader dataFile, boolean isTest,InstanceReader reader) {
        
        //isTest;
        fileSchema.skip_white_comments_same_line(dataFile);
        try{
            MLJ.ASSERT(fileSchema != null,"InstanceList.read_data_line: fileSchema is null");
            for(int i=0;i<fileSchema.num_attr();i++){
                reader.set_from_file(i,dataFile);
                dataFile.mark(1);
                char c = (char)dataFile.read();
                dataFile.reset();
                if(c == ',')
                    dataFile.read();
            }
            
            dataFile.mark(1);
            char c = (char)dataFile.read();
            dataFile.reset();
            if(c == '.'){
                dataFile.read();
                if(fileSchema.skip_white_comments_same_line(dataFile))
                    Error.err("InstanceList.read_data_line: Illegal"
                    +" file format, Only comments or whitespace may follow a '.' "
                    +" on a line in data file");
            }
            else if (c != '\n' && c != '|')
                Error.err("InstanceList.read_data_line: Illegal"
                +" file format, Only comments or whitespace or a '.' may "
                +" follow the last value on a line in data file");
            else
                fileSchema.skip_white_comments_same_line(dataFile);
            reader.add_instance();
        }catch(IOException e){Error.err("InstanceList."
        +"read_data_line: can't read file");}
    }
    
    /** InstanceList.read_data_line() takes time proportional to the number
     * of characters in the portion of the file that it reads + the total number
     * of possible attribute values for the _Instance.
     * @param dataStream StreamTokenizer that reads data from the file.
     * @param isTest TRUE if this file contains testing data.
     * @param reader InstanceReader for reading Instances from the file.
     */    
    private void read_data_line(StreamTokenizer dataStream, boolean isTest,InstanceReader reader) {
        int h;
        //isTest;
        //      fileSchema.skip_white_comments_same_line(dataFile);
        try{
            MLJ.ASSERT(fileSchema != null,"InstanceList.read_data_line: fileSchema is null");
            for(int i=0;i<fileSchema.num_attr();i++){
                h = i+1;
                if (h == fileSchema.num_attr()) h = 0;
                if(fileSchema.attrInfos[h] instanceof RealAttrInfo)
                {parseNumbers(dataStream, true);}
                else {parseNumbers(dataStream, false);}
                reader.set_from_file(i,dataStream);
                dataStream.nextToken();
                if((char)dataStream.ttype == ',')
                    dataStream.nextToken();
                //         dataFile.mark(1);
                //         char c = (char)dataFile.read();
                //         dataFile.reset();
                //         if(c == ',')
                //            dataFile.read();
            }
            //      dataFile.mark(1);
            //      char c = (char)dataFile.read();
            //      dataFile.reset();
            //      if(c == '.'){
            //         dataFile.read();
            if((char)dataStream.ttype == '.')
                for( ;dataStream.nextToken() != StreamTokenizer.TT_EOL; )
                    if (dataStream.ttype != StreamTokenizer.TT_EOL)
                        Error.err("InstanceList.read_data_line: Illegal"
                        +" file format, Only comments or whitespace may follow a '.' "
                        +" on a line in data file");
            //         if(fileSchema.skip_white_comments_same_line(dataFile))
            //            Error.err("InstanceList.read_data_line: Illegal"
            //	       +" file format, Only comments or whitespace may follow a '.' "
            //	       +" on a line in data file");
            //      }
            //      else if (c != '\n' && c != '|')
                    else if(dataStream.ttype != StreamTokenizer.TT_EOL)
                        Error.err("InstanceList.read_data_line: Illegal"
                        +" file format, Only comments or whitespace or a '.' may "
                        +" follow the last value on a line in data file");
            //      else
            //         fileSchema.skip_white_comments_same_line(dataFile);
            reader.add_instance();
        }catch(IOException e){Error.err("InstanceList."
        +"read_data_line: can't read file");}
    }
    
    /** Sets the maximum values for attributes and labels in this InstanceList
     * according to the MLJ options stored in the MLJ-options file. These options
     * are MAX_ATTR_VALS and MAX_LABEL_VALS.
     */
    public void init_max_vals() {
        GetEnv getenv = new GetEnv();
        
        initialized = false;
        if(initialized) return;
        
        initialized = true;
        
        //changed the signature of these functions - possible adverse effects
        InstanceList.set_max_attr_vals(getenv.get_option_int("MAX_ATTR_VALS"));
        InstanceList.set_max_label_vals(getenv.get_option_int("MAX_LABEL_VALS"));
        
    }
    
    /** Displays the Instances stored in this InstanceList.
     * InstanceList.display() takes time proportional to the
     * number of instances * the number of attributes per instance.
     *
     * @param normalizeReal	TRUE if the Instances should be normalized according
     * to the min/max stored for real attributes. If
     * min equals max, values are normalized to .5.
     */
    public void display(boolean normalizeReal) {
        
        ListIterator pix = instances.listIterator(0);
        while(pix.hasNext()) {
            Instance inst = (Instance)pix.next();
            inst.display(is_weighted(), normalizeReal);
        }
        if(no_instances())
            System.out.println("InstanceList.display: No instances");
    }
    
    /** Checks if Instances stored in this InstanceList are weighted.
     * @return TRUE if the Instances are weighted, FALSE otherwise.
     */
    public boolean is_weighted() {
        return weighted;
    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -