⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 instancereader.java

📁 id3的java实现
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
                }
            }
        }
        
        //if the weight is set and ignoreWeightColumn is specified, the weight
        //column MUST be unmapped
        if(fileSchema.get_weight_column() != unmapped &&
        assimMap[fileSchema.get_weight_column()] != unmapped &&
        fileSchema.get_weight_column() != 0) {
            Error.err("InstanceReader::constuct_assim_map: the "
            +"column "+fileSchema.attrInfos[fileSchema.get_weight_column()].name() + " is mapped "
            +"to both the weight and attribute "
            + assimMap[fileSchema.get_weight_column()] +" yet WEIGHT_IS"
            +"_ATTRIBUTE is falst -->fatal_error");
        }
        
        //if the label exists. replace with the list's label schema
        if(fileSchema.get_label_column() != unmapped){
            if(is_labelled() == false)
                Error.err("InstanceReader::construct_assim_map:"
                +" attempting to assimilate labelled data to unlabelled data "
                +" -->fatal_error");
            fileSchema.set_attr_info(fileSchema.get_label_column(),
            get_schema().label_info());
        }
        
        //make sure all attributes in the destination schema were accounted for.
        //Its ok to leave the lavel or weight unmapped, in this case you can use
        //the set_..._label() and set_weight() functions.
        if(checkCount < checklist.length){
            Error.err("InstanceReader::constuct_assim_map: the"
            +" following required attributes were unaccounted for: ");
            for(int i =0;i<checklist.length;i++){
                if(!checklist[i]){
                    System.out.print("\"" + get_schema().attr_info(i).name() + "\" ");
                }
            }
            Error.err("fatal_error");
        }
    }
    
    /** Releases the list we're building.
     * @return The InstanceList being built by this InstanceReader.
     */
    public InstanceList release_list() {
        //if we're not making extra values into unknowns, warn about
        //projected columns
        if(!makeUnknowns)
            warn_projected_columns();
        //it is an error to release a list when an instance is partially
        //added
        if(anySet)
            Error.err("InstanceReader::release_list: cannot"
            +" release a list with a partially built instance.  Use "
            +"set_complete() to finish off the instance with unknown"
            +" values -->fatal_error");
        
        //release ownership and return
        InstanceList retList = instList;
        instList = null;
        fileSchema = null;
        return retList;
    }
    
    /** Sets the value of an attribute from an MLJ format data file.
     *
     * @param attrNum The number of the attribute being read.
     * @param dataFile The BufferedReader reading the file.
     */
    public void set_from_file(int attrNum, BufferedReader dataFile) {
        //check range on the incoming attrNum
        if(attrNum < 0 || attrNum > fileSchema.num_attr())
            Error.err("InstanceReader::set_from_file: "
            +"attribute number "+attrNum+" is out of range-->fatal_error");
        
        //map attribute number, but keep original for later calls
        int mapNum = assimMap[attrNum];
        
        //read the attribute value from the file
        AttrValue attrVal = fileSchema.attrInfos[attrNum].read_attr_value(dataFile, makeUnknowns, fileSchema);
        if(mapNum==unmapped){
            if(fileSchema.get_ignore_weight_column() &&
            attrNum==fileSchema.get_weight_column()){
                //set the weight
                double val = fileSchema.attrInfos[attrNum].get_real_val(attrVal);
                weight = val;
            }
            return; //ignore unmapped attributes from here on
        }
        
        //determine type of attribute, and call the appropriate function instead.
        AttrInfo ai = fileSchema.attrInfos[attrNum];
        
        if(ai.is_unknown(attrVal)){
            set_unknown(attrNum);
        }
        else if(ai.can_cast_to_real()){
            
            double val = ai.get_real_val(attrVal);
            set_real(attrNum, val);
        }
        else if(ai.can_cast_to_nominal()){
            String strVal = ai.attrValue_to_string(attrVal);
            set_nominal(attrNum, strVal);
        }
        else
            Error.err("InstanceReader::set_from_file: reader "
            +"only supports real and nominal types -->fatal_error");
    }
    
    /** Sets the value of an attribute from an MLJ format data file.
     *
     * @param attrNum The number of the attribute being read.
     * @param dataFile The StreamTokenizer reading from the file.
     */
    public void set_from_file(int attrNum, StreamTokenizer dataFile) {
        //check range on the incoming attrNum
        if(attrNum < 0 || attrNum > fileSchema.num_attr())
            Error.err("InstanceReader::set_from_file: "
            +"attribute number "+attrNum+" is out of range-->fatal_error");
        
        //map attribute number, but keep original for later calls
        int mapNum = assimMap[attrNum];
        
        //read the attribute value from the file
        AttrValue attrVal = fileSchema.attrInfos[attrNum].read_attr_value(dataFile, makeUnknowns, fileSchema);
        if(mapNum==unmapped){
            if(fileSchema.get_ignore_weight_column() &&
            attrNum==fileSchema.get_weight_column()){
                //set the weight
                double val = fileSchema.attrInfos[attrNum].get_real_val(attrVal);
                weight = val;
            }
            return; //ignore unmapped attributes from here on
        }
        
        //determine type of attribute, and call the appropriate function instead.
        AttrInfo ai = fileSchema.attrInfos[attrNum];
        
        if(ai.is_unknown(attrVal)){
            set_unknown(attrNum);
        }
        else if(ai.can_cast_to_real()){
            
            double val = ai.get_real_val(attrVal);
            set_real(attrNum, val);
        }
        else if(ai.can_cast_to_nominal()){
            String strVal = ai.attrValue_to_string(attrVal);
            set_nominal(attrNum, strVal);
        }
        else
            Error.err("InstanceReader::set_from_file: reader "
            +"only supports real and nominal types -->fatal_error");
    }
    
    /** Adds the instance to the list. The instance must be fully constructed and must
     * have its label set. Also, you may not add the same instance twice.
     *
     * @return The Instance being added.
     */
    public Instance add_instance() {
        for(int i=0;i<setAttr.length;i++)
            if(assimMap[i]!=unmapped && !setAttr[i])
                Error.err("InstanceReader::add_instance: "
                +"you forgot to set attribute "+i+" ("
                +fileSchema.attrInfos[i].name()+")\n Use the set_complete() "
                +"to give unknown values to extra attributes -->fatal_Error");
        
        //set up small array of values for list
        AttrValue labelVal = null;
        AttrValue[] listVals = new AttrValue[get_schema().num_attr()];
        for(int i=0;i<assimMap.length;i++) {
            int mapNum = assimMap[i];
            if(mapNum>=0)
                listVals[mapNum] = vals[i];
            else if(mapNum == mapToLabel){
                //ASSERT(labelVal == null);
                labelVal = vals[i];
            }
        }
        
        //reset status bits for the next add
        //can't reset the weight before its used, so we have to
        //reset it independently for each branch of the if below.
        
        for(int i=0;i<setAttr.length;i++)
            setAttr[i] = false;
        anySet = false;
        
        if(is_labelled()){
            //ASSERT(labelVal);
            Instance inst =
            instList.reader_add_instance(listVals,labelVal,weight,
            allowUnknownLabels);
            if(attrValueLimit!=0)
                update_for_overflows();
            weight = 1.0;
            return inst;
        }
        else {
            //ASSERT(labelVal==null)
            Instance inst =
            instList.reader_add_instance(listVals,null,weight,false);
            if(attrValueLimit!=0)
                update_for_overflows();
            weight = 1.0;
            return inst;
        }
    }
    
    private void build_proj_maps(boolean[] readerProjMap, boolean[] listProjMap) {
        //ASSERT(readerProjMap.lenght == assimMap.length);
        Schema schema = get_schema();
        int numAttr = schema.num_attr();
        //ASSERT(listProjMap.length >= numAttr);
        
        //Both projection maps begin as all true;
        for(int i=0;i<readerProjMap.length;i++)
            readerProjMap[i] = true;
        for(int i=0;i<numAttr;i++)
            listProjMap[i] = true;
        
        //Run through the attributes in the names file. Check the number of
        //values for each nominal. If anything exceeds the limit, add it to the
        //map.  Also add attributes which are being deliberately ignored.
        for(int i=0;i<fileSchema.num_attr();i++){
            int numVals = 0;
            if(assimMap[i] >= 0){
                if(fileSchema.attrInfos[i].can_cast_to_nominal()) {
                    numVals = fileSchema.attrInfos[i].cast_to_nominal().num_values();
                    
                    //check the number of values here against the counterpart attr
                    // info in the list.  If these don't match, we have failed
                    //to update our attr infos correctly
                    int listNumVals = schema.attr_info(assimMap[i]).cast_to_nominal().num_values();
                    if(numVals != listNumVals)
                        Error.err("InstanceReader::build_proj_maps:"
                        +" number of values for attribute" + schema.attr_name(i)
                        +" is inconsistent; reader's FileSchema has "+numVals
                        +" while list has "+listNumVals+" -->fatal_error");
                    
                    //if we've exceeded the number of values, set entries in
                    // each map to false;
                    if(numVals > attrValueLimit) {
                        readerProjMap[i] = false;
                        listProjMap[assimMap[i]] = false;
                    }
                    
                    //if the attribute is marked to be ignored, set to false
                    if(fileSchema.attrInfos[i].cast_to_nominal().get_ignore()) {
                        readerProjMap[i] = false;
                        listProjMap[assimMap[i]] = false;
                    }
                }
            }
        }
    }
    
    private void update_assim_map(boolean[] projMap) {
        int displacement = 0;
        
        //ASSERT(projMap.length == asimMap.length);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -