⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 instancereader.java

📁 java数据挖掘算法
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
package shared;
import java.io.*;

/** Provide a set of functions for reading a list of instances from a source which
 * provides a single instance at a time, attribute by attribute. Supports the
 * exclusion of nominal attributes which have more than a set limit on the number
 * of values.
 * @author James Louis Java Implmentation.
 * @author Dan Sommerfield 5/03/96 Initial revision (.h, .c)
 *
 */
public class InstanceReader {
    /** The InstanceList in which are stored Instances that are read.
     */
    private InstanceList instList;
    /** TRUE if unknown values for attributes are possible, FALSE otherwise.
     */
    private boolean makeUnknowns;
    /** TRUE if unknown labels are possible, FALSE otherwise.
     */
    private boolean allowUnknownLabels;
    /** The FileSchema detailing the data being read by this InstanceReader.
     */
    private FileSchema fileSchema;
    
    /** Values possible for the attributes.
     */
    private AttrValue[] vals;
    private boolean[] setAttr;
    private boolean anySet;
    private int attrValueLimit;
    /** The total weight of Instances.
     */
    private double weight;
    private boolean warnOnSetComplete;
    private int[] assimMap;
    private boolean[] projMap;
    /**
     */
    private boolean[] listProjMap;
    //private QuarkTable[] quarkTables;
    
    /** Special value for mapping operations. Any integer value is valid.
     */
    public static final int unmapped = -1;
    /** Special value for mapping operations. Any integer value is valid.
     */
    public static final int mapToLabel = -2;
    
    /** Constructor. Builds an InstanceReader which can be used to construct instances
     * for ownerList. OwnerList MUST have a FileSchema associated with it; this
     * defines the form of all incoming data. The data will be ASSIMILATED to the form
     * of ownerList's schema as it is read.                                        <BR>
     * The limit parameter specifies an optional limit on the number of distinct
     * attribute values which are allowed on any given attribute.  If this limit is
     * exceeded, the attribute in question will be projected out, and future incoming
     * data for that attribute will be ignored.                                    <BR>
     * The makeUnknown parameter, if TRUE, will cause all attribute values not present
     * in ownerList's schema to be converted to UNKNOWN.                           <BR>
     * NOTE: for reading test data, limit should be set to 0 and makeUnknown should be
     * TRUE.
     * @param ownerList The InstaceList in which Instances will be stored.
     */
    public InstanceReader(InstanceList ownerList){	//ADDED BY JL
        this(ownerList,0,false,false);}
    /** Constructor. Builds an InstanceReader which can be used to construct instances
     * for ownerList. OwnerList MUST have a FileSchema associated with it; this
     * defines the form of all incoming data. The data will be ASSIMILATED to the form
     * of ownerList's schema as it is read.                                        <BR>
     * The limit parameter specifies an optional limit on the number of distinct
     * attribute values which are allowed on any given attribute.  If this limit is
     * exceeded, the attribute in question will be projected out, and future incoming
     * data for that attribute will be ignored.                                    <BR>
     * The makeUnknown parameter, if TRUE, will cause all attribute values not present
     * in ownerList's schema to be converted to UNKNOWN.                           <BR>
     * NOTE: for reading test data, limit should be set to 0 and makeUnknown should be
     * TRUE.
     * @param ownerList The InstaceList in which Instances will be stored.
     * @param limit The limit number of how many attribute values are possible.
     */
    public InstanceReader(InstanceList ownerList, int limit){	//ADDED BY JL
        this(ownerList,limit,false,false);}
    /** Constructor. Builds an InstanceReader which can be used to construct instances
     * for ownerList. OwnerList MUST have a FileSchema associated with it; this
     * defines the form of all incoming data. The data will be ASSIMILATED to the form
     * of ownerList's schema as it is read.                                        <BR>
     * The limit parameter specifies an optional limit on the number of distinct
     * attribute values which are allowed on any given attribute.  If this limit is
     * exceeded, the attribute in question will be projected out, and future incoming
     * data for that attribute will be ignored.                                    <BR>
     * The makeUnknown parameter, if TRUE, will cause all attribute values not present
     * in ownerList's schema to be converted to UNKNOWN.                           <BR>
     * NOTE: for reading test data, limit should be set to 0 and makeUnknown should be
     * TRUE.
     * @param ownerList The InstaceList in which Instances will be stored.
     * @param limit The limit number of how many attribute values are possible.
     * @param makeUnknown TRUE if unknown values for attributes are possible, FALSE otherwise.
     */
    public InstanceReader(InstanceList ownerList, int limit, boolean makeUnknown){	//ADDED BY JL
        this(ownerList,limit,makeUnknown,false);}
    
    /** Constructor. Builds an InstanceReader which can be used to construct instances
     * for ownerList. OwnerList MUST have a FileSchema associated with it; this
     * defines the form of all incoming data. The data will be ASSIMILATED to the form
     * of ownerList's schema as it is read.                                        <BR>
     * The limit parameter specifies an optional limit on the number of distinct
     * attribute values which are allowed on any given attribute.  If this limit is
     * exceeded, the attribute in question will be projected out, and future incoming
     * data for that attribute will be ignored.                                    <BR>
     * The makeUnknown parameter, if TRUE, will cause all attribute values not present
     * in ownerList's schema to be converted to UNKNOWN.                           <BR>
     * NOTE: for reading test data, limit should be set to 0 and makeUnknown should be
     * TRUE.
     * @param ownerList The InstaceList in which Instances will be stored.
     * @param limit The limit number of how many attribute values are possible.
     * @param makeUnknown TRUE if unknown values for attributes are possible, FALSE otherwise.
     * @param allowUnknownLab TRUE if unknown labels are possible, FALSE otherwise.
     */
    public InstanceReader(InstanceList ownerList, int limit, boolean makeUnknown, boolean allowUnknownLab) {
        instList = ownerList;
        makeUnknowns = makeUnknown;
        allowUnknownLabels = allowUnknownLab;
        
        setAttr = new boolean[ownerList.get_original_schema().num_attr()];
        for(int i=0;i<setAttr.length;i++)setAttr[i]=false;
        
        anySet = false;
        attrValueLimit = limit;
        weight = 1.0;
        fileSchema = ownerList.get_original_schema();
        
        vals = new AttrValue[ownerList.get_original_schema().num_attr()];
        for(int j=0;j<vals.length;j++)vals[j]=new AttrValue();///ADDED BY JL
        
        assimMap = new int[ownerList.get_original_schema().num_attr()];
        for(int i=0;i<assimMap.length;i++)assimMap[i]=-1;
        
        projMap = new boolean[ownerList.get_original_schema().num_attr()];
        listProjMap = new boolean[ownerList.get_original_schema().num_attr()];
        //quarkTables(0,ownerList.get_original_schema().num_attr(),null)
        warnOnSetComplete = true;
        
        //fileSchema.OK();
        if(attrValueLimit < 0)
            Error.err("InstanceReader::InstanceReader: negative"
            + " value is not allowed for attrValueLimit->fatal_error");
        
        //construct the assimilation map to be used during set functions
        construct_assim_map();
        
        //take ownership of the list we're building
        ownerList = null;
        //OK();
    }
    
    /** Attempts to match values for two fixed value set nominals. Prints an error
     * message on failure.
     * @param name The name of the attribute.
     * @param a1 The first nominal being compared.
     * @param a2 The second nominal being compared.
     */
    public void match_values(String name, NominalAttrInfo a1, NominalAttrInfo a2) {
        boolean error = false;
        //ASSERT(a1.is_fixed());
        //ASSERT(a2.is_fixed());
        if(a1.num_values() != a2.num_values())
            error = true;
        else{
            //         for(int i = 0;i<a1.num_values();i++) //CHANGED FOR ZOO TESTSET -JL
            for(int i = Globals.FIRST_NOMINAL_VAL; i < a1.num_values();i++)
                if(a1.get_value(i) != a2.get_value(i))
                    error = true;
        }
        if(error){
            Error.err("InstanceReader::match_values: mismatch"
            +" in fixed nominals for attribute \"" + name + "\": ");
            Error.err("taining version: ");
            a1.display_attr_values();
            Error.err("testing version: ");
            a2.display_attr_values();
            Error.err(" -->fatal_error");
        }
    }
    
    /** Constructs the assimilation map used to map attribute numbers used in the
     * assimilation schema (set functions) into numbers used in the list's schema.
     *
     */
    private void construct_assim_map() {
        //mark the label column as mapped to the label
        if(fileSchema.get_label_column() != unmapped)
            assimMap[fileSchema.get_label_column()] = mapToLabel;
        
        //for each attribute name in the file schema (test data), find the
        //same name in the list's schema(training data) and establish the mapping
        //No attributes in the list's schema may be left unaccounted for.
        
        int numDestAttr = get_schema().num_attr();
        boolean[] checklist = new boolean[numDestAttr];
        for(int i=0;i<checklist.length;i++)checklist[i]=false;
        int checkCount = 0;
        for(int i=0;i<fileSchema.num_attr();i++){
            for(int j=0;j<numDestAttr;j++){
                String name = fileSchema.attrInfos[i].name();
                if(name.equals(get_schema().attr_name(j))) {
                    //make sure the column is not mapped to some other column.
                    //if it is mapped to the label or weight, then ignore it.
                    //ASSERT(assimMap[i] != false);
                    if(assimMap[i] == unmapped){
                        assimMap[i] = j;
                        checkCount++;
                        checklist[j] = true;
                        
                        //assimilate attribute infos. Thre are some rules here:
                        // 1. if the types don't match, it is an error.
                        // 2. if both are fixed nominals, the exact values must match
                        // 3. if the list's schema is an unfixed nominal, use it
                        // 4. if the list's schema is a fixed nominl, but the file
                        //    schema specifies an unfixed noinal, create an unfixed
                        //    nominal with the values from the list's schema's
                        //    fixed nominal.
                        AttrInfo testAI = fileSchema.attrInfos[i];
                        AttrInfo trainAI = get_schema().attr_info(j);
                        if(trainAI.can_cast_to_nominal()) {
                            //make sure the nominal types match
                            if(!testAI.can_cast_to_nominal())
                                Error.err("InstanceReader::constuct_"
                                + "assim_map: training schema requires a nominal "
                                + "for attribute \"" +name+"\" -->fatal_error");
                            
                            // other nominal checks
                            NominalAttrInfo testNAI = testAI.cast_to_nominal();
                            NominalAttrInfo trainNAI = trainAI.cast_to_nominal();
                            
                            //check fixed/unfixed status
                            if(trainNAI.is_fixed()) {
                                if(testNAI.is_fixed()) {
                                    //by rule#2, the exact values must match
                                    match_values(name, trainNAI, testNAI);
                                }
                                else{
                                    //replace attribute info for test data with the
                                    //training version, but make unfixed(rule #4)
                                    fileSchema.set_attr_info(i,trainAI);
                                    fileSchema.attrInfos[i].cast_to_nominal().fix_values(false);
                                }
                            }
                            else {
                                //just use the training version (rule #3)
                                fileSchema.set_attr_info(i,trainAI);
                            }
                        }
                        else if(trainAI.can_cast_to_real()) {
                            if(!testAI.can_cast_to_real())
                                Error.err("InstanceReader::construct_"
                                +"assim_map: training schema requires a numerical "
                                +"value for attribute \"" +name+"\" -->fatal_error");
                        }
                        else
                            Error.err("InstanceReader::construct_"
                            +"assim_map: training schema contains an attribute \""
                            +name+"\" which is neither real nor nominal-->fatal_error");
                    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -