📄 instancelist.java
字号:
//obs //ListIterator temp = pix;
//obs //pix.next();
//Change instance, add to end. Add instance by hand so we can avoid
// checking the schema (whch we know won't match at this point)
//Don't touch the weight, because we're effectively adding
// and removing the same instance here.
Instance temp = (Instance)instance_list().get(index); //temp.next();
Instance instance = temp.project(newSchema, projMask);
MLJ.ASSERT(instance.get_weight() == temp.get_weight(),"InstanceList.project_in_place: ");
MLJ.ASSERT(bagCounters == null,"InstanceList.project_in_place: bagCounters not equal to null.");
instance_list().set(index++, instance);
//obs Instance instance = (Instance)instance_list().get(index++); //temp.next();
//obs instance.project(newSchema, projMask);
//obs //MLJ.ASSERT(instance.get_weight == instance.get_weight()),"InstanceList.project_in_place: ");
//obs MLJ.ASSERT(bagCounters == null,"InstanceList.project_in_place: bagCounters not equal to null.");
//obs int place = instance_list().indexOf(instance);
//obs instance_list().set(place, instance);
//obs //instance_list().add(instance);
//obs //instance_list().remove(instance);
}
//We should not have changed the number of instance
if(numInstBefore != num_instances())
Error.err("Assert Error, num instances changes");
//swap schemas
schema = null;
schema = newSchema;
if (Globals.DBG) newSchema = null;
//DBG(OK())
}
/** Clones the supplied Schema and sets this InstanceList object to use it.
* @param schemaRC The Schema object to be cloned into this InstanceList object.
* @throws CloneNotSupportedException if the cloning process of the Schema object encounters an error.
*/
public void set_schema(Schema schemaRC) throws CloneNotSupportedException {
//If we already have the schema set, save time.
//This happens when normalizing assigns a get_unique_copy()
//schema
//This formerly recursive call has been collapsed here because
//it used 'this' overloading, not sure how to do this in Java
for(int i=0;i<instances.size();i++){
Instance inst = (Instance)instances.get(i);
if(schema == null || (schemaRC != schema) ||
(inst.get_schema() != schema )) {
if(schemaRC != schema) {
schema = null;
schema = new Schema(schemaRC);
}
}
//DBG(OK());
}
}
/** Change the value of an attribute value in each instance to unknown with the
* given probability.
* @param rate A number ranging from 0..1 indicating the probability of changing a specific
* Instance's value to unknown.
* @param mrandom Random number generator for randomly determining if Instance value should be
* changed.
*/
private void corrupt_values_to_unknown(double rate, Random mrandom) {
//mlc.clamp_to_range(rate, 0, 1,"InstanceList.corrupt_values_to_unknown"
// +": rate outside [0,1]");
InstanceList tempInstList = new InstanceList(this);
remove_all_instances();
ListIterator pix = instance_list().listIterator();
Instance instance = null;
for(;pix.hasNext()==true;instance = (Instance)pix.next()) {
for(int i=0;i<num_attr();i++)
if(rate > mrandom.nextDouble()) //not equal for zero to work
attr_info(i).set_unknown(instance.get_value(i));
add_instance(instance);
}
}
/** Returns the information about a specific attribute stored in this
* InstanceList.
* @return An AttrInfo containing the information about the attribute.
* @param attrNum The number of the attribute about which information is
* requested.
*/
public AttrInfo attr_info(int attrNum){return get_schema().attr_info(attrNum);}
/** Returns the number of attributes in the InstanceList.
* @return An integer representing the number of attributes used for each
* instance in this InstanceList.
*/
public int num_attr(){return get_schema().num_attr();}
/** Returns the maximum number of attributes that can be used for Instances
* in this InstanceList.
* @return The maximum number of attributes.
*/
public static int get_max_attr_vals(){return maxAttrVals;}
/** Returns the maximum number of labes that can be used to categorize
* Instances in this InstanceList.
* @return The maximum number of labels.
*/
public static int get_max_label_vals(){return maxLabelVals;}
/** Sets the maximum number of attributes for Instances in this InstanceList.
* @param maxVals The maximum number of attributes allowed for Instances
* in this InstanceList.
*/
public static void set_max_attr_vals(int maxVals){maxAttrVals = maxVals;}
/** Sets the maximum number of labesl for Instances to be categorized as in
* this InstanceList.
* @param maxVals The maximum number of labels that instances may be
* categorized as.
*/
public static void set_max_label_vals(int maxVals){maxLabelVals = maxVals;}
/** Returns the Schema for this InstanceList.
* @return The Schema of Instances in this InstanceList.
*/
public Schema get_schema() {
if(schema == null)
Error.err("InstanceList.get_schema: schema "
+"has not been set --> fatal_error");
return schema;
}
/** Returns the FileSchema loaded into this InstanceList.
* @return The FileSchema for this InstanceList.
*/
public FileSchema get_original_schema() {
if(fileSchema==null)
Error.err("InstanceList.get_original_schema: there"
+" is no FileSchema associated with this list-->fatal_error");
return fileSchema;
}
/** InstanceList.read_data_line() takes time proportional to the number
* of characters in the portion of the file that it reads + the total number
* of possible attribute values for the _Instance.
* @param dataFile BufferedReader that reads the file containing data.
* @param isTest TRUE if this file contains testing data.
* @param reader InstanceReader for reading Instances from the file.
*/
private void read_data_line(BufferedReader dataFile, boolean isTest,InstanceReader reader) {
//isTest;
fileSchema.skip_white_comments_same_line(dataFile);
try{
MLJ.ASSERT(fileSchema != null,"InstanceList.read_data_line: fileSchema is null");
for(int i=0;i<fileSchema.num_attr();i++){
reader.set_from_file(i,dataFile);
dataFile.mark(1);
char c = (char)dataFile.read();
dataFile.reset();
if(c == ',')
dataFile.read();
}
dataFile.mark(1);
char c = (char)dataFile.read();
dataFile.reset();
if(c == '.'){
dataFile.read();
if(fileSchema.skip_white_comments_same_line(dataFile))
Error.err("InstanceList.read_data_line: Illegal"
+" file format, Only comments or whitespace may follow a '.' "
+" on a line in data file");
}
else if (c != '\n' && c != '|')
Error.err("InstanceList.read_data_line: Illegal"
+" file format, Only comments or whitespace or a '.' may "
+" follow the last value on a line in data file");
else
fileSchema.skip_white_comments_same_line(dataFile);
reader.add_instance();
}catch(IOException e){Error.err("InstanceList."
+"read_data_line: can't read file");}
}
/** InstanceList.read_data_line() takes time proportional to the number
* of characters in the portion of the file that it reads + the total number
* of possible attribute values for the _Instance.
* @param dataStream StreamTokenizer that reads data from the file.
* @param isTest TRUE if this file contains testing data.
* @param reader InstanceReader for reading Instances from the file.
*/
private void read_data_line(StreamTokenizer dataStream, boolean isTest,InstanceReader reader) {
int h;
//isTest;
// fileSchema.skip_white_comments_same_line(dataFile);
try{
MLJ.ASSERT(fileSchema != null,"InstanceList.read_data_line: fileSchema is null");
for(int i=0;i<fileSchema.num_attr();i++){
h = i+1;
if (h == fileSchema.num_attr()) h = 0;
if(fileSchema.attrInfos[h] instanceof RealAttrInfo)
{parseNumbers(dataStream, true);}
else {parseNumbers(dataStream, false);}
reader.set_from_file(i,dataStream);
dataStream.nextToken();
if((char)dataStream.ttype == ',')
dataStream.nextToken();
// dataFile.mark(1);
// char c = (char)dataFile.read();
// dataFile.reset();
// if(c == ',')
// dataFile.read();
}
// dataFile.mark(1);
// char c = (char)dataFile.read();
// dataFile.reset();
// if(c == '.'){
// dataFile.read();
if((char)dataStream.ttype == '.')
for( ;dataStream.nextToken() != StreamTokenizer.TT_EOL; )
if (dataStream.ttype != StreamTokenizer.TT_EOL)
Error.err("InstanceList.read_data_line: Illegal"
+" file format, Only comments or whitespace may follow a '.' "
+" on a line in data file");
// if(fileSchema.skip_white_comments_same_line(dataFile))
// Error.err("InstanceList.read_data_line: Illegal"
// +" file format, Only comments or whitespace may follow a '.' "
// +" on a line in data file");
// }
// else if (c != '\n' && c != '|')
else if(dataStream.ttype != StreamTokenizer.TT_EOL)
Error.err("InstanceList.read_data_line: Illegal"
+" file format, Only comments or whitespace or a '.' may "
+" follow the last value on a line in data file");
// else
// fileSchema.skip_white_comments_same_line(dataFile);
reader.add_instance();
}catch(IOException e){Error.err("InstanceList."
+"read_data_line: can't read file");}
}
/** Sets the maximum values for attributes and labels in this InstanceList
* according to the MLJ options stored in the MLJ-options file. These options
* are MAX_ATTR_VALS and MAX_LABEL_VALS.
*/
public void init_max_vals() {
GetEnv getenv = new GetEnv();
initialized = false;
if(initialized) return;
initialized = true;
//changed the signature of these functions - possible adverse effects
InstanceList.set_max_attr_vals(getenv.get_option_int("MAX_ATTR_VALS"));
InstanceList.set_max_label_vals(getenv.get_option_int("MAX_LABEL_VALS"));
}
/** Displays the Instances stored in this InstanceList.
* InstanceList.display() takes time proportional to the
* number of instances * the number of attributes per instance.
*
* @param normalizeReal TRUE if the Instances should be normalized according
* to the min/max stored for real attributes. If
* min equals max, values are normalized to .5.
*/
public void display(boolean normalizeReal) {
ListIterator pix = instances.listIterator(0);
while(pix.hasNext()) {
Instance inst = (Instance)pix.next();
inst.display(is_weighted(), normalizeReal);
}
if(no_instances())
System.out.println("InstanceList.display: No instances");
}
/** Checks if Instances stored in this InstanceList are weighted.
* @return TRUE if the Instances are weighted, FALSE otherwise.
*/
public boolean is_weighted() {
return weighted;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -