📄 fileschema.java
字号:
* encountered, the correct AttrInfo type will be created.
* @param namesFile Reader allowing access to the namesfile.
* @param attrName The name of the attrbute.
* @return The newly created AttrInfo whose values were read.
*/
private AttrInfo read_attr_info(BufferedReader namesFile, String attrName)
{
LinkedList attrVals = new LinkedList();
AttrInfo ai = null;
int discreteHint = -1;
// A comma is required before each value other than the first.
// Therefor we need to keep a flag to indicate whether we are
// executing the first run of the loop
boolean firstVal = true;
try
{
namesFile.mark(1);
char c =(char)namesFile.read();
namesFile.reset();
while(c != '.' && c != '\n'
&& c != '|' && c != -1)
{
if (!firstVal && c == ',')
namesFile.read();
boolean[] sameLine = new boolean[1];
sameLine[0] = false;
String attrValue = read_word(namesFile, false, sameLine);
attrVals.add(attrValue);
firstVal = false;
namesFile.mark(1);
c =(char)namesFile.read();
namesFile.reset();
}
if (attrVals.size()==0)
Error.err("Missing values or type specifier for"
+ "attribute" + attrName);
if (attrVals.size()==1 && attrVals.getFirst().equals("continuous"))
{
attrVals = null;
ai = new RealAttrInfo(attrName);
}
else if (attrVals.size()==1 &&
attrVals.getFirst().equals("ignore-attribute"))
{
attrVals = null;
ai = new NominalAttrInfo(attrName,0);
ai.set_ignore(true);
}
else if (attrVals.size()==1 &&
(discreteHint = read_discrete_and_hint((String)attrVals.getFirst()))!= -1)
{
attrVals = null;
ai = new NominalAttrInfo(attrName, discreteHint);
}
else ai = new NominalAttrInfo(attrName, attrVals);
namesFile.mark(1);
if ((char)namesFile.read()!= '.')
namesFile.reset();
return ai;
} catch(IOException e)
{
Error.err("FileSchema::read_attr_info - if reached here!");
}
return null;
}
/** Determine if the given name is 'discrete', possibly with an optional hint
* number. If the name is 'discrete', then the hint number (0 if none given) is
* returned. If not, then -1 is returned. Placing anything other than a hint
* number after 'discrete' and a space is an error.
*
* @param str String to be read.
* @return The hint number if discrete, -1 otherwise.
*/
private int read_discrete_and_hint(String str)
{
int lengthOfDiscrete = 9;
if (str.equals("discrete"))
return 0;
if (str.length()< lengthOfDiscrete)
return -1;
String leftHalf = str.substring(0, lengthOfDiscrete);
if (leftHalf != "discrete ")
return -1;
String hintString = str.substring(lengthOfDiscrete,
str.length()-lengthOfDiscrete);
long val;
val = new Long(hintString).longValue();
if (val <0)
Error.err("read_discrete_and_hint: "
+ "illegal value given for "
+ "\"discrete n\" syntax; word following \"discrete\" must be "
+ "a nonnegative integer. You supplied \"" + hintString + "\"");
int v = new Long(val).intValue();
return v;
}
/** Skips white space and comments.
* @param stream Reader allowing access to the namesfile.
* @return TRUE if the current line contains no comments, FALSE otherwise.
*/
public boolean skip_white_comments_same_line(BufferedReader stream)
{
boolean sameLine = true;
try
{
stream.mark(1);
char c =(char)stream.read();
stream.reset();
while(Character.isWhitespace(c)|| c == '|')
{
if (c== '|')
{
sameLine = false;
while(c!= '\n')
{
c =(char)stream.read();
}
}
else
{
if (c== '\n')
sameLine = false;
stream.skip(1);
}
stream.mark(1);
c =(char)stream.read();
stream.reset();
}
} catch(IOException e)
{
}
return sameLine;
}
/** Reads the loss specification as read from the stream "in". The specification
* ends at EOF OR if "endloss" is encountered.
* @param in Reader from which loss specification is read.
*/
private void read_loss_spec(BufferedReader in)
{
skip_blank_space(in);
// read the first line of the specification.
read_loss_default_spec(in);
skip_blank_space(in);
// read each line of the file until EOF or endloss
// the apply_loss_override_spec function will return FALSE
// when it is done
while(read_loss_override_spec(in))
skip_blank_space(in);
// if there's more to the file, skip whitespace/comments for the
// rest of the last line
try
{
if (in.ready()==true)
skip_white_comments_same_line(in);
} catch(IOException e)
{
}
}
/** Reads a single line of the loss specification. The single line specifies a pair
* of two label values and an associated loss, in the format: <BR>
* <predicted value>, <actual value>: <loss> <BR>
* @param in Reader from which line of specification will be read.
* @return FALSE if we just encountered the end of the specification, TRUE otherwise.
*/
private boolean read_loss_override_spec(BufferedReader in)
{
try
{
String predName = read_section(in, "|:,.\n" , " \t\r");
char c =(char)in.read();
// check if the line ends after one word
if (c == '\n' || c == '|')
{
if (predName.equals("endloss"))
return false;
else
Error.err("in loss specification; line contains a "
+ "single word: " + predName);
}
else if (c != ',')
Error.err("in loss specification; a comma (,) must follow "
+ "the predicted value " + predName);
// read the actual value
String actName = read_section(in, "|:,.\n" , " \t\r");
c =(char)in.read();
// error if the line ends here
if (c == '\n' || c == '|')
Error.err("in loss specification; missing loss value");
else if (c != ':')
Error.err("in loss specification; a colon (:) must follow "
+ "the predicted/actual value pair");
// read the loss
String lossStr = read_section(in, "|\n" , " \t\r");
double loss = new Double(lossStr).doubleValue();
if (new Double(lossStr).isNaN()== true)
Error.err("in loss specification: illegal loss value "
+ lossStr);
// Call add_loss_entry to store this line for later use
add_loss_entry(predName, actName, loss);
return true;
} catch(IOException e)
{
}
return false;
}
/***************************************************************************
***************************************************************************/
private void add_loss_entry(String predVal,String actVal, double loss)
{
// if the keyword is set to "nomatrix", set it to "nodefault"
// instead, to signal an undefined matrix
if (lossKeyword == nomatrix)
lossKeyword = nodefault;
int size = lossEntries.length;
lossEntries[size].predName = predVal;
lossEntries[size].actName = actVal;
lossEntries[size].loss = loss;
}
/***************************************************************************
***************************************************************************/
private void skip_blank_space(BufferedReader in)
{
try
{
skip_white_comments_same_line(in);
while((char)in.read()== '\n')
{
skip_white_comments_same_line(in);
}
} catch(IOException e)
{
}
}
/***************************************************************************
***************************************************************************/
private void read_loss_default_spec(BufferedReader in)
{
try
{
// first step: read the loss keyword from the lossStr
String keyword = read_section(in, "|\n:,.-0123456789" , " \t\r");
in.mark(1);
char c =(char)in.read();
in.reset();
if (c != ':' && c != '.')
Error.err("in loss specification; "
+ "first line must begin with a keyword "
+ "{nodefault, default, distance} followed by a colon "
+ "(:) or period (.)");
byte key = 0;
if (keyword.equals("nodefault"))
key = nodefault;
else if (keyword.equals("default"))
key = adefault;
else if (keyword.equals("distance"))
key = distance;
else
{
Error.err("in loss specification; "
+ "unrecognized keyword \"" + keyword + "\". Keyword "
+ "must be one of {nodefault, default, distance}");
}
// second step: read the array of arguments (unless the keyword is
// followed by a period or end of line)
double[] args;
args = new double[0];
if (c == '.' || c == '\n')
{
in.read();
}
else
args = process_loss_args(in, "," , "|\n");
set_loss_default(key, args);
} catch(IOException e)
{
}
}
/***************************************************************************
***************************************************************************/
private void set_loss_default(byte keyword, double[] args)
{
lossKeyword = keyword;
switch (keyword)
{
case nomatrix:
Error.err("FileSchema::set_loss_default: keyword may not be set "
+ "to nomatrix");
break;
case nodefault:
if (args.length != 0)
Error.err("FileSchema::set_loss_default: nodefault takes no "
+ "arguments");
lossArgs[0] = 0;
lossArgs[1] = 0;
lossArgs[2] = 0;
break;
case adefault:
if (args.length == 0)
{
lossArgs[0] = 0;
lossArgs[1] = 1;
}
else if (args.length == 1)
{
lossArgs[0] = args[0];
lossArgs[1] = 1;
}
else if (args.length == 2)
{
lossArgs[0] = args[0];
lossArgs[1] = args[1];
}
else
{
Error.err("FileSchema::set_loss_default: default takes up to "
+ "two arguments. You supplied " + args.length);
}
lossArgs[2] = 0;
break;
case distance:
if (args.length == 0)
{
lossArgs[0] = 0;
lossArgs[1] = 1;
lossArgs[2] = 1;
}
else if (args.length == 1)
{
lossArgs[0] = args[0];
lossArgs[1] = 1;
lossArgs[2] = 1;
}
else if (args.length == 2)
{
lossArgs[0] = args[0];
lossArgs[1] = args[1];
lossArgs[2] = args[1];
}
else if (args.length == 3)
{
lossArgs[0] = args[0];
lossArgs[1] = args[1];
lossArgs[2] = args[2];
}
else
{
Error.err("FileSchema::set_loss_default: distance takes up to "
+ "three arguments. You supplied " + args.length);
}
break;
default:
Error.err("ABORT_IF_REACHED");
}
}
/***************************************************************************
***************************************************************************/
private double[] process_loss_args(BufferedReader in, String sepChars,
String termChars)
{
double[] arr = new double[0];
try
{
for(; ;)
{
in.read();
String arg = read_section(in, sepChars + termChars,
" \t\r\n");
// check for a termination character
in.mark(1);
char c =(char)in.read();
in.reset();
boolean isTerm =
(termChars.indexOf(new Character(c).toString())!= -1);
// allow termination immediately on a termChar
if (arg == "" && arr.length == 0 && isTerm == true)
return arr;
double argReal;
argReal = new Double(arg).doubleValue();
if (new Double(arg).isNaN()== false)
arr[arr.length] = argReal;
else
Error.err("FileSchema::process_loss_args: in loss specification; "
+ "argument \"" + arg + "\" in list cannot be "
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -