📄 instances.java
字号:
// Check if token is valid.
if (tokenizer.ttype != StreamTokenizer.TT_WORD) {
errms(tokenizer,"not a valid value");
}
if (attribute(i).isNominal()) {
// Check if value appears in header.
index = attribute(i).indexOfValue(tokenizer.sval);
if (index == -1) {
errms(tokenizer,"nominal value not declared in header");
}
instance[i] = (double)index;
} else if (attribute(i).isNumeric()) {
// Check if value is really a number.
try{
instance[i] = Double.valueOf(tokenizer.sval).
doubleValue();
} catch (NumberFormatException e) {
errms(tokenizer,"number expected");
}
} else {
instance[i] = attribute(i).addStringValue(tokenizer.sval);
}
}
}
if (flag) {
getLastToken(tokenizer,true);
}
/*
// Add instance to dataset
for (int i=0; i<instance.length; i++){
System.out.println(Double.toString(instance[i]));
}
*/
// Instance neoInstance = ;
// System.out.println(neoInstance.dataset().toString());
Instance neoInstance = new Instance(1,instance);
add(neoInstance);
return true;
}
/**
* Reads and stores header of an XML file.
*
* @param document the org.jdom.Document object reference
* @exception e if the information is not read
* successfully
* by asymeon to process the Header part of a XML format
*/
protected void readHeader (Document document) {
try {
String valueSetListString = "";
String documentName;
String attributeName;
String attributeType;
FastVector attributeValues = null;
//Get name of relation.
Document xmlDocument = document;
Element docElement = xmlDocument.getRootElement();
if (docElement.getName().compareTo("INSTANCES")==0){
m_RelationName = (docElement.getAttributeValue("title"));
// System.out.println(m_RelationName);
}
else {
log.error("An error has occured: Document root relation expected");
}
// Create vectors to hold information temporarily.
m_Attributes = new FastVector ();
// Get attribute declarations.
List childrenList = docElement.getChildren("ATTRIBUTES");
if (childrenList.size()!=1) {
log.error("An error has occured : Bad Attributes of the document");
}
Element attributesElement = docElement.getChild("ATTRIBUTES");
List attributeList = attributesElement.getChildren("ATTRIBUTE");
if (attributeList.size() == 0) {
log.error("An error has occured: No attribute is defined");
}
else {
// Get attribute name and type
Iterator childrenIterator = attributeList.iterator();
while (childrenIterator.hasNext()){
Element child = (Element) childrenIterator.next ();
attributeName = child.getChildTextTrim("NAME");
attributeType = child.getChildTextTrim("TYPE");
if (attributeType.equalsIgnoreCase("real") ||
attributeType.equalsIgnoreCase("integer") ||
attributeType.equalsIgnoreCase("numeric")) {
Attribute babis = new org.agentacademy.modules.dataminer.core.Attribute (attributeName, numAttributes());
m_Attributes.addElement(babis);
}
else if (attributeType.equalsIgnoreCase("string")) {
Attribute babis2 = new org.agentacademy.modules.dataminer.core.Attribute (attributeName, (FastVector)null, numAttributes());
m_Attributes.addElement(babis2);
}
else if (attributeType.equalsIgnoreCase("nominal")) {
attributeValues = new FastVector();
List valueSetList = child.getChildren("VALUE-SET");
valueSetListString = valueSetList.toString();
Element valueSetElement = child.getChild("VALUE-SET");
if (valueSetList.size()!=1) {
log.error("An error has occured: Invalid value-set defined");
}
else {
List valuesList = valueSetElement.getChildren("VALUE");
if (valuesList.size()<1) {
log.error("An error has occured: No value defined");
}
//Get the values one by one.
Iterator valueIterator = valuesList.iterator();
while (valueIterator.hasNext()){
Element attrValue = (Element)valueIterator.next();
attributeValues.addElement(attrValue.getText());
}
Attribute babis3 = new org.agentacademy.modules.dataminer.core.Attribute (attributeName, attributeValues, numAttributes());
m_Attributes.addElement(babis3);
}
}
else {
log.error(" An error has occured: No valid attribute type or invalid ");
}
}//end of while
}//end of if-else
printVector(m_Attributes);
}
catch (Exception e){
log.error("Error on function readHeader@Instances has occured:" + e);
}
}
/**
* Reads and stores header of an ARFF file.
*
* @param tokenizer the stream tokenizer
* @exception IOException if the information is not read
* successfully
*/
protected void readHeader(StreamTokenizer tokenizer)
throws IOException{
String attributeName;
FastVector attributeValues;
int i;
// Get name of relation.
getFirstToken(tokenizer);
if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
errms(tokenizer,"premature end of file");
}
if (tokenizer.sval.equalsIgnoreCase("@relation")){
getNextToken(tokenizer);
m_RelationName = tokenizer.sval;
// System.out.println(m_RelationName);
getLastToken(tokenizer,false);
} else {
errms(tokenizer,"keyword @relation expected");
}
// Create vectors to hold information temporarily.
m_Attributes = new FastVector();
// Get attribute declarations.
getFirstToken(tokenizer);
if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
errms(tokenizer,"premature end of file");
}
while (tokenizer.sval.equalsIgnoreCase("@attribute")) {
// Get attribute name.
getNextToken(tokenizer);
attributeName = tokenizer.sval;
getNextToken(tokenizer);
// Check if attribute is nominal.
if (tokenizer.ttype == StreamTokenizer.TT_WORD) {
// Attribute is real, integer, or string.
if (tokenizer.sval.equalsIgnoreCase("real") ||
tokenizer.sval.equalsIgnoreCase("integer") ||
tokenizer.sval.equalsIgnoreCase("numeric")) {
m_Attributes.addElement(new Attribute(attributeName,
numAttributes()));
readTillEOL(tokenizer);
} else if (tokenizer.sval.equalsIgnoreCase("string")) {
m_Attributes.
addElement(new Attribute(attributeName, (FastVector)null,
numAttributes()));
readTillEOL(tokenizer);
} else {
errms(tokenizer,"no valid attribute type or invalid "+
"enumeration");
}
} else {
// Attribute is nominal.
attributeValues = new FastVector();
tokenizer.pushBack();
// Get values for nominal attribute.
if (tokenizer.nextToken() != '{') {
errms(tokenizer,"{ expected at beginning of enumeration");
}
while (tokenizer.nextToken() != '}') {
if (tokenizer.ttype == StreamTokenizer.TT_EOL) {
errms(tokenizer,"} expected at end of enumeration");
} else {
attributeValues.addElement(tokenizer.sval);
}
}
if (attributeValues.size() == 0) {
errms(tokenizer,"no nominal values found");
}
m_Attributes.
addElement(new Attribute(attributeName, attributeValues,
numAttributes()));
}
getLastToken(tokenizer,false);
getFirstToken(tokenizer);
if (tokenizer.ttype == StreamTokenizer.TT_EOF)
errms(tokenizer,"premature end of file");
}
// Check if data part follows. We can't easily check for EOL.
if (!tokenizer.sval.equalsIgnoreCase("@data")) {
errms(tokenizer,"keyword @data expected");
}
// Check if any attributes have been declared.
if (m_Attributes.size() == 0) {
errms(tokenizer,"no attributes declared");
}
// Allocate buffers in case sparse instances have to be read
m_ValueBuffer = new double[numAttributes()];
m_IndicesBuffer = new int[numAttributes()];
}
/**
* Copies instances from one set to the end of another
* one.
*
* @param source the source of the instances
* @param from the position of the first instance to be copied
* @param dest the destination for the instances
* @param num the number of instances to be copied
*/
private void copyInstances(int from, Instances dest, int num) {
for (int i = 0; i < num; i++) {
dest.add(instance(from + i));
}
}
/**
* Throws error message with line number and last token read.
*
* @param theMsg the error message to be thrown
* @param tokenizer the stream tokenizer
* @throws IOExcpetion containing the error message
*/
private void errms(StreamTokenizer tokenizer, String theMsg)
throws IOException {
throw new IOException(theMsg + ", read " + tokenizer.toString());
}
/**
* Replaces the attribute information by a clone of
* itself.
*/
private void freshAttributeInfo() {
m_Attributes = (FastVector) m_Attributes.copyElements();
}
/**
* Gets next token, skipping empty lines.
*
* @param tokenizer the stream tokenizer
* @exception IOException if reading the next token fails
*/
private void getFirstToken(StreamTokenizer tokenizer)
throws IOException{
while (tokenizer.nextToken() == StreamTokenizer.TT_EOL){};
if ((tokenizer.ttype == '\'') ||
(tokenizer.ttype == '"')) {
tokenizer.ttype = StreamTokenizer.TT_WORD;
} else if ((tokenizer.ttype == StreamTokenizer.TT_WORD) &&
(tokenizer.sval.equals("?"))){
tokenizer.ttype = '?';
}
}
/**
* Gets index, checking for a premature and of line.
*
* @param tokenizer the stream tokenizer
* @exception IOException if it finds a premature end of line
*/
private void getIndex(StreamTokenizer tokenizer) throws IOException{
if (tokenizer.nextToken() == StreamTokenizer.TT_EOL) {
errms(tokenizer,"premature end of line");
}
if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
errms(tokenizer,"premature end of file");
}
}
/**
* Gets token and checks if its end of line.
*
* @param tokenizer the stream tokenizer
* @exception IOException if it doesn't find an end of line
*/
private void getLastToken(StreamTokenizer tokenizer, boolean endOfFileOk)
throws IOException{
if ((tokenizer.nextToken() != StreamTokenizer.TT_EOL) &&
((tokenizer.nextToken() != StreamTokenizer.TT_EOF) || !endOfFileOk)) {
errms(tokenizer,"end of line expected");
}
}
/**
* Gets next token, checking for a premature and of line.
*
* @param tokenizer the stream tokenizer
* @exception IOException if it finds a premature end of line
*/
private void getNextToken(StreamTokenizer tokenizer)
throws IOException{
if (tokenizer.nextToken() == StreamTokenizer.TT_EOL) {
errms(tokenizer,"premature end of line");
}
if (tokenizer.ttype == StreamTokenizer.TT_EOF) {
errms(tokenizer,"premature end of file");
} else if ((tokenizer.ttype == '\'') ||
(tokenizer.ttype == '"')) {
tokenizer.ttype = StreamTokenizer.TT_WORD;
} else if ((tokenizer.ttype == StreamTokenizer.TT_WORD) &&
(tokenizer.sval.equals("?"))){
tokenizer.ttype = '?';
}
}
/**
* Initializes the StreamTokenizer used for reading the ARFF file.
*
* @param tokenizer the stream tokenizer
*/
private void initTokenizer(StreamTokenizer tokenizer){
tokenizer.resetSyntax();
tokenizer.whitespaceChars(0, ' ');
tokenizer.wordChars(' '+1,'\u00FF');
tokenizer.whitespaceChars(',',',');
tokenizer.commentChar('%');
tokenizer.quoteChar('"');
tokenizer.quoteChar('\'');
tokenizer.ordinaryChar('{');
tokenizer.ordinaryChar('}');
tokenizer.eolIsSignificant(true);
}
/**
* Returns string including all instances, their weights and
* their indices in the original dataset.
*
* @return description of instance and its weight as a string
*/
private String instancesAndWeights(){
StringBuffer text = new StringBuffer();
for (int i = 0; i < numInstances(); i++) {
text.append(instance(i) + " " + instance(i).weight());
if (i < numInstances() - 1) {
text.append("\n");
}
}
return text.toString();
}
/**
* Implements quicksort.
*
* @param attIndex the attribute's index
* @param lo0 the first index of the subset to be sorted
* @param hi0 the last index of the subset to be sorted
*/
private void quickSort(int attIndex, int lo0, int hi0) {
int lo = lo0, hi = hi0;
double mid, midPlus, midMinus;
if (hi0 > lo0) {
// Arbitrarily establishing partition element as the
// midpoint of the array.
mid = instance((lo0 + hi0) / 2).value(attIndex);
midPlus = mid + 1e-6;
midMinus = mid - 1e-6;
// loop through the ar
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -