📄 fileschema.java

📁 java数据挖掘算法
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
                   + "converted to a Real");

            // if we hit a termination character, stop here
            if (isTerm == true)
               return arr;
         }
      } catch(IOException e)
      {
      }
      Error.err("Shouldn\'t read here!");
      return null;
   }

   /** Reads a single word from the supplied BufferedReader.
    * @param stream The BufferedReader to be read from.
    * @param qMark TRUE if question marks are an acceptable name, FALSE otherwise.
    * @param sameLine Set to TRUE if the line has not changed in the process of reading this word, FALSE
    * otherwise.
    * @return The word read.
    */
   public String read_word(BufferedReader stream, boolean qMark, boolean[] sameLine)
   {
      try
      {
         boolean periodAllowed = false;
         char[] word = new char[MAX_INPUT_STRING_SIZE + 1];
         int wordLen = 0;
         sameLine[0] = skip_white_comments_same_line(stream);
         boolean whitespace = false;
         char[] c = new char[1];

         while(legal_attr_char(stream, c, periodAllowed))
         {

            if (c[0] == ' ' || c[0] == '\t' || c[0] == '\r')
            {
               whitespace = true;
               stream.skip(1);
            }
            else
            {
               if (whitespace)
               {
                  word[wordLen] =' ';
                  wordLen = inc_word_len(wordLen);
                  whitespace = false;
               }
               word[wordLen] =(char)stream.read();
               if (word[wordLen] == '\\')
                  word[wordLen] =(char)stream.read();
               wordLen = inc_word_len(wordLen);
            }
            // indicates problem w/legal_attr_char()
            if (stream.ready()==false)
               Error.err("FileSchema::read_word: stream not ready");
         }
         // Note that since c is a peeked character, we don't update
         //   sameLine here, since we haven't actually read it.
         if (wordLen < 1 && !periodAllowed)
            System.out.println("FileSchema::read_word: Unable to read word.  Perhaps you forgot to supply it.");
         if (!qMark && wordLen == 1 && word[0] == '?')
            System.out.println("Illegal name \'?\'");
         return new String(word).trim();
      } catch(IOException e)
      {
      }
      return null;
   }

   /** Reads a single word from the supplied BufferedReader without crossing lines.
    * @param stream The BufferedReader to be read from.
    * @param qMark TRUE if question marks are an acceptable name, FALSE otherwise.
    * @param periodAllowed TRUE if periods are allowed as words, FALSE otherwise. Automatically set to
    * FALSE in this function.
    * @return The word read.
    */
   public String read_word_on_same_line(BufferedReader stream, boolean qMark, boolean periodAllowed)
   {
      periodAllowed = false;
      boolean[] sameLine = new boolean[1];
      String word = read_word(stream, qMark, sameLine);
      if (!sameLine[0])
         Error.err("Parse Error->FileSchema:read_word_same_line" 
                + " Another word expected");
      return word;
   }

/***************************************************************************
***************************************************************************/
   private boolean legal_attr_char(BufferedReader stream, char[] c, boolean periodAllowed)
   {
      try
      {
         if (stream.ready()== false)
            Error.err("Unexpected end of file.");
         stream.mark(1);
         char ch =(char)stream.read();
         stream.reset();
         c[0] = ch;
         switch (ch)
         {
         case '\\' :
               return true;
         case ',' :
         case ':' :
         case '|' :
         case '\n' :
               return false;
         case '.' :
               return(periodAllowed);
         default:
               return true;
         }
      } catch(IOException e)
      {
      }
      return false;
   }

/***************************************************************************
***************************************************************************/
   private int inc_word_len(int wordlen)
   {
      wordlen++;
      if (wordlen > MAX_INPUT_STRING_SIZE)
      {
         System.out.println("mlcIO::read_word: word overflow.  More than " 
                + MAX_INPUT_STRING_SIZE + " characters for word " 
                + "fatal_error");
         return wordlen;
      }
      else return wordlen;
   }

/***************************************************************************
***************************************************************************/
   private String read_section_ws(BufferedReader stream, String delims, String wsChars)
   {
      boolean allowEOF = false;
      char[] word = new char[MAX_INPUT_STRING_SIZE + 1];
      int wordLen = 0;
      boolean whitespace = false;
      char[] c = new char[1];

      try
      {
         while(is_section_char(stream, c, delims, allowEOF)!= sectionDelimiter)
         {
            if (wsChars.indexOf(new Character(c[0]).toString())!= -1)
            {
               whitespace = true;
               stream.skip(1);
            }
            else
            {
               if (whitespace)
               {
                  word[wordLen] =' ';
                  inc_word_len(wordLen);
                  whitespace = false;
               }
               word[wordLen] =(char)stream.read();
               wordLen = inc_word_len(wordLen);
            }

            if (stream.ready()== false)
               Error.err("FileSchema::read_section_ws: unexpected end of file");
         }
      } catch(IOException e)
      {
      }
      return new String(word, 0, wordLen).toString().trim();
   }

/***************************************************************************
***************************************************************************/
   private byte is_section_char(BufferedReader stream, char[] c, String delims, boolean allowEOF)
   {
      try
      {
         if (stream.ready()== false)
         {
            if (!allowEOF)
            {
               Error.err("FileSchema::is_section_char:unexpected end of file");
               return sectionDelimiter;
            }
            else
            {
               c[0] = 0;
               return sectionDelimiter;
            }
         }
         stream.mark(1);
         char ch =(char)stream.read();
         stream.reset();
         c[0] = ch;
         if (ch == '\\')
         {
            stream.read();
            stream.mark(1);
            ch =(char)stream.read();
            stream.reset();
            if (stream.ready()== false)
               Error.err("FileSchema::is_section_char:unexpected end of file after backslash");
            c[0] = ch;
            return sectionEscape;
         }
         else if (delims.indexOf(new Character(c[0]).toString())!= -1)
         {
            return sectionDelimiter;
         }
         else
         return sectionCharacter;

      } catch(IOException e)
      {
         Error.err("shouldn\'t get here in FileSchema.java");
      }
      return sectionCharacter;
   }

/***************************************************************************
***************************************************************************/
   private String read_section(BufferedReader stream, String delims, String ignoreChars)
   {
      boolean allowEOF = false;
      char[] word = new char[MAX_INPUT_STRING_SIZE + 1];
      int wordLen = 0;

      char[] c = new char[1];
      byte sectionType;
      try
      {
         while((sectionType = is_section_char(stream, c, delims, allowEOF))!= sectionDelimiter)
         {
            if ((ignoreChars.indexOf(new Character(c[0]).toString())!= -1)&& sectionType != sectionEscape)
               stream.read();
            else
            {
               int i = stream.read();
               word[wordLen] =(char)i;
               wordLen = inc_word_len(wordLen);
            }
            if (stream.ready()==false)
               Error.err("FileSchema::read_section: stream not ready!");
         }

      } catch(IOException e)
      {
         Error.err("Error :)");
      }
      return new String(word).trim();
   }

/***************************************************************************
***************************************************************************/
   private void apply_config(OptionServer configOptions)
   {
      String[] labelName = new String[1];
      if (configOptions.get_option("label" , labelName))
      {
         labelColumn = find_attribute(labelName, false);
         if (labelColumn < 0)
            Error.err("FileSchema::apply_config: The " 
                   + "requested label attribute \"" + labelName[0] + "\" was never" 
                   + " declared --> fatal error");
         if (attrInfos[labelColumn].can_cast_to_real())
            Error.err("FileSchema::apply_config: The " 
                   + "requested label attribute \"" + labelName[0] + "\" must be " 
                   + "a Nominal type --> fatal error");
      }
      String[] weightName = new String[1];
      if (configOptions.get_option("weight" , weightName))
      {
         weightColumn = find_attribute(weightName, false);
         if (weightColumn < 0)
            Error.err("FileSchema::apply_config: The " 
                   + "requested weight attribute \"" + weightName[0] + "\" was never" 
                   + " declared --> fatal error");
         if (attrInfos[labelColumn].can_cast_to_real())
            Error.err("FileSchema::apply_config: The " 
                   + "requested weight attribute \"" + weightName[0] + "\" must be " 
                   + "a Nominal type --> fatal error");
      }
   }

   /** Find an attribute in the file schema by name. If the attribute is not found,
    * aborts if fatalOnNotFound is set.  Otherwise returns -1. Assumes the schema has
    * no duplicate attributes.
    *
    * @param name Name of the attribute.
    * @param fatalOnNotFound TRUE if an error message should be displayed if there is no attribute matching
    * that name, FALSE otherwise.
    * @return The integer value corresponding to the attribute with the specified name or -1
    * if an attribute with a matching name is not found.
    */
   public int find_attribute(String[] name, boolean fatalOnNotFound)
   {
      for(int i=0 ; i<attrInfos.length ; i++)
         if (attrInfos[i].name().equals(name[0]))
            return i;
      if (fatalOnNotFound)
         Error.err("FileSchema::find_attribute " 
                + name + "does not exist in this schema --> fatal error");
      return -1;
   }

   /** Create an MLJ style schema from all the information stored in this class.
    * This schema is used to create lists, use InstanceReaders, etc.
    *
    * @return Schema object containing information generated from this FileSchema object.
    */
   public Schema create_schema()
   {
      AttrInfo labelInfo = null;
      LinkedList schemaNames = new LinkedList();
      if (attrInfos == null)
         System.out.println("attrInfos is null");
      for(int i=0 ; i<attrInfos.length ; i++)
      {
         if (i==labelColumn)
         {
            labelInfo = attrInfos[i];
         }
         else if (i==weightColumn && ignoreWeightColumn)
         {
         }
         else
         {
            AttrInfo aip = attrInfos[i];
            schemaNames.add(aip);
         }
      }
      if (labelInfo!=null)
      {
         Schema sch = new Schema(schemaNames, labelInfo);
         return sch;
      }
      else
      {
         Schema sch = new Schema(schemaNames);
         return sch;
      }
   }


   /** Display this FileSchema.  This is done in .names file format so this can be
    * used for file conversion.
    *
    */
   public void display()
   {
      System.out.println("config");
      if (labelColumn != -1)
         System.out.println("label: " +attrInfos[labelColumn].name());
      if (weightColumn != -1)
         System.out.println("weight: " +attrInfos[weightColumn].name());
      System.out.println("endconfig\n");

      if (lossKeyword!=nomatrix)
      {
         System.out.println("loss");
         switch (lossKeyword)
         {
         case  nodefault:
               System.out.println("nodefault");
               break;
         case  adefault:
               System.out.println("default: " +lossArgs[0]+ ", " +lossArgs[1]);
               break;
         case  distance:
               System.out.println("distance: " +lossArgs[0]+ ", " + 
                      lossArgs[1]+ ", " + lossArgs[2]);
               break;
         default:
         }
         for(int i=0 ; i<lossEntries.length ; i++)
         {
            System.out.println(lossEntries[i].predName+ ", " 
                   +lossEntries[i].actName+ ": " 
                   +lossEntries[i].loss);
         }
         System.out.println("endloss");
      }
      for(int i=0 ; i<attrInfos.length ; i++)
      {
         System.out.print(attrInfos[i].name()+ ": ");
         if (attrInfos[i].can_cast_to_real())
            System.out.println("continuous");
         else if (attrInfos[i].can_cast_to_nominal())
         {
            NominalAttrInfo nai = attrInfos[i].cast_to_nominal();
            if (nai.is_fixed())
            {

               for(int val=Globals.FIRST_NOMINAL_VAL ; val<=nai.num_values(); val++)
               {
                  System.out.print(nai.get_value(val));
                  if (val <= nai.num_values()-1)
                     System.out.print(", ");
               }
               System.out.println();
            }
            else System.out.println("discrete");
         }
      }
   }
}
上一页 1 23
💿 文件大小 441 K
👤 上传用户 l2335800
📂 所属分类人工智能/神经网络
🏷️ 相关标签

#java #数据挖掘算法
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -