⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fileschema.java

📁 java数据挖掘算法
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
    * encountered, the correct AttrInfo type will be created.
    * @param namesFile Reader allowing access to the namesfile.
    * @param attrName The name of the attrbute.
    * @return The newly created AttrInfo whose values were read.
    */
   private AttrInfo read_attr_info(BufferedReader namesFile, String attrName)
   {
      LinkedList attrVals = new LinkedList();
      AttrInfo ai = null;
      int discreteHint = -1;
      // A comma is required before each value other than the first.
      // Therefor we need to keep a flag to indicate whether we are 
      // executing the first run of the loop

      boolean firstVal = true;

      try
      {
         namesFile.mark(1);
         char c =(char)namesFile.read();
         namesFile.reset();
         while(c != '.' && c != '\n' 
                && c != '|' && c != -1)
         {
            if (!firstVal && c == ',')
               namesFile.read();
            boolean[] sameLine = new boolean[1];
            sameLine[0] = false;
            String attrValue = read_word(namesFile, false, sameLine);
            attrVals.add(attrValue);
            firstVal = false;
            namesFile.mark(1);
            c =(char)namesFile.read();
            namesFile.reset();
         }
         if (attrVals.size()==0)
            Error.err("Missing values or type specifier for" 
                   + "attribute" + attrName);
         if (attrVals.size()==1 && attrVals.getFirst().equals("continuous"))
         {
            attrVals = null;
            ai = new RealAttrInfo(attrName);
         }
         else if (attrVals.size()==1 && 
                attrVals.getFirst().equals("ignore-attribute"))
         {
            attrVals = null;
            ai = new NominalAttrInfo(attrName,0);
            ai.set_ignore(true);
         }
         else if (attrVals.size()==1 && 
               (discreteHint = read_discrete_and_hint((String)attrVals.getFirst()))!= -1)
         {
            attrVals = null;
            ai = new NominalAttrInfo(attrName, discreteHint);
         }
         else ai = new NominalAttrInfo(attrName, attrVals);

         namesFile.mark(1);
         if ((char)namesFile.read()!= '.')
            namesFile.reset();
         return ai;

      } catch(IOException e)
      {
         Error.err("FileSchema::read_attr_info - if reached here!");
      }

      return null;
   }

   /** Determine if the given name is 'discrete', possibly with an optional hint
    * number. If the name is 'discrete', then the hint number (0 if none given) is
    * returned.  If not, then -1 is returned. Placing anything other than a hint
    * number after 'discrete' and a space is an error.
    *
    * @param str String to be read.
    * @return The hint number if discrete, -1 otherwise.
    */
   private int read_discrete_and_hint(String str)
   {
      int lengthOfDiscrete = 9;

      if (str.equals("discrete"))
         return 0;
      if (str.length()< lengthOfDiscrete)
         return -1;
      String leftHalf = str.substring(0, lengthOfDiscrete);
      if (leftHalf != "discrete ")
         return -1;
      String hintString = str.substring(lengthOfDiscrete, 
             str.length()-lengthOfDiscrete);

      long val;
      val = new Long(hintString).longValue();
      if (val <0)
         Error.err("read_discrete_and_hint: " 
                + "illegal value given for " 
                + "\"discrete n\" syntax; word following \"discrete\" must be " 
                + "a nonnegative integer.  You supplied \"" + hintString + "\"");
      int v = new Long(val).intValue();
      return v;
   }

    /** Skips white space and comments.
     * @param stream Reader allowing access to the namesfile.
     * @return TRUE if the current line contains no comments, FALSE otherwise.
     */
   public boolean skip_white_comments_same_line(BufferedReader stream)
   {
      boolean sameLine = true;
      try
      {
         stream.mark(1);
         char c =(char)stream.read();
         stream.reset();
         while(Character.isWhitespace(c)|| c == '|')
         {
            if (c== '|')
            {
               sameLine = false;
               while(c!= '\n')
               {
                  c =(char)stream.read();
               }
            }
            else
            {
               if (c== '\n')
                  sameLine = false;
               stream.skip(1);
            }
            stream.mark(1);
            c =(char)stream.read();
            stream.reset();
         }
      } catch(IOException e)
      {
      }
      return sameLine;
   }

   /** Reads the loss specification as read from the stream "in". The specification
    * ends at EOF OR if "endloss" is encountered.
    * @param in Reader from which loss specification is read.
    */
   private void read_loss_spec(BufferedReader in)
   {
      skip_blank_space(in);

      // read the first line of the specification.
      read_loss_default_spec(in);
      skip_blank_space(in);

      // read each line of the file until EOF or endloss
      // the apply_loss_override_spec function will return FALSE
      // when it is done
      while(read_loss_override_spec(in))
      skip_blank_space(in);

      // if there's more to the file, skip whitespace/comments for the
      // rest of the last line
      try
      {
         if (in.ready()==true)
            skip_white_comments_same_line(in);
      } catch(IOException e)
      {
      }
   }

   /** Reads a single line of the loss specification. The single line specifies a pair
    * of two label values and an associated loss, in the format:                  <BR>
    * <predicted value>, <actual value>: <loss>                                   <BR>
    * @param in Reader from which line of specification will be read.
    * @return FALSE if we just encountered the end of the specification, TRUE otherwise.
    */
   private boolean read_loss_override_spec(BufferedReader in)
   {
      try
      {
         String predName = read_section(in, "|:,.\n" , " \t\r");
         char c =(char)in.read();

         // check if the line ends after one word
         if (c == '\n' || c == '|')
         {
            if (predName.equals("endloss"))
               return false;
            else
            Error.err("in loss specification; line contains a " 
                   + "single word: " + predName);
         }
         else if (c != ',')
            Error.err("in loss specification; a comma (,) must follow " 
                   + "the predicted value " + predName);

         // read the actual value
         String actName = read_section(in, "|:,.\n" , " \t\r");
         c =(char)in.read();

         // error if the line ends here
         if (c == '\n' || c == '|')
            Error.err("in loss specification; missing loss value");
         else if (c != ':')
            Error.err("in loss specification; a colon (:) must follow " 
                   + "the predicted/actual value pair");

         // read the loss
         String lossStr = read_section(in, "|\n" , " \t\r");

         double loss = new Double(lossStr).doubleValue();

         if (new Double(lossStr).isNaN()== true)
            Error.err("in loss specification: illegal loss value " 
                   + lossStr);
         // Call add_loss_entry to store this line for later use
         add_loss_entry(predName, actName, loss);
         return true;
      } catch(IOException e)
      {
      }

      return false;
   }

/***************************************************************************
***************************************************************************/
   private void add_loss_entry(String predVal,String actVal, double loss)
   {
      // if the keyword is set to "nomatrix", set it to "nodefault"
      // instead, to signal an undefined matrix
      if (lossKeyword == nomatrix)
         lossKeyword = nodefault;
      int size = lossEntries.length;
      lossEntries[size].predName = predVal;
      lossEntries[size].actName = actVal;
      lossEntries[size].loss = loss;
   }

/***************************************************************************
***************************************************************************/
   private void skip_blank_space(BufferedReader in)
   {
      try
      {
         skip_white_comments_same_line(in);
         while((char)in.read()== '\n')
         {
            skip_white_comments_same_line(in);
         }
      } catch(IOException e)
      {
      }
   }

/***************************************************************************
***************************************************************************/
   private void read_loss_default_spec(BufferedReader in)
   {
      try
      {
         // first step: read the loss keyword from the lossStr
         String keyword = read_section(in, "|\n:,.-0123456789" , " \t\r");
         in.mark(1);
         char c =(char)in.read();
         in.reset();
         if (c != ':' && c != '.')
            Error.err("in loss specification; " 
                   + "first line must begin with a keyword " 
                   + "{nodefault, default, distance} followed by a colon " 
                   + "(:) or period (.)");
         byte key = 0;
         if (keyword.equals("nodefault"))
            key = nodefault;
         else if (keyword.equals("default"))
            key = adefault;
         else if (keyword.equals("distance"))
            key = distance;
         else
         {
            Error.err("in loss specification; " 
                   + "unrecognized keyword \"" + keyword + "\".  Keyword " 
                   + "must be one of {nodefault, default, distance}");
         }

         // second step: read the array of arguments (unless the keyword is
         // followed by a period or end of line)
         double[] args;
         args = new double[0];
         if (c == '.' || c == '\n')
         {
            in.read();
         }
         else
         args = process_loss_args(in, "," , "|\n");
         set_loss_default(key, args);
      } catch(IOException e)
      {
      }
   }

/***************************************************************************
***************************************************************************/
   private void set_loss_default(byte keyword, double[] args)
   {
      lossKeyword = keyword;

      switch (keyword)
      {
      case  nomatrix:
            Error.err("FileSchema::set_loss_default: keyword may not be set " 
                   + "to nomatrix");
            break;

      case  nodefault:
            if (args.length != 0)
               Error.err("FileSchema::set_loss_default: nodefault takes no " 
                      + "arguments");
            lossArgs[0] = 0;
            lossArgs[1] = 0;
            lossArgs[2] = 0;
            break;
      case  adefault:
            if (args.length == 0)
            {
               lossArgs[0] = 0;
               lossArgs[1] = 1;
            }
            else if (args.length == 1)
            {
               lossArgs[0] = args[0];
               lossArgs[1] = 1;
            }
            else if (args.length == 2)
            {
               lossArgs[0] = args[0];
               lossArgs[1] = args[1];
            }
            else
            {
               Error.err("FileSchema::set_loss_default: default takes up to " 
                      + "two arguments.  You supplied " + args.length);
            }
            lossArgs[2] = 0;
            break;
      case  distance:
            if (args.length == 0)
            {
               lossArgs[0] = 0;
               lossArgs[1] = 1;
               lossArgs[2] = 1;
            }
            else if (args.length == 1)
            {
               lossArgs[0] = args[0];
               lossArgs[1] = 1;
               lossArgs[2] = 1;
            }
            else if (args.length == 2)
            {
               lossArgs[0] = args[0];
               lossArgs[1] = args[1];
               lossArgs[2] = args[1];
            }
            else if (args.length == 3)
            {
               lossArgs[0] = args[0];
               lossArgs[1] = args[1];
               lossArgs[2] = args[2];
            }
            else
            {
               Error.err("FileSchema::set_loss_default: distance takes up to " 
                      + "three arguments.  You supplied " + args.length);
            }
            break;
      default:
            Error.err("ABORT_IF_REACHED");
      }
   }

/***************************************************************************
***************************************************************************/
   private double[] process_loss_args(BufferedReader in, String sepChars, 
          String termChars)
   {
      double[] arr = new double[0];
      try
      {
         for(; ;)
         {
            in.read();
            String arg = read_section(in, sepChars + termChars, 
                   " \t\r\n");
            // check for a termination character
            in.mark(1);
            char c =(char)in.read();
            in.reset();
            boolean isTerm =
               (termChars.indexOf(new Character(c).toString())!= -1);

            // allow termination immediately on a termChar
            if (arg == "" && arr.length == 0 && isTerm == true)
               return arr;
            double argReal;
            argReal = new Double(arg).doubleValue();
            if (new Double(arg).isNaN()== false)
               arr[arr.length] = argReal;
            else
            Error.err("FileSchema::process_loss_args: in loss specification; " 
                   + "argument \"" + arg + "\" in list cannot be " 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -