⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bm.java

📁 java 实现的BM算法。 BM算法是一种字符串匹配算法。
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
     * @param pattern String to search for. May be "" but not null.
     *
     * @return 0-based offset in text, just like String.indexOf.
     * -1 means not found.
     */
   public int indexOf( String pattern )
      {
      return indexOf(pattern, 0);
      } // end indexOf

   /**
    * Search for given pattern in String or char array.
    * Presume Pattern and Text have been previously set.
    *
    * @param index index at which to start search
    *
    * @return 0-based offset in text, just like String.indexOf.
    * -1 means not found.
    */
   protected final int indexOf(int index)
      {
      if ( text != null )
         {
         return indexOfViaText(index);
         }
      else
         {
         return indexOfViaTextArray(index);
         }
      } // end indexOf

   /**
    * Search for given pattern in String.
    * Presume Pattern and Text have been previously set.
    *
    * @param index  index at which to start search
    *
    * @return 0-based offset in text, just like String.indexOf.
    *         -1 means not found.
    */
   private final int indexOfViaText(int index)
      {
      // Deal with cases that don't rate the full
      // Boyer-Moore treatment.
      if ( (lenText <= breakEvenLenText/2 || lenPat <= breakEvenLenPat ) )
         {
         // this way we are consistent with
         // String.indexOf for "".indexOf("")
         // which is -1 in JDK 1.1
         // and 0 in JDK 1.2. See Bug Parade entry 4096273.
         // "".indexOf("abc") is always -1
         return text.indexOf(pattern, index);
         } // end if

      analysePattern();

      // At this point we have the pattern, and have skip[] calculated
      // We are commited to calculating the indexOf via Boyer-Moore.

      // tforward works left to right through the text, skipping depending
      // on what char it found in the text corresponding to the end of the pattern,
      // not to the place of the mismatch.
      char testChar = 0;
      final int lastPatChar = patternArray[lenPat-1];
      outer:
      for ( int tforward=index+lenPat-1; tforward<lenText; tforward+=skip[testChar & 0xff ] )
         {
         // compare working right to left through both pattern and text
         testChar = text.charAt(tforward);
         if ( testChar != lastPatChar )
            {
            continue outer;
            }
         // step back through pattern
         // step back through text
         inner:
         for ( int tback=tforward-1,pback=lenPat-2 ; pback>=0 ; tback--,pback-- )
            {
            if ( text.charAt(tback) != patternArray[pback] ) continue outer;
            } // end inner for
         // we stepped all the way back through the pattern comparing
         // without finding a mismatch. We found it!
         return tforward-lenPat+1;
         } // end outer for
      // stepped through entire text without finding it.
      return -1;
      } // end indexOf

   /**
    * Search for given pattern in charArray.
    * presume Pattern and Text have been previously set.
    *
    * @param index  index at which to start search
    *
    * @return 0-based offset in text, just like String.indexOf.
    *         -1 means not found.
    */
   private final int indexOfViaTextArray(int index)
      {
      // Deal with cases that don't rate the full
      // Boyer-Moore treatment.
      // !! Actually this has a different break even point to cover the overhead
      // of going back to String. We probably should adjust slightly.
      if ( (lenText <= breakEvenLenText/2 || lenPat <= breakEvenLenPat ) )
         {
         // this way we are consistent with
         // String.indexOf for "".indexOf("")
         // which is -1 in JDK 1.1
         // and 0 in JDK 1.2
         // "".indexOf("abc") is always -1
         return new String (textArray).indexOf(pattern, index);
         } // end if

      analysePattern();

      // At this point we have the pattern, and have skip[] calculated
      // We are commited to calculating the indexOf via Boyer-Moore.

      // tforward works left to right through the text, skipping depending
      // on what char it found in the text corresponding to the end of the pattern,
      // not to the place of the mismatch.
      char testChar = 0;
      final int lastPatChar = patternArray[lenPat-1];
      outer:
      for ( int tforward=index+lenPat-1; tforward<lenText; tforward+=skip[testChar & 0xff ] )
         {
         // compare working right to left through both pattern and text
         testChar = textArray[tforward];
         if ( testChar != lastPatChar )
            {
            continue outer;
            }
         // step back through pattern
         // step back through text
         inner:
         for ( int tback=tforward-1,pback=lenPat-2 ; pback>=0 ; tback--,pback-- )
            {
            if ( textArray[tback] != patternArray[pback] ) continue outer;
            } // end inner for
         // we stepped all the way back through the pattern comparing
         // without finding a mismatch. We found it!
         return tforward-lenPat+1;
         } // end outer for
      // stepped through entire text without finding it.
      return -1;
      } // end indexOf

   /**
    * Set pattern to use for subsequent indexOf searches.
    * Note this is protected.  Clients of this package
    * set the pattern using indexOf.
    *
    * @param pattern String to search for. May be "" but not null..
    */
   protected void setPattern (String pattern)
      {
      if ( pattern == null )
         {
         throw new NullPointerException();
         }
      this.pattern = pattern;
      lenPat = pattern.length();
      }

   /**
    * Set text to search in for subsequent indexOf searches.
    *
    * @param text String to search in. May be "" but not null.
    */
   public void setText (String text)
      {
      if ( text == null )
         {
         throw new NullPointerException();
         }
      this.text = text;
      lenText = text.length();
      // we don't yet have this is efficient char[] form.
      this.textArray = null;
      }

   /**
    * Set text to search in for subsequent indexOf searches.
    *
    * @param text   char array to search in. May be empty but not null.
    *               This is more efficient that providing the equivalent String.
    */
   public void setText (char[] text)
      {
      if ( text == null )
         {
         throw new NullPointerException();
         }
      // we have efficient char[] form, but not the string form.
      this.textArray = text;
      lenText = textArray.length;
      this.text = null;
      }

   /**
    * test harness
    *
    * @param args   command line arguments.  Not used.
    */
   public static void main( String[] args)
      {
      if ( DEBUGGING )
         {
         System.out.println(BM.indexOf("dogcatwombat","cat"));
         System.out.println("dogcatwombat".indexOf("cat"));

         System.out.println(BM.indexOf("crtcamccmcarogcatwombat","cat"));
         System.out.println("crtcamccmcarogcatwombat".indexOf("cat"));

         System.out.println(BM.indexOf("dogcatwombat",""));
         System.out.println("dogcatwombat".indexOf(""));

         System.out.println(BM.indexOf("",""));
         System.out.println("".indexOf(""));

         System.out.println(BM.indexOf("","abcde"));
         System.out.println("".indexOf("abcde"));

         System.out.println(BM.indexOf("dogcatwombat","cow"));
         System.out.println("dogcatwombat".indexOf("cow"));

         String s = "create table foo (created_date datetime default sysdate not null)";
         System.out.println(s.indexOf("create", 10));
         System.out.println(BM.indexOf(s, "create", 10));

         try
            {

            // O P E N
            // Any file of sample characters
            File f = new File ("C:/temp/temp.txt");
            int size = (int) f.length();
            FileReader fr;
            fr = new FileReader(f);

            // R E A D
            char[] ca = new char[size];
            int charsRead = fr.read(ca);
            s = new String(ca);

            long start;
            long stop;
            int result = 0;

            start = System.currentTimeMillis();
            for ( int i=0; i<1000; i++ )
               {
               // Need to make different so optimiser will actually do
               // the work repeatedly.
               // search for strings like "efficiency9" that probably won't be there.
               result = BM.indexOf(ca,"efficiency"+i%10);
               }
            System.out.println("Boyer.indexOf(char[]): " + result);
            stop = System.currentTimeMillis();
            System.out.println("Elapsed:"
                               + (stop-start));

            // benchmark Boyer.indexOf
            start = System.currentTimeMillis();
            
            System.out.println("Boyer.indexOf(String): " + result);
            stop = System.currentTimeMillis();
            System.out.println("Elapsed:"
                               + (stop-start));

            // Benchmark String.indexOf
            start = System.currentTimeMillis();
            for ( int i=0; i<1000; i++ )
               {
               result = s.indexOf("efficiency"+i%10);
               }
            System.out.println("String.indexOf: " + result);
            stop = System.currentTimeMillis();
            System.out.println("Elapsed:"
                               + (stop-start));

            // C L O S E
            fr.close();

            }
         catch ( IOException e )
            {
            return;
            }

         } // end if debugging
      } // end main
   } // end class Boyer

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -