⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 testmultianalyzer.java

📁 Lucene a java open-source SearchEngine Framework
💻 JAVA
字号:
package org.apache.lucene.queryParser;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements.  See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License.  You may obtain a copy of the License at * *     http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import java.io.Reader;import org.apache.lucene.util.LuceneTestCase;import org.apache.lucene.search.Query;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.LowerCaseFilter;import org.apache.lucene.analysis.Token;import org.apache.lucene.analysis.TokenFilter;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.standard.StandardTokenizer;/** * Test QueryParser's ability to deal with Analyzers that return more * than one token per position or that return tokens with a position * increment > 1. *  * @author Daniel Naber */public class TestMultiAnalyzer extends LuceneTestCase {  private static int multiToken = 0;  public void testMultiAnalyzer() throws ParseException {        QueryParser qp = new QueryParser("", new MultiAnalyzer());    // trivial, no multiple tokens:    assertEquals("foo", qp.parse("foo").toString());    assertEquals("foo", qp.parse("\"foo\"").toString());    assertEquals("foo foobar", qp.parse("foo foobar").toString());    assertEquals("\"foo foobar\"", qp.parse("\"foo foobar\"").toString());    assertEquals("\"foo foobar blah\"", qp.parse("\"foo foobar blah\"").toString());    // two tokens at the same position:    assertEquals("(multi multi2) foo", qp.parse("multi foo").toString());    assertEquals("foo (multi multi2)", qp.parse("foo multi").toString());    assertEquals("(multi multi2) (multi multi2)", qp.parse("multi multi").toString());    assertEquals("+(foo (multi multi2)) +(bar (multi multi2))",        qp.parse("+(foo multi) +(bar multi)").toString());    assertEquals("+(foo (multi multi2)) field:\"bar (multi multi2)\"",        qp.parse("+(foo multi) field:\"bar multi\"").toString());    // phrases:    assertEquals("\"(multi multi2) foo\"", qp.parse("\"multi foo\"").toString());    assertEquals("\"foo (multi multi2)\"", qp.parse("\"foo multi\"").toString());    assertEquals("\"foo (multi multi2) foobar (multi multi2)\"",        qp.parse("\"foo multi foobar multi\"").toString());    // fields:    assertEquals("(field:multi field:multi2) field:foo", qp.parse("field:multi field:foo").toString());    assertEquals("field:\"(multi multi2) foo\"", qp.parse("field:\"multi foo\"").toString());    // three tokens at one position:    assertEquals("triplemulti multi3 multi2", qp.parse("triplemulti").toString());    assertEquals("foo (triplemulti multi3 multi2) foobar",        qp.parse("foo triplemulti foobar").toString());    // phrase with non-default slop:    assertEquals("\"(multi multi2) foo\"~10", qp.parse("\"multi foo\"~10").toString());    // phrase with non-default boost:    assertEquals("\"(multi multi2) foo\"^2.0", qp.parse("\"multi foo\"^2").toString());    // phrase after changing default slop    qp.setPhraseSlop(99);    assertEquals("\"(multi multi2) foo\"~99 bar",                 qp.parse("\"multi foo\" bar").toString());    assertEquals("\"(multi multi2) foo\"~99 \"foo bar\"~2",                 qp.parse("\"multi foo\" \"foo bar\"~2").toString());    qp.setPhraseSlop(0);    // non-default operator:    qp.setDefaultOperator(QueryParser.AND_OPERATOR);    assertEquals("+(multi multi2) +foo", qp.parse("multi foo").toString());  }      public void testMultiAnalyzerWithSubclassOfQueryParser() throws ParseException {    DumbQueryParser qp = new DumbQueryParser("", new MultiAnalyzer());    qp.setPhraseSlop(99); // modified default slop    // direct call to (super's) getFieldQuery to demonstrate differnce    // between phrase and multiphrase with modified default slop    assertEquals("\"foo bar\"~99",                 qp.getSuperFieldQuery("","foo bar").toString());    assertEquals("\"(multi multi2) bar\"~99",                 qp.getSuperFieldQuery("","multi bar").toString());        // ask sublcass to parse phrase with modified default slop    assertEquals("\"(multi multi2) foo\"~99 bar",                 qp.parse("\"multi foo\" bar").toString());      }      public void testPosIncrementAnalyzer() throws ParseException {    QueryParser qp = new QueryParser("", new PosIncrementAnalyzer());    assertEquals("quick brown", qp.parse("the quick brown").toString());    assertEquals("\"quick brown\"", qp.parse("\"the quick brown\"").toString());    assertEquals("quick brown fox", qp.parse("the quick brown fox").toString());    assertEquals("\"quick brown fox\"", qp.parse("\"the quick brown fox\"").toString());  }    /**   * Expands "multi" to "multi" and "multi2", both at the same position,   * and expands "triplemulti" to "triplemulti", "multi3", and "multi2".     */  private class MultiAnalyzer extends Analyzer {    public MultiAnalyzer() {    }    public TokenStream tokenStream(String fieldName, Reader reader) {      TokenStream result = new StandardTokenizer(reader);      result = new TestFilter(result);      result = new LowerCaseFilter(result);      return result;    }  }  private final class TestFilter extends TokenFilter {        private org.apache.lucene.analysis.Token prevToken;        public TestFilter(TokenStream in) {      super(in);    }    public final org.apache.lucene.analysis.Token next() throws java.io.IOException {      if (multiToken > 0) {        org.apache.lucene.analysis.Token token =           new org.apache.lucene.analysis.Token("multi"+(multiToken+1), prevToken.startOffset(),          prevToken.endOffset(), prevToken.type());        token.setPositionIncrement(0);        multiToken--;        return token;      } else {        org.apache.lucene.analysis.Token t = input.next();        prevToken = t;        if (t == null)          return null;        String text = t.termText();        if (text.equals("triplemulti")) {          multiToken = 2;          return t;        } else if (text.equals("multi")) {          multiToken = 1;          return t;        } else {          return t;        }      }    }  }  /**   * Analyzes "the quick brown" as: quick(incr=2) brown(incr=1).   * Does not work correctly for input other than "the quick brown ...".   */  private class PosIncrementAnalyzer extends Analyzer {    public PosIncrementAnalyzer() {    }    public TokenStream tokenStream(String fieldName, Reader reader) {      TokenStream result = new StandardTokenizer(reader);      result = new TestPosIncrementFilter(result);      result = new LowerCaseFilter(result);      return result;    }  }  private final class TestPosIncrementFilter extends TokenFilter {        public TestPosIncrementFilter(TokenStream in) {      super(in);    }    public final org.apache.lucene.analysis.Token next() throws java.io.IOException {      for (Token t = input.next(); t != null; t = input.next()) {        if (t.termText().equals("the")) {          // stopword, do nothing        } else if (t.termText().equals("quick")) {          org.apache.lucene.analysis.Token token =             new org.apache.lucene.analysis.Token(t.termText(), t.startOffset(),                t.endOffset(), t.type());          token.setPositionIncrement(2);          return token;        } else {          org.apache.lucene.analysis.Token token =             new org.apache.lucene.analysis.Token(t.termText(), t.startOffset(),                t.endOffset(), t.type());          token.setPositionIncrement(1);          return token;        }      }      return null;    }  }    /** a very simple subclass of QueryParser */    private final static class DumbQueryParser extends QueryParser {                public DumbQueryParser(String f, Analyzer a) {            super(f, a);        }        /** expose super's version */        public Query getSuperFieldQuery(String f, String t)             throws ParseException {            return super.getFieldQuery(f,t);        }        /** wrap super's version */        protected Query getFieldQuery(String f, String t)            throws ParseException {            return new DumbQueryWrapper(getSuperFieldQuery(f,t));        }    }        /**     * A very simple wrapper to prevent instanceof checks but uses     * the toString of the query it wraps.     */    private final static class DumbQueryWrapper extends Query {        private Query q;        public DumbQueryWrapper(Query q) {            super();            this.q = q;        }        public String toString(String f) {            return q.toString(f);        }    }    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -