📄 testmultianalyzer.java
字号:
package org.apache.lucene.queryParser;/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */import java.io.Reader;import org.apache.lucene.util.LuceneTestCase;import org.apache.lucene.search.Query;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.LowerCaseFilter;import org.apache.lucene.analysis.Token;import org.apache.lucene.analysis.TokenFilter;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.standard.StandardTokenizer;/** * Test QueryParser's ability to deal with Analyzers that return more * than one token per position or that return tokens with a position * increment > 1. * * @author Daniel Naber */public class TestMultiAnalyzer extends LuceneTestCase { private static int multiToken = 0; public void testMultiAnalyzer() throws ParseException { QueryParser qp = new QueryParser("", new MultiAnalyzer()); // trivial, no multiple tokens: assertEquals("foo", qp.parse("foo").toString()); assertEquals("foo", qp.parse("\"foo\"").toString()); assertEquals("foo foobar", qp.parse("foo foobar").toString()); assertEquals("\"foo foobar\"", qp.parse("\"foo foobar\"").toString()); assertEquals("\"foo foobar blah\"", qp.parse("\"foo foobar blah\"").toString()); // two tokens at the same position: assertEquals("(multi multi2) foo", qp.parse("multi foo").toString()); assertEquals("foo (multi multi2)", qp.parse("foo multi").toString()); assertEquals("(multi multi2) (multi multi2)", qp.parse("multi multi").toString()); assertEquals("+(foo (multi multi2)) +(bar (multi multi2))", qp.parse("+(foo multi) +(bar multi)").toString()); assertEquals("+(foo (multi multi2)) field:\"bar (multi multi2)\"", qp.parse("+(foo multi) field:\"bar multi\"").toString()); // phrases: assertEquals("\"(multi multi2) foo\"", qp.parse("\"multi foo\"").toString()); assertEquals("\"foo (multi multi2)\"", qp.parse("\"foo multi\"").toString()); assertEquals("\"foo (multi multi2) foobar (multi multi2)\"", qp.parse("\"foo multi foobar multi\"").toString()); // fields: assertEquals("(field:multi field:multi2) field:foo", qp.parse("field:multi field:foo").toString()); assertEquals("field:\"(multi multi2) foo\"", qp.parse("field:\"multi foo\"").toString()); // three tokens at one position: assertEquals("triplemulti multi3 multi2", qp.parse("triplemulti").toString()); assertEquals("foo (triplemulti multi3 multi2) foobar", qp.parse("foo triplemulti foobar").toString()); // phrase with non-default slop: assertEquals("\"(multi multi2) foo\"~10", qp.parse("\"multi foo\"~10").toString()); // phrase with non-default boost: assertEquals("\"(multi multi2) foo\"^2.0", qp.parse("\"multi foo\"^2").toString()); // phrase after changing default slop qp.setPhraseSlop(99); assertEquals("\"(multi multi2) foo\"~99 bar", qp.parse("\"multi foo\" bar").toString()); assertEquals("\"(multi multi2) foo\"~99 \"foo bar\"~2", qp.parse("\"multi foo\" \"foo bar\"~2").toString()); qp.setPhraseSlop(0); // non-default operator: qp.setDefaultOperator(QueryParser.AND_OPERATOR); assertEquals("+(multi multi2) +foo", qp.parse("multi foo").toString()); } public void testMultiAnalyzerWithSubclassOfQueryParser() throws ParseException { DumbQueryParser qp = new DumbQueryParser("", new MultiAnalyzer()); qp.setPhraseSlop(99); // modified default slop // direct call to (super's) getFieldQuery to demonstrate differnce // between phrase and multiphrase with modified default slop assertEquals("\"foo bar\"~99", qp.getSuperFieldQuery("","foo bar").toString()); assertEquals("\"(multi multi2) bar\"~99", qp.getSuperFieldQuery("","multi bar").toString()); // ask sublcass to parse phrase with modified default slop assertEquals("\"(multi multi2) foo\"~99 bar", qp.parse("\"multi foo\" bar").toString()); } public void testPosIncrementAnalyzer() throws ParseException { QueryParser qp = new QueryParser("", new PosIncrementAnalyzer()); assertEquals("quick brown", qp.parse("the quick brown").toString()); assertEquals("\"quick brown\"", qp.parse("\"the quick brown\"").toString()); assertEquals("quick brown fox", qp.parse("the quick brown fox").toString()); assertEquals("\"quick brown fox\"", qp.parse("\"the quick brown fox\"").toString()); } /** * Expands "multi" to "multi" and "multi2", both at the same position, * and expands "triplemulti" to "triplemulti", "multi3", and "multi2". */ private class MultiAnalyzer extends Analyzer { public MultiAnalyzer() { } public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new StandardTokenizer(reader); result = new TestFilter(result); result = new LowerCaseFilter(result); return result; } } private final class TestFilter extends TokenFilter { private org.apache.lucene.analysis.Token prevToken; public TestFilter(TokenStream in) { super(in); } public final org.apache.lucene.analysis.Token next() throws java.io.IOException { if (multiToken > 0) { org.apache.lucene.analysis.Token token = new org.apache.lucene.analysis.Token("multi"+(multiToken+1), prevToken.startOffset(), prevToken.endOffset(), prevToken.type()); token.setPositionIncrement(0); multiToken--; return token; } else { org.apache.lucene.analysis.Token t = input.next(); prevToken = t; if (t == null) return null; String text = t.termText(); if (text.equals("triplemulti")) { multiToken = 2; return t; } else if (text.equals("multi")) { multiToken = 1; return t; } else { return t; } } } } /** * Analyzes "the quick brown" as: quick(incr=2) brown(incr=1). * Does not work correctly for input other than "the quick brown ...". */ private class PosIncrementAnalyzer extends Analyzer { public PosIncrementAnalyzer() { } public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new StandardTokenizer(reader); result = new TestPosIncrementFilter(result); result = new LowerCaseFilter(result); return result; } } private final class TestPosIncrementFilter extends TokenFilter { public TestPosIncrementFilter(TokenStream in) { super(in); } public final org.apache.lucene.analysis.Token next() throws java.io.IOException { for (Token t = input.next(); t != null; t = input.next()) { if (t.termText().equals("the")) { // stopword, do nothing } else if (t.termText().equals("quick")) { org.apache.lucene.analysis.Token token = new org.apache.lucene.analysis.Token(t.termText(), t.startOffset(), t.endOffset(), t.type()); token.setPositionIncrement(2); return token; } else { org.apache.lucene.analysis.Token token = new org.apache.lucene.analysis.Token(t.termText(), t.startOffset(), t.endOffset(), t.type()); token.setPositionIncrement(1); return token; } } return null; } } /** a very simple subclass of QueryParser */ private final static class DumbQueryParser extends QueryParser { public DumbQueryParser(String f, Analyzer a) { super(f, a); } /** expose super's version */ public Query getSuperFieldQuery(String f, String t) throws ParseException { return super.getFieldQuery(f,t); } /** wrap super's version */ protected Query getFieldQuery(String f, String t) throws ParseException { return new DumbQueryWrapper(getSuperFieldQuery(f,t)); } } /** * A very simple wrapper to prevent instanceof checks but uses * the toString of the query it wraps. */ private final static class DumbQueryWrapper extends Query { private Query q; public DumbQueryWrapper(Query q) { super(); this.q = q; } public String toString(String f) { return q.toString(f); } } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -