📄 nutchanalysis.java
字号:
/* Generated By:JavaCC: Do not edit this line. NutchAnalysis.java */
package net.nutch.analysis;
import net.nutch.searcher.Query;
import net.nutch.searcher.QueryFilters;
import net.nutch.searcher.Query.Clause;
import org.apache.lucene.analysis.StopFilter;
import java.io.*;
import java.util.*;
/** The JavaCC-generated Nutch lexical analyzer and query parser. */
public class NutchAnalysis implements NutchAnalysisConstants {
private static final String[] STOP_WORDS = {
"a", "and", "are", "as", "at", "be", "but", "by",
"for", "if", "in", "into", "is", "it",
"no", "not", "of", "on", "or", "s", "such",
"t", "that", "the", "their", "then", "there", "these",
"they", "this", "to", "was", "will", "with"
};
private static final Set STOP_SET = StopFilter.makeStopSet(STOP_WORDS);
private String queryString;
/** True iff word is a stop word. Stop words are only removed from queries.
* Every word is indexed. */
public static boolean isStopWord(String word) {
return STOP_SET.contains(word);
}
/** Construct a query parser for the text in a reader. */
public static Query parseQuery(String queryString) throws IOException {
NutchAnalysis parser =
new NutchAnalysis(new FastCharStream(new StringReader(queryString)));
parser.queryString = queryString;
return parser.parse();
}
/** For debugging. */
public static void main(String[] args) throws Exception {
BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
while (true) {
System.out.print("Query: ");
String line = in.readLine();
System.out.println(parseQuery(line));
}
}
/** Parse a query. */
final public Query parse() throws ParseException {
Query query = new Query();
ArrayList terms;
Token token;
String field;
boolean stop;
boolean prohibited;
nonOpOrTerm();
label_1:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case WORD:
case ACRONYM:
case SIGRAM:
case PLUS:
case MINUS:
case QUOTE:
;
break;
default:
jj_la1[0] = jj_gen;
break label_1;
}
stop=true; prohibited=false; field = Clause.DEFAULT_FIELD;
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case PLUS:
case MINUS:
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case PLUS:
jj_consume_token(PLUS);
stop=false;
break;
case MINUS:
jj_consume_token(MINUS);
stop=false;prohibited=true;
break;
default:
jj_la1[1] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
break;
default:
jj_la1[2] = jj_gen;
;
}
if (jj_2_1(2147483647)) {
token = jj_consume_token(WORD);
jj_consume_token(COLON);
field = token.image;
} else {
;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case QUOTE:
terms = phrase(field);
stop=false;
break;
case WORD:
case ACRONYM:
case SIGRAM:
// quoted terms or
terms = compound(field);
break;
default:
jj_la1[3] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
nonOpOrTerm();
String[] array = (String[])terms.toArray(new String[terms.size()]);
if (stop && terms.size()==1 && isStopWord(array[0])) {
// ignore stop words only when single, unadorned terms
} else {
if (prohibited)
query.addProhibitedPhrase(array, field);
else
query.addRequiredPhrase(array, field);
}
}
{if (true) return query;}
throw new Error("Missing return statement in function");
}
/** Parse an explcitly quoted phrase query. Note that this may return a single
* term, a trivial phrase.*/
final public ArrayList phrase(String field) throws ParseException {
int start;
int end;
ArrayList result = new ArrayList();
String term;
jj_consume_token(QUOTE);
start = token.endColumn;
label_2:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 0:
case PLUS:
case MINUS:
case COLON:
case SLASH:
case DOT:
case ATSIGN:
case APOSTROPHE:
case WHITE:
;
break;
default:
jj_la1[4] = jj_gen;
break label_2;
}
nonTerm();
}
label_3:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case WORD:
case ACRONYM:
case SIGRAM:
;
break;
default:
jj_la1[5] = jj_gen;
break label_3;
}
term = term();
result.add(term);
label_4:
while (true) {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case 0:
case PLUS:
case MINUS:
case COLON:
case SLASH:
case DOT:
case ATSIGN:
case APOSTROPHE:
case WHITE:
;
break;
default:
jj_la1[6] = jj_gen;
break label_4;
}
nonTerm();
}
}
end = token.endColumn;
jj_consume_token(QUOTE);
if (QueryFilters.isRawField(field)) {
result.clear();
result.add(queryString.substring(start, end));
}
{if (true) return result;}
throw new Error("Missing return statement in function");
}
/** Parse a compound term that is interpreted as an implicit phrase query.
* Compounds are a sequence of terms separated by infix characters. Note that
* htis may return a single term, a trivial compound. */
final public ArrayList compound(String field) throws ParseException {
int start;
ArrayList result = new ArrayList();
String term;
start = token.endColumn;
term = term();
result.add(term);
label_5:
while (true) {
if (jj_2_2(2147483647)) {
;
} else {
break label_5;
}
label_6:
while (true) {
infix();
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case PLUS:
case MINUS:
case COLON:
case SLASH:
case DOT:
case ATSIGN:
case APOSTROPHE:
;
break;
default:
jj_la1[7] = jj_gen;
break label_6;
}
}
term = term();
result.add(term);
}
if (QueryFilters.isRawField(field)) {
result.clear();
result.add(queryString.substring(start, token.endColumn));
}
{if (true) return result;}
throw new Error("Missing return statement in function");
}
/** Parse a single term. */
final public String term() throws ParseException {
Token token;
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case WORD:
token = jj_consume_token(WORD);
break;
case ACRONYM:
token = jj_consume_token(ACRONYM);
break;
case SIGRAM:
token = jj_consume_token(SIGRAM);
break;
default:
jj_la1[8] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
{if (true) return token.image;}
throw new Error("Missing return statement in function");
}
/** Parse anything but a term or a quote. */
final public void nonTerm() throws ParseException {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case WHITE:
jj_consume_token(WHITE);
break;
case PLUS:
case MINUS:
case COLON:
case SLASH:
case DOT:
case ATSIGN:
case APOSTROPHE:
infix();
break;
case 0:
jj_consume_token(0);
break;
default:
jj_la1[9] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
}
/** Parse anything but a term or an operator (plur or minus or quote). */
final public void nonOpOrTerm() throws ParseException {
label_7:
while (true) {
if (jj_2_3(2)) {
;
} else {
break label_7;
}
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case WHITE:
jj_consume_token(WHITE);
break;
case COLON:
case SLASH:
case DOT:
case ATSIGN:
case APOSTROPHE:
nonOpInfix();
break;
case PLUS:
case MINUS:
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case PLUS:
jj_consume_token(PLUS);
break;
case MINUS:
jj_consume_token(MINUS);
break;
default:
jj_la1[10] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
nonTerm();
break;
default:
jj_la1[11] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
}
}
/** Characters which can be used to form compound terms. */
final public void infix() throws ParseException {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case PLUS:
jj_consume_token(PLUS);
break;
case MINUS:
jj_consume_token(MINUS);
break;
case COLON:
case SLASH:
case DOT:
case ATSIGN:
case APOSTROPHE:
nonOpInfix();
break;
default:
jj_la1[12] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
}
/** Parse infix characters except plus and minus. */
final public void nonOpInfix() throws ParseException {
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
case COLON:
jj_consume_token(COLON);
break;
case SLASH:
jj_consume_token(SLASH);
break;
case DOT:
jj_consume_token(DOT);
break;
case ATSIGN:
jj_consume_token(ATSIGN);
break;
case APOSTROPHE:
jj_consume_token(APOSTROPHE);
break;
default:
jj_la1[13] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
}
final private boolean jj_2_1(int xla) {
jj_la = xla; jj_lastpos = jj_scanpos = token;
try { return !jj_3_1(); }
catch(LookaheadSuccess ls) { return true; }
finally { jj_save(0, xla); }
}
final private boolean jj_2_2(int xla) {
jj_la = xla; jj_lastpos = jj_scanpos = token;
try { return !jj_3_2(); }
catch(LookaheadSuccess ls) { return true; }
finally { jj_save(1, xla); }
}
final private boolean jj_2_3(int xla) {
jj_la = xla; jj_lastpos = jj_scanpos = token;
try { return !jj_3_3(); }
catch(LookaheadSuccess ls) { return true; }
finally { jj_save(2, xla); }
}
final private boolean jj_3R_24() {
if (jj_3R_18()) return true;
return false;
}
final private boolean jj_3R_11() {
Token xsp;
xsp = jj_scanpos;
if (jj_scan_token(1)) {
jj_scanpos = xsp;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -