⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 query.java

📁 nutch搜索的改进型工具和优化爬虫的相关工具
💻 JAVA
字号:
/* Copyright (c) 2003 The Nutch Organization.  All rights reserved.   */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */

package net.nutch.searcher;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.logging.Logger;

import net.nutch.util.LogFormatter;
import net.nutch.analysis.NutchAnalysis;

import net.nutch.io.Writable;

/** A Nutch query. */
public final class Query implements Writable, Cloneable {
  public static final Logger LOG =
    LogFormatter.getLogger("net.nutch.searcher.Query");
  
  private String queryStr = null;
  private int summaryLen = 0;
  
  public void setQueryStr(String queryStr){
	  this.queryStr = queryStr;
  }
  
  public String getQueryStr(){
	  return queryStr;
  }

  public int getSummaryLen() {
	  return summaryLen;
  }
  public void setSummaryLen(int summaryLen) {
	  this.summaryLen = summaryLen;
  }
  /** A query clause. */
  public static class Clause implements Cloneable {
    public static final String DEFAULT_FIELD = "DEFAULT";

    private static final byte REQUIRED_BIT = 1;
    private static final byte PROHIBITED_BIT = 2;
    private static final byte PHRASE_BIT = 4;

    private boolean isRequired;
    private boolean isProhibited;
    private String field = DEFAULT_FIELD;
    private float weight = 1.0f;
    private Object termOrPhrase; 

    public Clause(Term term, String field,
                  boolean isRequired, boolean isProhibited) {
      this(term, isRequired, isProhibited);
      this.field = field;
    }

    public Clause(Term term, boolean isRequired, boolean isProhibited) {
      this.isRequired = isRequired;
      this.isProhibited = isProhibited;
      this.termOrPhrase = term;
    }

    public Clause(Phrase phrase, String field,
                  boolean isRequired, boolean isProhibited) {
      this(phrase, isRequired, isProhibited);
      this.field = field;
    }

    public Clause(Phrase phrase, boolean isRequired, boolean isProhibited) {
      this.isRequired = isRequired;
      this.isProhibited = isProhibited;
      this.termOrPhrase = phrase;
    }

    public boolean isRequired() { return isRequired; }
    public boolean isProhibited() { return isProhibited; }

    public String getField() { return field; }

    public float getWeight() { return weight; }
    public void setWeight(float weight) {  this.weight = weight; }

    public boolean isPhrase() { return termOrPhrase instanceof Phrase; }

    public Phrase getPhrase() { return (Phrase)termOrPhrase; }
    public Term getTerm() { return (Term)termOrPhrase; }

    public void write(DataOutput out) throws IOException {
      byte bits = 0;
      if (isPhrase())
        bits |= PHRASE_BIT;
      if (isRequired)
        bits |= REQUIRED_BIT;
      if (isProhibited)
        bits |= PROHIBITED_BIT;
      out.writeByte(bits);
      out.writeUTF(field);
      out.writeFloat(weight);
      
      if (isPhrase())
        getPhrase().write(out);
      else
        getTerm().write(out);
    }

    public static Clause read(DataInput in) throws IOException {
      byte bits = in.readByte();
      boolean required = ((bits & REQUIRED_BIT) != 0);
      boolean prohibited = ((bits & PROHIBITED_BIT) != 0);

      String field = in.readUTF();
      float weight = in.readFloat();

      Clause clause;
      if ((bits & PHRASE_BIT) == 0) {
        clause = new Clause(Term.read(in), field, required, prohibited);
      } else {
        clause = new Clause(Phrase.read(in), field, required, prohibited);
      }
      clause.weight = weight;
      return clause;
    }

    public String toString() {
      StringBuffer buffer = new StringBuffer();
//       if (isRequired)
//         buffer.append("+");
//       else
      if (isProhibited)
        buffer.append ("-");

      if (!DEFAULT_FIELD.equals(field)) {
        buffer.append(field);
        buffer.append(":");
      }

      if (!isPhrase() && QueryFilters.isRawField(field)) {
        buffer.append('"');                        // quote raw terms
        buffer.append(termOrPhrase.toString());
        buffer.append('"');
      } else {
        buffer.append(termOrPhrase.toString());
      }

      return buffer.toString();
    }

    public boolean equals(Object o) {
      if (!(o instanceof Clause)) return false;
      Clause other = (Clause)o;
      return
        (this.isRequired == other.isRequired) &&
        (this.isProhibited == other.isProhibited) &&
        (this.weight == other.weight) &&
        (this.termOrPhrase == null ? other.termOrPhrase == null :
         this.termOrPhrase.equals(other.termOrPhrase));
    }
        
    public int hashCode() {
      return
        (this.isRequired ? 0 : 1) ^
        (this.isProhibited ? 2 : 4) ^
        Float.floatToIntBits(this.weight) ^
        (this.termOrPhrase != null ? termOrPhrase.hashCode() : 0);
    }
    
    public Object clone() {
      try {
        return super.clone();
      } catch (CloneNotSupportedException e) {
        throw new RuntimeException(e);
      }
    }
  }

  /** A single-term query clause. */
  public static class Term {
    private String text;

    public Term(String text) {
      this.text = text;
    }

    public void write(DataOutput out) throws IOException {
      out.writeUTF(text);
    }

    public static Term read(DataInput in) throws IOException {
      String text = in.readUTF();
      return new Term(text);
    }

    public String toString() {
      return text;
    }

    public boolean equals(Object o) {
      if (!(o instanceof Term)) return false;
      Term other = (Term)o;
      return text == null ? other.text == null : text.equals(other.text);
    }

    public int hashCode() {
      return text != null ? text.hashCode() : 0;
    }
  }

  /** A phrase query clause. */
  public static class Phrase {
    private Term[] terms;

    public Phrase(Term[] terms) {
      this.terms = terms;
    }

    public Phrase(String[] terms) {
      this.terms = new Term[terms.length];
      for (int i = 0; i < terms.length; i++) {
        this.terms[i] = new Term(terms[i]);
      }
    }

    public Term[] getTerms() { return terms; }

    public void write(DataOutput out) throws IOException {
      out.writeByte(terms.length);
      for (int i = 0; i < terms.length; i++)
        terms[i].write(out);
    }

    public static Phrase read(DataInput in) throws IOException {
      int length = in.readByte();
      Term[] terms = new Term[length];
      for (int i = 0; i < length; i++)
        terms[i] = Term.read(in);
      return new Phrase(terms);
    }

    public String toString() {
      StringBuffer buffer = new StringBuffer();
      buffer.append("\"");
      for (int i = 0; i < terms.length; i++) {
        buffer.append(terms[i].toString());
        if (i != terms.length-1)
          buffer.append(" ");
      }
      buffer.append("\"");
      return buffer.toString();
    }

    public boolean equals(Object o) {
      if (!(o instanceof Phrase)) return false;
      Phrase other = (Phrase)o;
      if (!(this.terms.length == this.terms.length))
        return false;
      for (int i = 0; i < terms.length; i++) {
        if (!this.terms[i].equals(other.terms[i]))
          return false;
      }
      return true;
    }

    public int hashCode() {
      int hashCode = terms.length;
      for (int i = 0; i < terms.length; i++) {
        hashCode ^= terms[i].hashCode();
      }
      return hashCode;
    }

  }


  private ArrayList clauses = new ArrayList();

  private static final Clause[] CLAUSES_PROTO = new Clause[0];

  /** Return all clauses. */
  public Clause[] getClauses() {
    return (Clause[])clauses.toArray(CLAUSES_PROTO);
  }

  /** Add a required term in the default field. */
  public void addRequiredTerm(String term) {
    addRequiredTerm(term, Clause.DEFAULT_FIELD);
  }

  /** Add a required term in a specified field. */
  public void addRequiredTerm(String term, String field) {
    clauses.add(new Clause(new Term(term), field, true, false));
  	//clauses.add(new Clause(new Term(term), field, false, false));
  }
  
  public void addRequiredTerm(String term, String field, boolean isRequired) {
  	clauses.add(new Clause(new Term(term), field, isRequired, false));
  }

  /** Add a prohibited term in the default field. */
  public void addProhibitedTerm(String term) {
    addProhibitedTerm(term, Clause.DEFAULT_FIELD);
  }

  /** Add a prohibited term in the specified field. */
  public void addProhibitedTerm(String term, String field) {
    clauses.add(new Clause(new Term(term), field, false, true));
  }

  /** Add a required phrase in the default field. */
  public void addRequiredPhrase(String[] terms) {
    addRequiredPhrase(terms, Clause.DEFAULT_FIELD);
  }

  /** Add a required phrase in the specified field. */
  public void addRequiredPhrase(String[] terms, String field) {
    if (terms.length == 0) {                      // ignore empty phrase
    } else if (terms.length == 1) {
      addRequiredTerm(terms[0], field);           // optimize to term query
    } else {
      clauses.add(new Clause(new Phrase(terms), field, true, false));
    }
  }

  /** Add a prohibited phrase in the default field. */
  public void addProhibitedPhrase(String[] terms) {
    addProhibitedPhrase(terms, Clause.DEFAULT_FIELD);
  }

  /** Add a prohibited phrase in the specified field. */
  public void addProhibitedPhrase(String[] terms, String field) {
    if (terms.length == 0) {                      // ignore empty phrase
    } else if (terms.length == 1) {
      addProhibitedTerm(terms[0], field);         // optimize to term query
    } else {
      clauses.add(new Clause(new Phrase(terms), field, false, true));
    }
  }

  public void write(DataOutput out) throws IOException {
    out.writeByte(clauses.size());
    for (int i = 0; i < clauses.size(); i++)
      ((Clause)clauses.get(i)).write(out);
    // Add by Xie shuqiang 2005.11.19
    out.writeUTF(queryStr);
    //  add by dinglf
    out.writeInt(summaryLen);
  }
  
  public static Query read(DataInput in) throws IOException {
    Query result = new Query();
    result.readFields(in);
    return result;
  }

  public void readFields(DataInput in) throws IOException {
    clauses.clear();
    int length = in.readByte();
    for (int i = 0; i < length; i++)
      clauses.add(Clause.read(in));
//  Add by Xie shuqiang 2005.11.19
    this.queryStr = in.readUTF();
//  add by dinglf
    this.summaryLen = in.readInt();
  }

  public String toString() {
    StringBuffer buffer = new StringBuffer();
    for (int i = 0; i < clauses.size(); i++) {
      buffer.append(clauses.get(i).toString());
      if (i != clauses.size()-1)
        buffer.append(" ");
    }
    return buffer.toString();
  }

  public boolean equals(Object o) {
    if (!(o instanceof Query)) return false;
    Query other = (Query)o;
    return this.clauses.equals(other.clauses);
  }
  
  public int hashCode() {
    return this.clauses.hashCode();
  }

  public Object clone() {
    Query clone = null;
    try {
      clone = (Query)super.clone();
    } catch (CloneNotSupportedException e) {
      throw new RuntimeException(e);
    }
    clone.clauses = (ArrayList)clauses.clone();
    return clone;
  }


  /** Flattens a query into the set of text terms that it contains.  These are
   * terms which should be higlighted in matching documents. */
  public String[] getTerms() {
    ArrayList result = new ArrayList();
    for (int i = 0; i < clauses.size(); i++) {
      Clause clause = (Clause)clauses.get(i);
      if (!clause.isProhibited()) {
        if (clause.isPhrase()) {
          Term[] terms = clause.getPhrase().getTerms();
          for (int j = 0; j < terms.length; j++) {
            result.add(terms[j].toString());
          }
        } else {
          result.add(clause.getTerm().toString());
        }
      }
    }
    return (String[])result.toArray(new String[result.size()]);
  }


  /** Parse a query from a string. */
  public static Query parse(String queryString) throws IOException {
    return fixup(NutchAnalysis.parseQuery(queryString));
  }

  /** Convert clauses in unknown fields to the default field. */
  private static Query fixup(Query input) {
    // walk the query
    Query output = new Query();
    Clause[] clauses = input.getClauses();
    for (int i = 0; i < clauses.length; i++) {
      Clause c = clauses[i];
      if (!QueryFilters.isField(c.getField())) {  // unknown field
        ArrayList terms = new ArrayList();        // add name to query
        if (c.isPhrase()) {                       
          terms.addAll(Arrays.asList(c.getPhrase().getTerms()));
        } else {
          terms.add(c.getTerm());
        }
        terms.add(0, new Term(c.getField()));     // add to front of phrase
        c = (Clause)c.clone();
        c.field = Clause.DEFAULT_FIELD;           // use default field instead
        c.termOrPhrase
          = new Phrase((Term[])terms.toArray(new Term[terms.size()]));
      }
      output.clauses.add(c);                    // copy clause to output
    }
    return output;
  }

  /** For debugging. */
  public static void main(String[] args) throws Exception {
    BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
    while (true) {
      System.out.print("Query: ");
      String line = in.readLine();
      Query query = parse(line);
      System.out.println("Parsed: " + query);
      System.out.println("Translated: " + QueryFilters.filter(query));
    }
  }


}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -