⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 rawfieldqueryfilter.java

📁 nutch搜索的改进型工具和优化爬虫的相关工具
💻 JAVA
字号:
/* Copyright (c) 2003 The Nutch Organization.  All rights reserved.   */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */

package net.nutch.searcher;

import java.util.StringTokenizer;

import net.nutch.searcher.Query.Clause;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.TermQuery;

/** Translate raw query fields to search the same-named field, as indexed by an
 * IndexingFilter. */
public abstract class RawFieldQueryFilter implements QueryFilter {
  private String field;
  private boolean lowerCase;
  private float boost;

  /** Construct for the named field, lowercasing query values.*/
  protected RawFieldQueryFilter(String field) {
    this(field, true);
  }

  /** Construct for the named field, lowercasing query values.*/
  protected RawFieldQueryFilter(String field, float boost) {
    this(field, true, boost);
  }

  /** Construct for the named field, potentially lowercasing query values.*/
  protected RawFieldQueryFilter(String field, boolean lowerCase) {
    this(field, lowerCase, 0.0f);
  }

  /** Construct for the named field, potentially lowercasing query values.*/
  protected RawFieldQueryFilter(String field, boolean lowerCase, float boost) {
    this.field = field;
    this.lowerCase = lowerCase;
    this.boost = boost;
  }

  public BooleanQuery filter(Query input, BooleanQuery output)
    throws QueryException {
    
    // examine each clause in the Nutch query
    Clause[] clauses = input.getClauses();
    for (int i = 0; i < clauses.length; i++) {
      Clause c = clauses[i];

      // skip non-matching clauses
      if (!c.getField().equals(field))
        continue;
      
      //System.out.println("********RawFieldQueryFilter:" + c.getField());

      // get the field value from the clause
      // raw fields are guaranteed to be Terms, not Phrases
      String value = c.getTerm().toString();
      if (lowerCase)
        value = value.toLowerCase();

      // Modified by Xie Shuqiang. 2006.11.15
      // 支持如cid:14458539;14458636等语法
      StringTokenizer st = new StringTokenizer(value,";");
      TermQuery clause = new TermQuery(new Term(field, st.nextToken()));
      clause.setBoost(boost);
      
      if (st.hasMoreTokens()){
    	  BooleanQuery out = new BooleanQuery();	  
    	  out.add(clause, false, false);
    	  while(st.hasMoreTokens()){
    		  clause = new TermQuery(new Term(field, st.nextToken()));
    		  clause.setBoost(boost);
    		  out.add(clause, false, false);
    	  }
    	  //out.setBoost(boost);
    	  //System.out.println("RawFieldQueryFilter:" + out.toString());
    	  output.add(out, c.isRequired(), c.isProhibited());
      }else{
    	  // add a Lucene TermQuery for this clause
    	  //TermQuery clause = new TermQuery(new Term(field, value));
    	  // set boost
    	  //clause.setBoost(boost);
    	  // add it as specified in query
    	  output.add(clause, c.isRequired(), c.isProhibited());
      }
    }
    
    // return the modified Lucene query
    return output;
  }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -