⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dosearch.java

📁 改写的小爬虫
💻 JAVA
字号:
//import org.mira.lucene.analysis.IK_CAnalyzer;
import jeasy.analysis.MMAnalyzer;
import org.apache.lucene.analysis.*;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.document.*;
import org.apache.lucene.queryParser.*;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
//import org.apache.lucene.analysis.cn.ChineseAnalyzer;
//import org.apache.lucene.search.highlight.Scorer;
//import org.apache.lucene.search.highlight.QueryScorer;
//import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.Query;
import java.io.*;
import java.util.*;
//import java.text.*;
public class doSearch
{
   public static void main(String args[])
   {
     int begin=0;
     int end=0;
     PrintWriter out=null;
     IndexSearcher searcher=null;
     Query query=null;
     Hits hits=null;
     String indexDir=null;
     String lookstring=null;
     String usage="usage: java doSearch <index filename> <query string> <begin index> <end index>";
     PrintWriter fileOut=null;
     if(args.length==2)
     {
       indexDir=args[0];
       lookstring=args[1];
       begin=0;
       end=10;
     }
     else if(args.length==3)
     {
         indexDir=args[0];
         lookstring=args[1];
         begin=0;
         try{
           end=Integer.parseInt(args[2]);
         }catch(Exception e){ System.out.println(usage); System.exit(1);}
         
         end=end>begin?end: begin+10;
      }
      else if(args.length==4)
      {
         indexDir=args[0];
         lookstring=args[1];
         try{
           begin=Integer.parseInt(args[2]);
           end=Integer.parseInt(args[3]);
         }catch(Exception e){System.out.println(usage);System.exit(1);}
         begin=begin>=0 ?begin :0;
         end=end>begin? begin+10: end;
      }
      else
      {
          System.out.println(usage);System.exit(1);
      }
      File f=new File(indexDir);
      IndexReader reader=null;
      out=null;
      searcher=null;
      query=null;
      hits=null;
      
    try{
         reader=IndexReader.open(f);
       }
    catch(IOException e)
       {
          System.out.println("error occurred in open index-file");
          System.exit(0);
       } 
      QueryParser parser=new QueryParser("desc",new MMAnalyzer()); 
      /*BooleanQuery boquery=new BooleanQuery();
      for(int i=0;i<args.length;i++)      
      {
         Query query=parser.parse(args[i]);System.out.println(query);
         boquery.add(query,BooleanClause.Occur.SHOULD);
      }
      //Scorer scorer=new QueryScorer(boquery);
      //Highlighter highlighter=new Highlighter(scorer);
     */
     try{
          query=parser.parse(args[1]);
       }catch(ParseException e)
        {
           System.out.println("parse error!");
           System.exit(1);
         }
      System.out.println("the query is: "+query);
      //try{
           searcher=new IndexSearcher(reader);
       //}catch(IOException e){System.out.println("search error!");System.exit(2);}
      long starts=new Date().getTime();
      try{
           hits=searcher.search(query);
        }catch(IOException e){System.out.print("error in search: "+e);System.exit(0);}
      long ends=new Date().getTime();
      int len=hits.length();
      begin=begin<len?begin:0;
      end=end<len?end:len;
     try{
            fileOut=new PrintWriter(new FileOutputStream("result.html"));
      }catch(FileNotFoundException e)
      {
           System.out.println("file not found error!");
           System.exit(4);
      }
      fileOut.print("<html><body>");
      fileOut.print("took "+(ends-starts)+" milliseconds");
      //out.println();
      if(hits.length()==0)
      {
         fileOut.print("no file matched "+args);
      }
     try{
      for(int i=begin;i<end;i++)
      {
         String sb=hits.doc(i).get("title");
         //TokenStream stream=new IK_CAnalyzer().tokenStream("content",new StringReader(sb));
         //String fragment=highlighter.getBestFragment(stream,sb);
         //out.print(fragment);
         fileOut.println("<p><a href=\""+hits.doc(i).get("url")+"\" target=\"_blank\">");
         fileOut.print((i+1)+"、 title: "+sb+"</a>&nbsp;");         
         fileOut.println("<i>score: "+hits.score(i)+"</i><br>");
         
       }
      }catch(Exception e)
      {
           System.out.println("error in get details: "+e);
           fileOut.println("</body></html>");
           fileOut.close();
           System.exit(0);
       }
      fileOut.print("</body></html>");
      fileOut.close();
     //}catch(Exception e){System.err.println(e.getMessage());System.exit(0);}
   }
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -