📄 searchbean.java
字号:
if (segmentName.equals((String) segmentNames.get(i))) {
System.out
.println("**********have this segment:" + segmentName);
return true;
}
}
return client.addSegment(segmentName, host, port);
}
public boolean delSegments(String[] segments, String host, String port)
throws IOException {
if (client.delSegments(segments, host, port)) {
for (int i = 0; i < segments.length; i++) {
int len = this.segmentNames.size();
for (int j = 0; j < len; j++) {
if (segments[i].equals((String) this.segmentNames.get(j))) {
System.out.println("*******delete " + segments[i]
+ " from segmentNames");
this.segmentNames.remove(j);
break;
}
}
}
return true;
}
return false;
}
public boolean mergeSegments(String[] segments, String newSegment,
String host, String port) throws IOException {
if (client.mergeSegments(segments, newSegment, host, port)) {
for (int i = 0; i < segments.length; i++) {
int len = this.segmentNames.size();
for (int j = 0; j < len; j++) {
if (segments[i].equals((String) this.segmentNames.get(j))) {
System.out.println("*******delete " + segments[i]
+ " from segmentNames");
this.segmentNames.remove(j);
break;
}
}
}
// File newSeg = new File(newSegment);
this.segmentNames.add(newSegment);
return true;
}
return false;
}
public String[] getSegmentNames() {
String[] names = new String[segmentNames.size()];
for (int i = 0; i < names.length; i++)
names[i] = (String) this.segmentNames.get(i);
return names;
}
/*
* public boolean reloadSegmentNames(String host) throws IOException{ return
* client.reloadSegmentNames(host); }
*/
public Hits search(Query query, int numHits, String mode) throws IOException {
return searcher.search(query, numHits, mode);
}
/**
* Modified by liubin. 2006-02-10
*/
public Hits search(Query query, int numHits, long start, long end, String mode) throws IOException {
return searcher.search(query, numHits, 0,start, end, mode);
}
/************************Comment by liubin. 2006-02-10**********************
* sortType: 0-按相关性排序,1-按时间排序 searchFrom: 0-全文搜索,1-标题搜索
*
* @see com.netease.search.searcher.Searcher#search(net.nutch.searcher.Query,
* int, int, int)
*/
public Hits search(Query query, int numHits, int sortType, String mode)
throws IOException {
return search(query, numHits, sortType, 0, mode);
}
public Hits search(Query query, int numHits, int sortType, long start, long end, String mode) throws IOException {
return search(query, numHits, sortType, 0, start, end, mode);
}
public Hits search(Query query, int numHits, int sortType, int groupType, String mode) throws IOException {
return search(query, numHits, sortType, groupType, 0, 0, mode);
}
/**
*
* @param query
* @param numHits
* @param sortType
* @param groupType 0:不折叠 1:按gid折叠 2:按cid折叠
* @param start
* @param end
* @return
* @throws IOException
*/
public Hits search(Query query, int numHits, int sortType,
int groupType, long start, long end, String mode) throws IOException {
boolean needGroup = (groupType == 1) ? true : false;
////////////////////////////////////////////////////
//System.out.println("//////////////////////////////");
//System.out.println("SearchBean nutch-query=["+query.toString()+"]");
//System.out.println("//////////////////////////////");
Clause[] clause = query.getClauses();
for (int i = 0; i < clause.length; i++) {
if (clause[i].getField().equals("gid")) {
needGroup = false;
break;
}
if (clause[i].getField().equals("cid")){
needGroup = true;
groupType = 1;
break;
}
////////////////////////////////////////////////////
//System.out.println("//////////////////////////////");
//System.out.println("SearchBean clause field=["+clause[i].getField()+"]");
//System.out.println("//////////////////////////////");
}
LOG.info("SearchBean needGroup : "+needGroup);
int maxHitsRaw = HITS_PER_PAGE * PAGES_PER_SEARCH
* (numHits / HITS_PER_PAGE / PAGES_PER_SEARCH + 1);
int numHitsRaw = (int) (maxHitsRaw * RAW_HITS_FACTOR);
////////////////////////////////////////////////////
//System.out.println("//////////////////////////////");
//System.out.println("SearchBean groupType=["+groupType+"] maxHitsRaw=["+maxHitsRaw+"] numHitsRaw=["+numHitsRaw+"] MAX_RESULT_HITS=["+MAX_RESULT_HITS+"]");
//System.out.println("//////////////////////////////");
//Add by LiuBin. 2006-03-22
if (numHitsRaw > MAX_RESULT_HITS) {
numHitsRaw = MAX_RESULT_HITS;
}
LOG.info("searching for " + numHitsRaw + " raw hits");
// Add by xie Shuqiang. 2006/09/19
if (groupType == 2)
sortType = 5;
if (!needGroup){
return searcher.search(query, numHits, sortType, start, end, mode);
}
// Need group
Hits hits = searcher.search(query, numHitsRaw, sortType, start, end, mode);
if (groupType == 2){
return hits;
}
/*** For Test Only
if(hits.getTotal()>0)
System.out.println("clusteId = "+hits.getHit(0).getClustNo());
***/
long total = hits.getTotal();
Map grpToHits = new HashMap();
List resultList = new ArrayList();
////////////////////////////////////////////////////
//System.out.println("//////////////////////////////");
//System.out.println("SearchBean hits.getTotal=["+total+"]");
//System.out.println("//////////////////////////////");
//System.out.println("----maxHitsRaw="+maxHitsRaw);
for (int rawHitNum = 0; rawHitNum < total; rawHitNum++) {
//System.out.println("SearchBean enter for cycle");
if (rawHitNum == hits.getLength()) {
if (numHitsRaw >= MAX_RESULT_HITS)
break;
numHitsRaw = MAX_RESULT_HITS;
//numHitsRaw = (int) (numHitsRaw * RAW_HITS_FACTOR);
LOG.info("re-searching for " + numHitsRaw + " raw hits");
hits = searcher.search(query, numHitsRaw, sortType, start, end, mode);
LOG.info("found " + hits.getTotal() + " raw hits");
rawHitNum = -1;
grpToHits.clear();
resultList.clear();
total = hits.getTotal();
continue;
}
Hit hit = hits.getHit(rawHitNum);
int grpNo = 0;
if (groupType == 1)
grpNo = hit.getGrpNo();
//else if (groupType == 2)
// grpNo = hit.getClustNo();
//else
// return hits;
Hit grpHit = (Hit) grpToHits.get(String.valueOf(grpNo));
if (grpHit == null) {
//hit.setSiteRankSum(hit.getSiteRank());
grpToHits.put(String.valueOf(grpNo), hit);
resultList.add(new Integer(grpNo));
if (resultList.size() > maxHitsRaw )
break;
} else {
//System.out.println("hit siteRank:" + hit.getSiteRank() + " grpHit siteRank:" + grpHit.getSiteRank());
if (hit.getSiteRank() > grpHit.getSiteRank() ||
(hit.getSiteRank() == grpHit.getSiteRank() && hit.getSortInt() > grpHit.getSortInt())){
hit.setGrpDocs(grpHit.getGrpDocs() + 1);
//hit.setSiteRankSum(grpHit.getSiteRankSum() + hit.getSiteRank());
grpToHits.put(String.valueOf(grpNo),hit);
}else{
grpHit.setGrpDocs(grpHit.getGrpDocs() + 1);
//grpHit.setSiteRankSum(grpHit.getSiteRankSum() + hit.getSiteRankSum());
}
}
}
// 排重部分
int resultListSize = resultList.size();
Hit[] resultHits = new Hit[resultListSize];
for (int i = 0; i < resultListSize; i++) {
int grpNo = ((Integer) resultList.remove(0)).intValue();
resultHits[i] = (Hit) grpToHits.get(String.valueOf(grpNo));
}
grpToHits.clear();
//if (groupType == 2){// group by cluster id
// Arrays.sort(resultHits,new ClusterComparator());
//}
//若total<maxHitsRaw,则设置total为叠后的值
if(total<maxHitsRaw){
total = resultListSize;
}//-------------
Hits results = new Hits(total, resultListSize, resultHits);
return results;
}
/*******************
private class ClusterComparator implements Comparator{
public int compare(Object o1, Object o2){
Hit h1 = (Hit)o1;
Hit h2 = (Hit)o2;
if (h1.getGrpDocs() > 1 && h2.getGrpDocs() == 1)
return -1;
if (h1.getGrpDocs() == 1 && h2.getGrpDocs() > 1)
return 1;
if (h1.getSiteRankSum() < h2.getSiteRankSum())
return -1;
if (h1.getSiteRankSum() > h2.getSiteRankSum())
return 1;
if (h1.getGrpDocs() < h2.getGrpDocs())
return -1;
if (h1.getGrpDocs() > h2.getGrpDocs())
return 1;
if (h1.getGrpNo() < h2.getGrpNo())
return -1;
else
return 1;
}
}
****************************/
private class SiteHits extends ArrayList {
private boolean maxSizeExceeded;
}
/**
* Search for pages matching a query, eliminating excessive hits from sites.
* Hits for a site in excess of <code>maxHitsPerSite</code> are removed
* from the results. The remaining hits for such sites have {@link
* Hit#moreFromSiteExcluded()} set.
* <p>
* If maxHitsPerSite is zero then all hits are returned.
*
* @param query
* query
* @param numHits
* number of requested hits
* @param maxHitsPerSite
* the maximum hits returned per site, or zero
* @return Hits the matching hits
* @throws IOException
*/
/*
* public Hits search(Query query, int numHits, int maxHitsPerSite) throws
* IOException { if (maxHitsPerSite <= 0) // disable site checking return
* searcher.search(query, numHits);
*
* int numHitsRaw = (int)(numHits * RAW_HITS_FACTOR); LOG.info("searching
* for "+numHitsRaw+" raw hits"); Hits hits = searcher.search(query,
* numHitsRaw); long total = hits.getTotal(); Map siteToHits = new
* HashMap(); List resultList = new ArrayList(); Set seen = new HashSet();
* List excludedSites = new ArrayList(); boolean totalIsExact = true; for
* (int rawHitNum = 0; rawHitNum < hits.getTotal(); rawHitNum++) { // get
* the next raw hit if (rawHitNum >= hits.getLength()) { // optimize query
* by prohibiting more matches on some excluded sites Query optQuery =
* (Query)query.clone(); for (int i = 0; i < excludedSites.size(); i++) { if
* (i == MAX_PROHIBITED_TERMS) break;
* optQuery.addProhibitedTerm(((String)excludedSites.get(i)), "site"); }
* numHitsRaw = (int)(numHitsRaw * RAW_HITS_FACTOR); LOG.info("re-searching
* for "+numHitsRaw+" raw hits, query: "+optQuery); hits =
* searcher.search(optQuery, numHitsRaw); LOG.info("found
* "+hits.getTotal()+" raw hits"); rawHitNum = 0; continue; }
*
* Hit hit = hits.getHit(rawHitNum); if (seen.contains(hit)) continue;
* seen.add(hit);
* // get site hits for its site String site = hit.getSite(); SiteHits
* siteHits = (SiteHits)siteToHits.get(site); if (siteHits == null)
* siteToHits.put(site, siteHits = new SiteHits());
* // does this hit exceed maxHitsPerSite? if (siteHits.size() ==
* maxHitsPerSite) { // yes -- ignore the hit if (!siteHits.maxSizeExceeded) {
* // mark prior hits with moreFromSiteExcluded for (int i = 0; i <
* siteHits.size(); i++) {
* ((Hit)siteHits.get(i)).setMoreFromSiteExcluded(true); }
* siteHits.maxSizeExceeded = true;
*
* excludedSites.add(site); // exclude site } totalIsExact = false; } else { //
* no -- collect the hit resultList.add(hit); siteHits.add(hit);
* // are we done? // we need to find one more than asked for, so that we
* can tell if // there are more hits to be shown if (resultList.size() >
* numHits) break; } }
*
* Hits results = new Hits(total, (Hit[])resultList.toArray(new
* Hit[resultList.size()])); results.setTotalIsExact(totalIsExact); return
* results; }
*
*/
public String getExplanation(Query query, Hit hit) throws IOException {
return searcher.getExplanation(query, hit);
}
public HitDetails getDetails(Hit hit) throws IOException {
return detailer.getDetails(hit);
}
public HitDetails[] getDetails(Hit[] hits) throws IOException {
return detailer.getDetails(hits);
}
public String getSummary(HitDetails hit, Query query) throws IOException {
return getSummaryNew(hit, query, 1);
}
public String getSummary(HitDetails hit, Query query, boolean fullTextSummary) throws IOException {
return summarizer.getSummary(hit, query, fullTextSummary);
}
public String getSummaryNew(HitDetails hit, Query query, int summaryType) throws IOException {
return summarizer.getSummaryNew(hit, query, summaryType);
}
public String[] getSummary(HitDetails[] hits, Query query) throws IOException {
return getSummaryNew(hits, query, 1);
}
public String[] getSummary(HitDetails[] hits, Query query, boolean fullTextSummary)
throws IOException {
return summarizer.getSummary(hits, query, fullTextSummary);
}
public String[] getSummaryNew(HitDetails[] hits, Query query, int summaryType)
throws IOException {
return summarizer.getSummaryNew(hits, query, summaryType);
}
public byte[] getContent(HitDetails hit) throws IOException {
return content.getContent(hit);
}
public ParseData getParseData(HitDetails hit) throws IOException {
return content.getParseData(hit);
}
public ParseText getParseText(HitDetails hit) throws IOException {
return content.getParseText(hit);
}
public String[] getAnchors(HitDetails hit) throws IOException {
return content.getAnchors(hit);
}
public long getFetchDate(HitDetails hit) throws IOException {
return content.getFetchDate(hit);
}
/** For debugging. */
public static void main(String[] args) throws Exception {
String usage = "NutchBean query";
if (args.length == 0) {
System.err.println(usage);
System.exit(-1);
}
SearchBean bean = new SearchBean(DistributedSearch.MODE_RAM);
Query query = Query.parse(args[0]);
Hits hits = bean.search(query, 10,0,0,0, DistributedSearch.MODE_RAM);
System.out.println("Total hits: " + hits.getTotal());
int length = (int) Math.min(hits.getTotal(), 10);
Hit[] show = hits.getHits(0, length);
HitDetails[] details = bean.getDetails(show);
String[] summaries = bean.getSummaryNew(details, query, 1);
for (int i = 0; i < hits.getLength(); i++) {
System.out.println(" " + i + " " + details[i]);// + "\n" +
// summaries[i]);
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -