📄 adddocument.java
字号:
* * @see IndexerText * @see IndexerDocument * @see IndexerDocument#getText * @see IndexerDocument#getTerms * @see IndexerDocument#getExtra * @see IndexerDocument#getText */ public void CustomizeDocumentInformation() { /* Add text blocks to be indexed. Ultraseek server will process the text blocks using the IndexerDocument's Locale, and add the following terms to the search index: "Here", "is", "a", "recipe", "for", "Nutria", "Gumbo" "here", "nutria", "gumbo", insertedby:Inserted insertedby:inserted insertedby:insert (English stem of "inserted") insertedby:AddDocument.java ... The text can be searched for with or without the "insertedby:" field as we've added two IndexerText objects. A search for "Nutria" or "nutria" will find this document, even though the term does not appear in the original document content. */ Collection text = parsedDoc.getText(); text.add( new IndexerText( "Here is a recipe for Nutria Gumbo", "" ) ); text.add( new IndexerText( "Inserted by AddDocument.java", "insertedby:" ) ); text.add( new IndexerText( "Inserted by AddDocument.java", "" ) ); /* Add only these precise terms to the index: cuisine:cajun Cajun insertedby:AddDocument.class insertedby:AddDocument Sample File Only a search for "Cajun" will find this document, a search for "cajun" will fail as only the term "Cajun" was inserted. To use automatic case mapping and stemming, insert the terms using getText() instead of getTerms(). */ Collection terms = parsedDoc.getTerms(); terms.add( "cuisine:cajun" ); terms.add( "Cajun" ); terms.add( "insertedby:AddDocument.class" ); terms.add( "insertedby:AddDocument Sample File" ); /* Add meta-data that is returned with a SearchResult, but is not searchable in the search index. For any search that returns this document's SearchResult, the meta-data element "ingredients:" will contain the text we specify below. A search for "Okra" or "ingredients:Okra" will not match this document. To make that information searchable you must add it to the searchable term list accessed by getTerms(). Similarly, "cuisine:" will NOT be in the SearchResult Meta-Data - you must add it to the Map accessed by getExtra() to have it returned with a SearchResult. */ Map extra = parsedDoc.getExtra(); extra.put("origin:", "Inserted by AddDocument.java sample file" ); extra.put("ingredients:", "Nutria, Okra, Onions, Celery, Bay Leaf" ); extra.put("meta:", "This value is not indexed" ); } /** * Delete the prior version of the document. * <p> * When re-indexing documents, you must delete the prior version of the document's * information - otherwise SearchResults could be returned that matched search terms * from an older document (and don't match the current version). * <p> * Your application should handle errors, retries, etc. * <p> * This sample deletes documents having the same URL as the document * we are about to insert. It does not retry after errors. * @see IndexerAdmin#deleteMatchingDocuments */ public void DeletePriorDocumentInformation() throws IOException { System.out.print( "\nDeleting prior document information: ... " ); long start = System.currentTimeMillis(); indexerAdmin.deleteMatchingDocuments(parsedDoc.getURL()); System.out.println( "" + (System.currentTimeMillis() - start) + " ms."); } /** * Insert the document information to the search collection. * <p> * Inserting the document is relatively simple, all the real work is * done while fetching and parsing the document. * <p> * Your application should retry after errors. * * @see IndexerAdmin#insert */ public void InsertDocumentInformation() throws IOException { /* Finally, insert the parsed document into the search index */ try { int quality = 0; System.out.print( "\nInserting document: ... " ); long start = System.currentTimeMillis(); indexerAdmin.insert(parsedDoc, quality); System.out.println( "" + (System.currentTimeMillis() - start) + " ms."); } catch (IOException e) { System.out.println( "Unable to insert document: " + e ); throw e; } } /** * Display the <code>SearchResult</code> for the inserted document. * <p> * Your application does not need to check the <code>SearchResult</code>, * but this example displays it to show the impact of customizing * the document information. * @see #CustomizeDocumentInformation * @see #formatSearchResult * @see SearchResult */ public void ShowDocumentSearchResult() throws IOException { try { System.out.println( "\nReview the SearchResult for the inserted document.\n" ); Query query = Query.parse("+url:" + parsedDoc.getURL().toString() + " +insertedby:AddDocument.class +Nutria"); SearchResultList srl = collection.search(query); Iterator it = srl.iterator(); boolean found = false; try { while (it.hasNext()) { SearchResult sr = (SearchResult) it.next(); found = true; System.out.println( formatSearchResult(sr) ); } } catch (NoSuchElementException ignored) { /* Normal termination condition for SearchResultList */ } if (!found) { System.out.println("Unable to find the document using the query:\n" + query); } } catch (QueryNotSupportedException e) { System.out.println( "Unable to query for document: " + e ); return; } catch (IOException e) { System.out.println( "Unable to search for document: " + e ); return; } } /** * Clean up after the sample application demonstration. * <p> * This is only a demonstration, so clean up afterwards. * Your application, of course, would leave the document * in the search index. */ public void MaybeCleanupInsertedDocument() throws IOException { try { System.out.print("Delete the inserted document (yes/no)? "); String yesno = bufferedReader.readLine(); if (yesno != null) yesno = yesno.trim(); if (yesno != null && yesno.length() > 0 && (yesno.charAt(0) == 'y' || yesno.charAt(0) == 'Y')) { Query query = Query.parse("+url:" + parsedDoc.getURL().toString() + " +insertedby:AddDocument.class"); /* Clean up by deleting the document */ System.out.print("Deleting the inserted document ... " ); long start = System.currentTimeMillis(); indexerAdmin.deleteMatchingDocuments(query); System.out.println( "" + (System.currentTimeMillis() - start) + " ms."); } } catch (QueryNotSupportedException e) { System.out.println( "Unable to delete document: " + e ); return; } catch (IOException e) { System.out.println( "Unable to delete document: " + e ); return; } } class MyURLConnection { URLConnection parent; public MyURLConnection(URL url) throws IOException { parent = url.openConnection(); } /** * Return the content of the document as an array of byte. */ public byte[] getDocumentContent() throws IOException { InputStream in = parent.getInputStream(); ByteArrayOutputStream out; int contentLength = parent.getContentLength(); if (contentLength==-1) out = new ByteArrayOutputStream(); else out = new ByteArrayOutputStream(contentLength); for (int c=in.read(); c>=0; c=in.read()) out.write(c); in.close(); return out.toByteArray(); } /** * Return a guess of the Content-Type of the document. * Many Web-Servers do a poor job of returning the correct Content-Type * for documents with extentions unknown to the web server. * This method detects suspect Content-Types * returned by the document source server and tries to guess the * type based on the filename of the URL. * <p> * Ultraseek needs the Content-Type to correctly parse the document * for indexing. * @see URLConnection#getContentType * @see URLConnection#guessContentTypeFromName * @see URLConnection#guessContentTypeFromStream */ public String getDocumentContentType() { String parentsGuess = parent.getContentType(); if (parentsGuess==null || parentsGuess.equals("") || parentsGuess.equals("application/octet-stream")) { System.out.println("----------------------------------------------------------------"); System.out.println("Document server returned unparsable Content-Type: " + parentsGuess); String filenameGuess = null; /* If using JDK 1.4 or later, uncomment the following line: filenameGuess = parent.guessContentTypeFromName(parent.getURL().getFile()); */ if (filenameGuess != null && !filenameGuess.equals("") && !filenameGuess.equals(parentsGuess)) { System.out.println("Guessing from filename Content-Type : " + filenameGuess); System.out.println("----------------------------------------------------------------"); return filenameGuess; } System.out.println("----------------------------------------------------------------"); } return parentsGuess; } /** * Return an initial guess of the document's date. * This is either what the remote server returned for a * "last-modified" setting, or the date of the fetch. * <p> * This guess may be refined during document parsing. * @see URLConnection#getLastModified * @see URLConnection#getDate */ public Date getDocumentDate() { long dateGuess = parent.getLastModified(); if (dateGuess == 0) dateGuess = parent.getDate(); if (dateGuess == 0) dateGuess = System.currentTimeMillis(); return new Date(dateGuess); } } /** * Format a <code>SearchResult</code> for display. * @return a String suitable for display in a text window. * @param sr The <code>SearchResult</code> to format. */ static public String formatSearchResult(SearchResult sr) throws IOException { StringBuffer sb = new StringBuffer(2048); sb.append("Title: " + sr.getTitle() + "\n" ); sb.append("URL: " + sr.getURLString() + "\n" ); sb.append(" Score=" + sr.getScore() + ", " ); sb.append("Quality=" + sr.getQuality() + ", " ); sb.append("Size=" + sr.getSize() + ", " ); sb.append("LinkCount=" + sr.getRemoteLinkCount() + ", " ); sb.append("Flags=" + sr.getFlags() + "\n" ); sb.append("Date: " + sr.getDate() + "\n" ); sb.append("Indexed: " + sr.getIndexed() + "\n" ); sb.append("Server: " + sr.getSearchServer() + "\n" ); sb.append("Collection: " + sr.getSearchCollection().getName() + "\n" ); sb.append("Extra: \n" ); Map extra = sr.getExtra(); Iterator i = extra.keySet().iterator(); while (i.hasNext()) { String key = (String) i.next().toString(); String val = (String) extra.get(key).toString(); sb.append(" "); sb.append(key); sb.append(" "); for (int j = 15 - key.length(); j > 0; j--) sb.append(" "); sb.append(val); sb.append("\n"); } sb.append("TermDFs: \n" ); Map terms = sr.getTermTFs(); i = terms.keySet().iterator(); while (i.hasNext()) { String key = (String) i.next().toString(); String val = (String) terms.get(key).toString(); sb.append(" "); sb.append(val); sb.append(" "); for (int j = 6 - val.length(); j > 0; j--) sb.append(" "); sb.append(key); sb.append("\n"); } sb.append("Description:\n" + sr.getDescription() + "\n" ); return sb.toString(); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -