📄 adddocument.java

📁 关于Ultraseek的一些用法,刚初学,所以都是比较简单
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
   *   * @see IndexerText   * @see IndexerDocument   * @see IndexerDocument#getText   * @see IndexerDocument#getTerms   * @see IndexerDocument#getExtra   * @see IndexerDocument#getText   */  public void CustomizeDocumentInformation() {    /* Add text blocks to be indexed.  Ultraseek server will process the text       blocks using the IndexerDocument's Locale, and add the following terms       to the search index:         "Here", "is", "a", "recipe", "for", "Nutria", "Gumbo"         "here", "nutria", "gumbo",         insertedby:Inserted         insertedby:inserted         insertedby:insert             (English stem of "inserted")         insertedby:AddDocument.java         ...      The text can be searched for with or without the "insertedby:" field      as we've added two IndexerText objects.             A search for "Nutria" or "nutria" will find this document, even though       the term does not appear in the original document content.    */    Collection text = parsedDoc.getText();    text.add( new IndexerText( "Here is a recipe for Nutria Gumbo", "" ) );    text.add( new IndexerText( "Inserted by AddDocument.java", "insertedby:" ) );    text.add( new IndexerText( "Inserted by AddDocument.java", "" ) );    /* Add only these precise terms to the index:         cuisine:cajun         Cajun         insertedby:AddDocument.class         insertedby:AddDocument Sample File      Only a search for "Cajun" will find this document, a search for "cajun"      will fail as only the term "Cajun" was inserted.  To use automatic      case mapping and stemming, insert the terms using getText() instead of getTerms().    */    Collection terms = parsedDoc.getTerms();    terms.add( "cuisine:cajun" );    terms.add( "Cajun" );    terms.add( "insertedby:AddDocument.class" );    terms.add( "insertedby:AddDocument Sample File" );    /* Add meta-data that is returned with a SearchResult,       but is not searchable in the search index.       For any search that returns this document's SearchResult,       the meta-data element "ingredients:" will contain        the text we specify below.       A search for "Okra" or "ingredients:Okra" will        not match this document.  To make that information       searchable you must add it to the searchable term       list accessed by getTerms().       Similarly, "cuisine:" will NOT be in the SearchResult       Meta-Data - you must add it to the Map accessed by       getExtra() to have it returned with a SearchResult.    */    Map extra = parsedDoc.getExtra();    extra.put("origin:",      "Inserted by AddDocument.java sample file" );    extra.put("ingredients:", "Nutria, Okra, Onions, Celery, Bay Leaf" );    extra.put("meta:",        "This value is not indexed" );  }  /**   * Delete the prior version of the document.   * <p>   * When re-indexing documents, you must delete the prior version of the document's   * information - otherwise SearchResults could be returned that matched search terms   * from an older document (and don't match the current version).   * <p>   * Your application should handle errors, retries, etc.   * <p>   * This sample deletes documents having the same URL as the document   * we are about to insert.  It does not retry after errors.   * @see IndexerAdmin#deleteMatchingDocuments   */  public void DeletePriorDocumentInformation()    throws IOException {    System.out.print( "\nDeleting prior document information: ...  " );    long start = System.currentTimeMillis();    indexerAdmin.deleteMatchingDocuments(parsedDoc.getURL());    System.out.println( "" + (System.currentTimeMillis() - start) + " ms.");  }  /**   * Insert the document information to the search collection.   * <p>   * Inserting the document is relatively simple, all the real work is   * done while fetching and parsing the document.   * <p>   * Your application should retry after errors.   *   * @see IndexerAdmin#insert   */  public void InsertDocumentInformation()    throws IOException {    /* Finally, insert the parsed document into the search index */    try {      int quality = 0;      System.out.print( "\nInserting document: ...  " );      long start = System.currentTimeMillis();      indexerAdmin.insert(parsedDoc, quality);      System.out.println( "" + (System.currentTimeMillis() - start) + " ms.");    } catch (IOException e) {      System.out.println( "Unable to insert document: " + e );      throw e;    }  }  /**   * Display the <code>SearchResult</code> for the inserted document.   * <p>   * Your application does not need to check the <code>SearchResult</code>,   * but this example displays it to show the impact of customizing   * the document information.   * @see #CustomizeDocumentInformation   * @see #formatSearchResult   * @see SearchResult   */  public void ShowDocumentSearchResult()    throws IOException {    try {      System.out.println( "\nReview the SearchResult for the inserted document.\n" );      Query query = Query.parse("+url:" + parsedDoc.getURL().toString() +                                 " +insertedby:AddDocument.class +Nutria");      SearchResultList srl = collection.search(query);      Iterator it = srl.iterator();      boolean found = false;      try {        while (it.hasNext()) {          SearchResult sr = (SearchResult) it.next();          found = true;          System.out.println( formatSearchResult(sr) );        }      } catch (NoSuchElementException ignored) {        /* Normal termination condition for SearchResultList */      }      if (!found) {        System.out.println("Unable to find the document using the query:\n" + query);      }    } catch (QueryNotSupportedException e) {      System.out.println( "Unable to query for document: " + e );      return;    } catch (IOException e) {      System.out.println( "Unable to search for document: " + e );      return;    }  }  /**   * Clean up after the sample application demonstration.   * <p>   * This is only a demonstration, so clean up afterwards.   * Your application, of course, would leave the document   * in the search index.   */  public void MaybeCleanupInsertedDocument()     throws IOException {    try {      System.out.print("Delete the inserted document (yes/no)? ");      String yesno = bufferedReader.readLine();      if (yesno != null) yesno = yesno.trim();            if (yesno != null && yesno.length() > 0 &&           (yesno.charAt(0) == 'y' || yesno.charAt(0) == 'Y')) {        Query query = Query.parse("+url:" + parsedDoc.getURL().toString() +                                   " +insertedby:AddDocument.class");        /* Clean up by deleting the document */        System.out.print("Deleting the inserted document ... " );        long start = System.currentTimeMillis();        indexerAdmin.deleteMatchingDocuments(query);        System.out.println( "" + (System.currentTimeMillis() - start) + " ms.");      }          } catch (QueryNotSupportedException e) {      System.out.println( "Unable to delete document: " + e );      return;    } catch (IOException e) {      System.out.println( "Unable to delete document: " + e );      return;    }  }  class MyURLConnection {    URLConnection parent;    public MyURLConnection(URL url)      throws IOException {      parent = url.openConnection();    }    /**     * Return the content of the document as an array of byte.     */    public byte[] getDocumentContent() throws IOException {      InputStream in = parent.getInputStream();      ByteArrayOutputStream out;      int contentLength = parent.getContentLength();      if (contentLength==-1) out = new ByteArrayOutputStream();      else out = new ByteArrayOutputStream(contentLength);      for (int c=in.read(); c>=0; c=in.read()) out.write(c);      in.close();      return out.toByteArray();    }    /**     * Return a guess of the Content-Type of the document.     * Many Web-Servers do a poor job of returning the correct Content-Type     * for documents with extentions unknown to the web server.      * This method detects suspect Content-Types     * returned by the document source server and tries to guess the      * type based on the filename of the URL.     * <p>     * Ultraseek needs the Content-Type to correctly parse the document     * for indexing.     * @see URLConnection#getContentType     * @see URLConnection#guessContentTypeFromName     * @see URLConnection#guessContentTypeFromStream     */    public String getDocumentContentType() {      String parentsGuess = parent.getContentType();      if (parentsGuess==null ||           parentsGuess.equals("") ||          parentsGuess.equals("application/octet-stream")) {        System.out.println("----------------------------------------------------------------");        System.out.println("Document server returned unparsable Content-Type:  " + parentsGuess);        String filenameGuess = null;        /* If using JDK 1.4 or later, uncomment the following line:           filenameGuess = parent.guessContentTypeFromName(parent.getURL().getFile());        */        if (filenameGuess != null &&            !filenameGuess.equals("") &&            !filenameGuess.equals(parentsGuess)) {          System.out.println("Guessing from filename Content-Type    :  " + filenameGuess);          System.out.println("----------------------------------------------------------------");          return filenameGuess;        }        System.out.println("----------------------------------------------------------------");      }      return parentsGuess;    }    /**     * Return an initial guess of the document's date.     * This is either what the remote server returned for a      * "last-modified" setting, or the date of the fetch.     * <p>     * This guess may be refined during document parsing.     * @see URLConnection#getLastModified     * @see URLConnection#getDate     */    public Date getDocumentDate() {      long dateGuess = parent.getLastModified();      if (dateGuess == 0)        dateGuess = parent.getDate();      if (dateGuess == 0)        dateGuess = System.currentTimeMillis();      return new Date(dateGuess);    }  }  /**   * Format a <code>SearchResult</code> for display.   * @return a String suitable for display in a text window.   * @param sr The <code>SearchResult</code> to format.   */  static public String formatSearchResult(SearchResult sr)    throws IOException {    StringBuffer sb = new StringBuffer(2048);    sb.append("Title:    " + sr.getTitle() + "\n" );    sb.append("URL:      " + sr.getURLString() + "\n" );    sb.append("          Score=" + sr.getScore() + ", " );    sb.append("Quality=" + sr.getQuality() + ", " );    sb.append("Size=" + sr.getSize() + ", " );    sb.append("LinkCount=" + sr.getRemoteLinkCount() + ", " );    sb.append("Flags=" + sr.getFlags() + "\n" );    sb.append("Date:     " + sr.getDate() + "\n" );    sb.append("Indexed:  " + sr.getIndexed() + "\n" );    sb.append("Server:   " + sr.getSearchServer() + "\n" );    sb.append("Collection: " + sr.getSearchCollection().getName() + "\n" );    sb.append("Extra:    \n" );    Map extra = sr.getExtra();    Iterator i = extra.keySet().iterator();    while (i.hasNext()) {      String key = (String) i.next().toString();      String val = (String) extra.get(key).toString();      sb.append("     ");      sb.append(key);      sb.append(" ");      for (int j = 15 - key.length(); j > 0; j--)        sb.append(" ");      sb.append(val);      sb.append("\n");    }    sb.append("TermDFs:   \n" );    Map terms = sr.getTermTFs();    i = terms.keySet().iterator();    while (i.hasNext()) {      String key = (String) i.next().toString();      String val = (String) terms.get(key).toString();      sb.append("     ");      sb.append(val);      sb.append(" ");      for (int j = 6 - val.length(); j > 0; j--)        sb.append(" ");      sb.append(key);      sb.append("\n");    }    sb.append("Description:\n" + sr.getDescription() + "\n" );    return sb.toString();  }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -