📄 adddocument.java

📁 关于Ultraseek的一些用法,刚初学,所以都是比较简单
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* -*- mode:java; indent-tabs-mode:nil; c-basic-offset:2 -*- * *  $RCSFile$ $Revision: 1.26 $ $Date: 2006/01/25 16:56:17 $ * *  Copyright (c) 2000-2004 Autonomy Corp.  All Rights Reserved. *  Permission to use, copy, modify, and distribute this file is hereby *  granted without fee, provided that the above copyright notice appear *  in all copies. */import java.io.*;import java.net.*;import java.util.*;import com.ultraseek.xpa.search.*;import com.ultraseek.xpa.server.*;/** * Add documents to an Ultraseek Indexer collection. * <p> * This sample application demonstrates how to: * <ul> * <li>parse and insert a document. * <li>add additional text to the search index for a document. * <li>add additional terms to the search index for a document. * <li>add non-indexed text, which is returned with SearchResults, to the document metadata. * </ul> * The steps to add a document to the Ultraseek search index are: * <blockquote> * <nl> * <li>Fetch the document. * <li>Parse the text from the document. * <li>Customize the information to add to the search index. * <li>Delete any previous version of the document from the search index. * <li>Insert the document's information to the search index. * </nl> * </blockquote> * This sample application does the following: * <blockquote> * <table border=1> * <tr><td>Fetch the Document</td> * <td>The document is fetched using a <code>URLConnection</code>. * It attempts to guess the real document type when the source server * returns <code>"application/octet-stream"</code>. * </td></tr> * <tr><td>Parse the text from the document</td> * <td>The document is sent to the Ultraseek server using <code>IndexerAdmin.parse()</code>, * and Ultraseek returns the indexable information it found in the document.</td></tr> * <tr><td>Customize Index information</td> * <td>This sample adds the following information: *   <blockquote> *   <table border=1><tr><td>Indexable text</td><td>Here is a recipe for Nutria Gumbo.</td></tr> *          <tr><td>Indexable terms</td><td>cuisine:cajun</td></tr> *          <tr><td>Non indexed Metadata</td><td>ingredients = "Nutria, Okra, Rice, Tomatoes"</td></tr> *   </table> *   </blockquote> * </td></tr> * <tr><td>Deletes the prior version of the document</td> * <td>Using <code>CollectionAdmin.deleteMatchingDocuments()</code></td></tr> * <tr><td>Insert the new document's information</td><td>Using <code>CollectionAdmin.insert()</code></td></tr> * </table> * </blockquote> * This sample application then displays the <code>SearchResult</code> for the inserted * document, and (optionally) removes the document from the index. * <p> * Afterwards, this sample application prompts for the next URL.   * An empty String will end the demonstration. * * @see IndexerAdmin#parse * @see IndexerDocument * @see IndexerAdmin#deleteMatchingDocuments * @see IndexerAdmin#insert * @since XPA1.0 */public class AddDocument {  public static void main(String[] args) throws IOException {    new AddDocument().runDemo(args);  }  AddDocument() { }  /**   * Run the entire demo of how to Add a Document to an Ultraseek collection.   */  public void runDemo(String[] args) throws IOException {    try {      InitSampleApplication();    } catch (IOException e) {      System.out.println( "Sample application encountered: " + e );      return;    }    while (true) {      try {        if (!GetDocumentURL()) break;        FetchTheDocument();        ParseTheDocument();        CustomizeDocumentInformation();        DeletePriorDocumentInformation();        InsertDocumentInformation();        /* See the result of the sample application's actions */        ShowDocumentSearchResult();        MaybeCleanupInsertedDocument();      } catch (IOException e) {        System.out.println( "Sample application encountered: " + e );      }    }    System.out.println( "Done." );  }  IndexerAdmin indexerAdmin = null;  UltraseekCollection collection = null;  URL url = null;  InputStreamReader inputStreamReader = new InputStreamReader(System.in);  BufferedReader bufferedReader = new BufferedReader(inputStreamReader);  /**   * Initialize the Sample application.   * Asks for Ultraseek server location, admin username, and collection to modify.   */  public void InitSampleApplication()     throws IOException {    System.out.println();    System.out.println("This demo application adds documents");    System.out.println("to an Ultraseek \"direct indexing\" collection.");    System.out.println();    UltraseekServer server = null;    while (true) {      try {        server = null;        System.out.println("Specify the URL (with port) of your Ultraseek instance.");        System.out.print("URL: ");        URL ultra = null;        String input = bufferedReader.readLine();        try {          ultra = new URL(input);        } catch (MalformedURLException e) {          if (input != null && !input.startsWith("http")) {            input = "http://" + input;            ultra = new URL(input);          } else {            throw e;          }        }        server = new UltraseekServer(ultra);        System.out.print("Connecting... ");        String version = server.getVersionString();        System.out.println("\nUltraseek Server at " + ultra +                            " is version " + version );        break;      } catch (MalformedURLException e) {        System.out.println("Unable to parse URL: " + e );      } catch (IOException e) {        if (server == null) throw e;        System.out.println("Connect failure: " + e);      }    }    ServerAdmin serverAdmin = null;    while (true) {      String username = null;      String password = null;      try {        username = null;        password = null;        System.out.print("Admin username: ");        username = bufferedReader.readLine();        System.out.print("Admin password: ");        password = bufferedReader.readLine();        serverAdmin = server.admin(username,password);        serverAdmin.login();        break;      } catch (IOException e) {        if ((username == null) || (password == null)) throw e;        System.out.println("Login failure: " + e + "\n");      }    }        while (true) {      System.out.println("Enter the internal name of the collection to insert into.");      System.out.print("collection: ");      String id = bufferedReader.readLine();      collection =         (UltraseekCollection)server.getSearchCollection(id);      if (collection==null) {        System.out.println("Cannot find a collection named: " + id + "\n");        continue;      }      CollectionAdmin collectionAdmin = collection.admin(serverAdmin);      /* REMIND: this test should be simplier ... */      if (!(collectionAdmin instanceof IndexerAdmin)          || (collectionAdmin instanceof SpiderAdmin)          || (collectionAdmin instanceof ScannerAdmin)          || (collectionAdmin instanceof ExchangeAdmin)          || (collectionAdmin instanceof NetnewsAdmin)          || (collectionAdmin instanceof DatabaseAdmin)) {        String collectionType = collection.getClass().getName();        collectionType = collectionType.substring(collectionType.lastIndexOf(".")+1);        System.out.println(collection.getID() + " is a " + collectionType );        System.out.println("Documents cannot be inserted into that kind of collection.\n");        continue;      }      indexerAdmin = (IndexerAdmin)collectionAdmin;      break;    }  }  /** Get a URL for the document to insert.   * @return true We have a new URL   * @return false Input is ended, time to quit.   */  public boolean GetDocumentURL() {    while (true) {      try {        System.out.println();        System.out.println("Enter the URL of the document you want added.");        System.out.print("URL: ");        String input = bufferedReader.readLine();        if ("".equals(input)) return false;        url = new URL(input);        return true;      } catch (MalformedURLException e) {        System.out.println("Exception: " + e );      } catch (IOException e) {        return false;      }    }   }  String contentType = null;  Date date = null;  int size = 0;  byte[] content = null;  /**   * Fetch the content of the document to insert into the search collection.   * <p>   * Your application could fetch the document content using HTTP, FTP,   * from a Database, etc.  Your application should also handle   * errors, retries, filtering of documents, etc.   * <p>   * This sample uses a simple <code>URLConnection</code> to   * fetch the document.  The document information is stored   * in <code>content, contentType, date,</code> and <code>size</code>.   * If the document source server returns a non-indexable   * <code>Content-Type</code>, this sample tries to guess   * a content type based on the filename part of the URL.   */  public void FetchTheDocument()    throws IOException {    MyURLConnection conn = null;    try {      System.out.print( "\nFetching document: ...  " );      long start = System.currentTimeMillis();      conn = new MyURLConnection(url);      content = conn.getDocumentContent();      System.out.println( "" + (System.currentTimeMillis() - start) + " ms.");      contentType = conn.getDocumentContentType();      date = conn.getDocumentDate();      size = content.length;      System.out.println( "Document Content-Type:  " + contentType );      System.out.println( "Document Date:          " + date );      System.out.println( "Document size in bytes  " + size );    } catch (IOException e) {      System.out.println( "Unable to fetch document: " + e );      throw e;    }  }  IndexerDocument parsedDoc = null;  /**   * Parse the document to discover indexable content.   * <p>   * Unless the document is <code>text/plain</code>, it must be parsed   * to determine the appropriate text content to index.  Your application   * can parse the document itself, but it is recommended you use the   * Ultraseek server's document parser - and then modify the results from   * that parse if you need to customize information about the document.   * <p>   * This sample uses the Ultraseek server's document parser, the result is   * stored in <code>parsedDoc</code>.   */  public void ParseTheDocument()    throws IOException {    /* Second, send the document to the Ultraseek Server for parsing */    try {      short flags = 0;      System.out.print( "\nParsing document: ...  " );      long start = System.currentTimeMillis();      parsedDoc = indexerAdmin.parse(url, size, date, flags,                                     contentType, content);      System.out.println( "" + (System.currentTimeMillis() - start) + " ms.");    } catch (IOException e) {      System.out.println( "Unable to parse document: " + e );      throw e;    }    System.out.println( "Parsed Title:       " + parsedDoc.getTitle() );    System.out.println( "Parsed URL:         " + parsedDoc.getURL() );    System.out.println( "Parsed Date:        " + parsedDoc.getDate() );    System.out.println( "Parsed Flags:       " + parsedDoc.getFlags() );    System.out.println( "Parsed Locale:      " + parsedDoc.getLocale() );    System.out.println( "Parsed Publisher:   " + parsedDoc.getPublisher() );    System.out.println( "Parsed Size:        " + parsedDoc.getSize() );    System.out.println( "Parsed Date:        " + parsedDoc.getDate() );    System.out.println( "Number of Terms:    " + parsedDoc.getTerms().size() );    System.out.println( "Number of Text:     " + parsedDoc.getText().size() );    System.out.println( "Number of Extra:    " + parsedDoc.getExtra().size() );    String description = parsedDoc.getDescription();    final int MAX_DESCRIPTION = 800;    if (description.length() > MAX_DESCRIPTION)      description = (description.substring(0,(MAX_DESCRIPTION/2-1))                     + "\n\n   ... display truncated ...  \n\n"                      + description.substring(description.length()-(MAX_DESCRIPTION/2-1)-1,description.length()-1));    System.out.println( "Parsed Description:\n" + description );    System.out.println( "------------------------------------------------" );  }  /**   * Customize the information to insert into the search index.   * <p>   * Your application could add meta-data, additional index terms, remove   * index terms, add text to be indexed, change the title, etc.   * <p>   * This sample adds some text to be indexed, adds some specific   * terms to be indexed, and adds some meta-data to be returned   * with the SearchResult (and which is not added to the search index).
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -