📄 filescanner.java

📁 关于Ultraseek的一些用法,刚初学,所以都是比较简单
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
    // Done, join with threads as they finish    for (Iterator iter = threads.iterator(); iter.hasNext(); ) {      try {        Thread th = ((Thread)(iter.next()));        th.join();        log(1,"joined thread: "+th.getName());      } catch (InterruptedException ex) {}    }          long endTime = System.currentTimeMillis();    long elapsedTime = endTime - startTime;    double indexRate = numberOfIndexedFiles/(elapsedTime/1000.0);    double kbytes = numberOfIndexedBytes/1024.0;    double kbytesRate = numberOfIndexedBytes/(elapsedTime/1000.0);    System.out.println("Total scanning time: "+elapsedTime/1000.0 +" seconds");    System.out.println("Found "+numberOfFiles+" files");    System.out.println("Rejected "+numberOfRejectedFiles+" files");    System.out.println("Indexed "+numberOfIndexedFiles+" files");    System.out.println(" "+indexRate+" files per second");    System.out.println("Indexed "+kbytes+" Kbytes");    System.out.println(" "+kbytesRate+" kbytes per second");  }  // Some utility routines.  static public String guessDoctype(String name)  {    int dot = name.lastIndexOf(".");    if (dot == -1) return defaultMime;    String ext = name.substring(dot);    log(2,"guessDoctype: extension is "+ext);    if (!doctypes.containsKey(ext)) return defaultMime;    return (String)(doctypes.get(ext));  }  static public boolean indexableDocType(String type) {    return allowedDoctypes.contains(type);  }  static synchronized final void log(int level,String msg)  {    if (level <= verbose) {      System.out.println(msg);    }  }  // From Acme.Utils.parseInt(). See http://www.acme.com/java/  // Parse an integer, returning a default value on errors.  // Avoids try blocks all over the code.  static int parseInt(String s, int default_value)  {    try { return Integer.parseInt(s); }    catch (Exception e) { return default_value; }  }  static void usage()  {    System.out.println("arguments are:");    System.out.println(" -c col     internal name of search collection");    System.out.println(" -d dir     directory to scan (can be repeated)");    System.out.println(" -m mime    default MIME content-type for files");    System.out.println(" -p port    port number of search server (integer)");    System.out.println(" -s host    hostname of search server");    System.out.println(" -t n       number of worker (indexing) threads");    System.out.println(" -u user    admin username for search collection");    System.out.println(" -v         more verbose message (repeat for even more)");    System.out.println(" -z pass    admin password for search collection");  }  FileScanner()  {  }    public void run()  {    long startWait = System.currentTimeMillis();    if (!finding) {      try { Thread.sleep(100); }      catch (InterruptedException ex) {}      log(1,"waiting for work "+Thread.currentThread().getName());      if (System.currentTimeMillis() - startWait > SCANNER_SLEEP) {        return;      }    }    // Default values for insert.    short flags = 0;    int quality = 0;    String desc = null;    String publisher = null;    String doctype = null;    Map extra = null;    Locale locale = null;    List text = null;    List terms = null;    log(3,"starting indexing: "+Thread.currentThread().getName());    try {      // Termination condition is subtle.      // If the files queue is empty, but the boss thread      // is not done, don't quit yet.      while(!files.isEmpty() || !done) {        while (files.isEmpty() && !done) {          // Wait for more files to be found          try { Thread.sleep(100); }          catch (InterruptedException ex) {}        }        if (files.isEmpty()) continue;        File file = new File((String)(files.removeFirst()));        String path = file.getPath();        String name = file.getName();        log(1,"thread "+Thread.currentThread().getName()+" reading "+path);        doctype = guessDoctype(name);        if (!indexableDocType(doctype)) {          log (0,doctype+" is not indexable, "+path);          numberOfRejectedFiles++;          continue;        }        long size = file.length();        if (size > 10000000) {          log (0,"file too big ("+size+" bytes) "+path);          numberOfRejectedFiles++;          continue;        }        // Read file into memory.        byte[] doc = new byte[(int)size];        try {          new FileInputStream(path).read(doc);        } catch (FileNotFoundException ex) {          continue;             // Must have been deleted, go to next filename.        } catch (IOException ex) {          log(0,"IO exception "+ex+" reading "+path);          continue;             // Some other problem, go to next filename.        }        // Map the filename into a URL that can be accessed by search clients        // (a "file:" URL is usually inappropriate as it is specific to this host)        // For this example, we're assuming the files will be served to clients         // by the host "webhost" via HTTP, with starting path "/FileScanner"        URL url;        try {          String canon = file.getCanonicalPath();          StringBuffer sb = new StringBuffer(canon.length() * 2);          int offset = 0;          int found;          found = canon.indexOf(File.separator, offset);          // Encode each directory for URL          while (found != -1) {            sb.append( "/" );            sb.append( URLEncoder.encode(canon.substring(offset,found)) );  // for JDK 1.3 or below            //sb.append( URLEncoder.encode(canon.substring(offset,found),"UTF-8") );  // for JDK 1.4 or above            offset = found + 1;            found = canon.indexOf(File.separator, offset);          }          sb.append( "/" );          sb.append( URLEncoder.encode(canon.substring(offset)) );             url = new URL( "http", "webhost",                          "/FileScanner" + sb.toString() );          log(1,"thread "+Thread.currentThread().getName()+" mapping to URL "+url);        } catch (MalformedURLException ex) {          log(0,"Malformed URL for "+path);          continue;             // Move on to next file.        } catch (IOException ex) {          log(0,"Unable to find Canonical path for "+path);          continue;             // Move on to next file.        }        // Insert file, with retries for network errors.        // Retries use binary exponential backoff: 2, 4, 8, 16, 32, ... seconds        // This allows for network congestion to improve, or for the        // server process to restart and recover.        final int INITIAL_WAIT = 2;        final int MAX_WAIT = 256; // Combined wait time is 510s (8.5 minutes)        boolean retry = true;        int trys = 0;        for (long waitSecs=INITIAL_WAIT; retry; waitSecs *= 2) {          try {            trys++;            if (trys>1) log(1,"try #"+trys+" for "+url);            // Remove previous copy of this document, if it was there            indexer.deleteMatchingDocuments(url);            // Add the new one            indexer.insert(url, (int)size, new Date(file.lastModified()),                           flags, quality, doctype, doc, path, desc,                           publisher, extra, locale, text, terms);            numberOfIndexedFiles++;            numberOfIndexedBytes += size;            retry = false;          } catch (IOException ex) {            if (waitSecs > MAX_WAIT) {              log(0,"Indexing failure in "+Thread.currentThread().getName()+                  " when handling "+path);              retry = false;    // skip this document              continue;            }            log(1,"Indexing problem in "+Thread.currentThread().getName()+                " sleeping for "+waitSecs+"s before retry, "+                ex.getMessage());            try { Thread.sleep(waitSecs * 1000); }            catch (InterruptedException ex2) {}            retry = true;          }        }      }    } catch (NoSuchElementException ex) {      // Someone else must have grabbed it. Try again.    }  }  // The Collections framework offers a synchronized List,  // but that doesn't implement the operations specific to  // LinkedList. We could implement an entire synchronized  // LinkedList here, or we could make the client code  // use the List calls in a queue-like manner. But this  // seems more obvious for the above code, and it is  // only four calls.  static final class Queue  {    LinkedList q = new LinkedList();    public synchronized boolean isEmpty()    {      return q.isEmpty();    }    public synchronized int size()    {      return q.size();    }    public synchronized void addLast(Object o)    {      q.addLast(o);    }    public synchronized Object removeFirst()    {      return q.removeFirst();    }  }}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -