📄 filescanner.java
字号:
/* -*- mode:java; indent-tabs-mode:nil; c-basic-offset:2 -*- * $RCSFile$ $Revision: 1.27 $ $Date: 2006/02/01 00:20:31 $ * Copyright (c) 2001-2002 Autonomy Corp. All Rights Reserved. */import java.io.*;import java.net.*;import java.util.*;import com.ultraseek.xpa.server.*;/** * A program to scan a directory tree and index the * files found there, using Ultraseek. * <p> * This is an example of a boss/worker threading design. * A "boss" thread finds files and adds them to a queue. * One or more "worker" threads remove files from the * queue and process them. The worker threads don't * quit until the work queue is empty and the boss * thread has signaled that it is "done", that is, * nothing more will be added to the queue. * <p> * Before using this, create an indexer collection on * an Ultraseek instance. Pass in the server hostname, * port number, collection internal name, admin username, * and admin password as arguments. * * @see com.ultraseek.xpa.server.ServerAdmin * @see com.ultraseek.xpa.server.UltraseekServer * @see com.ultraseek.xpa.server.IndexerCollection * @see com.ultraseek.xpa.server.IndexerAdmin#insert */class FileScanner implements Runnable { static final int MAX_QUEUE_SIZE = 5000; static final int SCANNER_SLEEP = 10000; // 10 seconds, in milliseconds static final String DEFAULT_MIME = "text/plain"; static int verbose = 0; static boolean finding = false; static boolean done = false; static Queue files = new Queue(); static IndexerAdmin indexer; static Map doctypes = new HashMap(); static Set allowedDoctypes = new HashSet(); static int numberOfIndexedFiles = 0; static int numberOfRejectedFiles = 0; static long numberOfIndexedBytes = 0; static String defaultMime = DEFAULT_MIME; static { // Add extension to doctype mappings here. // Alphabetical by mimetype doctypes.put(".doc", "application/msword"); doctypes.put(".pdf", "application/pdf"); doctypes.put(".rtf", "application/rtf"); doctypes.put(".xls", "application/vnd.ms-excel"); doctypes.put(".ppt", "application/vnd.ms-powerpoint"); doctypes.put(".mp3", "video/mpeg"); doctypes.put(".mpg", "video/mpeg"); doctypes.put(".mpeg", "video/mpeg"); doctypes.put(".qt", "video/quicktime"); doctypes.put(".mov", "video/quicktime"); doctypes.put(".gif", "image/gif"); doctypes.put(".jpg", "image/jpeg"); doctypes.put(".png", "image/png"); doctypes.put(".tif", "image/tiff"); doctypes.put(".tiff", "image/tiff"); doctypes.put(".html", "text/html"); doctypes.put(".htm", "text/html"); doctypes.put(".txt", "text/plain"); doctypes.put(".readme", "text/plain"); doctypes.put(".wml", "text/vnd.wap.wml"); doctypes.put(".xml", "text/xml"); // List the ones we want to index allowedDoctypes.add("application/msword"); allowedDoctypes.add("application/pdf"); allowedDoctypes.add("application/rtf"); allowedDoctypes.add("application/vnd.ms-excel"); allowedDoctypes.add("application/vnd.ms-powerpoint"); allowedDoctypes.add("message/rfc822"); allowedDoctypes.add("text/html"); allowedDoctypes.add("text/plain"); allowedDoctypes.add("text/vnd.wap.wml"); allowedDoctypes.add("text/xml"); } public static void main(String[] args) { String searchHost = null; int searchPort = 80; String searchCol = null; String searchUser = null; String searchPass = null; int numberOfFiles = 0; List threads = new ArrayList(); List roots = new LinkedList(); int nThreads = 1; if (args.length == 0) { usage(); return; } for (int i = 0; i < args.length; i++) { // Options handled in alphabetical order. if (args[i].equals("-c") && // collection internal name on search server i+1 < args.length) { searchCol = args[i+1]; i++; } if (args[i].equals("-d") && // a directory root i+1 < args.length) { roots.add(args[i+1]); i++; } if (args[i].equals("-m") && // default MIME content-type i+1 < args.length) { defaultMime = args[i+1]; i++; } if (args[i].equals("-p") && // port for search server i+1 < args.length) { searchPort = parseInt(args[i+1], 80); if (searchPort < 1 || searchPort > 0xffff) { log(0,"illegal value for -p (port), should be from "+ "1 to 65535, is "+searchPort); usage(); return; } i++; } if (args[i].equals("-s") && // hostname for search server i+1 < args.length) { searchHost = args[i+1]; i++; } if (args[i].equals("-t") && // number of threads i+1 < args.length) { nThreads = parseInt(args[i+1], 1); if (nThreads < 1) { log(0,"illegal value for -t (number of threads), should be "+ "greater than 0, is "+nThreads); usage(); return; } i++; } if (args[i].equals("-u") && // user name for search server i+1 < args.length) { searchUser = args[i+1]; i++; } if (args[i].equals("-v")) { // verbose verbose++; } if (args[i].equals("-z") && // password for search server i+1 < args.length) { searchPass = args[i+1]; i++; } } if (searchHost == null || searchCol == null || searchUser == null || searchPass == null) { usage(); return; } // No directories specified on command line, start in current. if (roots.isEmpty()) { roots.add("."); } log(1,"server: "+searchHost+":"+searchPort); log(1,"collection: "+searchCol); log(1,"user: "+searchUser+":"+searchPass); // Connect to search server try { UltraseekServer server = new UltraseekServer(searchHost, searchPort); IndexerCollection col = (IndexerCollection)server.getSearchCollection(searchCol); if (col == null) throw new IOException("Cannot find collection " + searchCol + " on " + searchHost + ":" + searchPort ); ServerAdmin admin = server.admin(searchUser, searchPass); indexer = (IndexerAdmin)col.admin(admin); } catch (IOException ex) { // Couldn't connect, give up. System.out.println(ex.getMessage()); return; } long startTime = System.currentTimeMillis(); // Create worker threads for (int n = 0; n < nThreads; n++) { Thread th = new Thread(new FileScanner()); th.setName("FileScanner#" + n); log(2,"created thread: "+th.getName()); threads.add(th); th.start(); log(1,"started thread: "+th.getName()); } // Start finding files. // Do a breadth-first walk of directories, adding files // to the end of the queue for the scanning threads. LinkedList dirs = new LinkedList(roots); while (!dirs.isEmpty()) { String dirname = (String)(dirs.removeFirst()); log(2,"opening directory "+dirname); File dir = new File(dirname); String[] names = dir.list(); if (names == null) continue; for (int j = 0; j<names.length; j++) { log(3,"checking "+names[j]); File file = new File(dirname,names[j]); if (!file.exists()) { log(1,"doesn't exist "+file); continue; } if (file.isDirectory()) { dirs.addLast(file.getPath()); log(1,"added directory "+file); } else { files.addLast(file.getPath()); numberOfFiles++; finding = true; log(3,"added file "+file); } } // No point in letting the queue get huge. // Wait until the worker threads have done // some work before continuing. if (files.size() > MAX_QUEUE_SIZE) { log(1,"pausing file finding, so worker threads can catch up"); while (files.size() > MAX_QUEUE_SIZE) { try { Thread.sleep(SCANNER_SLEEP); } catch (InterruptedException ex) {} } log(1,"resuming file finding"); } } done = true;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -