📄 indexer.groovy
字号:
import org.apache.lucene.analysis.standard.StandardAnalyzerimport org.apache.lucene.document.Documentimport org.apache.lucene.document.Fieldimport org.apache.lucene.index.IndexWriter/** * Indexer: traverses a file system and indexes .txt files * * @author Jeremy Rayner <groovy@ross-rayner.com> * based on examples in the wonderful 'Lucene in Action' book * by Erik Hatcher and Otis Gospodnetic ( http://www.lucenebook.com ) * * requires a lucene-1.x.x.jar from http://lucene.apache.org */if (args.size() != 2 ) { throw new Exception("Usage: groovy -cp lucene-1.4.3.jar Indexer <index dir> <data dir>")}def indexDir = new File(args[0]) // Create Lucene index in this directorydef dataDir = new File(args[1]) // Index files in this directorydef start = new Date().timedef numIndexed = index(indexDir, dataDir)def end = new Date().timeprintln "Indexing $numIndexed files took ${end - start} milliseconds"def index(indexDir, dataDir) { if (!dataDir.exists() || !dataDir.directory) { throw new IOException("$dataDir does not exist or is not a directory") } def writer = new IndexWriter(indexDir, new StandardAnalyzer(), true) // Create Lucene index writer.useCompoundFile = false dataDir.eachFileRecurse { if (it.name =~ /.txt$/) { // Index .txt files only indexFile(writer,it) } } def numIndexed = writer.docCount() writer.optimize() writer.close() // Close index return numIndexed}void indexFile(writer, f) { if (f.hidden || !f.exists() || !f.canRead() || f.directory) { return } println "Indexing $f.canonicalPath" def doc = new Document() // Construct a Field that is tokenized and indexed, but is not stored in the index verbatim. doc.add(Field.Text("contents", new FileReader(f))) // Construct a Field that is not tokenized, but is indexed and stored. doc.add(Field.Keyword("filename",f.canonicalPath)) writer.addDocument(doc) // Add document to Lucene index}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -