📄 dsindexer.java
字号:
} } /** * write index record for a community */ private static void writeCommunityIndex(Context c, IndexWriter writer, Community target) throws SQLException, IOException { // build a hash for the metadata HashMap textvalues = new HashMap(); // get the handle String myhandle = HandleManager.findHandle(c, target); // and populate it String name = target.getMetadata("name"); // String description = target.getMetadata("short_description"); // String intro_text = target.getMetadata("introductory_text"); textvalues.put("name", name); // textvalues.put("description", description); // textvalues.put("intro_text", intro_text ); textvalues.put("handletext", myhandle); writeIndexRecord(writer, Constants.COMMUNITY, myhandle, textvalues, ""); } /** * write an index record for a collection */ private static void writeCollectionIndex(Context c, IndexWriter writer, Collection target) throws SQLException, IOException { String location_text = buildCollectionLocationString(c, target); // get the handle String myhandle = HandleManager.findHandle(c, target); // build a hash for the metadata HashMap textvalues = new HashMap(); // and populate it String name = target.getMetadata("name"); // String description = target.getMetadata("short_description"); // String intro_text = target.getMetadata("introductory_text"); textvalues.put("name", name); // textvalues.put("description",description ); // textvalues.put("intro_text", intro_text ); textvalues.put("location", location_text); textvalues.put("handletext", myhandle); writeIndexRecord(writer, Constants.COLLECTION, myhandle, textvalues, ""); } /** * writes an index record - the index record is a set of name/value hashes, * which are sent to Lucene. */ private static void writeItemIndex(Context c, IndexWriter writer, Item myitem) throws SQLException, IOException { // FIXME: config reading should happen just once & be cached? // get the location string (for searching by collection & community) String location_text = buildItemLocationString(c, myitem); // read in indexes from the config ArrayList indexes = new ArrayList(); // read in search.index.1, search.index.2.... for (int i = 1; ConfigurationManager.getProperty("search.index." + i) != null; i++) { indexes.add(ConfigurationManager.getProperty("search.index." + i)); } int j; int k = 0; // initialize hash to be built HashMap textvalues = new HashMap(); if (indexes.size() > 0) { ArrayList fields = new ArrayList(); ArrayList content = new ArrayList(); DCValue[] mydc; for (int i = 0; i < indexes.size(); i++) { String index = (String) indexes.get(i); String[] configLine = index.split(":"); String indexName = configLine[0]; String schema; String element; String qualifier = null; // Get the schema, element and qualifier for the index // TODO: Should check valid schema, element, qualifier? String[] parts = configLine[1].split("\\."); switch (parts.length) { case 3: qualifier = parts[2]; case 2: schema = parts[0]; element = parts[1]; break; default: log.warn("Malformed configuration line: search.index." + i); // FIXME: Can't proceed here, no suitable exception to throw throw new RuntimeException( "Malformed configuration line: search.index." + i); } // extract metadata (ANY is wildcard from Item class) if (qualifier!= null && qualifier.equals("*")) { mydc = myitem.getMetadata(schema, element, Item.ANY, Item.ANY); } else { mydc = myitem.getMetadata(schema, element, qualifier, Item.ANY); } // put them all from an array of strings to one string for // writing out pack all of the arrays of DCValues into plain // text strings for the indexer String content_text = ""; for (j = 0; j < mydc.length; j++) { content_text = new String(content_text + mydc[j].value + " "); } // arranges content with fields in ArrayLists with same index to // put // into hash later k = fields.indexOf(indexName); if (k < 0) { fields.add(indexName); content.add(content_text); } else { content_text = new String(content_text + (String) content.get(k) + " "); content.set(k, content_text); } } // build the hash for (int i = 0; i < fields.size(); i++) { textvalues.put((String) fields.get(i), (String) content.get(i)); } textvalues.put("location", location_text); } else // if no search indexes found in cfg file, for backward compatibility { // extract metadata (ANY is wildcard from Item class) DCValue[] authors = myitem.getDC("contributor", Item.ANY, Item.ANY); DCValue[] creators = myitem.getDC("creator", Item.ANY, Item.ANY); DCValue[] titles = myitem.getDC("title", Item.ANY, Item.ANY); DCValue[] keywords = myitem.getDC("subject", Item.ANY, Item.ANY); DCValue[] abstracts = myitem.getDC("description", "abstract", Item.ANY); DCValue[] sors = myitem.getDC("description", "statementofresponsibility", Item.ANY); DCValue[] series = myitem.getDC("relation", "ispartofseries", Item.ANY); DCValue[] tocs = myitem.getDC("description", "tableofcontents", Item.ANY); DCValue[] mimetypes = myitem.getDC("format", "mimetype", Item.ANY); DCValue[] sponsors = myitem.getDC("description", "sponsorship", Item.ANY); DCValue[] identifiers = myitem.getDC("identifier", Item.ANY, Item.ANY); // put them all from an array of strings to one string for writing // out String author_text = ""; String title_text = ""; String keyword_text = ""; String abstract_text = ""; String series_text = ""; String mime_text = ""; String sponsor_text = ""; String id_text = ""; // pack all of the arrays of DCValues into plain text strings for // the // indexer for (j = 0; j < authors.length; j++) { author_text = new String(author_text + authors[j].value + " "); } for (j = 0; j < creators.length; j++) //also authors { author_text = new String(author_text + creators[j].value + " "); } for (j = 0; j < sors.length; j++) //also authors { author_text = new String(author_text + sors[j].value + " "); } for (j = 0; j < titles.length; j++) { title_text = new String(title_text + titles[j].value + " "); } for (j = 0; j < keywords.length; j++) { keyword_text = new String(keyword_text + keywords[j].value + " "); } for (j = 0; j < abstracts.length; j++) { abstract_text = new String(abstract_text + abstracts[j].value + " "); } for (j = 0; j < tocs.length; j++) { abstract_text = new String(abstract_text + tocs[j].value + " "); } for (j = 0; j < series.length; j++) { series_text = new String(series_text + series[j].value + " "); } for (j = 0; j < mimetypes.length; j++) { mime_text = new String(mime_text + mimetypes[j].value + " "); } for (j = 0; j < sponsors.length; j++) { sponsor_text = new String(sponsor_text + sponsors[j].value + " "); } for (j = 0; j < identifiers.length; j++) { id_text = new String(id_text + identifiers[j].value + " "); } // build the hash textvalues.put("author", author_text); textvalues.put("title", title_text); textvalues.put("keyword", keyword_text); textvalues.put("location", location_text); textvalues.put("abstract", abstract_text); textvalues.put("series", series_text); textvalues.put("mimetype", mime_text); textvalues.put("sponsor", sponsor_text); textvalues.put("identifier", id_text); } // now get full text of any bitstreams in the TEXT bundle String extractedText = ""; // trundle through the bundles Bundle[] myBundles = myitem.getBundles(); for (int i = 0; i < myBundles.length; i++) { if ((myBundles[i].getName() != null) && myBundles[i].getName().equals("TEXT")) { // a-ha! grab the text out of the bitstreams Bitstream[] myBitstreams = myBundles[i].getBitstreams(); for (j = 0; j < myBitstreams.length; j++) { try { InputStreamReader is = new InputStreamReader( myBitstreams[j].retrieve()); // get input // stream StringBuffer sb = new StringBuffer(); char[] charBuffer = new char[1024]; while (true) { int bytesIn = is.read(charBuffer); if (bytesIn == -1) { break; } if (bytesIn > 0) { sb.append(charBuffer, 0, bytesIn); } } // now sb has the full text - tack on to fullText string extractedText = extractedText.concat(new String(sb)); // System.out.println("Found extracted text!\n" + new // String(sb)); } catch (AuthorizeException e) { // this will never happen, but compiler is now happy. } } } } // lastly, get the handle String itemhandle = HandleManager.findHandle(c, myitem); textvalues.put("handletext", itemhandle); if (log.isDebugEnabled()) { log.debug(LogManager.getHeader(c, "write_index", "handle=" +itemhandle)); log.debug(textvalues.toString()); } // write out the metatdata (for scalability, using hash instead of // individual strings) writeIndexRecord(writer, Constants.ITEM, itemhandle, textvalues, extractedText); } /** * writeIndexRecord() creates a document from its args and writes it out to * the index that is opened */ private static void writeIndexRecord(IndexWriter iw, int type, String handle, HashMap textvalues, String extractedText) throws IOException { Document doc = new Document(); Integer ty = new Integer(type); String fulltext = ""; // do id, type, handle first doc.add(Field.UnIndexed("type", ty.toString())); // want to be able to search for handle, so use keyword // (not tokenized, but it is indexed) if (handle != null) { doc.add(Field.Keyword("handle", handle)); } // now iterate through the hash, building full text string // and index all values Iterator i = textvalues.keySet().iterator(); while (i.hasNext()) { String key = (String) i.next(); String value = (String) textvalues.get(key); fulltext = fulltext + " " + value; if (value != null) { doc.add(Field.Text(key, value)); } } fulltext = fulltext.concat(extractedText); // System.out.println("Full Text:\n" + fulltext + "------------\n\n"); // add the full text doc.add(Field.Text("default", fulltext)); // index the document iw.addDocument(doc); }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -