📄 dsindexer.java

📁 dspace 用j2ee架构的一个数字图书馆.开源程序
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
        }    }    /**     * write index record for a community     */    private static void writeCommunityIndex(Context c, IndexWriter writer,            Community target) throws SQLException, IOException    {        // build a hash for the metadata        HashMap textvalues = new HashMap();        // get the handle        String myhandle = HandleManager.findHandle(c, target);        // and populate it        String name = target.getMetadata("name");        //        String description = target.getMetadata("short_description");        //        String intro_text = target.getMetadata("introductory_text");        textvalues.put("name", name);        //        textvalues.put("description", description);        //        textvalues.put("intro_text", intro_text );        textvalues.put("handletext", myhandle);        writeIndexRecord(writer, Constants.COMMUNITY, myhandle, textvalues, "");    }    /**     * write an index record for a collection     */    private static void writeCollectionIndex(Context c, IndexWriter writer,            Collection target) throws SQLException, IOException    {        String location_text = buildCollectionLocationString(c, target);        // get the handle        String myhandle = HandleManager.findHandle(c, target);        // build a hash for the metadata        HashMap textvalues = new HashMap();        // and populate it        String name = target.getMetadata("name");        //        String description = target.getMetadata("short_description");        //        String intro_text = target.getMetadata("introductory_text");        textvalues.put("name", name);        //        textvalues.put("description",description );        //        textvalues.put("intro_text", intro_text );        textvalues.put("location", location_text);        textvalues.put("handletext", myhandle);        writeIndexRecord(writer, Constants.COLLECTION, myhandle, textvalues, "");    }    /**     * writes an index record - the index record is a set of name/value hashes,     * which are sent to Lucene.     */    private static void writeItemIndex(Context c, IndexWriter writer,            Item myitem) throws SQLException, IOException    {        // FIXME: config reading should happen just once & be cached?                  // get the location string (for searching by collection & community)        String location_text = buildItemLocationString(c, myitem);        // read in indexes from the config        ArrayList indexes = new ArrayList();        // read in search.index.1, search.index.2....        for (int i = 1; ConfigurationManager.getProperty("search.index." + i) != null; i++)        {            indexes.add(ConfigurationManager.getProperty("search.index." + i));        }        int j;        int k = 0;        // initialize hash to be built        HashMap textvalues = new HashMap();        if (indexes.size() > 0)        {            ArrayList fields = new ArrayList();            ArrayList content = new ArrayList();            DCValue[] mydc;            for (int i = 0; i < indexes.size(); i++)            {                String index = (String) indexes.get(i);                String[] configLine = index.split(":");                String indexName = configLine[0];                String schema;                String element;                String qualifier = null;                // Get the schema, element and qualifier for the index                // TODO: Should check valid schema, element, qualifier?                String[] parts = configLine[1].split("\\.");                                switch (parts.length)                {                case 3:                    qualifier = parts[2];                case 2:                    schema = parts[0];                    element = parts[1];                    break;                default:                    log.warn("Malformed configuration line: search.index." + i);                    // FIXME: Can't proceed here, no suitable exception to throw                    throw new RuntimeException(                            "Malformed configuration line: search.index." + i);                }                                // extract metadata (ANY is wildcard from Item class)                if (qualifier!= null && qualifier.equals("*"))                {                    mydc = myitem.getMetadata(schema, element, Item.ANY, Item.ANY);                }                else                {                    mydc = myitem.getMetadata(schema, element, qualifier, Item.ANY);                }                // put them all from an array of strings to one string for                // writing out pack all of the arrays of DCValues into plain                // text strings for the indexer                String content_text = "";                for (j = 0; j < mydc.length; j++)                {                    content_text = new String(content_text + mydc[j].value                            + " ");                }                // arranges content with fields in ArrayLists with same index to                // put                // into hash later                k = fields.indexOf(indexName);                if (k < 0)                {                    fields.add(indexName);                    content.add(content_text);                }                else                {                    content_text = new String(content_text                            + (String) content.get(k) + " ");                    content.set(k, content_text);                }            }            // build the hash            for (int i = 0; i < fields.size(); i++)            {                textvalues.put((String) fields.get(i), (String) content.get(i));            }            textvalues.put("location", location_text);        }        else        // if no search indexes found in cfg file, for backward compatibility        {            // extract metadata (ANY is wildcard from Item class)            DCValue[] authors = myitem.getDC("contributor", Item.ANY, Item.ANY);            DCValue[] creators = myitem.getDC("creator", Item.ANY, Item.ANY);            DCValue[] titles = myitem.getDC("title", Item.ANY, Item.ANY);            DCValue[] keywords = myitem.getDC("subject", Item.ANY, Item.ANY);            DCValue[] abstracts = myitem.getDC("description", "abstract",                    Item.ANY);            DCValue[] sors = myitem.getDC("description",                    "statementofresponsibility", Item.ANY);            DCValue[] series = myitem.getDC("relation", "ispartofseries",                    Item.ANY);            DCValue[] tocs = myitem.getDC("description", "tableofcontents",                    Item.ANY);            DCValue[] mimetypes = myitem.getDC("format", "mimetype", Item.ANY);            DCValue[] sponsors = myitem.getDC("description", "sponsorship",                    Item.ANY);            DCValue[] identifiers = myitem.getDC("identifier", Item.ANY,                    Item.ANY);            // put them all from an array of strings to one string for writing            // out            String author_text = "";            String title_text = "";            String keyword_text = "";            String abstract_text = "";            String series_text = "";            String mime_text = "";            String sponsor_text = "";            String id_text = "";            // pack all of the arrays of DCValues into plain text strings for            // the            // indexer            for (j = 0; j < authors.length; j++)            {                author_text = new String(author_text + authors[j].value + " ");            }            for (j = 0; j < creators.length; j++) //also authors            {                author_text = new String(author_text + creators[j].value + " ");            }            for (j = 0; j < sors.length; j++) //also authors            {                author_text = new String(author_text + sors[j].value + " ");            }            for (j = 0; j < titles.length; j++)            {                title_text = new String(title_text + titles[j].value + " ");            }            for (j = 0; j < keywords.length; j++)            {                keyword_text = new String(keyword_text + keywords[j].value                        + " ");            }            for (j = 0; j < abstracts.length; j++)            {                abstract_text = new String(abstract_text + abstracts[j].value                        + " ");            }            for (j = 0; j < tocs.length; j++)            {                abstract_text = new String(abstract_text + tocs[j].value + " ");            }            for (j = 0; j < series.length; j++)            {                series_text = new String(series_text + series[j].value + " ");            }            for (j = 0; j < mimetypes.length; j++)            {                mime_text = new String(mime_text + mimetypes[j].value + " ");            }            for (j = 0; j < sponsors.length; j++)            {                sponsor_text = new String(sponsor_text + sponsors[j].value                        + " ");            }            for (j = 0; j < identifiers.length; j++)            {                id_text = new String(id_text + identifiers[j].value + " ");            }            // build the hash            textvalues.put("author", author_text);            textvalues.put("title", title_text);            textvalues.put("keyword", keyword_text);            textvalues.put("location", location_text);            textvalues.put("abstract", abstract_text);            textvalues.put("series", series_text);            textvalues.put("mimetype", mime_text);            textvalues.put("sponsor", sponsor_text);            textvalues.put("identifier", id_text);        }        // now get full text of any bitstreams in the TEXT bundle        String extractedText = "";        // trundle through the bundles        Bundle[] myBundles = myitem.getBundles();        for (int i = 0; i < myBundles.length; i++)        {            if ((myBundles[i].getName() != null)                    && myBundles[i].getName().equals("TEXT"))            {                // a-ha! grab the text out of the bitstreams                Bitstream[] myBitstreams = myBundles[i].getBitstreams();                for (j = 0; j < myBitstreams.length; j++)                {                    try                    {                        InputStreamReader is = new InputStreamReader(                                myBitstreams[j].retrieve()); // get input                        // stream                        StringBuffer sb = new StringBuffer();                        char[] charBuffer = new char[1024];                        while (true)                        {                            int bytesIn = is.read(charBuffer);                            if (bytesIn == -1)                            {                                break;                            }                            if (bytesIn > 0)                            {                                sb.append(charBuffer, 0, bytesIn);                            }                        }                        // now sb has the full text - tack on to fullText string                        extractedText = extractedText.concat(new String(sb));                        //                        System.out.println("Found extracted text!\n" + new                        // String(sb));                    }                    catch (AuthorizeException e)                    {                        // this will never happen, but compiler is now happy.                    }                }            }        }        // lastly, get the handle        String itemhandle = HandleManager.findHandle(c, myitem);        textvalues.put("handletext", itemhandle);        if (log.isDebugEnabled())        {            log.debug(LogManager.getHeader(c, "write_index", "handle=" +itemhandle));            log.debug(textvalues.toString());        }        // write out the metatdata (for scalability, using hash instead of        // individual strings)        writeIndexRecord(writer, Constants.ITEM, itemhandle, textvalues,                extractedText);    }    /**     * writeIndexRecord() creates a document from its args and writes it out to     * the index that is opened     */    private static void writeIndexRecord(IndexWriter iw, int type,            String handle, HashMap textvalues, String extractedText)            throws IOException    {        Document doc = new Document();        Integer ty = new Integer(type);        String fulltext = "";        // do id, type, handle first        doc.add(Field.UnIndexed("type", ty.toString()));        // want to be able to search for handle, so use keyword        // (not tokenized, but it is indexed)        if (handle != null)        {            doc.add(Field.Keyword("handle", handle));        }        // now iterate through the hash, building full text string        // and index all values        Iterator i = textvalues.keySet().iterator();        while (i.hasNext())        {            String key = (String) i.next();            String value = (String) textvalues.get(key);            fulltext = fulltext + " " + value;            if (value != null)            {                doc.add(Field.Text(key, value));            }        }        fulltext = fulltext.concat(extractedText);        //        System.out.println("Full Text:\n" + fulltext + "------------\n\n");        // add the full text        doc.add(Field.Text("default", fulltext));        // index the document        iw.addDocument(doc);    }}
上一页 12
💿 文件大小 9847 K
👤 上传用户 LiuRong
📂 所属分类 Java编程
🏷️ 相关标签

#dspace #j2ee #架构 #数字图书馆
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -