📄 abstractmetsingester.java
字号:
/* * AbstractMETSIngester * * Version: $Revision: 1.1 $ * * Date: $Date: 2006/03/17 00:04:38 $ * * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts * Institute of Technology. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * - Neither the name of the Hewlett-Packard Company nor the name of the * Massachusetts Institute of Technology nor the names of their * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */package org.dspace.content.packager;import java.io.IOException;import java.io.InputStream;import java.sql.SQLException;import java.util.HashMap;import java.util.HashSet;import java.util.Iterator;import java.util.List;import java.util.Map;import java.util.Set;import java.util.zip.ZipEntry;import java.util.zip.ZipInputStream;import org.apache.log4j.Logger;import org.dspace.app.mediafilter.MediaFilter;import org.dspace.authorize.AuthorizeException;import org.dspace.content.Bitstream;import org.dspace.content.BitstreamFormat;import org.dspace.content.Bundle;import org.dspace.content.Collection;import org.dspace.content.FormatIdentifier;import org.dspace.content.Item;import org.dspace.content.WorkspaceItem;import org.dspace.content.crosswalk.CrosswalkException;import org.dspace.content.crosswalk.MetadataValidationException;import org.dspace.core.ConfigurationManager;import org.dspace.core.Constants;import org.dspace.core.Context;import org.dspace.core.LogManager;import org.dspace.core.PluginManager;import org.jdom.Element;/** * Base class for package ingester of * METS (Metadata Encoding & Transmission Standard) Package.<br> * See <a href="http://www.loc.gov/standards/mets/">http://www.loc.gov/standards/mets/</a> * <p> * This is a generic packager framework intended to be subclassed to create * ingesters for more specific METS "profiles". METS is an * abstract and flexible framework that can encompass many * different kinds of metadata and inner package structures. * <p> * <b>Configuration:</b> * If the property <code>mets.submission.preserveManifest</code> is <em>true</em>, * the METS manifest itself is preserved in a bitstream named * <code>mets.xml</code> in the <code>METADATA</code> bundle. If it is * <em>false</em> (the default), the manifest is discarded after ingestion. * * @author Larry Stone * @version $Revision: 1.1 $ * @see org.dspace.content.packager.METSManifest */public abstract class AbstractMETSIngester implements PackageIngester{ /** log4j category */ private static Logger log = Logger.getLogger(AbstractMETSIngester.class); /** Filename of manifest, relative to package toplevel. */ public static final String MANIFEST_FILE = "mets.xml"; // bitstream format name of magic METS SIP format.. private static final String MANIFEST_BITSTREAM_FORMAT = "DSpace METS SIP"; // value of mets.submission.preserveManifest config key private static final boolean preserveManifest = ConfigurationManager.getBooleanProperty("mets.submission.preserveManifest", false); /** * An instance of MdrefManager holds the state needed to * retrieve the contents (or bitstream corresponding to) an * external metadata stream referenced by an <code>mdRef</code> * element in the METS manifest. * <p> * Initialize it with the DSpace Bundle containing all of the * metadata bitstreams. Match an mdRef by finding the bitstream * with the same name. */ protected class MdrefManager implements METSManifest.Mdref { private Bundle mdBundle = null; // constructor initializes metadata bundle. private MdrefManager(Bundle mdBundle) { super(); this.mdBundle = mdBundle; } /** * Find the local Bitstream referenced in * an <code>mdRef</code> element. * @param mdref the METS mdRef element to locate the bitstream for. * @return bitstream or null if none found. */ public Bitstream getBitstreamForMdRef(Element mdref) throws MetadataValidationException, IOException, SQLException, AuthorizeException { String path = METSManifest.getFileName(mdref); if (mdBundle == null) throw new MetadataValidationException("Failed referencing mdRef element, because there were no metadata files."); return mdBundle.getBitstreamByName(path); } /** * Make the contents of an external resource mentioned in * an <code>mdRef</code> element available as an <code>InputStream</code>. * See the <code>METSManifest.MdRef</code> interface for details. * @param mdref the METS mdRef element to locate the input for. * @return the input stream of its content. */ public InputStream getInputStream(Element mdref) throws MetadataValidationException, IOException, SQLException, AuthorizeException { Bitstream mdbs = getBitstreamForMdRef(mdref); if (mdbs == null) throw new MetadataValidationException("Failed dereferencing bitstream for mdRef element="+mdref.toString()); return mdbs.retrieve(); } } /** * Create a new DSpace item out of a METS content package. * All contents are dictated by the METS manifest. * Package is a ZIP archive, all files relative to top level * and the manifest (as per spec) in mets.xml. * * @param context - DSpace context. * @param collection - collection under which to create new item. * @param pkg - input stream containing package to ingest. * @param license - may be null, which takes default license. * @return workspace item created by ingest. * @throws PackageValidationException if package is unacceptable or there is * a fatal error turning it into an Item. */ public WorkspaceItem ingest(Context context, Collection collection, InputStream pkg, PackageParameters params, String license) throws PackageValidationException, CrosswalkException, AuthorizeException, SQLException, IOException { ZipInputStream zip = new ZipInputStream(pkg); HashMap fileIdToBitstream = new HashMap(); WorkspaceItem wi = null; boolean success = false; HashSet packageFiles = new HashSet(); boolean validate = params.getBooleanProperty("validate", true); try { /* 1. Read all the files in the Zip into bitstreams first, * because we only get to take one pass through a Zip input * stream. Give them temporary bitstream names corresponding * to the same names they had in the Zip, since those MUST * match the URL references in <Flocat> and <mdRef> elements. */ METSManifest manifest = null; wi = WorkspaceItem.create(context, collection, false); Item item = wi.getItem(); Bundle contentBundle = item.createBundle(Constants.CONTENT_BUNDLE_NAME); Bundle mdBundle = null; ZipEntry ze; while ((ze = zip.getNextEntry()) != null) { if (ze.isDirectory()) continue; Bitstream bs = null; String fname = ze.getName(); if (fname.equals(MANIFEST_FILE)) { if (preserveManifest) { mdBundle = item.createBundle(Constants.METADATA_BUNDLE_NAME); bs = mdBundle.createBitstream(new PackageUtils.UnclosableInputStream(zip)); bs.setName(fname); bs.setSource(fname); // Get magic bitstream format to identify manifest. BitstreamFormat manifestFormat = null; manifestFormat = PackageUtils.findOrCreateBitstreamFormat(context, MANIFEST_BITSTREAM_FORMAT, "application/xml", MANIFEST_BITSTREAM_FORMAT+" package manifest"); bs.setFormat(manifestFormat); manifest = METSManifest.create(bs.retrieve(), validate); } else { manifest = METSManifest.create(new PackageUtils.UnclosableInputStream(zip), validate); continue; } } else { bs = contentBundle.createBitstream(new PackageUtils.UnclosableInputStream(zip)); bs.setSource(fname); bs.setName(fname); } packageFiles.add(fname); bs.setSource(fname); bs.update(); } zip.close(); if (manifest == null) throw new PackageValidationException("No METS Manifest found (filename="+MANIFEST_FILE+"). Package is unacceptable."); // initial sanity checks on manifest (in subclass) checkManifest(manifest); /* 2. Grovel a file list out of METS Manifest and compare * it to the files in package, as an integrity test. */ List manifestContentFiles = manifest.getContentFiles(); // Compare manifest files with the ones found in package: // a. Start with content files (mentioned in <fileGrp>s) HashSet missingFiles = new HashSet(); for (Iterator mi = manifestContentFiles.iterator(); mi.hasNext(); ) { // First locate corresponding Bitstream and make // map of Bitstream to <file> ID. Element mfile = (Element)mi.next(); String mfileId = mfile.getAttributeValue("ID"); if (mfileId == null) throw new PackageValidationException("Invalid METS Manifest: file element without ID attribute."); String path = METSManifest.getFileName(mfile); Bitstream bs = contentBundle.getBitstreamByName(path); if (bs == null) { log.warn("Cannot find bitstream for filename=\""+path+ "\", skipping it..may cause problems later."); missingFiles.add(path); } else { fileIdToBitstream.put(mfileId, bs); // Now that we're done using Name to match to <file>, // set default bitstream Name to last path element; // Zip entries all have '/' pathname separators // NOTE: set default here, hopefully crosswalk of // a bitstream techMD section will override it. String fname = bs.getName(); int lastSlash = fname.lastIndexOf('/'); if (lastSlash >= 0 && lastSlash+1 < fname.length()) bs.setName(fname.substring(lastSlash+1)); // Set Default bitstream format: // 1. attempt to guess from MIME type // 2. if that fails, guess from "name" extension. String mimeType = mfile.getAttributeValue("MIMETYPE"); BitstreamFormat bf = (mimeType == null) ? null : BitstreamFormat.findByMIMEType(context, mimeType); if (bf == null) bf = FormatIdentifier.guessFormat(context, bs); bs.setFormat(bf); // if this bitstream belongs in another Bundle, move it:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -