📄 eaf24parser.java

📁 编辑视频文件
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* * File:     EAF24Parser.java * Project:  MPI Linguistic Application * Date:     02 May 2007 * * Copyright (C) 2001-2007  Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */package mpi.eudico.server.corpora.clomimpl.dobes;import mpi.eudico.server.corpora.clom.TimeSlot;import mpi.eudico.server.corpora.clomimpl.abstr.LinkedFileDescriptor;import mpi.eudico.server.corpora.clomimpl.abstr.MediaDescriptor;import mpi.eudico.server.corpora.clomimpl.abstr.Parser;import mpi.eudico.server.corpora.clomimpl.abstr.PropertyImpl;import org.xml.sax.Attributes;import org.xml.sax.ContentHandler;import org.xml.sax.ErrorHandler;import org.xml.sax.InputSource;import org.xml.sax.Locator;import org.xml.sax.SAXException;import org.xml.sax.SAXParseException;import org.xml.sax.XMLReader;import org.xml.sax.helpers.XMLReaderFactory;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.Locale;/** * A (SAX2) Parser for Elan Annotation Format (EAF) compliant XML files. * * @author Hennie Brugman * @author Han Sloetjes * @version 1-Dec-2003 * @version jun 2004 addition of ControlledVocabularies * @version sep 2005 the constructor is now public giving up the singleton pattern * the path parameter of all getter methods can be removed in the next parser version * (replace by a public parse(String path) method) * @version Feb 2006 support for LinkedFleDescrptors and for stereotype * Included In is added. For compatibility reasons the filename parameter to the getters is maintained. * @version Dec 2006 element PROPERTY has been added to the HEADER element, attribute * ANNOTATOR has been added to element TIER */public class EAF24Parser extends Parser {    private boolean verbose = false;    private XMLReader reader;    /** stores tiername - tierrecord pairs */    private final HashMap tierMap = new HashMap();    /** a map with tiername - ArrayList with Annotation Records pairs */    private final HashMap tiers = new HashMap();    /** Holds value of property DOCUMENT ME! */    private final ArrayList tierNames = new ArrayList();    /** Holds value of property DOCUMENT ME! */    private final ArrayList linguisticTypes = new ArrayList();    /** Holds value of property DOCUMENT ME! */    private final ArrayList locales = new ArrayList();    /** Holds value of property DOCUMENT ME! */    private final HashMap timeSlots = new HashMap();    /** stores the ControlledVocabulary objects by their ID */    private final HashMap controlledVocabularies = new HashMap();    /** Holds value of property DOCUMENT ME! */    private final ArrayList docProperties = new ArrayList();    /** stores the time slots orderd by id */    private final ArrayList timeOrder = new ArrayList(); // since a HashMap is not ordered, all time_slot_ids have to be stored in order separately.    private String mediaFile;    private ArrayList mediaDescriptors = new ArrayList();    private ArrayList linkedFileDescriptors = new ArrayList();    private String svgFile;    private String author;    private String currentTierId;    private String currentAnnotationId;    private AnnotationRecord currentAnnRecord;    private String currentCVId;    private CVEntryRecord currentEntryRecord;    private String content = "";    private String lastParsed = "";    private String currentFileName;    private PropertyImpl currentProperty;    private boolean parseError;    /**     * Constructor, creates a new XMLReader     *     */    public EAF24Parser() {        try {            reader = XMLReaderFactory.createXMLReader(                    "org.apache.xerces.parsers.SAXParser");            reader.setFeature("http://xml.org/sax/features/namespaces", true);            reader.setFeature("http://xml.org/sax/features/validation", true);            reader.setFeature("http://apache.org/xml/features/validation/schema",                true);            reader.setFeature("http://apache.org/xml/features/validation/dynamic",                true);            reader.setProperty("http://java.sun.com/xml/jaxp/properties/schemaSource",                this.getClass().getResource("/mpi/eudico/resources/EAFv2.4.xsd")                    .openStream());            //reader.setProperty("http://apache.org/xml/properties/schema/external-noNamespaceSchemaLocation",            //		"http://www.mpi.nl/tools/elan/EAFv2.4.xsd");            reader.setContentHandler(new EAFContentHandler());            //reader.setErrorHandler(new EAFErrorHandler());        } catch (SAXException se) {            se.printStackTrace();        } catch (IOException ioe) {            ioe.printStackTrace();        }    }    /**     * For backward compatibility; not used anymore     *     * @param fileName the eaf filename, parameter also for historic reasons     *     * @return media file name     */    public String getMediaFile(String fileName) {        parse(fileName);        return mediaFile;    }    /**     * Returns the media descriptors     *     * @param fileName the eaf filename, parameter also for historic reasons     *     * @return the media descriptors     */    public ArrayList getMediaDescriptors(String fileName) {        parse(fileName);        return mediaDescriptors;    }    /**     * Returns the linked file descriptors     *     * @param fileName the eaf file name, for historic reasons     *     * @return a list of linked file descriptors     */    public ArrayList getLinkedFileDescriptors(String fileName) {        parse(fileName);        return linkedFileDescriptors;    }    /**     * DOCUMENT ME!     *     * @param fileName DOCUMENT ME!     *     * @return DOCUMENT ME!     */    public String getSVGFile(String fileName) {        parse(fileName);        return svgFile;    }    /**     * DOCUMENT ME!     *     * @param fileName DOCUMENT ME!     *     * @return DOCUMENT ME!     */    public String getAuthor(String fileName) {        parse(fileName);        return author;    }    /**     * Returns a list of PropertyImpl objects that have been retrieved from the eaf.     *     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getTranscriptionProperties(java.lang.String)     */    public ArrayList getTranscriptionProperties(String fileName) {        parse(fileName);        return docProperties;    }    /**     * DOCUMENT ME!     *     * @param fileName DOCUMENT ME!     *     * @return DOCUMENT ME!     */    public ArrayList getLinguisticTypes(String fileName) {        parse(fileName);        return linguisticTypes;    }    /**     * DOCUMENT ME!     *     * @param fileName DOCUMENT ME!     *     * @return DOCUMENT ME!     */    public ArrayList getTimeOrder(String fileName) {        parse(fileName);        return timeOrder;    }    /**     * DOCUMENT ME!     *     * @param fileName DOCUMENT ME!     *     * @return DOCUMENT ME!     */    public HashMap getTimeSlots(String fileName) {        parse(fileName);        return timeSlots;    }    /**     * Returns a Hastable of ArrayLists with the cv id's as keys.<br>     * Each ArrayList can contain one String, the description and an     * unknown number of CVEntryRecords.     *     * @param fileName the eaf filename     *     * @return a Hastable of ArrayLists with the cv id's as keys     */    public HashMap getControlledVocabularies(String fileName) {        parse(fileName);        return controlledVocabularies;    }    /**     * Returns the names of the Tiers that are present in the Transcription     * file     *     * @param fileName DOCUMENT ME!     *     * @return DOCUMENT ME!     */    public ArrayList getTierNames(String fileName) {        parse(fileName);        return tierNames;    }    /**     * Returns participant attribute of a tier.     * The tier record is not used in TranscriptionStore yet.     *     * @param tierName name of tier     * @param fileName the eaf     *     * @return the participant     */    public String getParticipantOf(String tierName, String fileName) {        parse(fileName);        if (tierMap.get(tierName) != null) {            if (((TierRecord) tierMap.get(tierName)).getParticipant() != null) {                return ((TierRecord) tierMap.get(tierName)).getParticipant();            }        }        return "";    }    /**     * Returns the annotator attribute of a tier.     * The tier record is not used in TranscriptionStore yet.     *     * @param tierName name of tier     * @param fileName the eaf     *     * @return the annotator of the tier     */    public String getAnnotatorOf(String tierName, String fileName) {        parse(fileName);        if (tierMap.get(tierName) != null) {            if (((TierRecord) tierMap.get(tierName)).getAnnotator() != null) {                return ((TierRecord) tierMap.get(tierName)).getAnnotator();            }        }        return "";    }    /**     * Returns the name of the linguistic type of a tier.     * The tier record is not used in TranscriptionStore yet.     *     * @param tierName the name of the tier     * @param fileName the eaf     *     * @return name of the type     */    public String getLinguisticTypeIDOf(String tierName, String fileName) {        parse(fileName);        if (tierMap.get(tierName) != null) {            if (((TierRecord) tierMap.get(tierName)).getLinguisticType() != null) {                return ((TierRecord) tierMap.get(tierName)).getLinguisticType();            }        }        return "";    }    /**     * Returns the Locale object for a tier.     *     * @param tierName the name of the tier     * @param fileName the eaf     *     * @return the default Locale object     */    public Locale getDefaultLanguageOf(String tierName, String fileName) {        parse(fileName);        Locale resultLoc = null;        String localeId = null;        if (tierMap.get(tierName) != null) {            localeId = ((TierRecord) tierMap.get(tierName)).getDefaultLocale();        }        Iterator locIter = locales.iterator();        while (locIter.hasNext()) {            Locale l = (Locale) locIter.next();            if (l.getLanguage().equals(localeId)) {                resultLoc = l;            }        }        return resultLoc;    }    /**     * Returns the name of the parent tier, if any.     *     * @param tierName the name of the tier     * @param fileName the eaf     *     * @return the name of the parent tier, or null     */    public String getParentNameOf(String tierName, String fileName) {        parse(fileName);        if (tierMap.get(tierName) != null) {            return ((TierRecord) tierMap.get(tierName)).getParentTier();        }        return null;    }    /**     * Returns a ArrayList with the Annotations for this Tier. Each     * AnnotationRecord contains begin time, end time and text values     *     * @param tierName the name of the tier     * @param fileName the eaf     *     * @return ArrayList of AnnotationRecord objects for the tier     */    public ArrayList getAnnotationsOf(String tierName, String fileName) {        // make sure that the correct file has been parsed        parse(fileName);        return (ArrayList) tiers.get(tierName);    }    /**     * Parses a EAF v2.4 (or <) xml file.     *     * @param fileName the EAF v2.4 xml file that must be parsed.     */    private void parse(String fileName) {        //long start = System.currentTimeMillis();        //		System.out.println("Parse : " + fileName);        //		System.out.println("Free memory : " + Runtime.getRuntime().freeMemory());        // only parse the same file once        if (lastParsed.equals(fileName)) {            return;        }        // (re)set everything to null for each parse        tiers.clear();        tierNames.clear(); // HB, 2-1-02, to store name IN ORDER        //tierAttributes.clear();        mediaFile = "";        linguisticTypes.clear();        locales.clear();        timeSlots.clear();        timeOrder.clear();        mediaDescriptors.clear();        linkedFileDescriptors.clear();        controlledVocabularies.clear();        // parse the file        lastParsed = fileName;        currentFileName = fileName;        try {            reader.parse(fileName);        } catch (SAXException e) {            System.out.println("Parsing error: " + e.getMessage());            // the SAX parser can have difficulties with certain characters in             // the filepath: try to create an InputSource for the parser             // HS Mar 2007: depending on Xerces version a SAXException or an IOException
12 下一页
💿 文件大小 23621 K
👤 上传用户 ccuading
📂 所属分类 Java编程
🏷️ 相关标签

#编辑 #视频
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -