⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 eaf23parser.java

📁 编辑视频文件
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* * File:     EAF23Parser.java * Project:  MPI Linguistic Application * Date:     02 May 2007 * * Copyright (C) 2001-2007  Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */package mpi.eudico.server.corpora.clomimpl.dobes;import mpi.eudico.server.corpora.clom.TimeSlot;import mpi.eudico.server.corpora.clomimpl.abstr.LinkedFileDescriptor;import mpi.eudico.server.corpora.clomimpl.abstr.MediaDescriptor;import mpi.eudico.server.corpora.clomimpl.abstr.Parser;import org.apache.xerces.parsers.SAXParser;import org.xml.sax.Attributes;import org.xml.sax.ContentHandler;import org.xml.sax.InputSource;import org.xml.sax.Locator;import org.xml.sax.SAXException;import org.xml.sax.SAXNotRecognizedException;import org.xml.sax.SAXNotSupportedException;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.Locale;import java.util.Vector;/** * A Parser for Eudico Annotation Format (EAF) compliant XML files. * In version 2.3 support for LinkedFleDescrptors and for stereotype * Included In is added. * * @author Hennie Brugman * @author Han Sloetjes * @version 1-Dec-2003 * @version jun 2004 addition of ControlledVocabularies * @version sep 2005 the constructor is now public giving up the singleton pattern * the path parameter of all getter methods can be removed in the next parser version * (replace by a public parse(String path) method) * @version Feb 2006 support for LinkedFleDescrptors and for stereotype * Included In is added. For compatibility reasons the filename parameter to the getters is maintained. */public class EAF23Parser extends Parser {    /** The EAF v2.3 XML file is parsed. */    private static boolean verbose = false;    /** Holds value of property DOCUMENT ME! */    private final SAXParser saxParser;    /** stores tiername - tierrecord pairs */    private final HashMap tierMap = new HashMap();    /** a map with tiername - ArrayList with Annotation Records pairs */    private final HashMap tiers = new HashMap();    /** Holds value of property DOCUMENT ME! */    private final ArrayList tierNames = new ArrayList();    /** Holds value of property DOCUMENT ME! */    //private final HashMap tierAttributes = new HashMap();    /** Holds value of property DOCUMENT ME! */    private final ArrayList linguisticTypes = new ArrayList();    /** Holds value of property DOCUMENT ME! */    private final ArrayList locales = new ArrayList();    /** Holds value of property DOCUMENT ME! */    private final HashMap timeSlots = new HashMap();    /** stores the ControlledVocabulary objects by their ID */    private final HashMap controlledVocabularies = new HashMap();    /** Holds value of property DOCUMENT ME! */    private final ArrayList timeOrder = new ArrayList(); // since a HashMap is not ordered, all time_slot_ids have to be stored in order separately.    private String mediaFile;    private ArrayList mediaDescriptors = new ArrayList();    private ArrayList linkedFileDescriptors = new ArrayList();    private String svgFile;    private String author;    private String currentTierId;    private String currentAnnotationId;    private AnnotationRecord currentAnnRecord;    private String currentCVId;    private CVEntryRecord currentEntryRecord;    private String content = "";    private String lastParsed = "";    private String currentFileName;    private boolean parseError;    /**     * Singleton pattern is removed in version 2.2.     */    public EAF23Parser() {        saxParser = new SAXParser();        try {            saxParser.setFeature("http://xml.org/sax/features/validation", true);            saxParser.setFeature("http://apache.org/xml/features/validation/dynamic",                true);            saxParser.setProperty("http://apache.org/xml/properties/schema/external-noNamespaceSchemaLocation",                "http://www.mpi.nl/tools/elan/EAFv2.3.xsd");            saxParser.setContentHandler(new EAFContentHandler());        } catch (SAXNotRecognizedException e) {            e.printStackTrace();        } catch (SAXNotSupportedException e) {            e.printStackTrace();        }    }    /**     * For backward compatibility; not used anymore     *     * @param fileName the eaf filename, parameter also for historic reasons     *     * @return media file name     */    public String getMediaFile(String fileName) {        parse(fileName);        return mediaFile;    }    /**     * Returns the media descriptors     *     * @param fileName the eaf filename, parameter also for historic reasons     *     * @return the media descriptors     */    public ArrayList getMediaDescriptors(String fileName) {        parse(fileName);        return mediaDescriptors;    }    /**     * Returns the linked file descriptors     *     * @param fileName the eaf file name, for historic reasons     *     * @return a list of linked file descriptors     */    public ArrayList getLinkedFileDescriptors(String fileName) {        parse(fileName);        return linkedFileDescriptors;    }    /**     * DOCUMENT ME!     *     * @param fileName DOCUMENT ME!     *     * @return DOCUMENT ME!     */    public String getSVGFile(String fileName) {        parse(fileName);        return svgFile;    }    /**     * DOCUMENT ME!     *     * @param fileName DOCUMENT ME!     *     * @return DOCUMENT ME!     */    public String getAuthor(String fileName) {        parse(fileName);        return author;    }    /**     * DOCUMENT ME!     *     * @param fileName DOCUMENT ME!     *     * @return DOCUMENT ME!     */    public ArrayList getLinguisticTypes(String fileName) {        parse(fileName);        return linguisticTypes;    }    /**     * DOCUMENT ME!     *     * @param fileName DOCUMENT ME!     *     * @return DOCUMENT ME!     */    public ArrayList getTimeOrder(String fileName) {        parse(fileName);        return timeOrder;    }    /**     * DOCUMENT ME!     *     * @param fileName DOCUMENT ME!     *     * @return DOCUMENT ME!     */    public HashMap getTimeSlots(String fileName) {        parse(fileName);        return timeSlots;    }    /**     * Returns a Hastable of ArrayLists with the cv id's as keys.<br>     * Each ArrayList can contain one String, the description and an     * unknown number of CVEntryRecords.     *     * @param fileName the eaf filename     *     * @return a Hastable of ArrayLists with the cv id's as keys     */    public HashMap getControlledVocabularies(String fileName) {        parse(fileName);        return controlledVocabularies;    }    /**     * Returns the names of the Tiers that are present in the Transcription     * file     *     * @param fileName DOCUMENT ME!     *     * @return DOCUMENT ME!     */    public ArrayList getTierNames(String fileName) {        parse(fileName);        return tierNames;    }    /**     * Returns participant attribute of a tier.     * The tier record is not used in TranscriptionStore yet.     *     * @param tierName name of tier     * @param fileName the eaf     *     * @return the participant     */    public String getParticipantOf(String tierName, String fileName) {        parse(fileName);        //String part = "";        if (tierMap.get(tierName) != null) {            if (((TierRecord) tierMap.get(tierName)).getParticipant() != null) {                return ((TierRecord) tierMap.get(tierName)).getParticipant();            }        }        /*        if (((HashMap) tierAttributes.get(tierName)).get("PARTICIPANT") != null) {            part = (String) ((HashMap) tierAttributes.get(tierName)).get(                    "PARTICIPANT");        }        */        return "";    }    /**     * Returns the name of the linguistic type of a tier.     * The tier record is not used in TranscriptionStore yet.     *     * @param tierName the name of the tier     * @param fileName the eaf     *     * @return name of the type     */    public String getLinguisticTypeIDOf(String tierName, String fileName) {        parse(fileName);        //String lType = ""; // name of type        if (tierMap.get(tierName) != null) {            if (((TierRecord) tierMap.get(tierName)).getLinguisticType() != null) {                return ((TierRecord) tierMap.get(tierName)).getLinguisticType();            }        }        /*        if (((HashMap) tierAttributes.get(tierName)).get(                    "LINGUISTIC_TYPE_REF") != null) {            lType = (String) ((HashMap) tierAttributes.get(tierName)).get(                    "LINGUISTIC_TYPE_REF");        }        */        return "";    }    /**     * Returns the Locale object for a tier.     *     * @param tierName the name of the tier     * @param fileName the eaf     *     * @return the default Locale object     */    public Locale getDefaultLanguageOf(String tierName, String fileName) {        parse(fileName);        Locale resultLoc = null;        String localeId = null;        //String localeId = (String) ((HashMap) tierAttributes.get(tierName)).get(        //        "DEFAULT_LOCALE");        if (tierMap.get(tierName) != null) {            localeId = ((TierRecord) tierMap.get(tierName)).getDefaultLocale();        }        Iterator locIter = locales.iterator();        while (locIter.hasNext()) {            Locale l = (Locale) locIter.next();            if (l.getLanguage().equals(localeId)) {                resultLoc = l;            }        }        return resultLoc;    }    /**     * Returns the name of the parent tier, if any.     *     * @param tierName the name of the tier     * @param fileName the eaf     *     * @return the name of the parent tier, or null     */    public String getParentNameOf(String tierName, String fileName) {        parse(fileName);        if (tierMap.get(tierName) != null) {            return ((TierRecord) tierMap.get(tierName)).getParentTier();        }        //return (String) ((HashMap) tierAttributes.get(tierName)).get(        //    "PARENT_REF");        return null;    }    /**     * Returns a ArrayList with the Annotations for this Tier. Each     * AnnotationRecord contains begin time, end time and text values     *     * @param tierName the name of the tier     * @param fileName the eaf     *     * @return ArrayList of AnnotationRecord objects for the tier     */    public ArrayList getAnnotationsOf(String tierName, String fileName) {        // make sure that the correct file has been parsed        parse(fileName);        //long start = System.currentTimeMillis();        return (ArrayList) tiers.get(tierName);        /*        ArrayList annotationList = new ArrayList();        // get the tags from the tiers HashMap        HashMap annotations = (HashMap) tiers.get(tierName);        // get an iterator that iterates over the tags in the right order.        Iterator iter = annotations.keySet().iterator();        while (iter.hasNext()) {            Object key = iter.next();            annotationList.add(annotations.get(key));        }        //long duration = System.currentTimeMillis() - start;        //    System.out.println("Extracting Annotations took " + duration + " milli seconds");        return annotationList;        */    }    /**     * Parses a EAF v2.3 xml file.     *     * @param fileName the EAF v2.3 xml file that must be parsed.     */    private void parse(String fileName) {        //long start = System.currentTimeMillis();        //		System.out.println("Parse : " + fileName);        //		System.out.println("Free memory : " + Runtime.getRuntime().freeMemory());        // only parse the same file once        if (lastParsed.equals(fileName)) {            return;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -