📄 eaf23parser.java
字号:
/* * File: EAF23Parser.java * Project: MPI Linguistic Application * Date: 02 May 2007 * * Copyright (C) 2001-2007 Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package mpi.eudico.server.corpora.clomimpl.dobes;import mpi.eudico.server.corpora.clom.TimeSlot;import mpi.eudico.server.corpora.clomimpl.abstr.LinkedFileDescriptor;import mpi.eudico.server.corpora.clomimpl.abstr.MediaDescriptor;import mpi.eudico.server.corpora.clomimpl.abstr.Parser;import org.apache.xerces.parsers.SAXParser;import org.xml.sax.Attributes;import org.xml.sax.ContentHandler;import org.xml.sax.InputSource;import org.xml.sax.Locator;import org.xml.sax.SAXException;import org.xml.sax.SAXNotRecognizedException;import org.xml.sax.SAXNotSupportedException;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.Locale;import java.util.Vector;/** * A Parser for Eudico Annotation Format (EAF) compliant XML files. * In version 2.3 support for LinkedFleDescrptors and for stereotype * Included In is added. * * @author Hennie Brugman * @author Han Sloetjes * @version 1-Dec-2003 * @version jun 2004 addition of ControlledVocabularies * @version sep 2005 the constructor is now public giving up the singleton pattern * the path parameter of all getter methods can be removed in the next parser version * (replace by a public parse(String path) method) * @version Feb 2006 support for LinkedFleDescrptors and for stereotype * Included In is added. For compatibility reasons the filename parameter to the getters is maintained. */public class EAF23Parser extends Parser { /** The EAF v2.3 XML file is parsed. */ private static boolean verbose = false; /** Holds value of property DOCUMENT ME! */ private final SAXParser saxParser; /** stores tiername - tierrecord pairs */ private final HashMap tierMap = new HashMap(); /** a map with tiername - ArrayList with Annotation Records pairs */ private final HashMap tiers = new HashMap(); /** Holds value of property DOCUMENT ME! */ private final ArrayList tierNames = new ArrayList(); /** Holds value of property DOCUMENT ME! */ //private final HashMap tierAttributes = new HashMap(); /** Holds value of property DOCUMENT ME! */ private final ArrayList linguisticTypes = new ArrayList(); /** Holds value of property DOCUMENT ME! */ private final ArrayList locales = new ArrayList(); /** Holds value of property DOCUMENT ME! */ private final HashMap timeSlots = new HashMap(); /** stores the ControlledVocabulary objects by their ID */ private final HashMap controlledVocabularies = new HashMap(); /** Holds value of property DOCUMENT ME! */ private final ArrayList timeOrder = new ArrayList(); // since a HashMap is not ordered, all time_slot_ids have to be stored in order separately. private String mediaFile; private ArrayList mediaDescriptors = new ArrayList(); private ArrayList linkedFileDescriptors = new ArrayList(); private String svgFile; private String author; private String currentTierId; private String currentAnnotationId; private AnnotationRecord currentAnnRecord; private String currentCVId; private CVEntryRecord currentEntryRecord; private String content = ""; private String lastParsed = ""; private String currentFileName; private boolean parseError; /** * Singleton pattern is removed in version 2.2. */ public EAF23Parser() { saxParser = new SAXParser(); try { saxParser.setFeature("http://xml.org/sax/features/validation", true); saxParser.setFeature("http://apache.org/xml/features/validation/dynamic", true); saxParser.setProperty("http://apache.org/xml/properties/schema/external-noNamespaceSchemaLocation", "http://www.mpi.nl/tools/elan/EAFv2.3.xsd"); saxParser.setContentHandler(new EAFContentHandler()); } catch (SAXNotRecognizedException e) { e.printStackTrace(); } catch (SAXNotSupportedException e) { e.printStackTrace(); } } /** * For backward compatibility; not used anymore * * @param fileName the eaf filename, parameter also for historic reasons * * @return media file name */ public String getMediaFile(String fileName) { parse(fileName); return mediaFile; } /** * Returns the media descriptors * * @param fileName the eaf filename, parameter also for historic reasons * * @return the media descriptors */ public ArrayList getMediaDescriptors(String fileName) { parse(fileName); return mediaDescriptors; } /** * Returns the linked file descriptors * * @param fileName the eaf file name, for historic reasons * * @return a list of linked file descriptors */ public ArrayList getLinkedFileDescriptors(String fileName) { parse(fileName); return linkedFileDescriptors; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getSVGFile(String fileName) { parse(fileName); return svgFile; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getAuthor(String fileName) { parse(fileName); return author; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public ArrayList getLinguisticTypes(String fileName) { parse(fileName); return linguisticTypes; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public ArrayList getTimeOrder(String fileName) { parse(fileName); return timeOrder; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public HashMap getTimeSlots(String fileName) { parse(fileName); return timeSlots; } /** * Returns a Hastable of ArrayLists with the cv id's as keys.<br> * Each ArrayList can contain one String, the description and an * unknown number of CVEntryRecords. * * @param fileName the eaf filename * * @return a Hastable of ArrayLists with the cv id's as keys */ public HashMap getControlledVocabularies(String fileName) { parse(fileName); return controlledVocabularies; } /** * Returns the names of the Tiers that are present in the Transcription * file * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public ArrayList getTierNames(String fileName) { parse(fileName); return tierNames; } /** * Returns participant attribute of a tier. * The tier record is not used in TranscriptionStore yet. * * @param tierName name of tier * @param fileName the eaf * * @return the participant */ public String getParticipantOf(String tierName, String fileName) { parse(fileName); //String part = ""; if (tierMap.get(tierName) != null) { if (((TierRecord) tierMap.get(tierName)).getParticipant() != null) { return ((TierRecord) tierMap.get(tierName)).getParticipant(); } } /* if (((HashMap) tierAttributes.get(tierName)).get("PARTICIPANT") != null) { part = (String) ((HashMap) tierAttributes.get(tierName)).get( "PARTICIPANT"); } */ return ""; } /** * Returns the name of the linguistic type of a tier. * The tier record is not used in TranscriptionStore yet. * * @param tierName the name of the tier * @param fileName the eaf * * @return name of the type */ public String getLinguisticTypeIDOf(String tierName, String fileName) { parse(fileName); //String lType = ""; // name of type if (tierMap.get(tierName) != null) { if (((TierRecord) tierMap.get(tierName)).getLinguisticType() != null) { return ((TierRecord) tierMap.get(tierName)).getLinguisticType(); } } /* if (((HashMap) tierAttributes.get(tierName)).get( "LINGUISTIC_TYPE_REF") != null) { lType = (String) ((HashMap) tierAttributes.get(tierName)).get( "LINGUISTIC_TYPE_REF"); } */ return ""; } /** * Returns the Locale object for a tier. * * @param tierName the name of the tier * @param fileName the eaf * * @return the default Locale object */ public Locale getDefaultLanguageOf(String tierName, String fileName) { parse(fileName); Locale resultLoc = null; String localeId = null; //String localeId = (String) ((HashMap) tierAttributes.get(tierName)).get( // "DEFAULT_LOCALE"); if (tierMap.get(tierName) != null) { localeId = ((TierRecord) tierMap.get(tierName)).getDefaultLocale(); } Iterator locIter = locales.iterator(); while (locIter.hasNext()) { Locale l = (Locale) locIter.next(); if (l.getLanguage().equals(localeId)) { resultLoc = l; } } return resultLoc; } /** * Returns the name of the parent tier, if any. * * @param tierName the name of the tier * @param fileName the eaf * * @return the name of the parent tier, or null */ public String getParentNameOf(String tierName, String fileName) { parse(fileName); if (tierMap.get(tierName) != null) { return ((TierRecord) tierMap.get(tierName)).getParentTier(); } //return (String) ((HashMap) tierAttributes.get(tierName)).get( // "PARENT_REF"); return null; } /** * Returns a ArrayList with the Annotations for this Tier. Each * AnnotationRecord contains begin time, end time and text values * * @param tierName the name of the tier * @param fileName the eaf * * @return ArrayList of AnnotationRecord objects for the tier */ public ArrayList getAnnotationsOf(String tierName, String fileName) { // make sure that the correct file has been parsed parse(fileName); //long start = System.currentTimeMillis(); return (ArrayList) tiers.get(tierName); /* ArrayList annotationList = new ArrayList(); // get the tags from the tiers HashMap HashMap annotations = (HashMap) tiers.get(tierName); // get an iterator that iterates over the tags in the right order. Iterator iter = annotations.keySet().iterator(); while (iter.hasNext()) { Object key = iter.next(); annotationList.add(annotations.get(key)); } //long duration = System.currentTimeMillis() - start; // System.out.println("Extracting Annotations took " + duration + " milli seconds"); return annotationList; */ } /** * Parses a EAF v2.3 xml file. * * @param fileName the EAF v2.3 xml file that must be parsed. */ private void parse(String fileName) { //long start = System.currentTimeMillis(); // System.out.println("Parse : " + fileName); // System.out.println("Free memory : " + Runtime.getRuntime().freeMemory()); // only parse the same file once if (lastParsed.equals(fileName)) { return;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -