📄 eaf20parser.java
字号:
/* * File: EAF20Parser.java * Project: MPI Linguistic Application * Date: 02 May 2007 * * Copyright (C) 2001-2007 Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package mpi.eudico.server.corpora.clomimpl.dobes;import mpi.eudico.server.corpora.clom.TimeSlot;import mpi.eudico.server.corpora.clomimpl.type.Constraint;import mpi.eudico.server.corpora.clomimpl.type.LinguisticType;import mpi.eudico.server.corpora.clomimpl.type.SymbolicAssociation;import mpi.eudico.server.corpora.clomimpl.type.SymbolicSubdivision;import mpi.eudico.server.corpora.clomimpl.type.TimeSubdivision;import org.apache.xerces.parsers.SAXParser;import org.xml.sax.Attributes;import org.xml.sax.ContentHandler;import org.xml.sax.Locator;import org.xml.sax.SAXException;import org.xml.sax.SAXNotRecognizedException;import org.xml.sax.SAXNotSupportedException;import java.io.IOException;import java.util.Hashtable;import java.util.Iterator;import java.util.Locale;import java.util.Vector;/** * A Parser for Eudico Annotation Format (EAF) compliant XML files. MAYBE THIS * CLASS MUST BE MADE THREAD SAFE BY ADDING SOME SYNCHRONIZED BLOCKS OR BY * GIVING UP THE SINGLETON PATTERN. * * @author Hennie Brugman * @version 18-Jul-2002 */public class EAF20Parser { /** The EAF v2.0 XML file is parsed. */ private static boolean verbose = false; private static EAF20Parser parser; /** Holds value of property DOCUMENT ME! */ private final SAXParser saxParser; /** Holds value of property DOCUMENT ME! */ private final Hashtable tiers = new Hashtable(); /** Holds value of property DOCUMENT ME! */ private final Vector tierNames = new Vector(); /** Holds value of property DOCUMENT ME! */ private final Hashtable tierAttributes = new Hashtable(); /** Holds value of property DOCUMENT ME! */ private final Vector linguisticTypes = new Vector(); /** Holds value of property DOCUMENT ME! */ private final Vector locales = new Vector(); /** Holds value of property DOCUMENT ME! */ private final Hashtable timeSlots = new Hashtable(); /** Holds value of property DOCUMENT ME! */ private final Vector timeOrder = new Vector(); // since a Hashtable is not ordered, all time_slot_ids have to be stored in order separately. private String mediaFile; private String svgFile; private String author; private String currentTierId; private String currentAnnotationId; private String content = ""; private String lastParsed = ""; private String currentFileName; private boolean parseError; /** * Private constructor for EAFParser because the Singleton pattern is * applied here. */ private EAF20Parser() { saxParser = new SAXParser(); try { saxParser.setFeature("http://xml.org/sax/features/validation", true); saxParser.setFeature("http://apache.org/xml/features/validation/dynamic", true); saxParser.setProperty("http://apache.org/xml/properties/schema/external-noNamespaceSchemaLocation", "http://www.mpi.nl/tools/elan/EAFv2.0.xsd"); saxParser.setContentHandler(new EAFContentHandler()); } catch (SAXNotRecognizedException e) { e.printStackTrace(); } catch (SAXNotSupportedException e) { e.printStackTrace(); } } /** * The instance method returns the single incarnation of EAFParser to the * caller. * * @return DOCUMENT ME! */ public static EAF20Parser Instance() { if (parser == null) { parser = new EAF20Parser(); } return parser; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getMediaFile(String fileName) { parse(fileName); return mediaFile; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getSVGFile(String fileName) { parse(fileName); return svgFile; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getAuthor(String fileName) { parse(fileName); return author; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public Vector getLinguisticTypes(String fileName) { parse(fileName); return linguisticTypes; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public Vector getTimeOrder(String fileName) { parse(fileName); return timeOrder; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public Hashtable getTimeSlots(String fileName) { parse(fileName); return timeSlots; } /** * Returns the names of the Tiers that are present in the Transcription * file * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public Vector getTierNames(String fileName) { parse(fileName); return tierNames; } /** * DOCUMENT ME! * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getParticipantOf(String tierName, String fileName) { parse(fileName); String part = ""; if (((Hashtable) tierAttributes.get(tierName)).get("PARTICIPANT") != null) { part = (String) ((Hashtable) tierAttributes.get(tierName)).get( "PARTICIPANT"); } return part; } /** * DOCUMENT ME! * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public LinguisticType getLinguisticTypeOf(String tierName, String fileName) { LinguisticType lt = null; parse(fileName); String lType = ""; // name of type if (((Hashtable) tierAttributes.get(tierName)).get( "LINGUISTIC_TYPE_REF") != null) { lType = (String) ((Hashtable) tierAttributes.get(tierName)).get( "LINGUISTIC_TYPE_REF"); } Iterator ltIter = linguisticTypes.iterator(); while (ltIter.hasNext()) { LinguisticType l = (LinguisticType) ltIter.next(); if (l.getLinguisticTypeName().equals(lType)) { lt = l; break; } } return lt; } /** * DOCUMENT ME! * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public Locale getDefaultLanguageOf(String tierName, String fileName) { parse(fileName); Locale resultLoc = null; String localeId = (String) ((Hashtable) tierAttributes.get(tierName)).get( "DEFAULT_LOCALE"); Iterator locIter = locales.iterator(); while (locIter.hasNext()) { Locale l = (Locale) locIter.next(); if (l.getLanguage().equals(localeId)) { resultLoc = l; } } return resultLoc; } /** * DOCUMENT ME! * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getParentNameOf(String tierName, String fileName) { parse(fileName); return (String) ((Hashtable) tierAttributes.get(tierName)).get( "PARENT_REF"); } /** * Returns a Vector with the Annotations for this Tier. Each * AnnotationRecord contains begin time, end time and text values * * <p> * MK:02/06/10<br> Elements of Vector are no CLOM/ACM Annotations but yet * another Vector of String . The inner Vector is interpreted as variant * record in DAFTranscriptionStore.loadTranscription * </p> * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return Vector of Vector of String */ public Vector getAnnotationsOf(String tierName, String fileName) { // make sure that the correct file has been parsed if (!lastParsed.equals(fileName)) { parse(fileName); } long start = System.currentTimeMillis(); Vector annotationVector = new Vector(); // get the tags from the tiers Hashtable Hashtable annotations = (Hashtable) tiers.get(tierName); // get an iterator that iterates over the tags in the right order. Iterator iter = annotations.keySet().iterator(); while (iter.hasNext()) { Vector annotationRecord = new Vector(); Object key = iter.next(); annotationRecord.add(key);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -