📄 eaf21parser.java
字号:
/* * File: EAF21Parser.java * Project: MPI Linguistic Application * Date: 02 May 2007 * * Copyright (C) 2001-2007 Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package mpi.eudico.server.corpora.clomimpl.dobes;import mpi.eudico.server.corpora.clom.TimeSlot;import mpi.eudico.server.corpora.clomimpl.abstr.MediaDescriptor;import mpi.eudico.server.corpora.clomimpl.abstr.Parser;import org.apache.xerces.parsers.SAXParser;import org.xml.sax.Attributes;import org.xml.sax.ContentHandler;import org.xml.sax.Locator;import org.xml.sax.SAXException;import org.xml.sax.SAXNotRecognizedException;import org.xml.sax.SAXNotSupportedException;import java.io.IOException;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.Locale;/** * A Parser for Eudico Annotation Format (EAF) compliant XML files. MAYBE THIS * CLASS MUST BE MADE THREAD SAFE BY ADDING SOME SYNCHRONIZED BLOCKS OR BY * GIVING UP THE SINGLETON PATTERN. * * @author Hennie Brugman * @version 1-Dec-2003 * * @version sep 2005 the constructor is now public giving up the singleton pattern. * The path parameter of all getter methods could be removed in the next parser version * (add a public parse(String path) method) * Hashtable and Vector in Parser have been replaced by HashMap and ArrayList */public class EAF21Parser extends Parser { /** The EAF v2.1 XML file is parsed. */ private static boolean verbose = false; private static EAF21Parser parser; /** Holds value of property DOCUMENT ME! */ private final SAXParser saxParser; /** Holds value of property DOCUMENT ME! */ private final HashMap tiers = new HashMap(); /** Holds value of property DOCUMENT ME! */ private final ArrayList tierNames = new ArrayList(); /** Holds value of property DOCUMENT ME! */ private final HashMap tierAttributes = new HashMap(); /** Holds value of property DOCUMENT ME! */ private final ArrayList linguisticTypes = new ArrayList(); /** Holds value of property DOCUMENT ME! */ private final ArrayList locales = new ArrayList(); /** Holds value of property DOCUMENT ME! */ private final HashMap timeSlots = new HashMap(); /** Holds value of property DOCUMENT ME! */ private final ArrayList timeOrder = new ArrayList(); // since a HashMap is not ordered, all time_slot_ids have to be stored in order separately. private String mediaFile; private ArrayList mediaDescriptors = new ArrayList(); private String svgFile; private String author; private String currentTierId; private String currentAnnotationId; private String content = ""; private String lastParsed = ""; private String currentFileName; private boolean parseError; /** * Private constructor for EAFParser because the Singleton pattern is * applied here. */ public EAF21Parser() { saxParser = new SAXParser(); try { saxParser.setFeature("http://xml.org/sax/features/validation", true); saxParser.setFeature("http://apache.org/xml/features/validation/dynamic", true); saxParser.setProperty("http://apache.org/xml/properties/schema/external-noNamespaceSchemaLocation", "http://www.mpi.nl/tools/elan/EAFv2.1.xsd"); saxParser.setContentHandler(new EAFContentHandler()); } catch (SAXNotRecognizedException e) { e.printStackTrace(); } catch (SAXNotSupportedException e) { e.printStackTrace(); } } /** * The instance method returns the single incarnation of EAFParser to the * caller. * * @return DOCUMENT ME! */ /* public static EAF21Parser Instance() { if (parser == null) { parser = new EAF21Parser(); } return parser; } */ /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getMediaFile(String fileName) { parse(fileName); return mediaFile; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public ArrayList getMediaDescriptors(String fileName) { parse(fileName); return mediaDescriptors; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getSVGFile(String fileName) { parse(fileName); return svgFile; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getAuthor(String fileName) { parse(fileName); return author; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public ArrayList getLinguisticTypes(String fileName) { parse(fileName); return linguisticTypes; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public ArrayList getTimeOrder(String fileName) { parse(fileName); return timeOrder; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public HashMap getTimeSlots(String fileName) { parse(fileName); return timeSlots; } /** * Returns the names of the Tiers that are present in the Transcription * file * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public ArrayList getTierNames(String fileName) { parse(fileName); return tierNames; } /** * DOCUMENT ME! * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getParticipantOf(String tierName, String fileName) { parse(fileName); String part = ""; if (((HashMap) tierAttributes.get(tierName)).get("PARTICIPANT") != null) { part = (String) ((HashMap) tierAttributes.get(tierName)).get( "PARTICIPANT"); } return part; } /** * DOCUMENT ME! * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getLinguisticTypeIDOf(String tierName, String fileName) { parse(fileName); String lType = ""; // name of type if (((HashMap) tierAttributes.get(tierName)).get("LINGUISTIC_TYPE_REF") != null) { lType = (String) ((HashMap) tierAttributes.get(tierName)).get( "LINGUISTIC_TYPE_REF"); } return lType; } /** * DOCUMENT ME! * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public Locale getDefaultLanguageOf(String tierName, String fileName) { parse(fileName); Locale resultLoc = null; String localeId = (String) ((HashMap) tierAttributes.get(tierName)).get( "DEFAULT_LOCALE"); Iterator locIter = locales.iterator(); while (locIter.hasNext()) { Locale l = (Locale) locIter.next(); if (l.getLanguage().equals(localeId)) { resultLoc = l; } } return resultLoc; } /** * DOCUMENT ME! * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getParentNameOf(String tierName, String fileName) { parse(fileName); return (String) ((HashMap) tierAttributes.get(tierName)).get( "PARENT_REF"); } /** * Returns a ArrayList with the Annotations for this Tier. Each * AnnotationRecord contains begin time, end time and text values * * <p> * MK:02/06/10<br> Elements of ArrayList are no CLOM/ACM Annotations but yet * another ArrayList of String . The inner ArrayList is interpreted as variant * record in DAFTranscriptionStore.loadTranscription * </p> * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return ArrayList of ArrayList of String */ public ArrayList getAnnotationsOf(String tierName, String fileName) { // make sure that the correct file has been parsed if (!lastParsed.equals(fileName)) { parse(fileName); } long start = System.currentTimeMillis(); ArrayList annotationArrayList = new ArrayList(); // get the tags from the tiers HashMap HashMap annotations = (HashMap) tiers.get(tierName); // get an iterator that iterates over the tags in the right order. Iterator iter = annotations.keySet().iterator(); while (iter.hasNext()) { Object key = iter.next(); // annotation parameters have the following format, all params are Strings. Either: // id, "alignable", time_slot_id1, time_slot_id2, value, or // id, "reference", annotation_ref_id, previous_annotation, value, or // id, "alignable_svg", time_slot_id1, time_slot_id2, svg_ref, value AnnotationRecord annotationRecord = new AnnotationRecord(); annotationRecord.setAnnotationId((String) key); ArrayList annotationParams = (ArrayList) annotations.get(key); String annotType = (String) annotationParams.get(0); annotationRecord.setAnnotationType(annotType); if (annotType.equals(AnnotationRecord.REFERENCE)) { String referredAnnotId = (String) annotationParams.get(1); String previousAnnotId = (String) annotationParams.get(2); annotationRecord.setReferredAnnotId(referredAnnotId); annotationRecord.setPreviousAnnotId(previousAnnotId); } else { // ALIGNABLE or ALIGNABLE_SVG String beginTimeSlotId = (String) annotationParams.get(1);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -