📄 dafparser.java
字号:
/* * File: DAFParser.java * Project: MPI Linguistic Application * Date: 02 May 2007 * * Copyright (C) 2001-2007 Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package mpi.eudico.server.corpora.clomimpl.dobes;import mpi.eudico.server.corpora.clom.TimeSlot;import mpi.eudico.server.corpora.clomimpl.type.LinguisticType;import org.xml.sax.AttributeList;import org.xml.sax.HandlerBase;import org.xml.sax.InputSource;import org.xml.sax.SAXParseException;import java.io.File;import java.util.Hashtable;import java.util.Iterator;import java.util.Locale;import java.util.Vector;import javax.xml.parsers.SAXParser;import javax.xml.parsers.SAXParserFactory;/** * A Parser for Dobes Annotation Format (DAF) compliant XML files. MAYBE THIS * CLASS MUST BE MADE THREAD SAFE BY ADDING SOME SYNCHRONIZED BLOCKS OR BY * GIVING UP THE SINGLETON PATTERN. * * @author Hennie Brugman * @version 6-Jul-2001 */public class DAFParser extends HandlerBase { private static DAFParser parser; /** The DOBES DAF XML file is parsed. */ private final Float ORDERED_KEYS_KEY = new Float(0.12345); private boolean verbose; private SAXParser saxParser; private String lastParsed; private String currentFileName; private File xmlFile; private boolean parseError; private Hashtable tiers; private Vector tierNames; private Hashtable tierAttributes; private String mediaFile; private String author; private Vector linguisticTypes; private Vector locales; private Hashtable timeSlots; private Vector timeOrder; // since a Hashtable is not ordered, all time_slot_ids have to be stored in order separately. private String currentTierId; private String currentAnnotationId; private String currentSpeakerId; private String currentStart; private String currentEnd; private String content; /** * Private constructor for DAFParser because the Singleton pattern is * applied here. */ private DAFParser() { try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setValidating(false); saxParser = factory.newSAXParser(); lastParsed = ""; verbose = false; } catch (Exception e) { e.printStackTrace(); } } /** * The instance method returns the single incarnation of DAFParser to the * caller. * * @return DOCUMENT ME! */ public static DAFParser Instance() { if (parser == null) { parser = new DAFParser(); } return parser; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getMediaFile(String fileName) { // make sure that the correct file has been parsed if (!lastParsed.equals(fileName)) { parse(fileName); } return mediaFile; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getAuthor(String fileName) { // make sure that the correct file has been parsed if (!lastParsed.equals(fileName)) { parse(fileName); } return author; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public Vector getLinguisticTypes(String fileName) { // make sure that the correct file has been parsed if (!lastParsed.equals(fileName)) { parse(fileName); } return linguisticTypes; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public Vector getTimeOrder(String fileName) { // make sure that the correct file has been parsed if (!lastParsed.equals(fileName)) { parse(fileName); } return timeOrder; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public Hashtable getTimeSlots(String fileName) { // make sure that the correct file has been parsed if (!lastParsed.equals(fileName)) { parse(fileName); } return timeSlots; } /** * Returns the names of the Tiers that are present in the Transcription * file * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public Vector getTierNames(String fileName) { // make sure that the correct file has been parsed if (!lastParsed.equals(fileName)) { parse(fileName); } // Vector tierNames = new Vector(tiers.keySet()); // Collections.sort(tierNames); return tierNames; } /** * DOCUMENT ME! * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getParticipantOf(String tierName, String fileName) { // make sure that the correct file has been parsed if (!lastParsed.equals(fileName)) { parse(fileName); } String part = ""; if (((Hashtable) tierAttributes.get(tierName)).get("PARTICIPANT") != null) { part = (String) ((Hashtable) tierAttributes.get(tierName)).get( "PARTICIPANT"); } return part; } /** * DOCUMENT ME! * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public LinguisticType getLinguisticTypeOf(String tierName, String fileName) { LinguisticType lt = null; // make sure that the correct file has been parsed if (!lastParsed.equals(fileName)) { parse(fileName); } String lType = ""; // name of type if (((Hashtable) tierAttributes.get(tierName)).get( "LINGUISTIC_TYPE_REF") != null) { lType = (String) ((Hashtable) tierAttributes.get(tierName)).get( "LINGUISTIC_TYPE_REF"); } Iterator ltIter = linguisticTypes.iterator(); while (ltIter.hasNext()) { LinguisticType l = (LinguisticType) ltIter.next(); if (l.getLinguisticTypeName().equals(lType)) { lt = l; break; } } return lt; } /** * DOCUMENT ME! * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public Locale getDefaultLanguageOf(String tierName, String fileName) { // make sure that the correct file has been parsed if (!lastParsed.equals(fileName)) { parse(fileName); } Locale resultLoc = null; String localeId = (String) ((Hashtable) tierAttributes.get(tierName)).get( "DEFAULT_LOCALE"); Iterator locIter = locales.iterator(); while (locIter.hasNext()) { Locale l = (Locale) locIter.next(); if (l.getLanguage().equals(localeId)) { resultLoc = l; } } return resultLoc; } /** * DOCUMENT ME! * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getParentNameOf(String tierName, String fileName) { // make sure that the correct file has been parsed if (!lastParsed.equals(fileName)) { parse(fileName); } return (String) ((Hashtable) tierAttributes.get(tierName)).get( "PARENT_REF"); } /** * Returns a Vector with the Annotations for this Tier. Each * AnnotationRecord contains begin time, end time and text values * * <p>
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -