📄 transcriber14parser.java
字号:
/* * File: Transcriber14Parser.java * Project: MPI Linguistic Application * Date: 02 May 2007 * * Copyright (C) 2001-2007 Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package mpi.eudico.server.corpora.clomimpl.transcriber;import mpi.eudico.server.corpora.clom.DecoderInfo;import mpi.eudico.server.corpora.clomimpl.abstr.MediaDescriptor;import mpi.eudico.server.corpora.clomimpl.abstr.Parser;import mpi.eudico.server.corpora.clomimpl.dobes.AnnotationRecord;import mpi.eudico.server.corpora.clomimpl.dobes.CVEntryRecord;import mpi.eudico.server.corpora.clomimpl.dobes.LingTypeRecord;import mpi.eudico.server.corpora.clomimpl.type.Constraint;// import org.apache.xerces.parsers.SAXParser;import org.xml.sax.Attributes;import org.xml.sax.InputSource;import org.xml.sax.SAXException;import org.xml.sax.helpers.DefaultHandler;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.StringReader;import java.util.ArrayList;import java.util.Collections;import java.util.Comparator;import java.util.HashMap;import java.util.HashSet;import java.util.Iterator;import java.util.StringTokenizer;import java.util.TreeSet;import javax.xml.parsers.*;/** * A Parser for Transcriber 1.4 compliant XML files. MAYBE THIS * CLASS MUST BE MADE THREAD SAFE BY ADDING SOME SYNCHRONIZED BLOCKS OR BY * GIVING UP THE SINGLETON PATTERN. * * @version sep 2005 the constructor is now public giving up the singleton pattern. * The path parameter of all getter methods could be removed in the next parser version * (add a public parse(String path) method) * Hashtable and Vector in Parser have been replaced by HashMap and ArrayList * * @author Hennie Brugman */public class Transcriber14Parser extends Parser { private static boolean verbose = false; //private static Transcriber14Parser parser; private static int annotationCounter = 0; private static int timeSlotCounter = 0; /** Holds value of property DOCUMENT ME! */ private static final String ANN_PREFIX = "a"; /** Holds value of property DOCUMENT ME! */ private static final String TS_PREFIX = "ts"; /** Holds value of property DOCUMENT ME! */ private static final String SP_PREFIX = "Sp"; /** Holds value of property DOCUMENT ME! */ private static final String SECTION_TIER_NAME = "Sections"; /** Holds value of property DOCUMENT ME! */ private static final String TURN_TIER_NAME = "Turns"; /** Holds value of property DOCUMENT ME! */ private static final String BACKGROUND_TIER_NAME = "Background"; /** Holds value of property DOCUMENT ME! */ private static final String COMMENT_TIER_NAME = "Comments"; /** Holds value of property DOCUMENT ME! */ private static final String MODE_TIER_NAME = "Mode"; /** Holds value of property DOCUMENT ME! */ private static final String FIDELITY_TIER_NAME = "Fidelity"; /** Holds value of property DOCUMENT ME! */ private static final String CHANNEL_TIER_NAME = "Channel"; /** Holds value of property DOCUMENT ME! */ private static final String SINGLE_TIER_NAME = "Speech"; /** Holds value of property DOCUMENT ME! */ private static final String UTTERANCE_TYPE = "UtteranceType"; /** Holds value of property DOCUMENT ME! */ private static final String SECTION_TYPE = "SectionType"; /** Holds value of property DOCUMENT ME! */ private static final String TURN_TYPE = "TurnType"; /** Holds value of property DOCUMENT ME! */ private static final String MODE_TYPE = "ModeType"; /** Holds value of property DOCUMENT ME! */ private static final String FIDELITY_TYPE = "FidelityType"; /** Holds value of property DOCUMENT ME! */ private static final String CHANNEL_TYPE = "ChannelType"; /** Holds value of property DOCUMENT ME! */ private static final String BACKGROUND_TYPE = "BackgroundType"; /** Holds value of property DOCUMENT ME! */ private static final String COMMENT_TYPE = "CommentType"; /** Holds value of property DOCUMENT ME! */ private static final String SECTION_TYPE_CV = "SectionTypeCV"; /** Holds value of property DOCUMENT ME! */ private static final String TURN_MODE_CV = "TurnModeCV"; /** Holds value of property DOCUMENT ME! */ private static final String TURN_FIDELITY_CV = "TurnFidelityCV"; /** Holds value of property DOCUMENT ME! */ private static final String TURN_CHANNEL_CV = "TurnChannelCV"; /** Holds value of property DOCUMENT ME! */ private static final String SPEAKER_UNSPECIFIED = "Unspecified"; /** Holds value of property DOCUMENT ME! */ private SAXParser saxParser; private DefaultHandler transcriberHandler; private TranscriberDecoderInfo decoderInfo; private boolean singleSpeechTier = false; private String lastParsed = ""; private String currentFileName; private boolean parseError; // members to store parse results private String audioFileName; private String scribe; private String language; private String date; private HashMap speakersHash = new HashMap(); private ArrayList sectionArrayList = new ArrayList(); private ArrayList utteranceRecords = new ArrayList(); private ArrayList backgroundArrayList = new ArrayList(); private HashMap timeSlots = new HashMap(); private HashMap annotRecordToTierMap = new HashMap(); private TreeSet tierNameSet = new TreeSet(); private HashMap topicHash = new HashMap(); private ArrayList commentRecords = new ArrayList(); private HashMap controlledVocabularies = null; // members to remember current parse state private SectionRecord currentSectionRecord; private SectionRecord lastSectionRecord; private TurnRecord currentTurnRecord; private TurnRecord lastTurnRecord; private String currentSpeakerId; private String speakersForCurrentTurn; private String lastSyncTime; private HashMap currentSpeakerContents = new HashMap(); // contains utterances under construction private BackgroundRecord lastBackgroundRecord; private String currentComments = ""; /** * Private constructor for Transcriber14Parser because the Singleton pattern is * applied here. */ public Transcriber14Parser() { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setValidating(false); factory.setNamespaceAware(false); try { saxParser = factory.newSAXParser(); } catch (Exception e) { e.printStackTrace(); } transcriberHandler = new Transcriber14Handler(); } /** * The instance method returns the single incarnation of TranscriberParser to the * caller. * * @return DOCUMENT ME! */ /* public static Transcriber14Parser Instance() { if (parser == null) { parser = new Transcriber14Parser(); } return parser; } */ /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getMediaFile(String fileName) { parse(fileName); return null; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public ArrayList getMediaDescriptors(String fileName) { parse(fileName); ArrayList mediaDescriptors = new ArrayList(); if (audioFileName != null) { // compose audio file url String urlString = pathToURLString(fileName); String mediaURL = urlString.substring(0, urlString.lastIndexOf("/") + 1) + audioFileName; if (!mediaURL.endsWith(".wav")) { mediaURL += ".wav"; } MediaDescriptor md = new MediaDescriptor(mediaURL, MediaDescriptor.WAV_MIME_TYPE); mediaDescriptors.add(md); } return mediaDescriptors; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getSVGFile(String fileName) { parse(fileName); return null; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getAuthor(String fileName) { parse(fileName); return scribe; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public ArrayList getLinguisticTypes(String fileName) { ArrayList lingTypeRecords = new ArrayList(); parse(fileName); // Transcriber uses a number of fixed linguistic types. LingTypeRecord lt = new LingTypeRecord(); lt.setLingTypeId(UTTERANCE_TYPE); lt.setTimeAlignable("true"); lt.setStereoType(null); lt.setControlledVocabulary(null); lt.setGraphicReferences("false"); lingTypeRecords.add(lt); lt = new LingTypeRecord(); lt.setLingTypeId(SECTION_TYPE); lt.setTimeAlignable("true"); lt.setStereoType(null); lt.setControlledVocabulary(SECTION_TYPE_CV); lt.setGraphicReferences("false"); lingTypeRecords.add(lt); lt = new LingTypeRecord(); lt.setLingTypeId(TURN_TYPE); lt.setTimeAlignable("true"); lt.setStereoType(Constraint.stereoTypes[Constraint.TIME_SUBDIVISION]); lt.setControlledVocabulary(null); lt.setGraphicReferences("false"); lingTypeRecords.add(lt); lt = new LingTypeRecord(); lt.setLingTypeId(MODE_TYPE); lt.setTimeAlignable("false"); lt.setStereoType(Constraint.stereoTypes[Constraint.SYMBOLIC_ASSOCIATION]); lt.setControlledVocabulary(null); lt.setGraphicReferences("false"); lingTypeRecords.add(lt); lt = new LingTypeRecord(); lt.setLingTypeId(FIDELITY_TYPE); lt.setTimeAlignable("false"); lt.setStereoType(Constraint.stereoTypes[Constraint.SYMBOLIC_ASSOCIATION]); lt.setControlledVocabulary(null); lt.setGraphicReferences("false"); lingTypeRecords.add(lt); lt = new LingTypeRecord(); lt.setLingTypeId(CHANNEL_TYPE); lt.setTimeAlignable("false"); lt.setStereoType(Constraint.stereoTypes[Constraint.SYMBOLIC_ASSOCIATION]); lt.setControlledVocabulary(null); lt.setGraphicReferences("false"); lingTypeRecords.add(lt); lt = new LingTypeRecord(); lt.setLingTypeId(BACKGROUND_TYPE); lt.setTimeAlignable("true"); lt.setStereoType(null); lt.setControlledVocabulary(null); lt.setGraphicReferences("false"); lingTypeRecords.add(lt); lt = new LingTypeRecord(); lt.setLingTypeId(COMMENT_TYPE); lt.setTimeAlignable("true"); lt.setStereoType(null); lt.setControlledVocabulary(null); lt.setGraphicReferences("false"); lingTypeRecords.add(lt); return lingTypeRecords;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -