📄 eafskeletonparser.java
字号:
/* * File: EAFSkeletonParser.java * Project: MPI Linguistic Application * Date: 02 May 2007 * * Copyright (C) 2001-2007 Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package mpi.eudico.server.corpora.clomimpl.dobes;import mpi.eudico.server.corpora.clomimpl.abstr.ParseException;import mpi.eudico.server.corpora.clomimpl.abstr.TierImpl;import mpi.eudico.server.corpora.clomimpl.type.Constraint;import mpi.eudico.server.corpora.clomimpl.type.LinguisticType;import mpi.eudico.server.corpora.clomimpl.type.SymbolicAssociation;import mpi.eudico.server.corpora.clomimpl.type.SymbolicSubdivision;import mpi.eudico.server.corpora.clomimpl.type.TimeSubdivision;import mpi.util.CVEntry;import mpi.util.ControlledVocabulary;import org.apache.xerces.parsers.SAXParser;import org.xml.sax.Attributes;import org.xml.sax.ContentHandler;import org.xml.sax.InputSource;import org.xml.sax.Locator;import org.xml.sax.SAXException;import org.xml.sax.SAXNotRecognizedException;import org.xml.sax.SAXNotSupportedException;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.util.ArrayList;import java.util.HashMap;import java.util.Iterator;import java.util.Locale;/** * Parses an eaf file, creating objects of ControlledVocabularies, * LinguisticTypes and Tiers only. The rest is skipped. * * @author Han Sloetjes * @version 1.0 jan 2006: reflects EAFv2.2.xsd * @version 2.0 jan 2007: reflects EAFv2.4.xsd, attribute "ANNOTATOR" added to * element "TIER" */public class EAFSkeletonParser { /** the sax parser */ private final SAXParser saxParser; /** the currently supported eaf version */ private final String version = "2.4"; private String fileName; /** stores tiername - tierrecord pairs */ private final HashMap tierMap = new HashMap(); private ArrayList tiers; private ArrayList tierOrder = new ArrayList(); /** stores linguistic types records! */ private final ArrayList lingTypeRecords = new ArrayList(); private ArrayList linguisticTypes; /** stores the Locales */ private final ArrayList locales = new ArrayList(); /** stores the ControlledVocabulary objects */ private final ArrayList cvList = new ArrayList(); private String currentTierId; private String currentCVId; private ControlledVocabulary currentCV; private String currentEntryDesc; private String content = ""; /** * Creates a new EAFSkeletonParser instance * * @param fileName the file to be parsed * * @throws ParseException any exception that can occur when creating * a parser * @throws NullPointerException thrown when the filename is null */ public EAFSkeletonParser(String fileName) throws ParseException { if (fileName == null) { throw new NullPointerException(); } this.fileName = fileName; saxParser = new SAXParser(); try { saxParser.setFeature("http://xml.org/sax/features/validation", true); saxParser.setFeature("http://apache.org/xml/features/validation/dynamic", true); saxParser.setProperty("http://apache.org/xml/properties/schema/external-noNamespaceSchemaLocation", "http://www.mpi.nl/tools/elan/EAFv2.4.xsd"); saxParser.setContentHandler(new EAFSkeletonHandler()); } catch (SAXNotRecognizedException e) { e.printStackTrace(); throw new ParseException(e.getMessage()); } catch (SAXNotSupportedException e) { e.printStackTrace(); throw new ParseException(e.getMessage()); } } /* public ArrayList getMediaDescriptors() { return null; } */ /** * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getLinguisticTypes(java.lang.String) */ public ArrayList getLinguisticTypes() { return linguisticTypes; } /** * Returns a list of tier objects. * * @return a list of tiers */ public ArrayList getTiers() { return tiers; } /** * Returns a list of the tiernames in the same order as in the file. * * @return a list of the tiernames in the same order as in the .eaf file */ public ArrayList getTierOrder() { return tierOrder; } /** * Returns a list of CVs. * * @return a list of Controlled Vocabularies */ public ArrayList getControlledVocabularies() { return cvList; } /** * Returns the current version of the skeleton parser. * * @return the current version */ public String getVersion() { return version; } /** * Starts the actual parsing. * * @throws ParseException any parse exception */ public void parse() throws ParseException { // init maps and lists try { saxParser.parse(fileName); createObjects(); } catch (SAXException sax) { System.out.println("Parsing error: " + sax.getMessage()); // the SAX parser can have difficulties with certain characters in // the filepath: try to create an InputSource for the parser // HS Mar 2007: depending on Xerces version a SAXException or an IOException // is thrown in such case File f = new File(fileName); if (f.exists()) { try { FileInputStream fis = new FileInputStream(f); InputSource source = new InputSource(fis); saxParser.parse(source); createObjects(); // just catch any exception } catch (Exception ee) { System.out.println("Parsing retry error: " + ee.getMessage()); throw new ParseException(ee.getMessage(), ee.getCause()); } } } catch (IOException ioe) { System.out.println("IO error: " + ioe.getMessage()); // the SAX parser can have difficulties with certain characters in // the filepath: try to create an InputSource for the parser // HS Mar 2007: depending on Xerces version a SAXException or an IOException // is thrown in such case File f = new File(fileName); if (f.exists()) { try { FileInputStream fis = new FileInputStream(f); InputSource source = new InputSource(fis); saxParser.parse(source); createObjects(); // just catch any exception } catch (Exception ee) { System.out.println("Parsing retry error: " + ee.getMessage()); throw new ParseException(ee.getMessage(), ee.getCause()); } } } catch (Exception e) { throw new ParseException(e.getMessage(), e.getCause()); } } /** * After parsing create objects from the records; tiers and linguistic * types. CV's + CVEntries and Locales have already been made. */ private void createObjects() { linguisticTypes = new ArrayList(lingTypeRecords.size()); for (int i = 0; i < lingTypeRecords.size(); i++) { LingTypeRecord ltr = (LingTypeRecord) lingTypeRecords.get(i); LinguisticType lt = new LinguisticType(ltr.getLingTypeId()); boolean timeAlignable = true; if (ltr.getTimeAlignable().equals("false")) { timeAlignable = false; } lt.setTimeAlignable(timeAlignable); boolean graphicReferences = false; if (ltr.getGraphicReferences().equals("true")) { graphicReferences = true; } lt.setGraphicReferences(graphicReferences); String stereotype = ltr.getStereoType(); Constraint c = null; if (stereotype != null) { stereotype = stereotype.replace('_', ' '); // for backwards compatibility if (stereotype.equals( Constraint.stereoTypes[Constraint.TIME_SUBDIVISION])) { c = new TimeSubdivision();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -