📄 shoeboxparser.java
字号:
/* * File: ShoeboxParser.java * Project: MPI Linguistic Application * Date: 02 May 2007 * * Copyright (C) 2001-2007 Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *//* * Created on Aug 23, 2004 * * To change the template for this generated file go to * Window>Preferences>Java>Code Generation>Code and Comments */package mpi.eudico.server.corpora.clomimpl.shoebox;import mpi.eudico.server.corpora.clom.DecoderInfo;import mpi.eudico.server.corpora.clom.TimeSlot;import mpi.eudico.server.corpora.clomimpl.abstr.Parser;import mpi.eudico.server.corpora.clomimpl.dobes.AnnotationRecord;import mpi.eudico.server.corpora.clomimpl.dobes.LingTypeRecord;import mpi.eudico.server.corpora.clomimpl.shoebox.utr22.SimpleConverter;import mpi.eudico.server.corpora.clomimpl.type.Constraint;import mpi.eudico.server.util.ServerLogger;import java.io.BufferedReader;import java.io.File;import java.io.FileReader;import java.util.ArrayList;import java.util.Collections;import java.util.Enumeration;import java.util.HashMap;import java.util.Iterator;import java.util.List;import java.util.TreeSet;import java.util.Vector;import javax.swing.tree.DefaultMutableTreeNode;/** * @author hennie * * @version sep 2005 the constructor is now public giving up the singleton pattern. * The path parameter of all getter methods could be removed in the next parser version * (add a public parse(String path) method) * Hashtable and Vector in Parser have been replaced by HashMap and ArrayList * @version may 2006 Shoebox Unicode tiers are now pre-edited in ShoeboxArray, lifting the need for * special treatment in the methods where single words are extracted from the marker lines. */public class ShoeboxParser extends Parser implements ServerLogger { //private static ShoeboxParser parser; /** Holds value of property DOCUMENT ME! */ private final static String ANN_ID_PREFIX = "ann"; /** Holds value of property DOCUMENT ME! */ private final static String TS_ID_PREFIX = "ts"; private long annotId = 0; private long tsId = 0; private SimpleConverter simpleConverter; private ShoeboxArray sbxfile; // shoebox transcription file private ShoeboxTypFile typfile; // shoebox typ file private ToolboxDecoderInfo decoderInfo; /** * Hierachical structure of the tags in the shoebox file. Elements are of * type String. */ DefaultMutableTreeNode tiertree = new DefaultMutableTreeNode(); private ArrayList lingTypeRecords = new ArrayList(); private ArrayList participantOrder = new ArrayList(); private TreeSet tierNameSet = new TreeSet(); private HashMap parentHash = new HashMap(); private ArrayList timeOrder = new ArrayList(); // of long[2] private ArrayList timeSlots = new ArrayList(); // of long[2], {id,time} private ArrayList annotationRecords = new ArrayList(); private HashMap annotRecordToTierMap = new HashMap(); private String lastParsed = ""; // for calculation of 'root annotation' times private ArrayList rootSlots = new ArrayList(); // of long[2], {id,time} private boolean fixImproperAlign = true; /** * Public constructor: the Singleton pattern is no longer applied to the parsers. * Create a new Parser for every file to parse. */ public ShoeboxParser() { try { simpleConverter = new SimpleConverter(null); } catch (Exception e) { e.printStackTrace(); } } /** * The instance method returns the single incarnation of CHATParser to the * caller. * * @return DOCUMENT ME! */ /* public static ShoeboxParser Instance() { if (parser == null) { try { parser = new ShoeboxParser(); } catch (Exception e) { e.printStackTrace(); } } return parser; } */ /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getMediaDescriptors(java.lang.String) */ public ArrayList getMediaDescriptors(String fileName) { parse(fileName); return sbxfile.getMediaDescriptors(); } /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getLinguisticTypes(java.lang.String) */ public ArrayList getLinguisticTypes(String fileName) { parse(fileName); Enumeration en = sbxfile.getLabels(); while (en.hasMoreElements()) { String label = (String) en.nextElement(); if (!(label.equals(ShoeboxArray.label_eudicoparticipant)) && !(label.equals(ShoeboxArray.label_eudicot0)) && !(label.equals(ShoeboxArray.label_eudicot1)) && !(label.equals(ShoeboxEncoder.elanParticipantLabel)) && !(label.equals(ShoeboxEncoder.elanBeginLabel)) && !(label.equals(ShoeboxEncoder.elanEndLabel)) && !(label.equals(ShoeboxEncoder.elanBlockStart)) && !(label.equals(ShoeboxEncoder.elanELANLabel))) { String ltName = label.substring(1); // cut off backslash LingTypeRecord lt = new LingTypeRecord(); lt.setLingTypeId(ltName); lt.setGraphicReferences("false"); // set defaults lt.setTimeAlignable("false"); lt.setStereoType(Constraint.stereoTypes[Constraint.SYMBOLIC_ASSOCIATION]); // set default for root tier if ((typfile.interlinearRootMarker != null) && typfile.interlinearRootMarker.equals(ltName)) { lt.setStereoType(null); lt.setTimeAlignable("true"); } if (typfile.getDatabaseType().equals(ShoeboxEncoder.defaultDBType) && !typfile.tofromHash.containsKey(ltName)) { // root tiers for import of ELAN exported Toolbox files lt.setStereoType(null); lt.setTimeAlignable("true"); } // make first marker under recordMarker a symbolic subdivision of record marker // if not already set by user defined shoebox markers // if ( !typfile.interlinearRootMarker.equals(ltName) && if (typfile.tofromHash.containsKey(ltName) && // if not root tier typfile.fromArray.contains(label) && !typfile.procedureTypeHash.contains(label)) { lt.setStereoType(Constraint.stereoTypes[Constraint.SYMBOLIC_SUBDIVISION]); lt.setTimeAlignable("false"); } String procType = (String) typfile.procedureTypeHash.get(label); if (procType != null) { if (procType.equals("Lookup")) { lt.setStereoType(Constraint.stereoTypes[Constraint.SYMBOLIC_ASSOCIATION]); lt.setTimeAlignable("false"); } else if (procType.equals("Parse")) { lt.setStereoType(Constraint.stereoTypes[Constraint.SYMBOLIC_SUBDIVISION]); lt.setTimeAlignable("false"); } else if (procType.equals("TimeSubdivision")) { lt.setStereoType(Constraint.stereoTypes[Constraint.TIME_SUBDIVISION]); lt.setTimeAlignable("true"); } else if (procType.equals("IncludedIn")) { lt.setStereoType(Constraint.stereoTypes[Constraint.INCLUDED_IN]); lt.setTimeAlignable("true"); } } lingTypeRecords.add(lt); } } return lingTypeRecords; } /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getTimeOrder(java.lang.String) */ public ArrayList getTimeOrder(String fileName) { parse(fileName); ArrayList resultTimeOrder = new ArrayList(); for (int i = 0; i < timeOrder.size(); i++) { resultTimeOrder.add(TS_ID_PREFIX + ((long[]) (timeOrder.get(i)))[0]); } return resultTimeOrder; } /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getTimeSlots(java.lang.String) */ public HashMap getTimeSlots(String fileName) { parse(fileName); // generate HashMap from ArrayList with long[2]'s HashMap resultSlots = new HashMap(); Iterator timeSlotIter = timeSlots.iterator(); while (timeSlotIter.hasNext()) { long[] timeSlot = (long[]) timeSlotIter.next(); String tsId = TS_ID_PREFIX + ((long) timeSlot[0]); String timeValue = Long.toString(((long) timeSlot[1])); resultSlots.put(tsId, timeValue); } return resultSlots; } /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getTierNames(java.lang.String) */ public ArrayList getTierNames(String fileName) { parse(fileName); // strip begin backslashes ArrayList result = new ArrayList(); // add in same order as in shoebox file ArrayList allNames = new ArrayList(tierNameSet); ArrayList markerOrder = sbxfile.getMarkerOrder(); String spk = null; String marker = null; for (int i = 0; i < participantOrder.size(); i++) { spk = (String) participantOrder.get(i); for (int j = 0; j < markerOrder.size(); j++) { marker = (String) markerOrder.get(j); if (allNames.contains(marker + "@" + spk)) { result.add((marker + "@" + spk).substring(1)); } } } /* Iterator iter = tierNameSet.iterator(); while (iter.hasNext()) { String tierName = (String) iter.next(); tierName = tierName.substring(1); result.add(tierName); } */ return result; } /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getParticipantOf(java.lang.String, java.lang.String) */ public String getParticipantOf(String tierName, String fileName) { String result = ""; parse(fileName); int index = tierName.indexOf("@"); if ((index > 0) && (tierName.length() > (index + 1))) { result = tierName.substring(index + 1); } return result; } /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getLinguisticTypeIDOf(java.lang.String, java.lang.String) */ public String getLinguisticTypeIDOf(String tierName, String fileName) { String result = tierName; parse(fileName); int index = tierName.indexOf("@"); if (index > 0) { result = tierName.substring(0, index); } return result; } /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getParentNameOf(java.lang.String, java.lang.String) */ public String getParentNameOf(String tierName, String fileName) { String parentName = null; parse(fileName); String labelPart = "\\" + tierName; String spkr = "@"; int index = tierName.indexOf("@"); if ((index > 0) && (tierName.length() > index)) { labelPart = "\\" + tierName.substring(0, index); spkr = tierName.substring(index); } // use typfile.tofromHash, or parent is typfile.recordMarker if (typfile.tofromHash.keySet().contains(labelPart)) { parentName = ((String) typfile.tofromHash.get(labelPart)).substring(1); } else if ((typfile.interlinearRootMarker != null) && (!tierName.equals(typfile.interlinearRootMarker + spkr))) { parentName = typfile.interlinearRootMarker; } if (parentName != null) { return parentName + spkr; } else { return null; } } /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getAnnotationsOf(java.lang.String, java.lang.String) */ public ArrayList getAnnotationsOf(String tierName, String fileName) { parse(fileName); ArrayList resultAnnotRecords = new ArrayList(); tierName = "\\" + tierName; Iterator it = annotRecordToTierMap.keySet().iterator(); while (it.hasNext()) { AnnotationRecord annRec = (AnnotationRecord) it.next();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -