📄 chatparser.java
字号:
/* * File: CHATParser.java * Project: MPI Linguistic Application * Date: 02 May 2007 * * Copyright (C) 2001-2007 Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *//* * Created on Jun 11, 2004 * * To change the template for this generated file go to * Window>Preferences>Java>Code Generation>Code and Comments */package mpi.eudico.server.corpora.clomimpl.chat;import mpi.eudico.server.corpora.clom.TimeSlot;import mpi.eudico.server.corpora.clomimpl.abstr.MediaDescriptor;import mpi.eudico.server.corpora.clomimpl.abstr.Parser;import mpi.eudico.server.corpora.clomimpl.dobes.AnnotationRecord;import mpi.eudico.server.corpora.clomimpl.dobes.LingTypeRecord;import mpi.eudico.server.corpora.clomimpl.type.Constraint;import mpi.util.MimeType;import java.io.BufferedReader;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.IOException;import java.io.InputStreamReader;import java.util.ArrayList;import java.util.HashMap;import java.util.HashSet;import java.util.Iterator;import java.util.Set;import java.util.StringTokenizer;import java.util.Vector;/** * @author hennie * * To change the template for this generated type comment go to * Window>Preferences>Java>Code Generation>Code and Comments * * @version sep 2005 the constructor is now public giving up the singleton pattern. * The path parameter of all getter methods could be removed in the next parser version * (add a public parse(String path) method) * Hashtable and Vector in Parser have been replaced by HashMap and ArrayList */public class CHATParser extends Parser { //private static CHATParser parser; /** Holds value of property DOCUMENT ME! */ private final static String MAIN_TYPE = "orthography"; /** Holds value of property DOCUMENT ME! */ private final static char TIER_NAME_SEPARATOR = '@'; /** Holds value of property DOCUMENT ME! */ private final static String TS_ID_PREFIX = "ts"; /** Holds value of property DOCUMENT ME! */ private final char BULLET = '\u0015'; private String participantLine = null; private String mediaFileName = null; private ArrayList chatBlocks = new ArrayList(); private ArrayList lingTypeRecords = new ArrayList(); private ArrayList tierNames = new ArrayList(); private HashMap parentHash = new HashMap(); private ArrayList timeOrder = new ArrayList(); private ArrayList timeSlots = new ArrayList(); // of long[2], {id,time} private ArrayList annotationRecords = new ArrayList(); private HashMap annotRecordToTierMap = new HashMap(); private String lastParsed = ""; private BufferedReader br; /** * Private constructor for EAFParser because the Singleton pattern is * applied here. */ public CHATParser() { } /** * The instance method returns the single incarnation of CHATParser to the * caller. * * @return DOCUMENT ME! */ /* public static CHATParser Instance() { if (parser == null) { parser = new CHATParser(); } return parser; } */ /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getMediaDescriptors(java.lang.String) */ public ArrayList getMediaDescriptors(String fileName) { ArrayList mediaDescriptors = new ArrayList(); parse(fileName); String mediaURL = null; if (mediaFileName != null) { mediaURL = pathToURLString(mediaFileName); String mimeType = MimeType.getMimeTypeStringFromExtension(mediaFileName); // String mimeType = MediaDescriptor.WAV_MIME_TYPE; MediaDescriptor md = new MediaDescriptor(mediaURL, mimeType); mediaDescriptors.add(md); } return mediaDescriptors; } /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getLinguisticTypes(java.lang.String) */ public ArrayList getLinguisticTypes(String fileName) { parse(fileName); if (lingTypeRecords.size() != 0) { return lingTypeRecords; } Set labels = new HashSet(); Iterator blockIter = chatBlocks.iterator(); while (blockIter.hasNext()) { ArrayList block = (ArrayList) blockIter.next(); Iterator lineIter = block.iterator(); while (lineIter.hasNext()) { String[] line = (String[]) lineIter.next(); String lbl = line[0]; if (!lbl.equals("%snd") && !((lbl.length() > 1) && lbl.substring(1).startsWith("%"))) { labels.add(lbl); } } } // create main "orthography" ling type for participant tiers LingTypeRecord orthoType = new LingTypeRecord(); orthoType.setLingTypeId(MAIN_TYPE); orthoType.setTimeAlignable("true"); orthoType.setGraphicReferences("false"); lingTypeRecords.add(orthoType); // for each label, create a matching lingtype Iterator lblIter = labels.iterator(); while (lblIter.hasNext()) { String label = (String) lblIter.next(); if (!label.startsWith("*")) { LingTypeRecord lt = new LingTypeRecord(); lt.setLingTypeId(label); lt.setTimeAlignable("false"); // all symbolic associations of ortho tier lt.setGraphicReferences("false"); lt.setStereoType(Constraint.stereoTypes[Constraint.SYMBOLIC_ASSOCIATION]); lingTypeRecords.add(lt); } } return lingTypeRecords; } /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getTimeOrder(java.lang.String) */ public ArrayList getTimeOrder(String fileName) { parse(fileName); // compose ordered list of timeslot ids from timeSlots // algorithm: // find first time after t = 0, put slot id in result list // find first time after t or equal to time of last result slot id // terminate when no time found // // handling of unaligned slots: // put all unaligned slots immediately preceding a result slot // immediately before this slot in the result ArrayList unalignedSlots = new ArrayList(); long[] firstSlotAfter = firstTimeSlotAfter(null, unalignedSlots); if (firstSlotAfter == null) { timeOrder.addAll(unalignedSlots); } while (firstSlotAfter != null) { timeOrder.addAll(unalignedSlots); timeOrder.add(firstSlotAfter); unalignedSlots.clear(); firstSlotAfter = firstTimeSlotAfter(firstSlotAfter, unalignedSlots); } // add trailing unaligned timeslots, if any long[] lastAddedSlot = (long[]) timeOrder.get(timeOrder.size() - 1); if (timeSlots.indexOf(lastAddedSlot) != (timeSlots.size() - 1)) { // not last for (int i = timeSlots.indexOf(lastAddedSlot); i < timeSlots.size(); i++) { timeOrder.add(timeSlots.get(i)); if (i == (timeSlots.size() - 1)) { // align last slot manually ((long[]) timeSlots.get(i))[1] = lastAddedSlot[1] + 1000; } } } ArrayList resultTimeOrder = new ArrayList(); for (int i = 0; i < timeOrder.size(); i++) { resultTimeOrder.add(TS_ID_PREFIX + ((long[]) (timeOrder.get(i)))[0]); } return resultTimeOrder; } private long[] firstTimeSlotAfter(long[] afterTimeSlot, ArrayList unalignedSlots) { long[] firstSlot = null; long firstTimeAfter = Long.MAX_VALUE; ArrayList unalignedStore = new ArrayList(); long afterTime = 0; long afterTimeId = -1; if (afterTimeSlot != null) { afterTime = afterTimeSlot[1]; afterTimeId = afterTimeSlot[0]; } Iterator tsIter = timeSlots.iterator(); while (tsIter.hasNext()) { long[] ts = (long[]) tsIter.next(); long time = ts[1]; if (time < 0) { // unaligned unalignedStore.add(ts); } else if ((time >= afterTime) && (time < firstTimeAfter) && (!(ts[0] == afterTimeId)) && (!(timeOrder.contains(ts)))) { firstTimeAfter = time; firstSlot = ts; unalignedSlots.clear(); unalignedSlots.addAll(unalignedStore); unalignedStore.clear(); } else if (time > 0) { // not 'first time after', also not unaligned, so reset unalignedStore.clear(); } } if (firstSlot == null) { // none found unalignedSlots.addAll(unalignedStore); } return firstSlot; } /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getTimeSlots(java.lang.String) */ public HashMap getTimeSlots(String fileName) { parse(fileName); // generate HashMap from ArrayList with long[2]'s HashMap resultSlots = new HashMap(); Iterator timeSlotIter = timeSlots.iterator(); while (timeSlotIter.hasNext()) { long[] timeSlot = (long[]) timeSlotIter.next(); String tsId = TS_ID_PREFIX + ((long) timeSlot[0]);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -