📄 shoeboxarray.java
字号:
/* * File: ShoeboxArray.java * Project: MPI Linguistic Application * Date: 02 May 2007 * * Copyright (C) 2001-2007 Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *//* * $Id: ShoeboxArray.java,v 1.26 2007/02/27 10:27:47 hasloe Exp $ */package mpi.eudico.server.corpora.clomimpl.shoebox;import mpi.eudico.server.corpora.clomimpl.abstr.MediaDescriptor;import org.w3c.dom.Document;import org.w3c.dom.Element;import org.w3c.dom.NodeList;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.InputStreamReader;import java.io.Reader;import java.util.ArrayList;import java.util.Enumeration;import java.util.HashSet;import java.util.Hashtable;import java.util.Iterator;import java.util.StringTokenizer;import java.util.Vector;import javax.swing.tree.DefaultMutableTreeNode;import javax.xml.parsers.DocumentBuilder;import javax.xml.parsers.DocumentBuilderFactory;/** * For parsing the shoeboxfile, I first read it into an array because: - Each * line of the shoeboxfile has to be adressed over and over again - * shoeboxfile is organized into blocks and tiers, which translate to rows and * columns. - Shoebox wraps a a line at 80 characters. As a parse * preprocessing, the hardwrap of the shoeboxfile is removed. For * inter-tier processing of annotation, the ShoeboxArray is a better starting * point as a shoebox file. */public class ShoeboxArray { /** Holds value of property DOCUMENT ME! */ public static String label_eudicoparticipant = "\\EUDICOp"; /** Holds value of property DOCUMENT ME! */ public static String label_eudicot0 = "\\EUDICOt0"; /** Holds value of property DOCUMENT ME! */ public static String label_eudicot1 = "\\EUDICOt1"; // private static Logger logger = Logger.getLogger(ShoeboxArray.class.getName()); private String shoeboxheader = ""; private String label_ref; /** Holds value of property DOCUMENT ME! */ // public static String label_eudicot2 = "\\EUDICOt2"; // private String lastlabel = ""; private int lastt0 = 0; private int lastt1 = 1; private File file = null; // store the marker order as encountered in the file private ArrayList markerOrder = new ArrayList(); // store the previous label name to determine the placee in the list private String prevLabel = null; private Vector labelList = new Vector(); private Vector labelNodeList = new Vector(); private String[][] shoeboxArray; private int[] shoeboxArrayMaxLength; private int currentIndexBlock = -1; // increments *before* \ref private int currentIndexLabel = -1; // initial no-sense value private int maxIndexBlocks; private boolean isShoeboxArrayPreparation = true; private boolean strict1; private HashSet interlinearTierMarkers; private Hashtable lineCounts = new Hashtable(); // stores number of lines in block for each interlinear tier private boolean readingWAC = false; //private boolean completelyUnaligned = true; //private boolean treatAsUnaligned = true; // temp solution, until interpolated times are supported private ArrayList mediaDescriptors = new ArrayList(); private ShoeboxTypFile typFile; /** * Reads a Shoebox file into an array. * * @param file the shoeboxfile * @param label_ref record marker (including leading "\") * @param theTypFile the ShoeboxTypeFile object * * @throws Exception DOCUMENT ME! */ public ShoeboxArray(File file, String label_ref, ShoeboxTypFile theTypFile) throws Exception { //this(file, true, label_ref); this(file, false, label_ref, theTypFile); // HB, 23 jul 02, set strict1 to false // HB, 30 jul 02, added ilTierMarkers } /** * Reads a Shoebox file into an array. * * @param file the shoebofile * @param strict1 line must start with label (e.g. \ref). ALWAYS TRUE * @param label_ref record marker (including leading "\") * @param theTypFile the ShoeboxTypeFile object * * @throws Exception DOCUMENT ME! */ public ShoeboxArray(File file, boolean strict1, String label_ref, // HashSet theInterlinearTierMarkers) throws Exception { ShoeboxTypFile theTypFile) throws Exception { if (!file.canRead()) { throw new Exception("cannot read \"" + file + "\""); } this.file = file; this.typFile = theTypFile; //strict1 = true; //strict1; this.strict1 = strict1; // HB, 23 jul 02: why else have extra argument? define_default_labels(label_ref); if (label_ref == null) { // set to default label_ref = ShoeboxEncoder.elanBlockStart; } this.label_ref = label_ref; this.interlinearTierMarkers = theTypFile.getInterlinearTierMarkers(); // define_default_labels(label_ref); // logger.log(Level.FINE, "preparation START"); readSbx(); maxIndexBlocks = currentIndexBlock; // logger.log(Level.FINE, // "preparation STOP, found " + getNumberOfBlocks() + " blocks"); /*for (int xx = 0; xx < getNumberOfLabels(); xx++) { logger.log(Level.FINE, "label " + xx + " = " + getLabel(xx)); }*/ shoeboxArray = new String[getNumberOfLabels()][getNumberOfBlocks()]; //logger.log(Level.FINE, getNumberOfLabels()+ " ]creating array[ " + getNumberOfBlocks()); shoeboxArrayMaxLength = new int[getNumberOfBlocks()]; currentIndexBlock = -1; // reset // logger.log(Level.FINE, "reading START"); readSbx(); // logger.log(Level.FINE, "reading STOP"); } /** * Reads a WAC file into the array. * * @param wacfile the wacfile * * @throws Exception DOCUMENT ME! */ public ShoeboxArray(File wacfile) throws Exception { readingWAC = true; define_default_labels("\\ref"); DocumentBuilder db = DocumentBuilderFactory.newInstance() .newDocumentBuilder(); Document doc = db.parse(wacfile); readWac(doc); // prepare 1/2 prepare_or_finish_block(); // prepare 2/2 isShoeboxArrayPreparation = false; // shoeboxArray = new String[getNumberOfLabels()][getNumberOfBlocks()]; shoeboxArrayMaxLength = new int[getNumberOfBlocks()]; currentIndexBlock = -1; // reset readWac(doc); // store prepare_or_finish_block(); // ... } ///////////////// final public String getShoeboxHeader() { return shoeboxheader; } /** * get value of array * * @param label DOCUMENT ME! * @param block DOCUMENT ME! * * @return DOCUMENT ME! */ final public String getCell(int label, int block) { String result = null; // logger.log(Level.FINE, " --- getCell(" + label + ", " + block); result = shoeboxArray[label][block]; // logger.log(Level.FINE, " --- getCell == '" + result + "'"); //alway return the max length padded result if ((label > 3) && (result != null)) { // HB, 23 jul 02: added null check while (result.length() < shoeboxArrayMaxLength[block]) { result = result + " "; } } return result; } /** * DOCUMENT ME! * * @param label DOCUMENT ME! * @param block DOCUMENT ME! * * @return DOCUMENT ME! */ final public String getCell(String label, int block) { //throws Exception{ // logger.log(Level.FINE, " --- getCell(" + label + ", " + block); int x = labelList.indexOf(label); if (x < 0) { /* JOptionPane.showMessageDialog(null, (getClass() + ".getCell(" + label + ", " + block + ") \n FATAL ERROR"), "", JOptionPane.ERROR_MESSAGE); */ System.out.println(getClass() + ".getCell(" + label + ", " + block + ") \n FATAL ERROR"); return ""; } //if (x < 0) throw new Exception("label '"+ label + "' does not exist."); return getCell(x, block); } /** * DOCUMENT ME! * * @param label DOCUMENT ME! * @param block DOCUMENT ME! * @param value DOCUMENT ME! */ final private void setC(int label, int block, String value) { shoeboxArray[label][block] = value; } /** * DOCUMENT ME! * * @param block the block/row * * @return the name of the speaker of given block */ final public String getSpeaker(int block) { String result = getCell(ShoeboxEncoder.elanParticipantLabel, block); if ((result == null) || (result.length() == 0)) { result = "unknown"; } result = result.trim(); // HB, 24-8-04 return result; } /** * Returns the begin time value as read from the file, or * the default of -1, if no time information was found. * * @param block the block/row * * @return t0 of given block in milliseconds */ final public long getT0(int block) { long t0 = getTX(ShoeboxEncoder.elanBeginLabel, block); /* if (treatAsUnaligned) { //t0 = block * 1000; t0 = block * ShoeboxPreferences.preferredBlockDuration; } */ if ((block == 0) && (t0 < 0)) { t0 = 0; } return t0; } /** * Returns the end time value as read from the file, or * the default of -1, if no time information was found. * * @param block the block/row * * @return t0 of given block in milliseconds */ final public long getT1(int block) { long t1 = getTX(ShoeboxEncoder.elanEndLabel, block); /* if (treatAsUnaligned) { //t1 = (block + 1) * 1000; t1 = (block + 1) * ShoeboxPreferences.preferredBlockDuration; } */ return t1; } /** * DOCUMENT ME! * * @return DOCUMENT ME! */ public ArrayList getMediaDescriptors() { return mediaDescriptors; } /** * DOCUMENT ME! * * @param label DOCUMENT ME! * @param block the block/row * * @return t0 of given block in milliseconds */ final private long getTX(String label, int block) { String sresult = getCell(label, block); long result = 0; try { double d = Double.parseDouble(sresult); // seconds to milliseconds d = d * 1000d; if (d == -1000) { // correct unaligned d = -1; } //Double dd = new Double(d); //result = dd.longValue(); // Double.longValue just casts the double to long result = (long) d; //result = Long.parseLong(sresult); } catch (NumberFormatException e) { //System.out.println(" ======= getTX " + block + " found " + sresult); return toMilliSeconds(sresult, block); } return result; } /** * Converts a time definition in the format hh:mm:ss.sss into a long that * contains the time in milli seconds. * Copied from mpi.eudico.client.util.TimeFormatter * * @param timeString the string that contains the time in the format * hh:mm:ss.sss * @param block the block index that the time belongs to (for error report) * * @return the time in seconds, -1.0 if the time string has an illegal * format */ public long toMilliSeconds(String timeString, int block) { try { String hourString = new String("0.0"); String minuteString = new String("0.0"); String secondString = new String("0.0"); int mark1 = timeString.indexOf(':', 0); if (mark1 == -1) { // no :, so interpret string as sss.ss secondString = timeString; } else { int mark2 = timeString.indexOf(':', mark1 + 1); if (mark2 == -1) { // only one :, so interpret string as mm:ss.sss minuteString = timeString.substring(0, mark1); secondString = timeString.substring(mark1 + 1, timeString.length()); } else { // two :, so interpret string as hh:mm:ss.sss hourString = timeString.substring(0, mark1); minuteString = timeString.substring(mark1 + 1, mark2); secondString = timeString.substring(mark2 + 1, timeString.length()); } } double hours = Double.valueOf(hourString).doubleValue(); double minutes = Double.valueOf(minuteString).doubleValue(); double seconds = Double.valueOf(secondString).doubleValue(); return (long) (1000 * ((hours * 3600.0) + (minutes * 60.0) + seconds)); } catch (Exception e) { // the timeString was not parseable System.out.println("TX: " + block + " unknown time format: " + timeString); return -1; } } /** * Returns the ordered list of markers. * @return the list of markers */ public final ArrayList getMarkerOrder() { return markerOrder; } /** * DOCUMENT ME! * * @return DOCUMENT ME! */ final public int getNumberOfLabels() { return labelList.size(); } /** * DOCUMENT ME! * * @return DOCUMENT ME! */ final public Enumeration getLabels() { return labelList.elements(); } /** * DOCUMENT ME! * * @param i DOCUMENT ME! * * @return DOCUMENT ME!
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -