⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 shoeboxarray.java

📁 编辑视频文件
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
/* * File:     ShoeboxArray.java * Project:  MPI Linguistic Application * Date:     02 May 2007 * * Copyright (C) 2001-2007  Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA *//* * $Id: ShoeboxArray.java,v 1.26 2007/02/27 10:27:47 hasloe Exp $ */package mpi.eudico.server.corpora.clomimpl.shoebox;import mpi.eudico.server.corpora.clomimpl.abstr.MediaDescriptor;import org.w3c.dom.Document;import org.w3c.dom.Element;import org.w3c.dom.NodeList;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.InputStreamReader;import java.io.Reader;import java.util.ArrayList;import java.util.Enumeration;import java.util.HashSet;import java.util.Hashtable;import java.util.Iterator;import java.util.StringTokenizer;import java.util.Vector;import javax.swing.tree.DefaultMutableTreeNode;import javax.xml.parsers.DocumentBuilder;import javax.xml.parsers.DocumentBuilderFactory;/** * For parsing the shoeboxfile, I first read it into an array because:  - Each * line of the shoeboxfile has to be adressed over and over again - * shoeboxfile is organized into blocks and tiers, which translate to rows and * columns. - Shoebox wraps a a line at 80 characters. As a parse * preprocessing, the hardwrap of the shoeboxfile is removed.     For * inter-tier processing of annotation, the ShoeboxArray is a better starting * point as a shoebox file. */public class ShoeboxArray {    /** Holds value of property DOCUMENT ME! */    public static String label_eudicoparticipant = "\\EUDICOp";    /** Holds value of property DOCUMENT ME! */    public static String label_eudicot0 = "\\EUDICOt0";    /** Holds value of property DOCUMENT ME! */    public static String label_eudicot1 = "\\EUDICOt1";    //    private static Logger logger = Logger.getLogger(ShoeboxArray.class.getName());    private String shoeboxheader = "";    private String label_ref;    /** Holds value of property DOCUMENT ME! */    //    public static String label_eudicot2 = "\\EUDICOt2";    //    private String lastlabel = "";    private int lastt0 = 0;    private int lastt1 = 1;    private File file = null;    // store the marker order as encountered in the file    private ArrayList markerOrder = new ArrayList();    // store the previous label name to determine the placee in the list    private String prevLabel = null;    private Vector labelList = new Vector();    private Vector labelNodeList = new Vector();    private String[][] shoeboxArray;    private int[] shoeboxArrayMaxLength;    private int currentIndexBlock = -1; // increments *before* \ref    private int currentIndexLabel = -1; // initial no-sense value    private int maxIndexBlocks;    private boolean isShoeboxArrayPreparation = true;    private boolean strict1;    private HashSet interlinearTierMarkers;    private Hashtable lineCounts = new Hashtable(); // stores number of lines in block for each interlinear tier    private boolean readingWAC = false;    //private boolean completelyUnaligned = true;    //private boolean treatAsUnaligned = true;	// temp solution, until interpolated times are supported    private ArrayList mediaDescriptors = new ArrayList();    private ShoeboxTypFile typFile;    /**     * Reads a Shoebox file into an array.     *     * @param file the shoeboxfile     * @param label_ref record marker (including leading "\")     * @param theTypFile the ShoeboxTypeFile object     *     * @throws Exception DOCUMENT ME!     */    public ShoeboxArray(File file, String label_ref, ShoeboxTypFile theTypFile)        throws Exception {        //this(file, true, label_ref);        this(file, false, label_ref, theTypFile); // HB, 23 jul 02, set strict1 to false        // HB, 30 jul 02, added ilTierMarkers    }    /**     * Reads a Shoebox file into an array.     *     * @param file the shoebofile     * @param strict1 line must start with label (e.g. \ref). ALWAYS TRUE     * @param label_ref record marker (including leading "\")     * @param theTypFile the ShoeboxTypeFile object     *     * @throws Exception DOCUMENT ME!     */    public ShoeboxArray(File file, boolean strict1, String label_ref,            //       HashSet theInterlinearTierMarkers) throws Exception {    ShoeboxTypFile theTypFile) throws Exception {        if (!file.canRead()) {            throw new Exception("cannot read \"" + file + "\"");        }        this.file = file;        this.typFile = theTypFile;        //strict1 = true; //strict1;        this.strict1 = strict1; // HB, 23 jul 02: why else have extra argument?        define_default_labels(label_ref);        if (label_ref == null) { // set to default            label_ref = ShoeboxEncoder.elanBlockStart;        }        this.label_ref = label_ref;        this.interlinearTierMarkers = theTypFile.getInterlinearTierMarkers();        //       define_default_labels(label_ref);        //       logger.log(Level.FINE, "preparation START");        readSbx();        maxIndexBlocks = currentIndexBlock;        //        logger.log(Level.FINE,        //            "preparation STOP, found " + getNumberOfBlocks() + " blocks");        /*for (int xx = 0; xx < getNumberOfLabels(); xx++) {           logger.log(Level.FINE, "label " + xx + " = " + getLabel(xx));           }*/        shoeboxArray = new String[getNumberOfLabels()][getNumberOfBlocks()];        //logger.log(Level.FINE, getNumberOfLabels()+ " ]creating array[ " + getNumberOfBlocks());        shoeboxArrayMaxLength = new int[getNumberOfBlocks()];        currentIndexBlock = -1; // reset        //        logger.log(Level.FINE, "reading START");        readSbx();        //       logger.log(Level.FINE, "reading STOP");    }    /**     * Reads a WAC file into the array.     *     * @param wacfile the wacfile     *     * @throws Exception DOCUMENT ME!     */    public ShoeboxArray(File wacfile) throws Exception {        readingWAC = true;        define_default_labels("\\ref");        DocumentBuilder db = DocumentBuilderFactory.newInstance()                                                   .newDocumentBuilder();        Document doc = db.parse(wacfile);        readWac(doc); // prepare 1/2        prepare_or_finish_block(); // prepare 2/2         isShoeboxArrayPreparation = false; //         shoeboxArray = new String[getNumberOfLabels()][getNumberOfBlocks()];        shoeboxArrayMaxLength = new int[getNumberOfBlocks()];        currentIndexBlock = -1; // reset        readWac(doc); // store        prepare_or_finish_block(); // ...     }    /////////////////    final public String getShoeboxHeader() {        return shoeboxheader;    }    /**     * get value of array     *     * @param label DOCUMENT ME!     * @param block DOCUMENT ME!     *     * @return DOCUMENT ME!     */    final public String getCell(int label, int block) {        String result = null;        //       logger.log(Level.FINE, " --- getCell(" + label + ", " + block);        result = shoeboxArray[label][block];        //        logger.log(Level.FINE, " --- getCell == '" + result + "'");        //alway return the max length padded result        if ((label > 3) && (result != null)) { // HB, 23 jul 02: added null check	            while (result.length() < shoeboxArrayMaxLength[block]) {                result = result + " ";            }        }        return result;    }    /**     * DOCUMENT ME!     *     * @param label DOCUMENT ME!     * @param block DOCUMENT ME!     *     * @return DOCUMENT ME!     */    final public String getCell(String label, int block) { //throws Exception{        //        logger.log(Level.FINE, " --- getCell(" + label + ", " + block);        int x = labelList.indexOf(label);        if (x < 0) {            /*            JOptionPane.showMessageDialog(null,                (getClass() + ".getCell(" + label + ", " + block +                ") \n FATAL ERROR"), "", JOptionPane.ERROR_MESSAGE);            */            System.out.println(getClass() + ".getCell(" + label + ", " + block +                ") \n FATAL ERROR");            return "";        }        //if (x < 0) throw new Exception("label '"+ label + "' does not exist.");        return getCell(x, block);    }    /**     * DOCUMENT ME!     *     * @param label DOCUMENT ME!     * @param block DOCUMENT ME!     * @param value DOCUMENT ME!     */    final private void setC(int label, int block, String value) {        shoeboxArray[label][block] = value;    }    /**     * DOCUMENT ME!     *     * @param block the block/row     *     * @return the name of the speaker of given block     */    final public String getSpeaker(int block) {        String result = getCell(ShoeboxEncoder.elanParticipantLabel, block);        if ((result == null) || (result.length() == 0)) {            result = "unknown";        }        result = result.trim(); // HB, 24-8-04        return result;    }    /**     * Returns the begin time value as read from the file, or     * the default of -1, if no time information was found.     *     * @param block the block/row     *     * @return t0 of given block in milliseconds     */    final public long getT0(int block) {        long t0 = getTX(ShoeboxEncoder.elanBeginLabel, block);        /*        if (treatAsUnaligned) {            //t0 = block * 1000;            t0 = block * ShoeboxPreferences.preferredBlockDuration;        }        */        if ((block == 0) && (t0 < 0)) {            t0 = 0;        }        return t0;    }    /**     * Returns the end time value as read from the file, or     * the default of -1, if no time information was found.     *     * @param block the block/row     *     * @return t0 of given block in milliseconds     */    final public long getT1(int block) {        long t1 = getTX(ShoeboxEncoder.elanEndLabel, block);        /*        if (treatAsUnaligned) {            //t1 = (block + 1) * 1000;            t1 = (block + 1) * ShoeboxPreferences.preferredBlockDuration;        }        */        return t1;    }    /**     * DOCUMENT ME!     *     * @return DOCUMENT ME!     */    public ArrayList getMediaDescriptors() {        return mediaDescriptors;    }    /**     * DOCUMENT ME!     *     * @param label DOCUMENT ME!     * @param block the block/row     *     * @return t0 of given block in milliseconds     */    final private long getTX(String label, int block) {        String sresult = getCell(label, block);        long result = 0;        try {            double d = Double.parseDouble(sresult);            // seconds to milliseconds            d = d * 1000d;            if (d == -1000) { // correct unaligned                d = -1;            }            //Double dd = new Double(d);            //result = dd.longValue();            // Double.longValue just casts the double to long            result = (long) d;            //result = Long.parseLong(sresult);        } catch (NumberFormatException e) {            //System.out.println(" ======= getTX " + block + " found " + sresult);            return toMilliSeconds(sresult, block);        }        return result;    }    /**     * Converts a time definition in the format hh:mm:ss.sss into a long that     * contains the time in milli seconds.     * Copied from mpi.eudico.client.util.TimeFormatter     *     * @param timeString the string that contains the time in the format     *        hh:mm:ss.sss     * @param block the block index that the time belongs to (for error report)     *     * @return the time in seconds, -1.0 if the time string has an illegal     *         format     */    public long toMilliSeconds(String timeString, int block) {        try {            String hourString = new String("0.0");            String minuteString = new String("0.0");            String secondString = new String("0.0");            int mark1 = timeString.indexOf(':', 0);            if (mark1 == -1) { // no :, so interpret string as sss.ss                secondString = timeString;            } else {                int mark2 = timeString.indexOf(':', mark1 + 1);                if (mark2 == -1) { // only one :, so interpret string as mm:ss.sss                    minuteString = timeString.substring(0, mark1);                    secondString = timeString.substring(mark1 + 1,                            timeString.length());                } else { // two :, so interpret string as hh:mm:ss.sss                    hourString = timeString.substring(0, mark1);                    minuteString = timeString.substring(mark1 + 1, mark2);                    secondString = timeString.substring(mark2 + 1,                            timeString.length());                }            }            double hours = Double.valueOf(hourString).doubleValue();            double minutes = Double.valueOf(minuteString).doubleValue();            double seconds = Double.valueOf(secondString).doubleValue();            return (long) (1000 * ((hours * 3600.0) + (minutes * 60.0) +            seconds));        } catch (Exception e) { // the timeString was not parseable            System.out.println("TX: " + block + " unknown time format: " +                timeString);            return -1;        }    }    /**     * Returns the ordered list of markers.     * @return the list of markers     */    public final ArrayList getMarkerOrder() {        return markerOrder;    }    /**     * DOCUMENT ME!     *     * @return DOCUMENT ME!     */    final public int getNumberOfLabels() {        return labelList.size();    }    /**     * DOCUMENT ME!     *     * @return DOCUMENT ME!     */    final public Enumeration getLabels() {        return labelList.elements();    }    /**     * DOCUMENT ME!     *     * @param i DOCUMENT ME!     *     * @return DOCUMENT ME!

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -