📄 shoeboxparser.java

📁 编辑视频文件
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
12 3 4 5 下一页
/* * File:     ShoeboxParser.java * Project:  MPI Linguistic Application * Date:     02 May 2007 * * Copyright (C) 2001-2007  Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA *//* * Created on Aug 23, 2004 * * To change the template for this generated file go to * Window&gt;Preferences&gt;Java&gt;Code Generation&gt;Code and Comments */package mpi.eudico.server.corpora.clomimpl.shoebox;import mpi.eudico.server.corpora.clom.DecoderInfo;import mpi.eudico.server.corpora.clom.TimeSlot;import mpi.eudico.server.corpora.clomimpl.abstr.Parser;import mpi.eudico.server.corpora.clomimpl.dobes.AnnotationRecord;import mpi.eudico.server.corpora.clomimpl.dobes.LingTypeRecord;import mpi.eudico.server.corpora.clomimpl.shoebox.utr22.SimpleConverter;import mpi.eudico.server.corpora.clomimpl.type.Constraint;import mpi.eudico.server.util.ServerLogger;import java.io.BufferedReader;import java.io.File;import java.io.FileReader;import java.util.ArrayList;import java.util.Collections;import java.util.Enumeration;import java.util.HashMap;import java.util.Iterator;import java.util.List;import java.util.TreeSet;import java.util.Vector;import javax.swing.tree.DefaultMutableTreeNode;/** * @author hennie * * @version sep 2005 the constructor is now public giving up the singleton pattern. * The path parameter of all getter methods could be removed in the next parser version * (add a public parse(String path) method) * Hashtable and Vector in Parser have been replaced by HashMap and ArrayList * @version may 2006 Shoebox Unicode tiers are now pre-edited in ShoeboxArray, lifting the need for * special treatment in the methods where single words are extracted from the marker lines. */public class ShoeboxParser extends Parser implements ServerLogger {    //private static ShoeboxParser parser;    /** Holds value of property DOCUMENT ME! */    private final static String ANN_ID_PREFIX = "ann";    /** Holds value of property DOCUMENT ME! */    private final static String TS_ID_PREFIX = "ts";    private long annotId = 0;    private long tsId = 0;    private SimpleConverter simpleConverter;    private ShoeboxArray sbxfile; // shoebox transcription file    private ShoeboxTypFile typfile; // shoebox typ file    private ToolboxDecoderInfo decoderInfo;    /**     * Hierachical structure of the tags in the shoebox file. Elements are of     * type String.     */    DefaultMutableTreeNode tiertree = new DefaultMutableTreeNode();    private ArrayList lingTypeRecords = new ArrayList();    private ArrayList participantOrder = new ArrayList();    private TreeSet tierNameSet = new TreeSet();    private HashMap parentHash = new HashMap();    private ArrayList timeOrder = new ArrayList(); // of long[2]    private ArrayList timeSlots = new ArrayList(); // of long[2], {id,time}    private ArrayList annotationRecords = new ArrayList();    private HashMap annotRecordToTierMap = new HashMap();    private String lastParsed = "";    // for calculation of 'root annotation' times    private ArrayList rootSlots = new ArrayList(); // of long[2], {id,time}    private boolean fixImproperAlign = true;    /**     * Public constructor: the Singleton pattern is no longer applied to the parsers.     * Create a new Parser for every file to parse.     */    public ShoeboxParser() {        try {            simpleConverter = new SimpleConverter(null);        } catch (Exception e) {            e.printStackTrace();        }    }    /**     * The instance method returns the single incarnation of CHATParser to the     * caller.     *     * @return DOCUMENT ME!     */    /*    public static ShoeboxParser Instance() {        if (parser == null) {            try {                parser = new ShoeboxParser();            } catch (Exception e) {                e.printStackTrace();            }        }        return parser;    }    */    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getMediaDescriptors(java.lang.String)     */    public ArrayList getMediaDescriptors(String fileName) {        parse(fileName);        return sbxfile.getMediaDescriptors();    }    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getLinguisticTypes(java.lang.String)     */    public ArrayList getLinguisticTypes(String fileName) {        parse(fileName);        Enumeration en = sbxfile.getLabels();        while (en.hasMoreElements()) {            String label = (String) en.nextElement();            if (!(label.equals(ShoeboxArray.label_eudicoparticipant)) &&                    !(label.equals(ShoeboxArray.label_eudicot0)) &&                    !(label.equals(ShoeboxArray.label_eudicot1)) &&                    !(label.equals(ShoeboxEncoder.elanParticipantLabel)) &&                    !(label.equals(ShoeboxEncoder.elanBeginLabel)) &&                    !(label.equals(ShoeboxEncoder.elanEndLabel)) &&                    !(label.equals(ShoeboxEncoder.elanBlockStart)) &&                    !(label.equals(ShoeboxEncoder.elanELANLabel))) {                String ltName = label.substring(1); // cut off backslash                LingTypeRecord lt = new LingTypeRecord();                lt.setLingTypeId(ltName);                lt.setGraphicReferences("false");                // set defaults                lt.setTimeAlignable("false");                lt.setStereoType(Constraint.stereoTypes[Constraint.SYMBOLIC_ASSOCIATION]);                // set default for root tier                if ((typfile.interlinearRootMarker != null) &&                        typfile.interlinearRootMarker.equals(ltName)) {                    lt.setStereoType(null);                    lt.setTimeAlignable("true");                }                if (typfile.getDatabaseType().equals(ShoeboxEncoder.defaultDBType) &&                        !typfile.tofromHash.containsKey(ltName)) { // root tiers for import of ELAN exported Toolbox files                    lt.setStereoType(null);                    lt.setTimeAlignable("true");                }                // make first marker under recordMarker a symbolic subdivision of record marker                // if not already set by user defined shoebox markers                //		if (	!typfile.interlinearRootMarker.equals(ltName) &&                if (typfile.tofromHash.containsKey(ltName) && // if not root tier                        typfile.fromArray.contains(label) &&                        !typfile.procedureTypeHash.contains(label)) {                    lt.setStereoType(Constraint.stereoTypes[Constraint.SYMBOLIC_SUBDIVISION]);                    lt.setTimeAlignable("false");                }                String procType = (String) typfile.procedureTypeHash.get(label);                if (procType != null) {                    if (procType.equals("Lookup")) {                        lt.setStereoType(Constraint.stereoTypes[Constraint.SYMBOLIC_ASSOCIATION]);                        lt.setTimeAlignable("false");                    } else if (procType.equals("Parse")) {                        lt.setStereoType(Constraint.stereoTypes[Constraint.SYMBOLIC_SUBDIVISION]);                        lt.setTimeAlignable("false");                    } else if (procType.equals("TimeSubdivision")) {                        lt.setStereoType(Constraint.stereoTypes[Constraint.TIME_SUBDIVISION]);                        lt.setTimeAlignable("true");                    } else if (procType.equals("IncludedIn")) {                        lt.setStereoType(Constraint.stereoTypes[Constraint.INCLUDED_IN]);                        lt.setTimeAlignable("true");                    }                }                lingTypeRecords.add(lt);            }        }        return lingTypeRecords;    }    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getTimeOrder(java.lang.String)     */    public ArrayList getTimeOrder(String fileName) {        parse(fileName);        ArrayList resultTimeOrder = new ArrayList();        for (int i = 0; i < timeOrder.size(); i++) {            resultTimeOrder.add(TS_ID_PREFIX +                ((long[]) (timeOrder.get(i)))[0]);        }        return resultTimeOrder;    }    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getTimeSlots(java.lang.String)     */    public HashMap getTimeSlots(String fileName) {        parse(fileName);        // generate HashMap from ArrayList with long[2]'s        HashMap resultSlots = new HashMap();        Iterator timeSlotIter = timeSlots.iterator();        while (timeSlotIter.hasNext()) {            long[] timeSlot = (long[]) timeSlotIter.next();            String tsId = TS_ID_PREFIX + ((long) timeSlot[0]);            String timeValue = Long.toString(((long) timeSlot[1]));            resultSlots.put(tsId, timeValue);        }        return resultSlots;    }    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getTierNames(java.lang.String)     */    public ArrayList getTierNames(String fileName) {        parse(fileName);        // strip begin backslashes        ArrayList result = new ArrayList();        // add in same order as in shoebox file        ArrayList allNames = new ArrayList(tierNameSet);        ArrayList markerOrder = sbxfile.getMarkerOrder();        String spk = null;        String marker = null;        for (int i = 0; i < participantOrder.size(); i++) {            spk = (String) participantOrder.get(i);            for (int j = 0; j < markerOrder.size(); j++) {                marker = (String) markerOrder.get(j);                if (allNames.contains(marker + "@" + spk)) {                    result.add((marker + "@" + spk).substring(1));                }            }        }        /*        Iterator iter = tierNameSet.iterator();        while (iter.hasNext()) {            String tierName = (String) iter.next();            tierName = tierName.substring(1);            result.add(tierName);        }        */        return result;    }    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getParticipantOf(java.lang.String, java.lang.String)     */    public String getParticipantOf(String tierName, String fileName) {        String result = "";        parse(fileName);        int index = tierName.indexOf("@");        if ((index > 0) && (tierName.length() > (index + 1))) {            result = tierName.substring(index + 1);        }        return result;    }    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getLinguisticTypeIDOf(java.lang.String, java.lang.String)     */    public String getLinguisticTypeIDOf(String tierName, String fileName) {        String result = tierName;        parse(fileName);        int index = tierName.indexOf("@");        if (index > 0) {            result = tierName.substring(0, index);        }        return result;    }    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getParentNameOf(java.lang.String, java.lang.String)     */    public String getParentNameOf(String tierName, String fileName) {        String parentName = null;        parse(fileName);        String labelPart = "\\" + tierName;        String spkr = "@";        int index = tierName.indexOf("@");        if ((index > 0) && (tierName.length() > index)) {            labelPart = "\\" + tierName.substring(0, index);            spkr = tierName.substring(index);        }        // use typfile.tofromHash, or parent is typfile.recordMarker        if (typfile.tofromHash.keySet().contains(labelPart)) {            parentName = ((String) typfile.tofromHash.get(labelPart)).substring(1);        } else if ((typfile.interlinearRootMarker != null) &&                (!tierName.equals(typfile.interlinearRootMarker + spkr))) {            parentName = typfile.interlinearRootMarker;        }        if (parentName != null) {            return parentName + spkr;        } else {            return null;        }    }    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getAnnotationsOf(java.lang.String, java.lang.String)     */    public ArrayList getAnnotationsOf(String tierName, String fileName) {        parse(fileName);        ArrayList resultAnnotRecords = new ArrayList();        tierName = "\\" + tierName;        Iterator it = annotRecordToTierMap.keySet().iterator();        while (it.hasNext()) {            AnnotationRecord annRec = (AnnotationRecord) it.next();
12 3 4 5 下一页
💿 文件大小 23621 K
👤 上传用户 ccuading
📂 所属分类 Java编程
🏷️ 相关标签

#编辑 #视频
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -