⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 chatparser.java

📁 编辑视频文件
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
/* * File:     CHATParser.java * Project:  MPI Linguistic Application * Date:     02 May 2007 * * Copyright (C) 2001-2007  Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA *//* * Created on Jun 11, 2004 * * To change the template for this generated file go to * Window&gt;Preferences&gt;Java&gt;Code Generation&gt;Code and Comments */package mpi.eudico.server.corpora.clomimpl.chat;import mpi.eudico.server.corpora.clom.TimeSlot;import mpi.eudico.server.corpora.clomimpl.abstr.MediaDescriptor;import mpi.eudico.server.corpora.clomimpl.abstr.Parser;import mpi.eudico.server.corpora.clomimpl.dobes.AnnotationRecord;import mpi.eudico.server.corpora.clomimpl.dobes.LingTypeRecord;import mpi.eudico.server.corpora.clomimpl.type.Constraint;import mpi.util.MimeType;import java.io.BufferedReader;import java.io.FileInputStream;import java.io.FileNotFoundException;import java.io.FileReader;import java.io.IOException;import java.io.InputStreamReader;import java.util.ArrayList;import java.util.HashMap;import java.util.HashSet;import java.util.Iterator;import java.util.Set;import java.util.StringTokenizer;import java.util.Vector;/** * @author hennie * * To change the template for this generated type comment go to * Window&gt;Preferences&gt;Java&gt;Code Generation&gt;Code and Comments * * @version sep 2005 the constructor is now public giving up the singleton pattern. * The path parameter of all getter methods could be removed in the next parser version * (add a public parse(String path) method) * Hashtable and Vector in Parser have been replaced by HashMap and ArrayList */public class CHATParser extends Parser {    //private static CHATParser parser;    /** Holds value of property DOCUMENT ME! */    private final static String MAIN_TYPE = "orthography";    /** Holds value of property DOCUMENT ME! */    private final static char TIER_NAME_SEPARATOR = '@';    /** Holds value of property DOCUMENT ME! */    private final static String TS_ID_PREFIX = "ts";    /** Holds value of property DOCUMENT ME! */    private final char BULLET = '\u0015';    private String participantLine = null;    private String mediaFileName = null;    private ArrayList chatBlocks = new ArrayList();    private ArrayList lingTypeRecords = new ArrayList();    private ArrayList tierNames = new ArrayList();    private HashMap parentHash = new HashMap();    private ArrayList timeOrder = new ArrayList();    private ArrayList timeSlots = new ArrayList(); // of long[2], {id,time}    private ArrayList annotationRecords = new ArrayList();    private HashMap annotRecordToTierMap = new HashMap();    private String lastParsed = "";    private BufferedReader br;    /**     * Private constructor for EAFParser because the Singleton pattern is     * applied here.     */    public CHATParser() {    }    /**     * The instance method returns the single incarnation of CHATParser to the     * caller.     *     * @return DOCUMENT ME!     */    /*    public static CHATParser Instance() {        if (parser == null) {            parser = new CHATParser();        }        return parser;    }     */    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getMediaDescriptors(java.lang.String)     */    public ArrayList getMediaDescriptors(String fileName) {        ArrayList mediaDescriptors = new ArrayList();        parse(fileName);        String mediaURL = null;        if (mediaFileName != null) {            mediaURL = pathToURLString(mediaFileName);            String mimeType = MimeType.getMimeTypeStringFromExtension(mediaFileName);            //		String mimeType = MediaDescriptor.WAV_MIME_TYPE;            MediaDescriptor md = new MediaDescriptor(mediaURL, mimeType);            mediaDescriptors.add(md);        }        return mediaDescriptors;    }    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getLinguisticTypes(java.lang.String)     */    public ArrayList getLinguisticTypes(String fileName) {        parse(fileName);        if (lingTypeRecords.size() != 0) {            return lingTypeRecords;        }        Set labels = new HashSet();        Iterator blockIter = chatBlocks.iterator();        while (blockIter.hasNext()) {            ArrayList block = (ArrayList) blockIter.next();            Iterator lineIter = block.iterator();            while (lineIter.hasNext()) {                String[] line = (String[]) lineIter.next();                String lbl = line[0];                if (!lbl.equals("%snd") &&                        !((lbl.length() > 1) &&                        lbl.substring(1).startsWith("%"))) {                    labels.add(lbl);                }            }        }        // create main "orthography" ling type for participant tiers        LingTypeRecord orthoType = new LingTypeRecord();        orthoType.setLingTypeId(MAIN_TYPE);        orthoType.setTimeAlignable("true");        orthoType.setGraphicReferences("false");        lingTypeRecords.add(orthoType);        // for each label, create a matching lingtype        Iterator lblIter = labels.iterator();        while (lblIter.hasNext()) {            String label = (String) lblIter.next();            if (!label.startsWith("*")) {                LingTypeRecord lt = new LingTypeRecord();                lt.setLingTypeId(label);                lt.setTimeAlignable("false"); // all symbolic associations of ortho tier                lt.setGraphicReferences("false");                lt.setStereoType(Constraint.stereoTypes[Constraint.SYMBOLIC_ASSOCIATION]);                lingTypeRecords.add(lt);            }        }        return lingTypeRecords;    }    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getTimeOrder(java.lang.String)     */    public ArrayList getTimeOrder(String fileName) {        parse(fileName);        // compose ordered list of timeslot ids from timeSlots        // algorithm:        // find first time after t = 0, put slot id in result list        // find first time after t or equal to time of last result slot id        // terminate when no time found        //        // handling of unaligned slots:        // put all unaligned slots immediately preceding a result slot         // immediately before this slot in the result        ArrayList unalignedSlots = new ArrayList();        long[] firstSlotAfter = firstTimeSlotAfter(null, unalignedSlots);        if (firstSlotAfter == null) {            timeOrder.addAll(unalignedSlots);        }        while (firstSlotAfter != null) {            timeOrder.addAll(unalignedSlots);            timeOrder.add(firstSlotAfter);            unalignedSlots.clear();            firstSlotAfter = firstTimeSlotAfter(firstSlotAfter, unalignedSlots);        }        // add trailing unaligned timeslots, if any        long[] lastAddedSlot = (long[]) timeOrder.get(timeOrder.size() - 1);        if (timeSlots.indexOf(lastAddedSlot) != (timeSlots.size() - 1)) { // not last            for (int i = timeSlots.indexOf(lastAddedSlot);                    i < timeSlots.size(); i++) {                timeOrder.add(timeSlots.get(i));                if (i == (timeSlots.size() - 1)) { // align last slot manually                    ((long[]) timeSlots.get(i))[1] = lastAddedSlot[1] + 1000;                }            }        }        ArrayList resultTimeOrder = new ArrayList();        for (int i = 0; i < timeOrder.size(); i++) {            resultTimeOrder.add(TS_ID_PREFIX +                ((long[]) (timeOrder.get(i)))[0]);        }        return resultTimeOrder;    }    private long[] firstTimeSlotAfter(long[] afterTimeSlot,        ArrayList unalignedSlots) {        long[] firstSlot = null;        long firstTimeAfter = Long.MAX_VALUE;        ArrayList unalignedStore = new ArrayList();        long afterTime = 0;        long afterTimeId = -1;        if (afterTimeSlot != null) {            afterTime = afterTimeSlot[1];            afterTimeId = afterTimeSlot[0];        }        Iterator tsIter = timeSlots.iterator();        while (tsIter.hasNext()) {            long[] ts = (long[]) tsIter.next();            long time = ts[1];            if (time < 0) { // unaligned                unalignedStore.add(ts);            } else if ((time >= afterTime) && (time < firstTimeAfter) &&                    (!(ts[0] == afterTimeId)) && (!(timeOrder.contains(ts)))) {                firstTimeAfter = time;                firstSlot = ts;                unalignedSlots.clear();                unalignedSlots.addAll(unalignedStore);                unalignedStore.clear();            } else if (time > 0) { // not 'first time after', also not unaligned, so reset                unalignedStore.clear();            }        }        if (firstSlot == null) { // none found            unalignedSlots.addAll(unalignedStore);        }        return firstSlot;    }    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getTimeSlots(java.lang.String)     */    public HashMap getTimeSlots(String fileName) {        parse(fileName);        // generate HashMap from ArrayList with long[2]'s        HashMap resultSlots = new HashMap();        Iterator timeSlotIter = timeSlots.iterator();        while (timeSlotIter.hasNext()) {            long[] timeSlot = (long[]) timeSlotIter.next();            String tsId = TS_ID_PREFIX + ((long) timeSlot[0]);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -