📄 chatparser.java

📁 编辑视频文件
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
            String timeValue = Long.toString(((long) timeSlot[1]));            resultSlots.put(tsId, timeValue);        }        return resultSlots;    }    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getTierNames(java.lang.String)     */    public ArrayList getTierNames(String fileName) {        // tierNames in ELAN are either the main tier '*PAR' labels, or        // the combination of tier label plus participant, like '%mor@PAR'        parse(fileName);        return tierNames;    }    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getParticipantOf(java.lang.String, java.lang.String)     */    public String getParticipantOf(String tierName, String fileName) {        String participant = "";        if (tierName.startsWith("*")) {            participant = tierName.substring(1); // main tier label without *        } else {            int i = tierName.indexOf(TIER_NAME_SEPARATOR); // part of tier name after @            if ((i > 0) && (tierName.length() > (i + 2))) {                participant = tierName.substring(i + 1);            }        }        return participant;    }    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getLinguisticTypeOf(java.lang.String, java.lang.String)     */    public String getLinguisticTypeIDOf(String tierName, String fileName) {        String lingTypeId = "";        if (tierName.startsWith("*")) {            lingTypeId = MAIN_TYPE; // main tier label without *        } else {            int i = tierName.indexOf(TIER_NAME_SEPARATOR); // part of tier name after @            if (i > 0) {                lingTypeId = tierName.substring(0, i);            }        }        return lingTypeId;    }    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getParentNameOf(java.lang.String, java.lang.String)     */    public String getParentNameOf(String tierName, String fileName) {        parse(fileName);        return (String) parentHash.get(tierName);    }    /* (non-Javadoc)     * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getAnnotationsOf(java.lang.String, java.lang.String)     */    public ArrayList getAnnotationsOf(String tierName, String fileName) {        parse(fileName);        ArrayList resultAnnotRecords = new ArrayList();        Iterator it = annotRecordToTierMap.keySet().iterator();        while (it.hasNext()) {            AnnotationRecord annRec = (AnnotationRecord) it.next();            if (annotRecordToTierMap.get(annRec).equals(tierName)) {                resultAnnotRecords.add(annRec);            }        }        return resultAnnotRecords;    }    private void parse(String fileName) {        if (lastParsed.equals(fileName)) {            return;        }        // (re)set everything to null for each parse        participantLine = null;        mediaFileName = null;        chatBlocks.clear();        lingTypeRecords.clear();        tierNames.clear();        parentHash.clear();        timeOrder.clear();        timeSlots.clear();        annotationRecords.clear();        annotRecordToTierMap.clear();        br = null;        // parse the file        lastParsed = fileName;        // do actual parsing        try {            br = new BufferedReader(new FileReader(fileName));        } catch (Exception fnf) {            fnf.printStackTrace();        }        String line = null;        try {            if ((line = br.readLine()) != null) {                if (line.startsWith("@UTF8")) { // CHAT UTF-8                    br.close();                    br = new BufferedReader(new InputStreamReader(                                new FileInputStream(fileName), "UTF-8"));                }            }        } catch (IOException iox) {            iox.printStackTrace();        }        parseLines();        processBlocks();        try {            br.close();        } catch (IOException io) {            io.printStackTrace();        }    }    private void parseLines() {        String line = null;        String outputLine = "";        boolean recordingParticipant = false;        ArrayList chatBlock = null;        try {            while ((line = br.readLine()) != null) {                // Participant lines from header                if (line.startsWith("@Participants:")) {                    recordingParticipant = true;                    participantLine = line;                } else if (recordingParticipant == true) {                    if (!(line.startsWith("@") || line.startsWith("*") ||                            line.startsWith("%"))) {                        // continuation of participants line                        participantLine += line;                    } else { // new header line or block line, end recording                        recordingParticipant = false;                    }                }                // CHAT "blocks"                if (line.startsWith("*")) { // new block                    // finish last line of previous block                    if (!outputLine.equals("") && (chatBlock != null)) {                        addLineToBlock(outputLine, chatBlock);                    }                    // output block                    if (chatBlock != null) {                        chatBlocks.add(chatBlock);                    }                    // start new recording                    chatBlock = new ArrayList();                    // add line to new recording                    outputLine = line;                } else if (line.startsWith("%") ||                        ((line.length() > 1) &&                        line.substring(1).startsWith("%"))) { // other lines                    // finish last line                    if (!outputLine.equals("") && (chatBlock != null)) {                        addLineToBlock(outputLine, chatBlock);                    }                    outputLine = line;                    if ((mediaFileName == null) &&                            (startsWithMediaLabel(line) ||                            ((line.length() > 1) &&                            startsWithMediaLabel(line.substring(1))))) { // bullet in chat-utf8                        // parse this line, second token is media file name.                        StringTokenizer st = new StringTokenizer(line);                        if (st.hasMoreTokens()) { // 'eat' %snd label                            st.nextToken();                        }                        if (st.hasMoreTokens()) {                            mediaFileName = st.nextToken();                        }                        // strip off possible double quotes                        if (mediaFileName.startsWith("\"")) {                            mediaFileName = mediaFileName.substring(1);                        }                        if (mediaFileName.endsWith("\"")) {                            mediaFileName = mediaFileName.substring(0,                                    mediaFileName.length() - 1);                        }                    }                } else if (!line.startsWith("@")) { // no label, continuation of previous line                    outputLine += line;                }            }            // finish last line            if (!outputLine.equals("") && (chatBlock != null)) {                addLineToBlock(outputLine, chatBlock);            }            // output last block            if (chatBlock != null) {                chatBlocks.add(chatBlock);            }        } catch (FileNotFoundException fex) {            fex.printStackTrace();        } catch (IOException iex) {            iex.printStackTrace();        }    }    /**     * Helper method to avoid copy and paste     *     * @param file DOCUMENT ME!     *     * @return DOCUMENT ME!     *     * @throws IOException DOCUMENT ME!     */    //	private final BufferedReader file2br(File file) throws IOException {    /*       A file is opened from the operating system.       This stream of bytes could be a UTF-8 encoded unicode stream.       If a file interpreted as UTF-8 contains isolatin-1, the file       cannot be read. An Exception is thrown.       Therefore, special care has to be taken when reading in UTF-8.       As a first measure, the filename is used to decide if to read as UTF-8.       This has to be changend in a future version.       This is just done in order to include Unicode characters into Eudico.     */    /*        Reader filereader;            if (-1 != file.getName().lastIndexOf(".utf8.")) { // this means 'contains'                filereader = new InputStreamReader(new FileInputStream(file),                        "UTF-8");            } else {                // use the locale encoding.                filereader = new FileReader(file);            }            BufferedReader br = new BufferedReader(filereader);            return br;        }    */    private void addLineToBlock(String theLine, ArrayList theBlock) {        String label = null;        String value = null;        label = getLabelPart(theLine);        value = getValuePart(theLine);        if ((label != null) && (value != null)) {            String[] line = { label, value };            theBlock.add(line);        } else if ((label != null) && (value == null)) {            // maybe a valid tierlabel with empty annotation content            String[] line = { label, "" };            theBlock.add(line);        }    }    private String getLabelPart(String theLine) {        String label = null;        int index = theLine.indexOf(':');        if (index > 0) {            label = theLine.substring(0, index);        }        return label;    }    private String getValuePart(String theLine) {        String value = null;        int index = theLine.indexOf(':');        if (index < (theLine.length() - 2)) {            value = theLine.substring(index + 1).trim();        }
💿 文件大小 23621 K
👤 上传用户 ccuading
📂 所属分类 Java编程
🏷️ 相关标签

#编辑 #视频
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -