📄 chatparser.java

📁 编辑视频文件
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
        return value;    }    private void processBlocks() {        Set tNames = new HashSet();        String annotationIdPrefix = "ann";        long annotId = 0;        long tsId = 0;        // store last end per root annot, to prevent overlaps within tier        HashMap lastEndTimes = new HashMap();        long[] firstSlotAfterSync = null; // store first slot for a range of unaligned blocks,                                          // time interval may apply to a sequence of blocks        Iterator blockIter = chatBlocks.iterator();        while (blockIter.hasNext()) {            String participantLabel = "";            String tierName = null;            String[] mediaLine = null;            String rootAnnotId = "";            long beginTSId = 0;            long endTSId = 0;            long begin = TimeSlot.TIME_UNALIGNED;            long end = TimeSlot.TIME_UNALIGNED;            ArrayList block = (ArrayList) blockIter.next();            Iterator lineIter = block.iterator();            while (lineIter.hasNext()) {                // (compose and) collect tier names                String[] line = (String[]) lineIter.next();                String lbl = line[0];                String value = line[1];                if (lbl.startsWith("*")) {                    participantLabel = lbl;                    tierName = lbl;                } else if (!startsWithMediaLabel(lbl) &&                        !(((lbl.length() > 1) &&                        startsWithMediaLabel(lbl.substring(1))))) {                    tierName = lbl +                        participantLabel.replace('*', TIER_NAME_SEPARATOR);                    parentHash.put(tierName, participantLabel);                }                tNames.add(tierName);                // create AnnotationRecord for main and dependent tiers                // create time slots per block                if (lbl.startsWith("*")) { // main utterance tier                    AnnotationRecord annRec = new AnnotationRecord();                    rootAnnotId = annotationIdPrefix + annotId; // store annot id for parent referencing                    annRec.setAnnotationId(annotationIdPrefix + annotId++);                    annRec.setAnnotationType(AnnotationRecord.ALIGNABLE);                    beginTSId = tsId++;                    endTSId = tsId++;                    annRec.setBeginTimeSlotId(TS_ID_PREFIX +                        Long.toString(beginTSId));                    annRec.setEndTimeSlotId(TS_ID_PREFIX +                        Long.toString(endTSId));                    // Feb 2006: add support for media and time information on the main                     // utterance tier: format BULLET%snd:"2MEHT10"_392998_397665BULLET                    if (value.indexOf(BULLET) > -1) {                        mediaLine = extractMediaAndTime(value.substring(                                    value.indexOf(BULLET)));                        if (mediaLine != null) {                            if ((mediaLine[0] != null) &&                                    startsWithMediaLabel(mediaLine[0])) {                                if ((mediaLine[1] != null) &&                                        (mediaFileName == null)) {                                    mediaFileName = mediaLine[1];                                }                                if (mediaLine[2] != null) {                                    try {                                        begin = Long.parseLong(mediaLine[2]);                                    } catch (NumberFormatException nfe) {                                        System.out.println(                                            "Invalid time value: " +                                            mediaLine[2]);                                    }                                }                                if (mediaLine[3] != null) {                                    try {                                        end = Long.parseLong(mediaLine[3]);                                    } catch (NumberFormatException nfe) {                                        System.out.println(                                            "Invalid time value: " +                                            mediaLine[3]);                                    }                                }                            }                        }                        annRec.setValue(value.substring(0, value.indexOf(BULLET))                                             .trim());                    } else {                        annRec.setValue(value);                    }                    annotationRecords.add(annRec);                    annotRecordToTierMap.put(annRec, tierName);                } else if ((startsWithMediaLabel(lbl)) ||                        ((lbl.length() > 1) &&                        startsWithMediaLabel(lbl.substring(1)))) {                    String timeString = value;                    if (timeString != null) {                        StringTokenizer st = new StringTokenizer(timeString);                        if (st.hasMoreTokens()) { // skip first token, the sound file name                            st.nextToken();                        }                        if (st.hasMoreTokens()) { // second token is begin time                            String bString = st.nextToken();                            int positionOfDot = bString.indexOf("."); // for MED/X-Waves alined CHAT data                            if (positionOfDot > 0) {                                bString = bString.substring(0, positionOfDot);                            }                            begin = (new Long(bString)).longValue();                        }                        if (st.hasMoreTokens()) { // third token is end time                            String eString = st.nextToken();                            int positionOfDot = eString.indexOf("."); // for MED/X-Waves alined CHAT data                            if (positionOfDot > 0) {                                eString = eString.substring(0, positionOfDot);                            }                            end = (new Long(eString)).longValue();                        }                    }                } else { // consider reference annotation on dependent tier                    AnnotationRecord annRec = new AnnotationRecord();                    annRec.setAnnotationId(annotationIdPrefix + annotId++);                    annRec.setAnnotationType(AnnotationRecord.REFERENCE);                    annRec.setReferredAnnotId(rootAnnotId);                    annRec.setValue(value);                    annotationRecords.add(annRec);                    annotRecordToTierMap.put(annRec, tierName);                }            }            long beginMsec = TimeSlot.TIME_UNALIGNED;            if (begin != TimeSlot.TIME_UNALIGNED) {                beginMsec = (long) begin;                // prevent overlaps within one tier                long lastEnd = 0;                if (lastEndTimes.get(participantLabel) != null) {                    lastEnd = ((Long) lastEndTimes.get(participantLabel)).longValue();                }                if (lastEnd > beginMsec) {                    beginMsec = lastEnd;                }            }            long endMsec = TimeSlot.TIME_UNALIGNED;            if (end != TimeSlot.TIME_UNALIGNED) {                endMsec = (long) end;                lastEndTimes.put(participantLabel, new Long(endMsec));            }            long[] bSlot = { beginTSId, beginMsec };            // in case %snd time intervale applies to a sequence of blocks, store first slot for later alignment            if ((firstSlotAfterSync == null) &&                    (begin == TimeSlot.TIME_UNALIGNED)) { // store                 firstSlotAfterSync = bSlot;            }            long[] eSlot = { endTSId, endMsec };            if ((firstSlotAfterSync != null) &&                    (begin != TimeSlot.TIME_UNALIGNED) &&                    (end != TimeSlot.TIME_UNALIGNED)) {                firstSlotAfterSync[1] = beginMsec;                bSlot[1] = TimeSlot.TIME_UNALIGNED;                firstSlotAfterSync = null;            }            timeSlots.add(bSlot);            timeSlots.add(eSlot);        }        tierNames = new ArrayList(tNames);    }    private boolean startsWithMediaLabel(String line) {        boolean start = false;        if (line.startsWith("%snd") || line.startsWith("%mov")) {            start = true;        }        return start;    }    /**     * Extracts label, medianame, begintime and endtime from a CHAT formatted     * media string. This is the media string that follows an utterance on the     * same line (as opposed to media information in a separate tier)     * Format: {BULLET}%snd:"1MEHT10"_8742_10762{BULLET}     * @param value the formatted string     * @return a String array with the single tokens     */    private String[] extractMediaAndTime(String value) {        if (value == null) {            return null;        }        String[] result = new String[4];        StringBuffer buf = new StringBuffer(value);        // remove bullets        if (buf.charAt(0) == BULLET) {            buf.delete(0, 1);        }        if (buf.charAt(buf.length() - 1) == BULLET) {            buf.delete(buf.length() - 1, buf.length());        }        int colon = buf.indexOf(":");        int quot = buf.indexOf("\"");        int quot2 = buf.lastIndexOf("\"");        if (colon > -1) {            result[0] = buf.substring(0, colon); //%snd or %mov        }        if ((quot > -1) && (quot2 > (quot + 1))) {            // media filename, without extension!            result[1] = buf.substring(quot + 1, quot2);            int under = buf.indexOf("_", quot2);            int under2 = buf.indexOf("_", under + 1);            if (under > -1) {                if (under2 > (under + 1)) {                    result[2] = buf.substring(under + 1, under2);                    if (under2 < (buf.length() - 1)) {                        result[3] = buf.substring(under2 + 1);                    }                } else {                    // only begintime?                    result[2] = buf.substring(under + 1);                }            }        }        return result;    }    /*     * This method should be in a Utility class or a URL class     * Convert a path to a file URL string. Takes care of Samba related problems     * file:///path works for all files except for samba file systems, there we need file://machine/path,     * i.e. 2 slashes insteda of 3     *     * What's with relative paths?     */    private String pathToURLString(String path) {        // replace all back slashes by forward slashes        path = path.replace('\\', '/');        // remove leading slashes and count them        int n = 0;        while (path.charAt(0) == '/') {            path = path.substring(1);            n++;        }        // add the file:// or file:/// prefix        if (n == 2) {            return "file://" + path;        } else {            return "file:///" + path;        }    }}
上一页 1 23
💿 文件大小 23621 K
👤 上传用户 ccuading
📂 所属分类 Java编程
🏷️ 相关标签

#编辑 #视频
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -