📄 chatparser.java
字号:
return value; } private void processBlocks() { Set tNames = new HashSet(); String annotationIdPrefix = "ann"; long annotId = 0; long tsId = 0; // store last end per root annot, to prevent overlaps within tier HashMap lastEndTimes = new HashMap(); long[] firstSlotAfterSync = null; // store first slot for a range of unaligned blocks, // time interval may apply to a sequence of blocks Iterator blockIter = chatBlocks.iterator(); while (blockIter.hasNext()) { String participantLabel = ""; String tierName = null; String[] mediaLine = null; String rootAnnotId = ""; long beginTSId = 0; long endTSId = 0; long begin = TimeSlot.TIME_UNALIGNED; long end = TimeSlot.TIME_UNALIGNED; ArrayList block = (ArrayList) blockIter.next(); Iterator lineIter = block.iterator(); while (lineIter.hasNext()) { // (compose and) collect tier names String[] line = (String[]) lineIter.next(); String lbl = line[0]; String value = line[1]; if (lbl.startsWith("*")) { participantLabel = lbl; tierName = lbl; } else if (!startsWithMediaLabel(lbl) && !(((lbl.length() > 1) && startsWithMediaLabel(lbl.substring(1))))) { tierName = lbl + participantLabel.replace('*', TIER_NAME_SEPARATOR); parentHash.put(tierName, participantLabel); } tNames.add(tierName); // create AnnotationRecord for main and dependent tiers // create time slots per block if (lbl.startsWith("*")) { // main utterance tier AnnotationRecord annRec = new AnnotationRecord(); rootAnnotId = annotationIdPrefix + annotId; // store annot id for parent referencing annRec.setAnnotationId(annotationIdPrefix + annotId++); annRec.setAnnotationType(AnnotationRecord.ALIGNABLE); beginTSId = tsId++; endTSId = tsId++; annRec.setBeginTimeSlotId(TS_ID_PREFIX + Long.toString(beginTSId)); annRec.setEndTimeSlotId(TS_ID_PREFIX + Long.toString(endTSId)); // Feb 2006: add support for media and time information on the main // utterance tier: format BULLET%snd:"2MEHT10"_392998_397665BULLET if (value.indexOf(BULLET) > -1) { mediaLine = extractMediaAndTime(value.substring( value.indexOf(BULLET))); if (mediaLine != null) { if ((mediaLine[0] != null) && startsWithMediaLabel(mediaLine[0])) { if ((mediaLine[1] != null) && (mediaFileName == null)) { mediaFileName = mediaLine[1]; } if (mediaLine[2] != null) { try { begin = Long.parseLong(mediaLine[2]); } catch (NumberFormatException nfe) { System.out.println( "Invalid time value: " + mediaLine[2]); } } if (mediaLine[3] != null) { try { end = Long.parseLong(mediaLine[3]); } catch (NumberFormatException nfe) { System.out.println( "Invalid time value: " + mediaLine[3]); } } } } annRec.setValue(value.substring(0, value.indexOf(BULLET)) .trim()); } else { annRec.setValue(value); } annotationRecords.add(annRec); annotRecordToTierMap.put(annRec, tierName); } else if ((startsWithMediaLabel(lbl)) || ((lbl.length() > 1) && startsWithMediaLabel(lbl.substring(1)))) { String timeString = value; if (timeString != null) { StringTokenizer st = new StringTokenizer(timeString); if (st.hasMoreTokens()) { // skip first token, the sound file name st.nextToken(); } if (st.hasMoreTokens()) { // second token is begin time String bString = st.nextToken(); int positionOfDot = bString.indexOf("."); // for MED/X-Waves alined CHAT data if (positionOfDot > 0) { bString = bString.substring(0, positionOfDot); } begin = (new Long(bString)).longValue(); } if (st.hasMoreTokens()) { // third token is end time String eString = st.nextToken(); int positionOfDot = eString.indexOf("."); // for MED/X-Waves alined CHAT data if (positionOfDot > 0) { eString = eString.substring(0, positionOfDot); } end = (new Long(eString)).longValue(); } } } else { // consider reference annotation on dependent tier AnnotationRecord annRec = new AnnotationRecord(); annRec.setAnnotationId(annotationIdPrefix + annotId++); annRec.setAnnotationType(AnnotationRecord.REFERENCE); annRec.setReferredAnnotId(rootAnnotId); annRec.setValue(value); annotationRecords.add(annRec); annotRecordToTierMap.put(annRec, tierName); } } long beginMsec = TimeSlot.TIME_UNALIGNED; if (begin != TimeSlot.TIME_UNALIGNED) { beginMsec = (long) begin; // prevent overlaps within one tier long lastEnd = 0; if (lastEndTimes.get(participantLabel) != null) { lastEnd = ((Long) lastEndTimes.get(participantLabel)).longValue(); } if (lastEnd > beginMsec) { beginMsec = lastEnd; } } long endMsec = TimeSlot.TIME_UNALIGNED; if (end != TimeSlot.TIME_UNALIGNED) { endMsec = (long) end; lastEndTimes.put(participantLabel, new Long(endMsec)); } long[] bSlot = { beginTSId, beginMsec }; // in case %snd time intervale applies to a sequence of blocks, store first slot for later alignment if ((firstSlotAfterSync == null) && (begin == TimeSlot.TIME_UNALIGNED)) { // store firstSlotAfterSync = bSlot; } long[] eSlot = { endTSId, endMsec }; if ((firstSlotAfterSync != null) && (begin != TimeSlot.TIME_UNALIGNED) && (end != TimeSlot.TIME_UNALIGNED)) { firstSlotAfterSync[1] = beginMsec; bSlot[1] = TimeSlot.TIME_UNALIGNED; firstSlotAfterSync = null; } timeSlots.add(bSlot); timeSlots.add(eSlot); } tierNames = new ArrayList(tNames); } private boolean startsWithMediaLabel(String line) { boolean start = false; if (line.startsWith("%snd") || line.startsWith("%mov")) { start = true; } return start; } /** * Extracts label, medianame, begintime and endtime from a CHAT formatted * media string. This is the media string that follows an utterance on the * same line (as opposed to media information in a separate tier) * Format: {BULLET}%snd:"1MEHT10"_8742_10762{BULLET} * @param value the formatted string * @return a String array with the single tokens */ private String[] extractMediaAndTime(String value) { if (value == null) { return null; } String[] result = new String[4]; StringBuffer buf = new StringBuffer(value); // remove bullets if (buf.charAt(0) == BULLET) { buf.delete(0, 1); } if (buf.charAt(buf.length() - 1) == BULLET) { buf.delete(buf.length() - 1, buf.length()); } int colon = buf.indexOf(":"); int quot = buf.indexOf("\""); int quot2 = buf.lastIndexOf("\""); if (colon > -1) { result[0] = buf.substring(0, colon); //%snd or %mov } if ((quot > -1) && (quot2 > (quot + 1))) { // media filename, without extension! result[1] = buf.substring(quot + 1, quot2); int under = buf.indexOf("_", quot2); int under2 = buf.indexOf("_", under + 1); if (under > -1) { if (under2 > (under + 1)) { result[2] = buf.substring(under + 1, under2); if (under2 < (buf.length() - 1)) { result[3] = buf.substring(under2 + 1); } } else { // only begintime? result[2] = buf.substring(under + 1); } } } return result; } /* * This method should be in a Utility class or a URL class * Convert a path to a file URL string. Takes care of Samba related problems * file:///path works for all files except for samba file systems, there we need file://machine/path, * i.e. 2 slashes insteda of 3 * * What's with relative paths? */ private String pathToURLString(String path) { // replace all back slashes by forward slashes path = path.replace('\\', '/'); // remove leading slashes and count them int n = 0; while (path.charAt(0) == '/') { path = path.substring(1); n++; } // add the file:// or file:/// prefix if (n == 2) { return "file://" + path; } else { return "file:///" + path; } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -