📄 transcriber14parser.java
字号:
tierNameSet.add(SECTION_TIER_NAME); tierNameSet.add(TURN_TIER_NAME); for (int i = 0; i < sectionArrayList.size(); i++) { SectionRecord sectionR = (SectionRecord) sectionArrayList.get(i); long sectionBegin = new Double((new Double(sectionR.startTime)).doubleValue() * 1000).longValue(); long sectionEnd = new Double((new Double(sectionR.endTime)).doubleValue() * 1000).longValue(); String sectionBeginSlotId = TS_PREFIX + timeSlotCounter++; String sectionEndSlotId = TS_PREFIX + timeSlotCounter++; String value = sectionR.type; if ((sectionR.topicId != null) && !sectionR.topicId.equals("")) { value += (" - " + topicHash.get(sectionR.topicId)); } addAnnotRecordAndTimeSlots(sectionBegin, sectionEnd, sectionBeginSlotId, sectionEndSlotId, value, SECTION_TIER_NAME); // turns String turnBeginSlotId = ""; String turnEndSlotId = ""; for (int j = 0; j < sectionR.turnRecords.size(); j++) { TurnRecord turnR = (TurnRecord) sectionR.turnRecords.get(j); long turnBegin = new Double((new Double(turnR.startTime)).doubleValue() * 1000).longValue(); long turnEnd = new Double((new Double(turnR.endTime)).doubleValue() * 1000).longValue(); if (turnBegin == sectionBegin) { turnBeginSlotId = sectionBeginSlotId; } else if (!turnEndSlotId.equals("")) { turnBeginSlotId = turnEndSlotId; } else { turnBeginSlotId = TS_PREFIX + timeSlotCounter++; } if (turnEnd == sectionEnd) { turnEndSlotId = sectionEndSlotId; } else { turnEndSlotId = TS_PREFIX + timeSlotCounter++; } String speakerString = ""; if (turnR.speakers != null) { StringTokenizer tokenizer = new StringTokenizer(turnR.speakers); while (tokenizer.hasMoreTokens()) { speakerString += (String) speakersHash.get(tokenizer.nextToken()); if (tokenizer.hasMoreTokens()) { speakerString += " + "; } } } else { speakerString = "(no speaker)"; } String parentId = addAnnotRecordAndTimeSlots(turnBegin, turnEnd, turnBeginSlotId, turnEndSlotId, speakerString, TURN_TIER_NAME); if ((turnR.mode != null) && !turnR.mode.equals("")) { tierNameSet.add(MODE_TIER_NAME); addAnnotRecord(parentId, turnR.mode, MODE_TIER_NAME); } if ((turnR.fidelity != null) && !turnR.fidelity.equals("")) { tierNameSet.add(FIDELITY_TIER_NAME); addAnnotRecord(parentId, turnR.fidelity, FIDELITY_TIER_NAME); } if ((turnR.channel != null) && !turnR.channel.equals("")) { tierNameSet.add(CHANNEL_TIER_NAME); addAnnotRecord(parentId, turnR.channel, CHANNEL_TIER_NAME); } } } } private void processUtterances() { // add tier names to tierNameSet Iterator speakerNameIter = speakersHash.values().iterator(); while (speakerNameIter.hasNext()) { tierNameSet.add(speakerNameIter.next()); } // iterate over utteranceRecords for (int i = 0; i < utteranceRecords.size(); i++) { UtteranceRecord utteranceR = (UtteranceRecord) utteranceRecords.get(i); long uttBegin = new Double((new Double(utteranceR.startTime)).doubleValue() * 1000).longValue(); long uttEnd = new Double((new Double(utteranceR.endTime)).doubleValue() * 1000).longValue(); String beginSlotId = TS_PREFIX + timeSlotCounter++; String endSlotId = TS_PREFIX + timeSlotCounter++; addAnnotRecordAndTimeSlots(uttBegin, uttEnd, beginSlotId, endSlotId, utteranceR.text, utteranceR.speaker); } } private void processBackgrounds() { tierNameSet.add(BACKGROUND_TIER_NAME); long bgBeginTime = 0; long bgEndTime = 0; for (int i = 0; i < backgroundArrayList.size(); i++) { BackgroundRecord backgroundR = (BackgroundRecord) backgroundArrayList.get(i); bgEndTime = new Double((new Double(backgroundR.time)).doubleValue() * 1000).longValue(); if ((lastBackgroundRecord != null) && !lastBackgroundRecord.level.equals("off")) { // not first background bgBeginTime = new Double((new Double(lastBackgroundRecord.time)).doubleValue() * 1000).longValue(); String beginSlotId = TS_PREFIX + timeSlotCounter++; String endSlotId = TS_PREFIX + timeSlotCounter++; addAnnotRecordAndTimeSlots(bgBeginTime, bgEndTime, beginSlotId, endSlotId, lastBackgroundRecord.type, BACKGROUND_TIER_NAME); } lastBackgroundRecord = backgroundR; } if ((lastBackgroundRecord != null) && !lastBackgroundRecord.level.equals("off") && (sectionArrayList.size() > 0)) { // handle last background record, end at last section end SectionRecord sectionR = (SectionRecord) sectionArrayList.get(sectionArrayList.size() - 1); if (sectionR != null) { bgBeginTime = new Double((new Double(lastBackgroundRecord.time)).doubleValue() * 1000).longValue(); bgEndTime = new Double((new Double(sectionR.endTime)).doubleValue() * 1000).longValue(); String beginSlotId = TS_PREFIX + timeSlotCounter++; String endSlotId = TS_PREFIX + timeSlotCounter++; addAnnotRecordAndTimeSlots(bgBeginTime, bgEndTime, beginSlotId, endSlotId, lastBackgroundRecord.type, BACKGROUND_TIER_NAME); } } } private void processComments() { tierNameSet.add(COMMENT_TIER_NAME); // iterate over commentRecords for (int i = 0; i < commentRecords.size(); i++) { CommentRecord commentR = (CommentRecord) commentRecords.get(i); long commBegin = new Double((new Double(commentR.begin)).doubleValue() * 1000).longValue(); long commEnd = new Double((new Double(commentR.end)).doubleValue() * 1000).longValue(); String beginSlotId = TS_PREFIX + timeSlotCounter++; String endSlotId = TS_PREFIX + timeSlotCounter++; addAnnotRecordAndTimeSlots(commBegin, commEnd, beginSlotId, endSlotId, commentR.desc, COMMENT_TIER_NAME); } } private String addAnnotRecordAndTimeSlots(long begin, long end, String beginSlotId, String endSlotId, String value, String tierName) { timeSlots.put(beginSlotId, new Long(begin)); timeSlots.put(endSlotId, new Long(end)); String annId = ANN_PREFIX + annotationCounter++; AnnotationRecord annRec = new AnnotationRecord(); annRec.setAnnotationId(annId); annRec.setAnnotationType(AnnotationRecord.ALIGNABLE); annRec.setBeginTimeSlotId(beginSlotId); annRec.setEndTimeSlotId(endSlotId); annRec.setValue(value); annotRecordToTierMap.put(annRec, tierName); return annId; } private void addAnnotRecord(String parentId, String value, String tierName) { AnnotationRecord annRec = new AnnotationRecord(); annRec.setAnnotationId(ANN_PREFIX + annotationCounter++); annRec.setAnnotationType(AnnotationRecord.REFERENCE); annRec.setReferredAnnotId(parentId); annRec.setValue(value); annotRecordToTierMap.put(annRec, tierName); } private void println(String s) { if (verbose) { System.out.println(s); } } private void printErrorLocationInfo(String message) { System.out.println(message); System.out.println("Exception for " + currentFileName); } /* * This method should be in a Utility class or a URL class * Convert a path to a file URL string. Takes care of Samba related problems * file:///path works for all files except for samba file systems, there we need file://machine/path, * i.e. 2 slashes insteda of 3 * * What's with relative paths? */ private String pathToURLString(String path) { // replace all back slashes by forward slashes path = path.replace('\\', '/'); // remove leading slashes and count them int n = 0; while (path.charAt(0) == '/') { path = path.substring(1); n++; } // add the file:// or file:/// prefix if (n == 2) { return "file://" + path; } else { return "file:///" + path; } } /** * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#setDecoderInfo(mpi.eudico.server.corpora.clom.DecoderInfo) */ public void setDecoderInfo(DecoderInfo decoderInfo) { if (decoderInfo instanceof TranscriberDecoderInfo) { this.decoderInfo = (TranscriberDecoderInfo) decoderInfo; singleSpeechTier = this.decoderInfo.isSingleSpeakerTier(); } } /** * DOCUMENT ME! */ class Transcriber14Handler extends DefaultHandler { /** * ContentHandler method * * @throws SAXException DOCUMENT ME! */ public void startDocument() throws SAXException { parseError = false; } /** * ContentHandler method * * @throws SAXException DOCUMENT ME! */ public void endDocument() throws SAXException { } /** * ContentHandler method * * @param nameSpaceURI DOCUMENT ME! * @param name DOCUMENT ME! * @param rawName DOCUMENT ME! * @param attributes DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void startElement(String nameSpaceURI, String name, String qName, Attributes attributes) throws SAXException { // System.out.println("startElement called for name:" + name); if (qName.equals("Trans")) { audioFileName = attributes.getValue("audio_filename"); scribe = attributes.getValue("scribe"); language = attributes.getValue("xml:lang"); date = attributes.getValue("version_date"); } else if (qName.equals("Speaker")) { speakersHash.put(attributes.getValue("id"), attributes.getValue("name")); if (!singleSpeechTier) { currentSpeakerContents.put(attributes.getValue("id"), ""); } } else if (qName.equals("Section")) { currentSectionRecord = new SectionRecord(attributes.getValue( "type"), attributes.getValue("startTime"), attributes.getValue("endTime"), attributes.getValue("topic")); sectionArrayList.add(currentSectionRecord); } else if (qName.equals("Turn")) { currentTurnRecord = new TurnRecord(attributes.getValue( "startTime"), attributes.getValue("endTime"), attributes.getValue("speaker"), attributes.getValue("mode"), attributes.getValue("fidelity"), attributes.getValue("channel")); if (currentSectionRecord != null) { currentSectionRecord.turnRecords.add(currentTurnRecord); } speakersForCurrentTurn = attributes.getValue("speaker"); if (!singleSpeechTier) { if (speakersForCurrentTurn != null) { StringTokenizer tokenizer = new StringTokenizer(speakersForCurrentTurn); if (tokenizer.hasMoreTokens()) { currentSpeakerId = tokenizer.nextToken(); } else { currentSpeakerId = ""; } } else { currentSpeakerId = SPEAKER_UNSPECIFIED; speakersHash.put(SPEAKER_UNSPECIFIED, SPEAKER_UNSPECIFIED); currentSpeakerContents.put(SPEAKER_UNSPECIFIED, ""); } } } else if (qName.equals("Sync")) { String time = attributes.getValue("time"); storeUtterances(time); storeComments(time); // store lastSyncTime = time; } else if (qName.equals("Event")) { String desc = attributes.getValue("desc"); String extent = attributes.getValue("extent"); String eventString = TranscriberEvent.getEventString(desc, extent); if ((currentSpeakerContents != null) && (currentSpeakerId != null)) { String content = (String) currentSpeakerContents.get(currentSpeakerId); if (content != null) { content += eventString; } currentSpeakerContents.put(currentSpeakerId, content); } } else if (qName.equals("Who")) { String nb = attributes.getValue("nb"); if (singleSpeechTier) { if ((currentSpeakerContents != null) && (currentSpeakerId != null)) { String content = (String) currentSpeakerContents.get(currentSpeakerId); String whoStr = "(" + SP_PREFIX + ":" + nb + ")"; if (content != null) { if (content.length() == 0) { content = whoStr; } else { content += (" " + whoStr); } } else { content = whoStr; } currentSpeakerContents.put(currentSpeakerId, content); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -