📄 transcriber14parser.java
字号:
} /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public ArrayList getTimeOrder(String fileName) { parse(fileName); ArrayList timeOrder = new ArrayList(); ArrayList tempOrder = new ArrayList(); ArrayList pair = null; Iterator it = timeSlots.keySet().iterator(); while (it.hasNext()) { String id = (String) it.next(); Long time = (Long) timeSlots.get(id); pair = new ArrayList(); pair.add(id); pair.add(time); tempOrder.add(pair); } Collections.sort(tempOrder, new PairComparator()); for (int i = 0; i < tempOrder.size(); i++) { timeOrder.add(((ArrayList) tempOrder.get(i)).get(0)); } return timeOrder; } /** * DOCUMENT ME! * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public HashMap getTimeSlots(String fileName) { parse(fileName); HashMap resultSlots = new HashMap(); Iterator it = timeSlots.keySet().iterator(); while (it.hasNext()) { String id = (String) it.next(); String timeString = ((Long) timeSlots.get(id)).toString(); resultSlots.put(id, timeString); } return resultSlots; } /** * Returns a HashMap of ArrayLists with the cv id's as keys.<br> * Each ArrayList can contain one String, the description and an * unknown number of CVEntryRecords. * * @param fileName the eaf filename * * @return a HashMap of ArrayLists with the cv id's as keys */ public HashMap getControlledVocabularies(String fileName) { parse(fileName); if (controlledVocabularies == null) { // fixed cv's, only has to be done once ArrayList cvArrayList = new ArrayList(); CVEntryRecord cvEntry = null; controlledVocabularies = new HashMap(); // Section - type cv cvArrayList.add( "All values that are allowed for Transcriber Section.type attribute"); // description cvEntry = new CVEntryRecord(); cvEntry.setValue("report"); cvArrayList.add(cvEntry); cvEntry = new CVEntryRecord(); cvEntry.setValue("nontrans"); cvArrayList.add(cvEntry); cvEntry = new CVEntryRecord(); cvEntry.setValue("filler"); cvArrayList.add(cvEntry); controlledVocabularies.put(SECTION_TYPE_CV, cvArrayList); // Turn - mode cv cvArrayList = new ArrayList(); cvArrayList.add( "All values that are allowed for Transcriber Turn.mode attribute"); // description cvEntry = new CVEntryRecord(); cvEntry.setValue("spontaneous"); cvArrayList.add(cvEntry); cvEntry = new CVEntryRecord(); cvEntry.setValue("planned"); cvArrayList.add(cvEntry); controlledVocabularies.put(TURN_MODE_CV, cvArrayList); // Turn - fidelity cv cvArrayList = new ArrayList(); cvArrayList.add( "All values that are allowed for Transcriber Turn.fidelity attribute"); // description cvEntry = new CVEntryRecord(); cvEntry.setValue("high"); cvArrayList.add(cvEntry); cvEntry = new CVEntryRecord(); cvEntry.setValue("medium"); cvArrayList.add(cvEntry); cvEntry = new CVEntryRecord(); cvEntry.setValue("low"); cvArrayList.add(cvEntry); controlledVocabularies.put(TURN_FIDELITY_CV, cvArrayList); // Turn - channel cv cvArrayList = new ArrayList(); cvArrayList.add( "All values that are allowed for Transcriber Turn.channel attribute"); // description cvEntry = new CVEntryRecord(); cvEntry.setValue("telephone"); cvArrayList.add(cvEntry); cvEntry = new CVEntryRecord(); cvEntry.setValue("studio"); cvArrayList.add(cvEntry); controlledVocabularies.put(TURN_CHANNEL_CV, cvArrayList); } return controlledVocabularies; } /** * Returns the names of the Tiers that are present in the Transcription * file * * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public ArrayList getTierNames(String fileName) { parse(fileName); HashSet tierNames = new HashSet(); Iterator tierIter = annotRecordToTierMap.values().iterator(); while (tierIter.hasNext()) { tierNames.add(tierIter.next()); } return new ArrayList(tierNames); } /** * DOCUMENT ME! * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getParticipantOf(String tierName, String fileName) { parse(fileName); String part = ""; if (!tierName.equals(SECTION_TIER_NAME) && !tierName.equals(TURN_TIER_NAME)) { part = tierName; } return part; } /** * DOCUMENT ME! * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getLinguisticTypeIDOf(String tierName, String fileName) { parse(fileName); String lType = UTTERANCE_TYPE; // name of type if (tierName.equals(SECTION_TIER_NAME)) { lType = SECTION_TYPE; } else if (tierName.equals(TURN_TIER_NAME)) { lType = TURN_TYPE; } else if (tierName.equals(MODE_TIER_NAME)) { lType = MODE_TYPE; } else if (tierName.equals(FIDELITY_TIER_NAME)) { lType = FIDELITY_TYPE; } else if (tierName.equals(CHANNEL_TIER_NAME)) { lType = CHANNEL_TYPE; } else if (tierName.equals(BACKGROUND_TIER_NAME)) { lType = BACKGROUND_TYPE; } else if (tierName.equals(COMMENT_TIER_NAME)) { lType = COMMENT_TYPE; } return lType; } /** * DOCUMENT ME! * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return DOCUMENT ME! */ public String getParentNameOf(String tierName, String fileName) { parse(fileName); if (tierName.equals(TURN_TIER_NAME)) { return SECTION_TIER_NAME; } if (tierName.equals(MODE_TIER_NAME)) { return TURN_TIER_NAME; } if (tierName.equals(FIDELITY_TIER_NAME)) { return TURN_TIER_NAME; } if (tierName.equals(CHANNEL_TIER_NAME)) { return TURN_TIER_NAME; } else { return ""; } } /** * Returns a ArrayList with the Annotations for this Tier. Each * AnnotationRecord contains begin time, end time and text values * * @param tierName DOCUMENT ME! * @param fileName DOCUMENT ME! * * @return ArrayList of ArrayList of String */ public ArrayList getAnnotationsOf(String tierName, String fileName) { parse(fileName); ArrayList resultAnnotRecords = new ArrayList(); Iterator it = annotRecordToTierMap.keySet().iterator(); while (it.hasNext()) { AnnotationRecord annRec = (AnnotationRecord) it.next(); if (annotRecordToTierMap.get(annRec).equals(tierName)) { resultAnnotRecords.add(annRec); } } return resultAnnotRecords; } /** * Parses a Transcriber v1.4 xml file. * * @param fileName the Transcriber 1.4 xml file that must be parsed. */ private void parse(String fileName) { long start = System.currentTimeMillis(); // System.out.println("Parse : " + fileName); // System.out.println("Free memory : " + Runtime.getRuntime().freeMemory()); // only parse the same file once if (lastParsed.equals(fileName)) { return; } // (re)set everything to null for each parse speakersHash.clear(); sectionArrayList.clear(); currentSpeakerContents.clear(); currentSpeakerContents.put(SINGLE_TIER_NAME, ""); utteranceRecords.clear(); timeSlots.clear(); annotRecordToTierMap.clear(); commentRecords.clear(); lastBackgroundRecord = null; speakersForCurrentTurn = ""; annotationCounter = 0; timeSlotCounter = 0; currentSectionRecord = null; currentSpeakerId = null; currentTurnRecord = null; lastSectionRecord = null; lastSyncTime = null; lastTurnRecord = null; currentComments = ""; tierNameSet.clear(); topicHash.clear(); if (singleSpeechTier) { currentSpeakerId = SINGLE_TIER_NAME; speakersHash.put(SINGLE_TIER_NAME, SINGLE_TIER_NAME); } // parse the file lastParsed = fileName; currentFileName = fileName; try { saxParser.parse(fileName, transcriberHandler); } catch (SAXException e) { System.out.println("Parsing error: " + e.getMessage()); // the SAX parser can have difficulties with certain characters in // the filepath: try to create an InputSource for the parser File f = new File(fileName); if (f.exists()) { try { FileInputStream fis = new FileInputStream(f); InputSource source = new InputSource(fis); saxParser.parse(source, transcriberHandler); // just catch any exception } catch (Exception ee) { System.out.println("Parsing retry error: " + ee.getMessage()); } } } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { printErrorLocationInfo("Fatal(?) Error! " + e.getMessage()); } // go through Sections, Turns, Utterances and Backgrounds and compose // TimeOrder, time slots and annotation records. processSectionsAndTurns(); processUtterances(); processBackgrounds(); processComments(); long duration = System.currentTimeMillis() - start; // System.out.println("Parsing took " + duration + " milli seconds"); } private void processSectionsAndTurns() {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -