📄 chatparser.java
字号:
String timeValue = Long.toString(((long) timeSlot[1])); resultSlots.put(tsId, timeValue); } return resultSlots; } /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getTierNames(java.lang.String) */ public ArrayList getTierNames(String fileName) { // tierNames in ELAN are either the main tier '*PAR' labels, or // the combination of tier label plus participant, like '%mor@PAR' parse(fileName); return tierNames; } /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getParticipantOf(java.lang.String, java.lang.String) */ public String getParticipantOf(String tierName, String fileName) { String participant = ""; if (tierName.startsWith("*")) { participant = tierName.substring(1); // main tier label without * } else { int i = tierName.indexOf(TIER_NAME_SEPARATOR); // part of tier name after @ if ((i > 0) && (tierName.length() > (i + 2))) { participant = tierName.substring(i + 1); } } return participant; } /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getLinguisticTypeOf(java.lang.String, java.lang.String) */ public String getLinguisticTypeIDOf(String tierName, String fileName) { String lingTypeId = ""; if (tierName.startsWith("*")) { lingTypeId = MAIN_TYPE; // main tier label without * } else { int i = tierName.indexOf(TIER_NAME_SEPARATOR); // part of tier name after @ if (i > 0) { lingTypeId = tierName.substring(0, i); } } return lingTypeId; } /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getParentNameOf(java.lang.String, java.lang.String) */ public String getParentNameOf(String tierName, String fileName) { parse(fileName); return (String) parentHash.get(tierName); } /* (non-Javadoc) * @see mpi.eudico.server.corpora.clomimpl.abstr.Parser#getAnnotationsOf(java.lang.String, java.lang.String) */ public ArrayList getAnnotationsOf(String tierName, String fileName) { parse(fileName); ArrayList resultAnnotRecords = new ArrayList(); Iterator it = annotRecordToTierMap.keySet().iterator(); while (it.hasNext()) { AnnotationRecord annRec = (AnnotationRecord) it.next(); if (annotRecordToTierMap.get(annRec).equals(tierName)) { resultAnnotRecords.add(annRec); } } return resultAnnotRecords; } private void parse(String fileName) { if (lastParsed.equals(fileName)) { return; } // (re)set everything to null for each parse participantLine = null; mediaFileName = null; chatBlocks.clear(); lingTypeRecords.clear(); tierNames.clear(); parentHash.clear(); timeOrder.clear(); timeSlots.clear(); annotationRecords.clear(); annotRecordToTierMap.clear(); br = null; // parse the file lastParsed = fileName; // do actual parsing try { br = new BufferedReader(new FileReader(fileName)); } catch (Exception fnf) { fnf.printStackTrace(); } String line = null; try { if ((line = br.readLine()) != null) { if (line.startsWith("@UTF8")) { // CHAT UTF-8 br.close(); br = new BufferedReader(new InputStreamReader( new FileInputStream(fileName), "UTF-8")); } } } catch (IOException iox) { iox.printStackTrace(); } parseLines(); processBlocks(); try { br.close(); } catch (IOException io) { io.printStackTrace(); } } private void parseLines() { String line = null; String outputLine = ""; boolean recordingParticipant = false; ArrayList chatBlock = null; try { while ((line = br.readLine()) != null) { // Participant lines from header if (line.startsWith("@Participants:")) { recordingParticipant = true; participantLine = line; } else if (recordingParticipant == true) { if (!(line.startsWith("@") || line.startsWith("*") || line.startsWith("%"))) { // continuation of participants line participantLine += line; } else { // new header line or block line, end recording recordingParticipant = false; } } // CHAT "blocks" if (line.startsWith("*")) { // new block // finish last line of previous block if (!outputLine.equals("") && (chatBlock != null)) { addLineToBlock(outputLine, chatBlock); } // output block if (chatBlock != null) { chatBlocks.add(chatBlock); } // start new recording chatBlock = new ArrayList(); // add line to new recording outputLine = line; } else if (line.startsWith("%") || ((line.length() > 1) && line.substring(1).startsWith("%"))) { // other lines // finish last line if (!outputLine.equals("") && (chatBlock != null)) { addLineToBlock(outputLine, chatBlock); } outputLine = line; if ((mediaFileName == null) && (startsWithMediaLabel(line) || ((line.length() > 1) && startsWithMediaLabel(line.substring(1))))) { // bullet in chat-utf8 // parse this line, second token is media file name. StringTokenizer st = new StringTokenizer(line); if (st.hasMoreTokens()) { // 'eat' %snd label st.nextToken(); } if (st.hasMoreTokens()) { mediaFileName = st.nextToken(); } // strip off possible double quotes if (mediaFileName.startsWith("\"")) { mediaFileName = mediaFileName.substring(1); } if (mediaFileName.endsWith("\"")) { mediaFileName = mediaFileName.substring(0, mediaFileName.length() - 1); } } } else if (!line.startsWith("@")) { // no label, continuation of previous line outputLine += line; } } // finish last line if (!outputLine.equals("") && (chatBlock != null)) { addLineToBlock(outputLine, chatBlock); } // output last block if (chatBlock != null) { chatBlocks.add(chatBlock); } } catch (FileNotFoundException fex) { fex.printStackTrace(); } catch (IOException iex) { iex.printStackTrace(); } } /** * Helper method to avoid copy and paste * * @param file DOCUMENT ME! * * @return DOCUMENT ME! * * @throws IOException DOCUMENT ME! */ // private final BufferedReader file2br(File file) throws IOException { /* A file is opened from the operating system. This stream of bytes could be a UTF-8 encoded unicode stream. If a file interpreted as UTF-8 contains isolatin-1, the file cannot be read. An Exception is thrown. Therefore, special care has to be taken when reading in UTF-8. As a first measure, the filename is used to decide if to read as UTF-8. This has to be changend in a future version. This is just done in order to include Unicode characters into Eudico. */ /* Reader filereader; if (-1 != file.getName().lastIndexOf(".utf8.")) { // this means 'contains' filereader = new InputStreamReader(new FileInputStream(file), "UTF-8"); } else { // use the locale encoding. filereader = new FileReader(file); } BufferedReader br = new BufferedReader(filereader); return br; } */ private void addLineToBlock(String theLine, ArrayList theBlock) { String label = null; String value = null; label = getLabelPart(theLine); value = getValuePart(theLine); if ((label != null) && (value != null)) { String[] line = { label, value }; theBlock.add(line); } else if ((label != null) && (value == null)) { // maybe a valid tierlabel with empty annotation content String[] line = { label, "" }; theBlock.add(line); } } private String getLabelPart(String theLine) { String label = null; int index = theLine.indexOf(':'); if (index > 0) { label = theLine.substring(0, index); } return label; } private String getValuePart(String theLine) { String value = null; int index = theLine.indexOf(':'); if (index < (theLine.length() - 2)) { value = theLine.substring(index + 1).trim(); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -