📄 shoeboxparser.java
字号:
if (wc < (wb.size() - 2)) { e = ((Integer) wb.get(wc + 2)).intValue(); } } if (val.length() < e) { e = val.length(); } if (val.length() < b) { b = val.length(); } if ((b > 0) && (val.charAt(b - 1) != ' ')) { // ignore, if taken with previous word b = e; } } if (b > e) { //System.out.println("Val: " + val); LOG.warning("begin > end: " + b + " - " + e + " value: " + val); e = val.length(); } if (b >= val.length()) { //System.out.println("Error b >= l: " + b + " " + e + " l: " + val.length()); //System.out.println("Val: " + val); LOG.warning("begin >= length: " + b + " - " + val.length() + " value: " + val); return result; } if (e >= val.length()) { //System.out.println("Error e >= l: " + b + " " + e + " l: " + val.length()); //System.out.println("Val: " + val); if (e > val.length()) { LOG.warning("end > length: " + e + " - " + val.length() + " value: " + val); } result = val.substring(b); } else { result = val.substring(b, e); } if (trim) { result = result.trim(); } return result; } /** * DOCUMENT ME! * * @param tname shoebox tier (with or without leading \) * * @return names of direct childs of given tier * * @throws Exception up */ private final Vector childs(String tname) throws Exception { if (!tname.startsWith("\\")) { tname = "\\" + tname; } Vector result = new Vector(); //MK:02/06/19 I have to find the tree element by linear search?! DefaultMutableTreeNode found = null; Enumeration all = tiertree.postorderEnumeration(); while ((found == null) && all.hasMoreElements()) { DefaultMutableTreeNode node = (DefaultMutableTreeNode) all.nextElement(); String name = (String) node.getUserObject(); //logger.log(Level.FINE, " ," + name); if ((name != null) && name.equals(tname)) { found = node; } } if (found == null) { return result; } Enumeration kids = found.children(); while (kids.hasMoreElements()) { DefaultMutableTreeNode kid = (DefaultMutableTreeNode) kids.nextElement(); String name = (String) kid.getUserObject(); if (name == null) { continue; } if (name.startsWith("\\EUDICO")) { continue; } if (name.startsWith("\\ELAN")) { continue; } if (name.startsWith(ShoeboxArray.label_eudicoparticipant)) { continue; } result.add(name); } return result; } /** * MK:02/06/10<br> For a single row, create the RefAnno for this dad. If * there are no depended Annos, ignore the word-counter, otherwise, use * the wordcounter and create all children Annos. * * @param dad the annotation above me * @param brothers tiers on the same level of myself * @param row Sbxfile row * @param wordboundaries list of row-character count from file * @param wordcount actual row-character count from file * * @return true if there are more words in the given wordboundaries * * @throws Exception DOCUMENT ME! */ //private static int ccib = 0; private boolean createChildsInBlock(AnnotationRecord dad, Enumeration brothers, int row, ArrayList wordboundaries, int wordcount) throws Exception { String dadtierName = (String) annotRecordToTierMap.get(dad); boolean result = true; if (!brothers.hasMoreElements()) { // logger.log(Level.INFO, "== ending recursion"); return false; } String name = (String) brothers.nextElement(); String spk = sbxfile.getSpeaker(row); // logger.log(Level.INFO, // "== createChildsInBlock(" + dad.getValue() + ", '" + name + "', " + // row + ", " + wordboundaries + ", " + wordcount); ArrayList mywordboundaries = null; String val = sbxfile.getCell(name, row); /* if (typfile.isUnicodeTier(name)) { byte[] bytes = val.getBytes("UTF-8"); //System.out.println("ol: " + val.length()); val = new String(bytes, "UTF-8"); //System.out.println("by: " + bytes.length + " nl: " + val.length()); } */ if ((val == null) || (val.length() == 0)) { // skip this value, but there might be more brothers return createChildsInBlock(dad, brothers, row, wordboundaries, wordcount); } if ((simpleConverter != null) && typfile.isIPAtier(name)) { val = simpleConverter.toUnicode(val); } Vector vdcs = childs(name); Enumeration dcs = vdcs.elements(); boolean iHaveKids = dcs.hasMoreElements(); if ((wordboundaries == null) && !iHaveKids) { // append only once if (wordcount > 0) { return false; } // logger.log(Level.INFO, "== ("+val.substring(0, 8)+") is hanging childless under \\ref "); // logger.log(Level.INFO, // "== (" + val + ") is hanging childless under \\ref "); } else { // tier is not under ref, kids or not // use the wordcounter if (wordboundaries == null) { // ref is my dad and I have kids, I set the wordboundaries and the wordcounter. mywordboundaries = wbound(val, typfile.isUnicodeTier(name)); result = wordcount < (mywordboundaries.size() - 2); } else { // my dad set some wordboundaries // I have to get the right word mywordboundaries = wordboundaries; // If I have (inner )wordboundaries myself, // I have to ignore dads bounds and set a new ones. String xval = snap(val, mywordboundaries, wordcount, true, typfile.isUnicodeTier(name)); int index = ((Integer) mywordboundaries.get(wordcount)).intValue(); // pad xval with spaces until endIndex (hb, 3 sept 04) int endIndex = ((Integer) mywordboundaries.get(wordcount + 1)).intValue(); int xvalLength = xval.length(); for (int i = 0; i < (endIndex - index - xvalLength - 1); i++) { xval += " "; } ArrayList xmywordboundaries = wbound(xval, typfile.isUnicodeTier(name), index); // logger.log(Level.INFO, // " (" + xval + ") " + index + "/" + // xmywordboundaries); if (xmywordboundaries.size() > 2) { ///////////////////////////////// // recursion over trees within words (bern -e) int ww_wordcount = 0; boolean ww_hasMoreWords = true; while (ww_hasMoreWords) { String ww_val = snap(val, xmywordboundaries, ww_wordcount, true, typfile.isUnicodeTier(name)); // logger.log(Level.INFO, // " ........... '" + ww_val + "', of " + // ww_wordcount); // logger.log(Level.INFO, // " brothers.... '" + vdcs); AnnotationRecord annRec = new AnnotationRecord(); annRec.setAnnotationId(ANN_ID_PREFIX + annotId++); annRec.setValue(ww_val); if ((typfile.procedureTypeHash.get(name) != null) && (typfile.procedureTypeHash.get(name).equals("TimeSubdivision") || typfile.procedureTypeHash.get(name).equals("IncludedIn"))) { // alignable annot annRec.setAnnotationType(AnnotationRecord.ALIGNABLE); annRec.setReferredAnnotId(dad.getAnnotationId()); // used to create and connect timeslots // NOTE: order of these 3 statements important annotRecordToTierMap.put(annRec, name + "@" + spk); createAndConnectTimeSlots(annRec); annotationRecords.add(annRec); } else { // ref annotation annRec.setAnnotationType(AnnotationRecord.REFERENCE); annRec.setReferredAnnotId(dad.getAnnotationId()); // NOTE: order of these 3 statements important annotRecordToTierMap.put(annRec, name + "@" + spk); fillInPrevAnnotRef(annRec); annotationRecords.add(annRec); } if (!participantOrder.contains(spk)) { participantOrder.add(spk); } tierNameSet.add(name + "@" + spk); // logger.log(Level.INFO, "ww "); createChildsInBlock(annRec, vdcs.elements(), row, xmywordboundaries, ww_wordcount); ww_hasMoreWords = ww_wordcount < (xmywordboundaries.size() - 2); ww_wordcount += 1; } return createChildsInBlock(dad, brothers, row, wordboundaries, wordcount); ////////////////////////////////////////////// } } // snap sentence to word val = snap(val, mywordboundaries, wordcount, true, typfile.isUnicodeTier(name)); } AnnotationRecord aRec = new AnnotationRecord(); aRec.setAnnotationId(ANN_ID_PREFIX + annotId++); aRec.setValue(val); if ((typfile.procedureTypeHash.get(name) != null) && (typfile.procedureTypeHash.get(name).equals("TimeSubdivision") || typfile.procedureTypeHash.get(name).equals("IncludedIn"))) { // alignable annot aRec.setAnnotationType(AnnotationRecord.ALIGNABLE); aRec.setReferredAnnotId(dad.getAnnotationId()); // used to create and connect timeslots // NOTE: order of these 3 statements important annotRecordToTierMap.put(aRec, name + "@" + spk); createAndConnectTimeSlots(aRec); annotationRecords.add(aRec); } else { // ref annot aRec.setAnnotationType(AnnotationRecord.REFERENCE); aRec.setReferredAnnotId(dad.getAnnotationId()); // NOTE: order of next 3 statements important annotRecordToTierMap.put(aRec, name + "@" + spk); fillInPrevAnnotRef(aRec); annotationRecords.add(aRec); } if (!participantOrder.contains(spk)) { participantOrder.add(spk); } tierNameSet.add(name + "@" + spk); // System.out.println("added annot: " + me.getValue()); if (iHaveKids) { createChildsInBlock(aRec, dcs, row, mywordboundaries, wordcount); } createChildsInBlock(dad, brothers, row, wordboundaries, wordcount); return result; } private void fillInPrevAnnotRef(AnnotationRecord annRec) { long highestIndex = -1; String onTier = (String) annotRecordToTierMap.get(annRec); Iterator annIter = annotationRecords.iterator(); while (annIter.hasNext()) { AnnotationRecord aRec = (AnnotationRecord) annIter.next(); if ((((String) annotRecordToTierMap.get(aRec)).equals(onTier)) && // on same tier (aRec.getReferredAnnotId().equals(annRec.getReferredAnnotId()))) { // with same parent annot // remember the annot id with the highest index part String idString = aRec.getAnnotationId(); long annotIndex = Long.valueOf(idString.substring( ANN_ID_PREFIX.length())).longValue(); // get rid of prefix part if (annotIndex > highestIndex) { highestIndex = annotIndex; } } } if (highestIndex >= 0) { annRec.setPreviousAnnotId(ANN_ID_PREFIX + highestIndex); } } private void createAndConnectTimeSlots(AnnotationRecord annRec) { long highestIndex = -1; AnnotationRecord highestAnnot = null; AnnotationRecord parentRec = null; AnnotationRecord aRec = null; String onTier = (String) annotRecordToTierMap.get(annRec); Iterator annIter = annotationRecords.iterator(); while (annIter.hasNext()) { aRec = (AnnotationRecord) annIter.next(); if ((parentRec == null) && annRec.getReferredAnnotId().equals(aRec.getAnnotationId())) { // store parent annotation record for later use parentRec = aRec; } if ((((String) annotRecordToTierMap.get(aRec)).equals(onTier)) && // on same tier (aRec.getReferredAnnotId().equals(annRec.getReferredAnnotId()))) { // with same parent annot
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -