📄 shoeboxparser.java

📁 编辑视频文件
💻 JAVA
📖 第 1 页 / 共 5 页
字号:
        // MK's code assumes that interlinear tiers are handled before additional 'symbolic        // association' tiers. Therefore, reorder by putting the first interlinear line under the         // interlinearRootMarker first in vdcs        // iterate over 'to' names. If 'from' value is interlinearRootMarker and 'to' is element         // of 'from' (has children), then make it topTierName        /*        Enumeration toEnum = typfile.tofromHash.keys();                while (toEnum.hasMoreElements()) {                    String toName = (String) toEnum.nextElement();                    if (    (((String) typfile.tofromHash.get(toName)).equals(rootMarker)) &&                            (typfile.tofromHash.containsValue(toName))) {                        topTierName = toName;                        break;                    }                } */        topTierName = rootMarker;        Vector vdcs = childs(rootMarker);        if (topTierName != "") {            int maxDepth = 0;            Vector reorderedChildren = new Vector();            Iterator vdcsIter = vdcs.iterator();            while (vdcsIter.hasNext()) {                String n = (String) vdcsIter.next();                // hb, 19-4-05                DefaultMutableTreeNode childNode = sbxfile.getLabelNode(n);                if (childNode.getDepth() > maxDepth) {                    reorderedChildren.add(0, n);                    maxDepth = childNode.getDepth();                } else {                    reorderedChildren.add(n);                }                /*    if (n.equals(topTierName)) {                        reorderedChildren.add(0, n);                    } else {                        reorderedChildren.add(n);                    } */            }            vdcs = reorderedChildren;        }        // Annotation has to be split up by words        int wordcounter = 0;        boolean hasMoreWords = true;        while (hasMoreWords) {            //			  logger.log(Level.INFO, "-- create Block for word " + wordcounter);            Enumeration dcs = vdcs.elements();            //           logger.log(Level.INFO, "== vdcs=" + vdcs);            //hasMoreWords = createChildsInBlock(annRec, dcs, row, null, wordcounter);             // HS may 06 new implementation after change in ShoeboxArray: Unicode tiers are converted there            hasMoreWords = createChildrenInBlock(annRec, dcs, row, null,                    wordcounter);            wordcounter += 1;        }    }    /**     * Recursively find word boundaries and create annotations.     *     * @param par the parent annotation     * @param brothers enumeration of sibling     * @param row the current row or block or record index     * @param wordboundaries the wordboundaries of the parent     * @param wordcount the (current) index in the list of boundaries     * @return true as long as there are more siblings to process     * @throws Exception ??     */    private boolean createChildrenInBlock(AnnotationRecord par,        Enumeration brothers, int row, ArrayList wordboundaries, int wordcount)        throws Exception {        boolean result = true;        if (!brothers.hasMoreElements()) {            //           logger.log(Level.INFO, "== ending recursion");            return false;        }        String name = (String) brothers.nextElement();        String spk = sbxfile.getSpeaker(row);        //		  logger.log(Level.INFO,        //			  "==  createChildsInBlock(" + par.getValue() + ", '" + name + "', " +        //			  row + ", " + wordboundaries + ", " + wordcount);        ArrayList mywordboundaries = null;        String val = sbxfile.getCell(name, row);        if ((val == null) || (val.length() == 0)) {            // skip this value, but there might be more brothers            return createChildrenInBlock(par, brothers, row, wordboundaries,                wordcount);        }        if ((simpleConverter != null) && typfile.isIPAtier(name)) {            val = simpleConverter.toUnicode(val);        }        Vector vdcs = childs(name);        Enumeration dcs = vdcs.elements();        boolean iHaveKids = dcs.hasMoreElements();        // HS March 2007: if the marker has no children but is the first element in a Parse procedure         // (subdivision) treat it as if it had children?        if (!iHaveKids && typfile.getInterlinearTierMarkers().contains(name)) {            iHaveKids = true;        }        if ((wordboundaries == null) && !iHaveKids) {            // append only once            if (wordcount > 0) {                return false;            }            //	logger.log(Level.INFO, "== ("+val.substring(0, 8)+") is hanging childless under \\ref ");            //			  logger.log(Level.INFO,            //               "== (" + val + ") is hanging childless under \\ref ");        } else {            // tier is not under ref, kids or not            // use the wordcounter            if (wordboundaries == null) {                // ref is my parent and I have kids, I set the wordboundaries and the wordcounter.                mywordboundaries = wordbounds(val);                result = wordcount < (mywordboundaries.size() - 2);            } else {                // my parent set some wordboundaries                // I have to get the right word                mywordboundaries = wordboundaries;                // If I have (inner )wordboundaries myself,                // I have to ignore parents bounds and set a new ones.                String xval = snapWord(val, mywordboundaries, wordcount, true);                int index = ((Integer) mywordboundaries.get(wordcount)).intValue();                // pad xval with spaces until endIndex (hb, 3 sept 04)                int endIndex = ((Integer) mywordboundaries.get(wordcount + 1)).intValue();                int xvalLength = xval.length();                for (int i = 0; i < (endIndex - index - xvalLength - 1); i++) {                    xval += " ";                }                ArrayList xmywordboundaries = wordbounds(xval, index);                //				  logger.log(Level.INFO,                //					  "             (" + xval + ") " + index + "/" +                //                   xmywordboundaries);                if (xmywordboundaries.size() > 2) {                    /////////////////////////////////                    //  recursion over trees within words (bern -e)                    int ww_wordcount = 0;                    boolean ww_hasMoreWords = true;                    while (ww_hasMoreWords) {                        String ww_val = snapWord(val, xmywordboundaries,                                ww_wordcount, true);                        //                       logger.log(Level.INFO,                        //                           " ...........            '" + ww_val + "', of " +                        //                           ww_wordcount);                        //                       logger.log(Level.INFO,                        //                           "  brothers....          '" + vdcs);                        AnnotationRecord annRec = new AnnotationRecord();                        annRec.setAnnotationId(ANN_ID_PREFIX + annotId++);                        annRec.setValue(ww_val);                        if ((typfile.procedureTypeHash.get(name) != null) &&                                (typfile.procedureTypeHash.get(name).equals("TimeSubdivision") ||                                typfile.procedureTypeHash.get(name).equals("IncludedIn"))) { // alignable annot                            annRec.setAnnotationType(AnnotationRecord.ALIGNABLE);                            annRec.setReferredAnnotId(par.getAnnotationId()); // used to create and connect timeslots                            // NOTE: order of these 3 statements important                            annotRecordToTierMap.put(annRec, name + "@" + spk);                            createAndConnectTimeSlots(annRec);                            annotationRecords.add(annRec);                        } else { // ref annotation                            annRec.setAnnotationType(AnnotationRecord.REFERENCE);                            annRec.setReferredAnnotId(par.getAnnotationId());                            // NOTE: order of these 3 statements important                            annotRecordToTierMap.put(annRec, name + "@" + spk);                            fillInPrevAnnotRef(annRec);                            annotationRecords.add(annRec);                        }                        if (!participantOrder.contains(spk)) {                            participantOrder.add(spk);                        }                        tierNameSet.add(name + "@" + spk);                        //                       logger.log(Level.INFO, "ww ");                        createChildrenInBlock(annRec, vdcs.elements(), row,                            xmywordboundaries, ww_wordcount);                        ww_hasMoreWords = ww_wordcount < (xmywordboundaries.size() -                            2);                        ww_wordcount += 1;                    }                    return createChildrenInBlock(par, brothers, row,                        wordboundaries, wordcount);                    //////////////////////////////////////////////                }            }            // snap sentence to word            val = snapWord(val, mywordboundaries, wordcount, true);        }        AnnotationRecord aRec = new AnnotationRecord();        aRec.setAnnotationId(ANN_ID_PREFIX + annotId++);        aRec.setValue(val);        if ((typfile.procedureTypeHash.get(name) != null) &&                (typfile.procedureTypeHash.get(name).equals("TimeSubdivision") ||                typfile.procedureTypeHash.get(name).equals("IncludedIn"))) { // alignable annot            aRec.setAnnotationType(AnnotationRecord.ALIGNABLE);            aRec.setReferredAnnotId(par.getAnnotationId()); // used to create and connect timeslots            // NOTE: order of these 3 statements important            annotRecordToTierMap.put(aRec, name + "@" + spk);            createAndConnectTimeSlots(aRec);            annotationRecords.add(aRec);        } else { // ref annot            aRec.setAnnotationType(AnnotationRecord.REFERENCE);            aRec.setReferredAnnotId(par.getAnnotationId());            // NOTE: order of next 3 statements important		            annotRecordToTierMap.put(aRec, name + "@" + spk);            fillInPrevAnnotRef(aRec);            annotationRecords.add(aRec);        }        if (!participantOrder.contains(spk)) {            participantOrder.add(spk);        }        tierNameSet.add(name + "@" + spk);        // System.out.println("added annot: " + me.getValue());        if (iHaveKids) {            createChildrenInBlock(aRec, dcs, row, mywordboundaries, wordcount);        }        createChildrenInBlock(par, brothers, row, wordboundaries, wordcount);        return result;    }    /**     * DOCUMENT ME!     *     * @param s DOCUMENT ME!     *     * @return DOCUMENT ME!     */    private final ArrayList wordbounds(String s) {        return wordbounds(s, 0);    }    /**     * Get the wordboundaries from given String. Wordboundaries are the     * positions of all white space. If white space is followed by white     * space, the last position is used.     *     * @param s given String     * @param offset to add to all wordboundaries     *     * @return ArrayList of wordboundaries     */    private final ArrayList wordbounds(String s, int offset) {        //System.out.println(""+val+ "   ---- entry");        ArrayList result = new ArrayList();        result.add(new Integer(offset));        ArrayList idx = indicesOf(s.trim(), ' ');        //ArrayList idx = indexesOf(s, utf8, ' ');        //System.out.println(""+v1+ "   ---- indexes of");        idx = lastIntInRow(idx);        idx = addToAllIntegers(idx, offset + 1);        //System.out.println(""+v1+ "   ---- last in row");        result.addAll(idx);        // hb, 2-9-04: added +1 because rest of code assumes space between        // word beginnings        // String.getBytes(charset).length can be different from String.length()        result.add(new Integer(s.length() + 1 + offset));        // ending on ws        result = lastIntInRow(result);        //System.out.println(""+wordboundaries+ "   ---- result");        return result;    }    /**     * Returna a list with all indices of a certain char.     * @param val the string     * @param lookingfor the character to find in the string     * @return a list of indices     */    private final ArrayList indicesOf(String val, char lookingfor) {        ArrayList result = new ArrayList();        try {            char[] chars = new char[val.length()];            val.getChars(0, val.length(), chars, 0);            for (int i = 0; i < chars.length; i++) {                if (chars[i] == lookingfor) {                    result.add(new Integer(i));                }            }        } catch (Exception e) {            e.printStackTrace();        }        return result;    }    /**     * Extract a word from the input string, based on word boundaries and index.     *     * May 2006: unproper alignment was fixed in previous version in non utf-8 markers.     * This distinction is no longer made (see ShoeboxArray):     * @param val the complete line     * @param wb the boundary indices     * @param wc the index into the boundary list     * @param trim whether or not to trim the result     * @return the extracted word     */    private final String snapWord(String val, ArrayList wb, int wc, boolean trim) {        //logger.log(Level.FINE, "-- snap (" + val+  ", " + wb+  ", " + wc);        String result = "";        int b = 0;        int e = 0;        if (wc < wb.size()) {            b = ((Integer) wb.get(wc)).intValue();        }        if (wc < (wb.size() - 1)) {            e = ((Integer) wb.get(wc + 1)).intValue();        }        if (val.length() < e) {            e = val.length();        }        // HB, 3 nov 04, hack to fix improper shoebox alignment pattern        // ... woi Bia teri...        // ... woi     teri...        // ... mother  across...        if (fixImproperAlign) {            if (val.charAt(e - 1) != ' ') { // take with previous word
💿 文件大小 23621 K
👤 上传用户 ccuading
📂 所属分类 Java编程
🏷️ 相关标签

#编辑 #视频
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -