📄 shoeboxarray.java

📁 编辑视频文件
💻 JAVA
📖 第 1 页 / 共 3 页
字号:
上一页 1 23
                    nlFound = false;                }                int numOfSpaces = maxLengthInInterlinearBlock - l;                String spaces = "";                for (int i = 0; i < numOfSpaces; i++) {                    spaces += " ";                }                String newC = "";                if (nlFound) {                    newC = c.substring(0, l) + spaces + " " +                        c.substring(l + 1);                } else {                    newC = c.substring(0, l) + spaces;                }                shoeboxArray[lblIndex][currentIndexBlock] = newC; // store in array            }        }        // decrement all lineCounts        Iterator markerIter4 = interlinearTierMarkers.iterator();        while (markerIter4.hasNext()) {            String lbl = (String) markerIter4.next();            int newCount = ((Integer) (lineCounts.get(lbl))).intValue() - 1;            if (newCount < 0) {                newCount = 0;            }            Integer i = new Integer(newCount);            lineCounts.put(lbl, i); // decr count        }    }    /*       Used for preparation (counting) and storing.     */    private final void readSbx() throws Exception {        String line = null;        String utf8Line = null;        /*           A shoebox file may contain 8byte characters from custom fonts.           Treating it as isolatin-1 may introduce character errors!         */        Reader filereader = null;        Reader utf8FileReader = null;        MediaDescriptor mediaDescriptor = null;        mediaDescriptors = new ArrayList();        //        boolean useDedicatedCharacterset = false;        //        if (useDedicatedCharacterset) {        //            InputStream fis = new FileInputStream(file);        //            filereader = new InputStreamReader(fis, "DedicatedCharacterset");        //        } else {        // use the default encoding        //     filereader = new FileReader(file);        filereader = new InputStreamReader(new FileInputStream(file),                "ISO-8859-1");        utf8FileReader = new InputStreamReader(new FileInputStream(file),                "UTF-8");        //        }        // explicit performance care: buffering the filereader        BufferedReader br = new BufferedReader(filereader);        BufferedReader utf8Br = new BufferedReader(utf8FileReader);        String label = null;        prevLabel = null;        String content = null;        int linenumber = 0;        while ((line = br.readLine()) != null) {            utf8Line = utf8Br.readLine();            prevLabel = label;            // if unicode tier, substitute line with utf8Line            StringTokenizer t = new StringTokenizer(line);            if (t.hasMoreTokens()) {                label = t.nextToken(); // the first word                if ((label.length() > 1) &&                        (!label.startsWith("\\") ||                        ((label.charAt(0) == '\\') &&                        ((label.charAt(1) == ' ') || (label.charAt(1) == '\t'))))) {                    label = prevLabel;                }                if (typFile.isUnicodeTier(label)) {                    line = utf8Line;                    if (!isShoeboxArrayPreparation &&                            interlinearTierMarkers.contains(label)) {                        line = decodeToolboxUnicode(line);                    }                }            }            linenumber++;            line = line.trim();            //            logger.log(Level.FINE, "  ..." + line);            if (linenumber == 1) {                // HS 06-2006 extended the test with support for files with the Unicode Byte Order Mark,                // \ufeff                if ((line.startsWith("\\_sh v4.0")) ||                        (line.startsWith("\\_sh v3.0")) ||                        (utf8Line.startsWith("\ufeff\\_sh v3.0") ||                        utf8Line.startsWith("\ufeff\\_sh v4.0"))) {                    shoeboxheader = line;                    // last token is database type, store in ShoeboxTypFile                    String dbType = "";                    while (t.hasMoreTokens()) {                        dbType = t.nextToken();                    }                    if (!dbType.equals("")) {                        typFile.setDatabaseType(dbType);                    }                    continue;                } else {                    throw new Exception(                        "A shoebox file must begin with '\\_sh v4.0' or '\\_sh v3.0', found " +                        line + "!");                }            }            if (line.length() == 0) {                // skip white lines                continue;            }            if (line.startsWith("\\_") && (currentIndexBlock == -1)) {                // add to header                shoeboxheader = shoeboxheader + "\n" + line;                continue;            }            if (!line.startsWith("\\")) {                /* when a line does not start with a label, this is an error.                   If we are not in strict mode,                   we assume that the preceding line is continued.                 */                if (strict1) {                    throw new Exception("tier without leading label \"" + line +                        "\"");                }                //else nevertested                if (currentIndexLabel < 0) {                    throw new Exception(                        "There is no tier where I can append \"" + line +                        "\" to!");                }                if (isShoeboxArrayPreparation) {                    continue;                }                // hacky append                String oldContent = shoeboxArray[currentIndexLabel][currentIndexBlock];                //if (oldContent.length() == 0) {                // HS 06-2006 only throw exception when there is no old contents                if (oldContent == null) {                    throw new Exception(                        "There is no tier where I can append \"" + line +                        "\" to!");                }                // concatenate, do not mark the point of concatenation,                // fix error silently.                shoeboxArray[currentIndexLabel][currentIndexBlock] = oldContent +                    " " + line;                content = ""; // ??                //                logger.log(Level.FINE, "  appended (" + line + ")");                continue;            } else if (!isShoeboxArrayPreparation &&                    ((line.length() == 1) || (line.charAt(1) == ' ') ||                    (line.charAt(1) == '\t'))) {                //HS june 2006: allow a single backslash to be part of the content of a marker                // append                if (shoeboxArray[currentIndexLabel][currentIndexBlock] != null) {                    shoeboxArray[currentIndexLabel][currentIndexBlock] = shoeboxArray[currentIndexLabel][currentIndexBlock] +                        " " + line;                    content = "";                }                continue;            }            // tokenize the shoebox line into label and content            {                StringTokenizer xxx = new StringTokenizer(line);                //prevLabel = label;                label = xxx.nextToken(); // the first word                storeLabelInOrder(label, prevLabel);                // label contains leading backslash!            }            content = "";            if (line.length() > label.length()) {                content = (line.substring(label.length() + 1));            }            // strip trailing spaces, if any            if (content.length() > 0) {                int lastNonSpaceIndex = content.length() - 1;                while (content.charAt(lastNonSpaceIndex) == ' ') {                    lastNonSpaceIndex--;                }                if ((lastNonSpaceIndex < (content.length() - 1)) &&                        (lastNonSpaceIndex >= 0)) {                    content = content.substring(0, lastNonSpaceIndex);                }            }            if (line.startsWith(ShoeboxEncoder.elanMediaURLLabel)) {                if (mediaDescriptor != null) {                    mediaDescriptors.add(mediaDescriptor);                }                mediaDescriptor = new MediaDescriptor(content, null);                continue;            }            if (line.startsWith(ShoeboxEncoder.elanMediaMIMELabel)) {                if (mediaDescriptor != null) {                    mediaDescriptor.mimeType = content;                }                continue;            }            if (line.startsWith(ShoeboxEncoder.elanMediaExtractedLabel)) {                if (mediaDescriptor != null) {                    mediaDescriptor.extractedFrom = content;                }                continue;            }            if (line.startsWith(ShoeboxEncoder.elanMediaOriginLabel)) {                if (mediaDescriptor != null) {                    mediaDescriptor.timeOrigin = new Long(content).longValue();                }                continue;            }            store_label(label);            if (label.equals(label_ref)) {                //        lastlabel = label_ref;                prepare_or_finish_block();                // HB, 31 jul 02, reset lineCounts                Iterator markerIter = interlinearTierMarkers.iterator();                while (markerIter.hasNext()) {                    lineCounts.put(markerIter.next(), new Integer(0));                }            }            store_label_and_content(label, content);        }        // add last pending mediaDescriptor, if present        if (mediaDescriptor != null) {            mediaDescriptors.add(mediaDescriptor);        }        prepare_or_finish_block();        //checkIfCompletelyAligned();        br.close();        filereader.close();        isShoeboxArrayPreparation = false; // only once    }    /*    private void checkIfCompletelyAligned() {        // if completelyUnaligned is false there is at least one time set.        // For the moment time alignment must be complete, otherwise imported file is        // to be treated as completely unaligned.        // TEMPORARY: method can be removed when proper dealing with partial time alignment        // on top level tiers is implemented        if (!completelyUnaligned && !isShoeboxArrayPreparation) {            treatAsUnaligned = false;            // check t0's for value -1            int x = labelList.indexOf(ShoeboxEncoder.elanBeginLabel);            String[] beginStrings = shoeboxArray[x];            for (int i = 0; i < beginStrings.length; i++) {                if (beginStrings[i].equals("-1")) {                    treatAsUnaligned = true;                    break;                }            }            // check t1's for value -1, only if treatAsUnaligned isn't already true            if (!treatAsUnaligned) {                x = labelList.indexOf(ShoeboxEncoder.elanEndLabel);                String[] endStrings = shoeboxArray[x];                for (int j = 0; j < endStrings.length; j++) {                    if (endStrings[j].equals("-1")) {                        treatAsUnaligned = true;                        break;                    }                }            }        }    } */    public String getRootMarkerForBlock(int row) {        String result = "";        Enumeration en = getLabels();        while (en.hasMoreElements()) {            String lbl = (String) en.nextElement();            if (lbl.equals(ShoeboxEncoder.elanBeginLabel) ||                    lbl.equals(ShoeboxEncoder.elanEndLabel) ||                    lbl.equals(ShoeboxEncoder.elanParticipantLabel) ||                    lbl.equals(ShoeboxEncoder.elanELANLabel) ||                    lbl.equals(ShoeboxEncoder.elanBlockStart)) {                continue;            }            if (!typFile.tofromHash.containsKey(lbl) &&                    (getCell(lbl, row) != null)) {                result = lbl;                break;            }        }        return result;    }    /**     * DOCUMENT ME!     *     * @param doc wacfile     *     * @throws Exception DOCUMENT ME!     */    private final void readWac(Document doc) throws Exception {        NodeList blockList = doc.getElementsByTagName("block");        for (int i = 0; i < blockList.getLength(); i++) {            Element blockElement = (Element) blockList.item(i);            if (isShoeboxArrayPreparation) {                maxIndexBlocks += 1;            }            NodeList tierList = blockElement.getElementsByTagName("tier");            for (int j = 0; j < tierList.getLength(); j++) {                Element tierElement = (Element) tierList.item(j);                //MK:02/11/29 the sad tale of standard procedures: WAC tiernames must follow sbx \-convention...                String tierName = "\\" + tierElement.getAttribute("name");                String tierValue = tierElement.getFirstChild().getNodeValue();                if (isShoeboxArrayPreparation) {                    store_label(tierName);                    //					System.out.println(i+"/"+j+")  " + tierName + ": " + tierValue);                } else {                    overwriteContent(tierName, i, tierValue);                }            }            prepare_or_finish_block();        }    }    /**     * Toolbox uses 2 bytes or 3 bytes for certain characters on Unicode markers.     * The interlinear alignement based on whitespace characters is corrected here by adding extra space characters     * in between words, depending on the characters in the String. This way the alignement corresponds to the     * alignment in ISO Latin markers.     *     *      Toolbox stores interlinearization on basis of byte position     *     This causes a problem in case of UTF-8 encodings of more than 1 byte.     *     * @param value the original Toolbox unicode encoded String     * @return the modified string     */    private String decodeToolboxUnicode(String value) {        if (value == null) {            return value;        }        int length = value.length();        char[] chars = value.toCharArray();        // first count how many chars to add        int count = 0;        char cc;        for (int i = 0; i < length; i++) {            cc = chars[i];            if ((cc == '\u0000') || ((cc >= '\u0080') && (cc <= '\u07ff'))) { // 2 bytes                count++;            } else if ((cc >= '\u0800') && (cc <= '\uffff')) { // 3 bytes                count += 2;            }        }        char[] resChars = new char[length + count];        count = 0;        int k = 0;        for (int i = 0; (i < length) && (k < resChars.length); i++, k++) {            cc = chars[i];            resChars[k] = chars[i];            if (cc == ' ') {                if (count > 0) {                    for (int z = 0; z < count; z++) {                        k++;                        if (k < resChars.length) {                            resChars[k] = ' ';                        } else {                            break;                        }                    }                    count = 0;                }            } else if ((cc == '\u0000') ||                    ((cc >= '\u0080') && (cc <= '\u07ff'))) { // 2 bytes                count++;            } else if ((cc >= '\u0800') && (cc <= '\uffff')) { // 3 bytes                count += 2;            }        }        return new String(resChars);    }}
上一页 1 23
💿 文件大小 23621 K
👤 上传用户 ccuading
📂 所属分类 Java编程
🏷️ 相关标签

#编辑 #视频
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -