📄 dafparser.java

📁 编辑视频文件
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
     * MK:02/06/10<br> Elements of Vector are no CLOM/ACM Annotations but yet     * another Vector of String . The inner Vector is interpreted as variant     * record in DAFTranscriptionStore.loadTranscription     * </p>     *     * @param tierName DOCUMENT ME!     * @param fileName DOCUMENT ME!     *     * @return Vector of Vector of String     */    public Vector getAnnotationsOf(String tierName, String fileName) {        // make sure that the correct file has been parsed        if (!lastParsed.equals(fileName)) {            parse(fileName);        }        long start = System.currentTimeMillis();        Vector annotationVector = new Vector();        // get the tags from the tiers Hashtable        Hashtable annotations = (Hashtable) tiers.get(tierName);        // get an iterator that iterates over the tags in the right order.        Iterator iter = annotations.keySet().iterator();        while (iter.hasNext()) {            Vector annotationRecord = new Vector();            Object key = iter.next();            annotationRecord.add(key);            annotationRecord.addAll(((Vector) annotations.get(key)));            annotationVector.add(annotationRecord);        }        long duration = System.currentTimeMillis() - start;        //	System.out.println("Extracting Annotations took " + duration + " milli seconds");        return annotationVector;    }    /**     * Parses a DOBES-minimal compliant xml file.     *     * @param fileName the DOBES-minimal compliant xml file that must be     *        parsed.     */    private void parse(String fileName) {        long start = System.currentTimeMillis();        try {            //		System.out.println("Parse : " + fileName);            //		System.out.println("Free memory : " + Runtime.getRuntime().freeMemory());            // only parse the same file once            if (lastParsed.equals(fileName)) {                return;            }            // (re)set everything to null for each parse            tiers = new Hashtable();            tierNames = new Vector(); // HB, 2-1-02, to store name IN ORDER            tierAttributes = new Hashtable();            mediaFile = "";            linguisticTypes = new Vector();            locales = new Vector();            timeSlots = new Hashtable();            timeOrder = new Vector();            // parse the file            xmlFile = new File(fileName);            lastParsed = fileName;            currentFileName = fileName;            saxParser.parse(xmlFile, this);        } catch (Exception e) {            printErrorLocationInfo("Fatal(?) Error! " + e.getMessage());        }        long duration = System.currentTimeMillis() - start;        //	System.out.println("Parsing took " + duration + " milli seconds");    }    /**     * HandlerBase method     */    public void startDocument() {        parseError = false;    }    /**     * HandlerBase method     */    public void endDocument() {    }    /**     * HandlerBase method     *     * @param name DOCUMENT ME!     * @param attributes DOCUMENT ME!     */    public void startElement(String name, AttributeList attributes) {        //	System.out.println("startElement called for name:" + name);        content = null;        if (name.equals("ANNOTATION_DOCUMENT")) {            author = attributes.getValue("AUTHOR");        } else if (name.equals("HEADER")) {            // implement when dealing with MediaObject            mediaFile = attributes.getValue("MEDIA_FILE");        } else if (name.equals("TIME_ORDER")) {            // nothing to be done, tierOrder Vector already created        } else if (name.equals("TIME_SLOT")) {            String timeValue = String.valueOf(TimeSlot.TIME_UNALIGNED);            if (attributes.getValue("TIME_VALUE") != null) {                timeValue = attributes.getValue("TIME_VALUE");            }            timeSlots.put(attributes.getValue("TIME_SLOT_ID"), timeValue);            timeOrder.add(attributes.getValue("TIME_SLOT_ID"));        } else if (name.equals("TIER")) {            currentTierId = attributes.getValue("TIER_ID");            // First check whether this tier already exists            if (!tiers.containsKey(currentTierId)) {                // create entries in the tiers and tierAttributes Hashtables for annotations and attributes resp.                tiers.put(currentTierId, new Hashtable());                tierAttributes.put(currentTierId, new Hashtable());                // HB, 2-1-02                tierNames.add(currentTierId);            }            // store tier attributes            Hashtable attrHash = (Hashtable) tierAttributes.get(currentTierId);            if (attributes.getValue("PARTICIPANT") != null) {                attrHash.put("PARTICIPANT", attributes.getValue("PARTICIPANT"));            }            attrHash.put("LINGUISTIC_TYPE_REF",                attributes.getValue("LINGUISTIC_TYPE_REF"));            attrHash.put("DEFAULT_LOCALE", attributes.getValue("DEFAULT_LOCALE"));            if (attributes.getValue("PARENT_REF") != null) {                attrHash.put("PARENT_REF", attributes.getValue("PARENT_REF"));            }        } else if (name.equals("ALIGNABLE_ANNOTATION")) {            currentAnnotationId = attributes.getValue("ANNOTATION_ID");            // create new "AnnotationRecord" and add to annotations Hashtable for current tier            ((Hashtable) tiers.get(currentTierId)).put(currentAnnotationId,                new Vector());            // mark type of annotation, add start and end times to this AnnotationRecord            ((Vector) ((Hashtable) tiers.get(currentTierId)).get(currentAnnotationId)).add(                "alignable");            ((Vector) ((Hashtable) tiers.get(currentTierId)).get(currentAnnotationId)).add(attributes.getValue(                    "TIME_SLOT_REF1"));            ((Vector) ((Hashtable) tiers.get(currentTierId)).get(currentAnnotationId)).add(attributes.getValue(                    "TIME_SLOT_REF2"));        } else if (name.equals("REF_ANNOTATION")) {            currentAnnotationId = attributes.getValue("ANNOTATION_ID");            // create new "AnnotationRecord" and add to annotations Hashtable for current tier            ((Hashtable) tiers.get(currentTierId)).put(currentAnnotationId,                new Vector());            // mark type of annotation, add annotation reference to this AnnotationRecord            ((Vector) ((Hashtable) tiers.get(currentTierId)).get(currentAnnotationId)).add(                "reference");            ((Vector) ((Hashtable) tiers.get(currentTierId)).get(currentAnnotationId)).add(attributes.getValue(                    "ANNOTATION_REF"));            if (attributes.getValue("PREVIOUS_ANNOTATION") != null) {                ((Vector) ((Hashtable) tiers.get(currentTierId)).get(currentAnnotationId)).add(attributes.getValue(                        "PREVIOUS_ANNOTATION"));            } else {                ((Vector) ((Hashtable) tiers.get(currentTierId)).get(currentAnnotationId)).add(                    "");            }        } else if (name.equals("LINGUISTIC_TYPE")) {            linguisticTypes.add(new LinguisticType(attributes.getValue(                        "LINGUISTIC_TYPE_ID")));        } else if (name.equals("LOCALE")) {            String langCode = attributes.getValue("LANGUAGE_CODE");            String countryCode = attributes.getValue("COUNTRY_CODE");            if (countryCode == null) {                countryCode = "";            }            String variant = attributes.getValue("VARIANT");            if (variant == null) {                variant = "";            }            Locale l = new Locale(langCode, countryCode, variant);            locales.add(l);        }    }    //startElement    /**     * HandlerBase method     *     * @param name DOCUMENT ME!     */    public void endElement(String name) {        if (name.equals("ANNOTATION_VALUE")) {            ((Vector) ((Hashtable) tiers.get(currentTierId)).get(currentAnnotationId)).add(content);        }    }    /**     * HandlerBase method     *     * @param buf DOCUMENT ME!     * @param start DOCUMENT ME!     * @param length DOCUMENT ME!     */    public void characters(char[] buf, int start, int length) {        if (content == null) {            content = removeWhiteSpace(buf, start, length);        } else {            content += removeWhiteSpace(buf, start, length);        }    }    /**     * HandlerBase method     *     * @param publicId DOCUMENT ME!     * @param systemId DOCUMENT ME!     *     * @return DOCUMENT ME!     */    public InputSource resolveEntity(String publicId, String systemId) {        InputSource inputSource = null;        /*        try {           // Open an InputSource to a DOBES-DAF DTD           // The location of the dtd defs is under the corpus directory in the path dobes/dtd.           if (systemId.endsWith(".dtd")) {               int to = systemId.indexOf(".dtd") + 4;               int from = systemId.lastIndexOf('/', to) + 1;               String fileName = ServerConfiguration.CORPUS_DIRECTORY + File.separator + "dobes" +                           File.separator + "dtd" + File.separator + systemId.substring(from, to);           //    inputSource = new InputSource(new FileInputStream(fileName));           //    inputSource = new InputSource(StringUtil.openEncodedFile("UTF-8", fileName));               inputSource = new InputSource(new InputStreamReader(new FileInputStream(fileName), "UTF8"));           }           }           catch (Exception e) {               e.printStackTrace();           }         */        return inputSource;    }    /**     * HandlerBase method     *     * @param e DOCUMENT ME!     */    public void error(SAXParseException e) {        printErrorLocationInfo("Parse error " + e.getMessage());        parseError = true;    }    /**     * DOCUMENT ME!     *     * @param e DOCUMENT ME!     */    public void fatalError(SAXParseException e) {        printErrorLocationInfo("Fatal Parse Error " + e.getMessage());        parseError = true;    }    private String removeWhiteSpace(char[] buf, int start, int length) {        int from = start;        int to = start + length;        /*           for (int i = start; i  < start + length; i++) {               if (buf[i] == ' ' || buf[i] == '\t') {                   from++;               }               else {                   to = from;                   for (int j = from; j < start + length; j++) {                       if  (buf[j] != ' ' && buf[i] != '\t') {                           to++;                       }                       else {                           break;                       }                   }                   break;               }           }         */        return new String(buf, from, to - from);    }    private void println(String s) {        if (verbose) {            System.out.println(s);        }    }    private void printErrorLocationInfo(String message) {        System.out.println(message);        System.out.println("Exception for " + currentFileName);        System.out.println("Tier id " + currentTierId);        System.out.println("Annotation id " + currentAnnotationId);    }}
上一页 12
💿 文件大小 23621 K
👤 上传用户 ccuading
📂 所属分类 Java编程
🏷️ 相关标签

#编辑 #视频
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -