📄 eaf22parser.java
字号:
/** * Parses a EAF v2.2 xml file. * * @param fileName the EAF v2.2 xml file that must be parsed. */ private void parse(String fileName) { //long start = System.currentTimeMillis(); // System.out.println("Parse : " + fileName); // System.out.println("Free memory : " + Runtime.getRuntime().freeMemory()); // only parse the same file once if (lastParsed.equals(fileName)) { return; } // (re)set everything to null for each parse tiers.clear(); tierNames.clear(); // HB, 2-1-02, to store name IN ORDER tierAttributes.clear(); mediaFile = ""; linguisticTypes.clear(); locales.clear(); timeSlots.clear(); timeOrder.clear(); mediaDescriptors.clear(); controlledVocabularies.clear(); // parse the file lastParsed = fileName; currentFileName = fileName; try { saxParser.parse(fileName); } catch (SAXException e) { System.out.println("Parsing error: " + e.getMessage()); // the SAX parser can have difficulties with certain characters in // the filepath: try to create an InputSource for the parser File f = new File(fileName); if (f.exists()) { try { FileInputStream fis = new FileInputStream(f); InputSource source = new InputSource(fis); saxParser.parse(source); // just catch any exception } catch (Exception ee) { System.out.println("Parsing retry error: " + ee.getMessage()); } } } catch (IOException e) { e.printStackTrace(); } catch (Exception e) { printErrorLocationInfo("Fatal(?) Error! " + e.getMessage()); } //long duration = System.currentTimeMillis() - start; // System.out.println("Parsing took " + duration + " milli seconds"); } private void println(String s) { if (verbose) { System.out.println(s); } } private void printErrorLocationInfo(String message) { System.out.println(message); System.out.println("Exception for " + currentFileName); System.out.println("Tier id " + currentTierId); System.out.println("Annotation id " + currentAnnotationId); } /** * DOCUMENT ME! * $Id: EAF22Parser.java,v 1.3 2005/09/15 15:38:09 hasloe Exp $ * @author $Author: hasloe $ * @version $Revision: 1.3 $ */ class EAFContentHandler implements ContentHandler { private Locator locator; /** * DOCUMENT ME! * * @param locator DOCUMENT ME! */ public void setDocumentLocator(Locator locator) { this.locator = locator; } /** * DOCUMENT ME! * * @param prefix DOCUMENT ME! * @param uri DOCUMENT ME! */ public void startPrefixMapping(String prefix, String uri) { } /** * DOCUMENT ME! * * @param prefix DOCUMENT ME! */ public void endPrefixMapping(String prefix) { } /** * DOCUMENT ME! * * @param ch DOCUMENT ME! * @param start DOCUMENT ME! * @param end DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void ignorableWhitespace(char[] ch, int start, int end) throws SAXException { } /** * DOCUMENT ME! * * @param name DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void skippedEntity(String name) throws SAXException { } /** * DOCUMENT ME! * * @param target DOCUMENT ME! * @param data DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void processingInstruction(String target, String data) throws SAXException { } /** * ContentHandler method * * @throws SAXException DOCUMENT ME! */ public void startDocument() throws SAXException { parseError = false; } /** * ContentHandler method * * @throws SAXException DOCUMENT ME! */ public void endDocument() throws SAXException { } /** * ContentHandler method * * @param nameSpaceURI DOCUMENT ME! * @param name DOCUMENT ME! * @param rawName DOCUMENT ME! * @param attributes DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void startElement(String nameSpaceURI, String name, String rawName, Attributes attributes) throws SAXException { // System.out.println("startElement called for name:" + name); content = ""; if (name.equals("ANNOTATION_DOCUMENT")) { author = attributes.getValue("AUTHOR"); } else if (name.equals("HEADER")) { // implement when dealing with MediaObject mediaFile = attributes.getValue("MEDIA_FILE"); svgFile = attributes.getValue("SVG_FILE"); } else if (name.equals("MEDIA_DESCRIPTOR")) { String mediaURL = attributes.getValue("MEDIA_URL"); String mimeType = attributes.getValue("MIME_TYPE"); MediaDescriptor md = new MediaDescriptor(mediaURL, mimeType); long timeOrigin = 0; if (attributes.getValue("TIME_ORIGIN") != null) { timeOrigin = Long.parseLong(attributes.getValue( "TIME_ORIGIN")); md.timeOrigin = timeOrigin; } String extractedFrom = ""; if (attributes.getValue("EXTRACTED_FROM") != null) { extractedFrom = attributes.getValue("EXTRACTED_FROM"); md.extractedFrom = extractedFrom; } mediaDescriptors.add(md); } else if (name.equals("TIME_ORDER")) { // nothing to be done, tierOrder ArrayList already created } else if (name.equals("TIME_SLOT")) { String timeValue = String.valueOf(TimeSlot.TIME_UNALIGNED); if (attributes.getValue("TIME_VALUE") != null) { timeValue = attributes.getValue("TIME_VALUE"); } timeSlots.put(attributes.getValue("TIME_SLOT_ID"), timeValue); timeOrder.add(attributes.getValue("TIME_SLOT_ID")); } else if (name.equals("TIER")) { currentTierId = attributes.getValue("TIER_ID"); // First check whether this tier already exists if (!tiers.containsKey(currentTierId)) { // create entries in the tiers and tierAttributes HashMaps for annotations and attributes resp. tiers.put(currentTierId, new HashMap()); tierAttributes.put(currentTierId, new HashMap()); // HB, 2-1-02 tierNames.add(currentTierId); } // store tier attributes HashMap attrHash = (HashMap) tierAttributes.get(currentTierId); if (attributes.getValue("PARTICIPANT") != null) { attrHash.put("PARTICIPANT", attributes.getValue("PARTICIPANT")); } attrHash.put("LINGUISTIC_TYPE_REF", attributes.getValue("LINGUISTIC_TYPE_REF")); if (attributes.getValue("DEFAULT_LOCALE") != null) { // HB, 29 oct 02: added condition attrHash.put("DEFAULT_LOCALE", attributes.getValue("DEFAULT_LOCALE")); } else { // HB, 30 oct 02, added default case attrHash.put("DEFAULT_LOCALE", "en"); } if (attributes.getValue("PARENT_REF") != null) { attrHash.put("PARENT_REF", attributes.getValue("PARENT_REF")); } } else if (name.equals("ALIGNABLE_ANNOTATION")) { currentAnnotationId = attributes.getValue("ANNOTATION_ID"); // create new "AnnotationRecord" and add to annotations HashMap for current tier //// AnnotationRecord record = new AnnotationRecord(); record.setAnnotationId(currentAnnotationId); String svg_ref = attributes.getValue("SVG_REF"); if (svg_ref != null) { record.setAnnotationType(AnnotationRecord.ALIGNABLE_SVG); record.setSvgReference(svg_ref); } else { record.setAnnotationType(AnnotationRecord.ALIGNABLE); } record.setBeginTimeSlotId(attributes.getValue("TIME_SLOT_REF1")); record.setEndTimeSlotId(attributes.getValue("TIME_SLOT_REF2")); ((HashMap) tiers.get(currentTierId)).put(currentAnnotationId, record); //// /* ((HashMap) tiers.get(currentTierId)).put(currentAnnotationId, new ArrayList()); // mark type of annotation, add start and end times to this AnnotationRecord String svg_ref = attributes.getValue("SVG_REF"); ((ArrayList) ((HashMap) tiers.get(currentTierId)).get(currentAnnotationId)).add((svg_ref == null) ? "alignable" : "alignable_svg"); ((ArrayList) ((HashMap) tiers.get(currentTierId)).get(currentAnnotationId)).add(attributes.getValue( "TIME_SLOT_REF1")); ((ArrayList) ((HashMap) tiers.get(currentTierId)).get(currentAnnotationId)).add(attributes.getValue( "TIME_SLOT_REF2")); if (svg_ref != null) { ((ArrayList) ((HashMap) tiers.get(currentTierId)).get(currentAnnotationId)).add(svg_ref); } */ } else if (name.equals("REF_ANNOTATION")) { currentAnnotationId = attributes.getValue("ANNOTATION_ID"); // create new "AnnotationRecord" and add to annotations HashMap for current tier //// AnnotationRecord record = new AnnotationRecord(); record.setAnnotationId(currentAnnotationId); record.setAnnotationType(AnnotationRecord.REFERENCE); record.setReferredAnnotId(attributes.getValue("ANNOTATION_REF")); if (attributes.getValue("PREVIOUS_ANNOTATION") != null) { record.setPreviousAnnotId(attributes.getValue( "PREVIOUS_ANNOTATION")); } else { record.setPreviousAnnotId(""); } ((HashMap) tiers.get(currentTierId)).put(currentAnnotationId, record); //// /* ((HashMap) tiers.get(currentTierId)).put(currentAnnotationId, new ArrayList()); // mark type of annotation, add annotation reference to this AnnotationRecord ((ArrayList) ((HashMap) tiers.get(currentTierId)).get(currentAnnotationId)).add( "reference"); ((ArrayList) ((HashMap) tiers.get(currentTierId)).get(currentAnnotationId)).add(attributes.getValue( "ANNOTATION_REF")); if (attributes.getValue("PREVIOUS_ANNOTATION") != null) { ((ArrayList) ((HashMap) tiers.get(currentTierId)).get(currentAnnotationId)).add(attributes.getValue( "PREVIOUS_ANNOTATION")); } else { ((ArrayList) ((HashMap) tiers.get(currentTierId)).get(currentAnnotationId)).add( ""); } */ } else if (name.equals("LINGUISTIC_TYPE")) { LingTypeRecord ltr = new LingTypeRecord(); ltr.setLingTypeId(attributes.getValue("LINGUISTIC_TYPE_ID")); String timeAlignable = "true"; if ((attributes.getValue("TIME_ALIGNABLE") != null) && (attributes.getValue("TIME_ALIGNABLE").equals("false"))) { timeAlignable = "false"; } ltr.setTimeAlignable(timeAlignable); String graphicReferences = "false"; if ((attributes.getValue("GRAPHIC_REFERENCES") != null) && (attributes.getValue("GRAPHIC_REFERENCES").equals("true"))) { graphicReferences = "true"; } ltr.setGraphicReferences(graphicReferences); String stereotype = attributes.getValue("CONSTRAINTS"); ltr.setStereoType(stereotype); ltr.setControlledVocabulary(attributes.getValue( "CONTROLLED_VOCABULARY_REF")); linguisticTypes.add(ltr); } else if (name.equals("LOCALE")) { String langCode = attributes.getValue("LANGUAGE_CODE"); String countryCode = attributes.getValue("COUNTRY_CODE"); if (countryCode == null) { countryCode = ""; } String variant = attributes.getValue("VARIANT"); if (variant == null) { variant = ""; } Locale l = new Locale(langCode, countryCode, variant); locales.add(l); } else if (name.equals("CONTROLLED_VOCABULARY")) { currentCVId = attributes.getValue("CV_ID"); ArrayList cv = new ArrayList(); String desc = attributes.getValue("DESCRIPTION"); if (desc != null) { cv.add(desc); } controlledVocabularies.put(currentCVId, cv); } else if (name.equals("CV_ENTRY")) { currentEntryRecord = new CVEntryRecord(); currentEntryRecord.setDescription(attributes.getValue( "DESCRIPTION")); ((ArrayList) controlledVocabularies.get(currentCVId)).add(currentEntryRecord); } } //startElement /** * ContentHandler method * * @param nameSpaceURI DOCUMENT ME! * @param name DOCUMENT ME! * @param rawName DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void endElement(String nameSpaceURI, String name, String rawName) throws SAXException { if (name.equals("ANNOTATION_VALUE")) { ((AnnotationRecord) ((HashMap) tiers.get(currentTierId)).get(currentAnnotationId)).setValue(content); } else if (name.equals("CV_ENTRY")) { currentEntryRecord.setValue(content); } } /** * ContentHandler method * * @param ch DOCUMENT ME! * @param start DOCUMENT ME! * @param end DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void characters(char[] ch, int start, int end) throws SAXException { content += new String(ch, start, end); } }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -