📄 eafmultiplefilesearchhandler.java
字号:
/* * File: EAFMultipleFileSearchHandler.java * Project: MPI Linguistic Application * Date: 02 May 2007 * * Copyright (C) 2001-2007 Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package mpi.eudico.client.annotator.search.model;import mpi.eudico.client.annotator.search.result.model.EAFMultipleFileMatch;import mpi.search.content.query.model.ContentQuery;import mpi.search.content.query.model.Utilities;import mpi.search.content.result.model.ContentResult;import org.xml.sax.Attributes;import org.xml.sax.SAXException;import org.xml.sax.helpers.DefaultHandler;import java.io.File;import java.util.ArrayList;import java.util.Enumeration;import java.util.Hashtable;import java.util.List;import java.util.regex.Matcher;import java.util.regex.Pattern;/** * */class EAFMultipleFileSearchHandler extends DefaultHandler { /** Holds value of property DOCUMENT ME! */ final private static String NULL = "iuhfiahfafb29384hc"; /** Holds value of property DOCUMENT ME! */ final private ContentResult result; /** Holds value of property DOCUMENT ME! */ final private Pattern pattern; /** Holds value of property DOCUMENT ME! */ final private Hashtable timeSlots; /** Holds value of property DOCUMENT ME! */ final private Hashtable timeUnsolvedMatches; /** Holds value of property DOCUMENT ME! */ final private Hashtable ref1; /** Holds value of property DOCUMENT ME! */ final private Hashtable ref2; /** Holds value of property DOCUMENT ME! */ final private List tierNames; private boolean doAppend; private boolean processAfter; private StringBuffer textBuffer; private EAFMultipleFileMatch lastMatch; private String annotationBefore; private String tierName; private String timeSlotRef1; private String timeSlotRef2; private String annotationRef; private int indexInTier; private File file; // test for resolving unaligned alignable annotations private List unalignedAlignablesIds; private List unalignedAlignableMatches; private String lastAlignedBTS; private String id; /** * Creates a new EAFMultipleFileSearchHandler instance * * @param query DOCUMENT ME! */ public EAFMultipleFileSearchHandler(ContentQuery query) { this.result = (ContentResult) query.getResult(); pattern = Utilities.getPattern(query.getAnchorConstraint(), new EAFType()); tierNames = new ArrayList(); timeSlots = new Hashtable(); ref1 = new Hashtable(); ref2 = new Hashtable(); timeUnsolvedMatches = new Hashtable(); unalignedAlignablesIds = new ArrayList(); unalignedAlignableMatches = new ArrayList(); } /** * DOCUMENT ME! * * @param file DOCUMENT ME! */ public void newFile(File file) { this.file = file; timeSlots.clear(); ref1.clear(); ref2.clear(); timeUnsolvedMatches.clear(); unalignedAlignablesIds.clear(); unalignedAlignableMatches.clear(); } /** * DOCUMENT ME! * * @return DOCUMENT ME! */ public ContentResult getResult() { return result; } /** * DOCUMENT ME! * * @return DOCUMENT ME! */ public List getTierNames() { return tierNames; } /** * DOCUMENT ME! * * @param namespaceURI DOCUMENT ME! * @param sName DOCUMENT ME! * @param qName DOCUMENT ME! * @param attrs DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void startElement(String namespaceURI, String sName, String qName, Attributes attrs) throws SAXException { doAppend = false; if (qName.equals("TIER")) { tierName = attrs.getValue("TIER_ID"); // remember all tier names since last reset if (!tierNames.contains(tierName)) { tierNames.add(tierName); } annotationBefore = null; processAfter = false; indexInTier = -1; // let the first index be 0 } else if (qName.equals("ANNOTATION_VALUE")) { doAppend = true; textBuffer = new StringBuffer(); indexInTier++; } else if (qName.equals("ALIGNABLE_ANNOTATION")) { id = attrs.getValue("ANNOTATION_ID"); timeSlotRef1 = attrs.getValue("TIME_SLOT_REF1"); timeSlotRef2 = attrs.getValue("TIME_SLOT_REF2"); annotationRef = null; ref1.put(id, timeSlotRef1); ref2.put(id, timeSlotRef2); if ((lastAlignedBTS == null) && timeSlots.containsKey(timeSlotRef1) && !timeSlots.containsKey(timeSlotRef2)) { lastAlignedBTS = timeSlotRef1; } } else if (qName.equals("REF_ANNOTATION")) { id = attrs.getValue("ANNOTATION_ID"); annotationRef = attrs.getValue("ANNOTATION_REF"); timeSlotRef1 = null; timeSlotRef2 = null; ref1.put(id, NULL); ref2.put(id, annotationRef); } else if (qName.equals("TIME_SLOT")) { id = attrs.getValue("TIME_SLOT_ID"); String value = attrs.getValue("TIME_VALUE"); // must be robust, at least one dobes files has error if ((id != null) && (value != null)) { timeSlots.put(id, value); } } } /** * DOCUMENT ME! * * @param namespaceURI DOCUMENT ME! * @param sName DOCUMENT ME! * @param qName DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void endElement(String namespaceURI, String sName, String qName) throws SAXException { if (qName.equals("ANNOTATION_VALUE")) { // update last hit if needed if (processAfter) { lastMatch.setRightContext(textBuffer.toString()); processAfter = false; } Matcher matcher = pattern.matcher(textBuffer); if (matcher.find()) { EAFMultipleFileMatch match = new EAFMultipleFileMatch(textBuffer.toString()); List substringIndices = new ArrayList(); do { substringIndices.add(new int[] { matcher.start(0), matcher.end(0) }); } while (matcher.find()); match.setMatchedSubstringIndices((int[][]) substringIndices.toArray( new int[0][0])); match.setIndex(indexInTier); match.setId(id); if (annotationBefore != null) { match.setLeftContext(annotationBefore); } match.setTierName(tierName); match.setFileName(file.getAbsolutePath()); match.setBeginTimeBoundary(-1L); match.setEndTimeBoundary(-1L); if (annotationRef == null) { if (!timeSlots.containsKey(timeSlotRef1) || !timeSlots.containsKey(timeSlotRef2)) { unalignedAlignableMatches.add(match); } setTimeForMatch(match, timeSlotRef1, timeSlotRef2); } // do something for ref annotations, else { String alignedAnnID = annotationRef; while (ref1.get(alignedAnnID) == NULL) { // are there more than 1 refs possible? alignedAnnID = (String) ref2.get(alignedAnnID); } Object timeSlotRef = ref1.get(alignedAnnID); if (timeSlotRef != null) { setTimeForMatch(match, ref1.get(alignedAnnID), ref2.get(alignedAnnID)); } // if the annotation refered to comes later then the // refering annotation in .eaf resolve references at the end of the document else { timeUnsolvedMatches.put(match, annotationRef); } } //result.addMatch(match); lastMatch = match; processAfter = true; } annotationBefore = textBuffer.toString(); } else if (qName.equals("ALIGNABLE_ANNOTATION")) { // only do something if an unaligned annotation was encountered if (lastAlignedBTS != null) { if (timeSlots.containsKey(timeSlotRef2)) { // time to calculate interpolated time values // the stored lastAlignedBegin TS and the current timeSlotRef2 are used // for the interpolation //unalignedAlignablesIds.add(new Integer(indexInTier)); unalignedAlignablesIds.add(id); calculateUnalignedAlignedMatches(); } else { //unalignedAlignablesIds.add(new Integer(indexInTier)); unalignedAlignablesIds.add(id); } } } } /** * DOCUMENT ME! */ public void endDocument() { try { Enumeration e = timeUnsolvedMatches.keys(); while (e.hasMoreElements()) { EAFMultipleFileMatch match = (EAFMultipleFileMatch) e.nextElement(); String alignedAnnID = (String) timeUnsolvedMatches.get(match); while (ref1.get(alignedAnnID) == NULL) { // are there more than 1 refs possible? alignedAnnID = (String) ref2.get(alignedAnnID); } Object timeSlotRef = ref1.get(alignedAnnID); if (timeSlotRef != null) { setTimeForMatch(match, ref1.get(alignedAnnID), ref2.get(alignedAnnID)); } } } catch (Exception e) { e.printStackTrace(); } } /** * Tries to resolve the slot references and sets begin and end time in match * * @param match */ private void setTimeForMatch(EAFMultipleFileMatch match, Object timeSlotRef1, Object timeSlotRef2) { if (timeSlotRef1 != null) { String value = (String) timeSlots.get(timeSlotRef1); if (value != null) { match.setBeginTimeBoundary(Long.valueOf(value).longValue()); } } if (timeSlotRef2 != null) { String value = (String) timeSlots.get(timeSlotRef2); if (value != null) { match.setEndTimeBoundary(Long.valueOf(value).longValue()); } //if end time is smaller than begin time (e.g. value missing), set it 1 ns after begin time if (match.getEndTimeBoundary() < match.getBeginTimeBoundary()) { //match.setEndTimeBoundary(match.getBeginTimeBoundary()+ 1l); } } result.addMatch(match); } /** * DOCUMENT ME! * * @param buf DOCUMENT ME! * @param offset DOCUMENT ME! * @param len DOCUMENT ME! * * @throws SAXException DOCUMENT ME! */ public void characters(char[] buf, int offset, int len) throws SAXException { if (doAppend) { textBuffer.append(buf, offset, len); } } /** * Calculates interpolated time values for unaligned timeslots used by alignable * annotations. If ther are matches using these slots their begin- and / or endtime * values will be updated. */ private void calculateUnalignedAlignedMatches() { if (unalignedAlignablesIds.size() != 0) { //System.out.println("num una: " + unalignedAlignablesIds.size()); //System.out.println("num una match: " + unalignedAlignableMatches.size()); long refBT = 0L; long refET = 0L; try { refBT = Long.valueOf((String) timeSlots.get(lastAlignedBTS)) .longValue(); } catch (NumberFormatException ex) { } try { refET = Long.valueOf((String) timeSlots.get(timeSlotRef2)) .longValue(); } catch (NumberFormatException ex) { } long span = refET - refBT; if (span < 0) { span = 0; } int step = (int) (span / unalignedAlignablesIds.size()); // add the calculated time for the 'end' time slot to the table for (int i = 0; i < unalignedAlignablesIds.size(); i++) { String id = (String) unalignedAlignablesIds.get(i); String tsRef2 = (String) ref2.get(id); if (!timeSlots.containsKey(tsRef2)) { timeSlots.put(tsRef2, String.valueOf(refBT + ((i + 1) * step))); } } // apply to the matches in the current set of unaligned annotations for (int i = 0; i < unalignedAlignableMatches.size(); i++) { EAFMultipleFileMatch match = (EAFMultipleFileMatch) unalignedAlignableMatches.get(i); String id = match.getId(); if (match.getBeginTimeBoundary() == -1) { String tsRef1 = (String) ref1.get(id); String value = (String) timeSlots.get(tsRef1); if (value != null) { match.setBeginTimeBoundary(Long.valueOf(value) .longValue()); } } if (match.getEndTimeBoundary() == -1) { String tsRef2 = (String) ref2.get(id); String value = (String) timeSlots.get(tsRef2); if (value != null) { match.setEndTimeBoundary(Long.valueOf(value).longValue()); } } } } unalignedAlignablesIds.clear(); unalignedAlignableMatches.clear(); lastAlignedBTS = null; }}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -