⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 transcription2tiger.java

📁 编辑视频文件
💻 JAVA
字号:
/* * File:     Transcription2Tiger.java * Project:  MPI Linguistic Application * Date:     02 May 2007 * * Copyright (C) 2001-2007  Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA *//* This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */package mpi.eudico.client.util;import mpi.eudico.server.corpora.clom.Annotation;import mpi.eudico.server.corpora.clom.AnnotationCore;import mpi.eudico.server.corpora.clom.Transcription;import mpi.eudico.server.corpora.clomimpl.abstr.TierImpl;import org.apache.xml.serialize.OutputFormat;import org.apache.xml.serialize.XMLSerializer;import org.w3c.dom.Document;import org.w3c.dom.Element;import java.io.File;import java.io.FileOutputStream;import java.io.IOException;import java.io.OutputStreamWriter;import java.text.SimpleDateFormat;import java.util.Calendar;import java.util.Comparator;import java.util.HashMap;import java.util.Iterator;import java.util.Set;import java.util.TreeSet;import java.util.Vector;import javax.xml.parsers.DocumentBuilder;import javax.xml.parsers.DocumentBuilderFactory;import javax.xml.parsers.ParserConfigurationException;/** * $Id: Transcription2Tiger.java,v 1.8 2007/02/06 14:46:34 klasal Exp $ exports * annotation on selected tiers into the Tiger Syntax Format (as leaf nodes) * In "Tiger-terminology": annotations will become feature values of terminal * nodes * * @author $Author: klasal $ * @version $Revision: 1.8 $ */public class Transcription2Tiger {    /** Holds value of property DOCUMENT ME! */    private static final String wordFeature = "word";    /** Holds value of property DOCUMENT ME! */    private static final String posFeature = "pos";    /** Holds value of property DOCUMENT ME! */    private static final String morphFeature = "morph";    /** Holds value of property DOCUMENT ME! */    private static final String lemmaFeature = "lemma";    /** Holds value of property DOCUMENT ME! */    public static final String[] defaultFeatureNames = new String[] {            wordFeature, posFeature, morphFeature, lemmaFeature        };    /**     * DOCUMENT ME!     *     * @param transcription DOCUMENT ME!     * @param tierRelationHash DOCUMENT ME!     * @param fileName DOCUMENT ME!     * @param encoding DOCUMENT ME!     * @param beginTime DOCUMENT ME!     * @param endTime DOCUMENT ME!     *     * @throws IOException DOCUMENT ME!     */    public static void exportTiers(Transcription transcription,        HashMap tierRelationHash, File exportFile, String encoding,        long beginTime, long endTime) throws IOException {        try {            DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();            DocumentBuilder db = dbf.newDocumentBuilder();            Document doc = db.newDocument();            Element root = doc.createElement("corpus");            root.setAttribute("xmlns:xsi",                "http://www.w3.org/2001/XMLSchema-instance");            root.setAttribute("xsi:noNamespaceSchemaLocation",                "http://www.ims.uni-stuttgart.de/projekte/TIGER/TIGERSearch/public/TigerXML.xsd");            root.setAttribute("id", transcription.getName());            Element meta = doc.createElement("meta");            root.appendChild(meta);            Element name = doc.createElement("name");            name.appendChild(doc.createTextNode(transcription.getName()                                                             .replaceAll(".eaf",                        ".tig")));            meta.appendChild(name);            Element author = doc.createElement("author");            author.appendChild(doc.createTextNode(transcription.getAuthor()));            meta.appendChild(author);            Element date = doc.createElement("date");            SimpleDateFormat dateFmt = new SimpleDateFormat(                    "yyyy.MM.dd HH:mm:ss");            String dateString = dateFmt.format(Calendar.getInstance().getTime());            date.appendChild(doc.createTextNode(dateString));            meta.appendChild(date);            Element format = doc.createElement("format");            format.appendChild(doc.createTextNode("Negra format 3"));            meta.appendChild(format);            Element body = doc.createElement("body");            root.appendChild(body);            Set sentenceAnnotations = getOrderedAnnotations(transcription,                    tierRelationHash.keySet());            for (Iterator it = sentenceAnnotations.iterator(); it.hasNext();) {                Annotation sentenceAnnotation = (Annotation) it.next();                if ((beginTime <= sentenceAnnotation.getBeginTimeBoundary()) &&                        (sentenceAnnotation.getEndTimeBoundary() <= endTime)) {                    Element s = doc.createElement("s");                    body.appendChild(s);                    s.setAttribute("id", sentenceAnnotation.getId());                    Element graph = doc.createElement("graph");                    s.appendChild(graph);                    Element terminals = doc.createElement("terminals");                    graph.appendChild(terminals);                    HashMap featureHash = (HashMap) tierRelationHash.get(sentenceAnnotation.getTier());                    Vector childTiers = ((TierImpl) sentenceAnnotation.getTier()).getChildTiers();                    for (Iterator descendantTiers = featureHash.keySet()                                                               .iterator();                            descendantTiers.hasNext();) {                        TierImpl featureTier = (TierImpl) descendantTiers.next();                        if (childTiers.contains(featureTier)) {                            Vector childAnnotations = sentenceAnnotation.getChildrenOnTier(featureTier);                            for (int j = 0; j < childAnnotations.size(); j++) {                                Annotation childAnnotation = (Annotation) childAnnotations.get(j);                                Element t = doc.createElement("t");                                terminals.appendChild(t);                                t.setAttribute("id", childAnnotation.getId());                                String feature = (String) featureHash.get(featureTier);                                if ((feature != null) &&                                        (feature.length() > 0)) {                                    t.setAttribute(feature,                                        childAnnotation.getValue());                                }                                addFeatures(featureHash, t, childAnnotation);                            }                        }                    }                }            }            writeTigerFile(encoding, exportFile, root);        } catch (ParserConfigurationException e) {            e.printStackTrace();            throw new IOException("Parser configuration error: " +                e.getMessage());        }    }    private static void addFeatures(HashMap featureHash, Element t,        Annotation annotation) {        Vector childTiers = ((TierImpl) annotation.getTier()).getChildTiers();        if (childTiers.size() > 0) {            for (Iterator it = featureHash.keySet().iterator(); it.hasNext();) {                TierImpl featureTier = (TierImpl) it.next();                if (childTiers.contains(featureTier)) {                    String feature = (String) featureHash.get(featureTier);                    Vector featureChildrenAnnotations = annotation.getChildrenOnTier(featureTier);                    if (featureChildrenAnnotations.size() > 0) {                        Annotation childAnnotation = (Annotation) featureChildrenAnnotations.get(0);                        if ((feature != null) && (feature.length() > 0)) {                            t.setAttribute(feature, childAnnotation.getValue());                        }                        addFeatures(featureHash, t, childAnnotation);                    }                }            }        }    }    /**     * DOCUMENT ME!     *     * @param encoding DOCUMENT ME!     * @param filename DOCUMENT ME!     * @param content DOCUMENT ME!     *     * @throws IOException DOCUMENT ME!     */    private static final void writeTigerFile(String encoding, File exportFile,        Element content) throws IOException {        OutputStreamWriter out = new OutputStreamWriter(new FileOutputStream(                    exportFile), encoding);        OutputFormat format = new OutputFormat(content.getOwnerDocument(),                encoding, true);        XMLSerializer ser = new XMLSerializer(out, format);        ser.asDOMSerializer();        ser.serialize(content);        out.close();    }    private static TreeSet getOrderedAnnotations(Transcription transcription,        Set sentenceTierIds) {        TreeSet annotations = new TreeSet(new AnnotationComparator());        for (Iterator it = sentenceTierIds.iterator(); it.hasNext();) {            annotations.addAll(((TierImpl) it.next()).getAnnotations());        }        return annotations;    }    /**     * Compares two TimeInterval objects.<br>     * Note: this comparator imposes orderings that are inconsistent with     * equals.     *     * @author Han Sloetjes     */    static class AnnotationComparator implements Comparator {        /**         * Compares two TimeInterval objects. First the begin times are         * compared. If they are the same the end times  are compared.  Note:         * this comparator imposes orderings that are inconsistent with         * equals.         *         * @param o1 the first interval         * @param o2 the second interval         *         * @return DOCUMENT ME!         *         * @throws ClassCastException when either object is not a TimeInterval         *         * @see java.util.Comparator#compare(java.lang.Object,         *      java.lang.Object)         */        public int compare(Object o1, Object o2) throws ClassCastException {            if (!(o1 instanceof AnnotationCore) ||                    !(o2 instanceof AnnotationCore)) {                throw new ClassCastException(                    "Objects should be of type AnnotationCore");            }            if (((AnnotationCore) o1).getBeginTimeBoundary() < ((AnnotationCore) o2).getBeginTimeBoundary()) {                return -1;            }            if (((AnnotationCore) o1).getBeginTimeBoundary() == ((AnnotationCore) o2).getBeginTimeBoundary()) {                if (((AnnotationCore) o1).getEndTimeBoundary() < ((AnnotationCore) o2).getEndTimeBoundary()) {                    return -1;                } else if (((AnnotationCore) o1).getEndTimeBoundary() == ((AnnotationCore) o2).getEndTimeBoundary()) {                    return 0;                }            }            return 1;        }    }}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -