📄 shoeboxencoder.java
字号:
/* * File: ShoeboxEncoder.java * Project: MPI Linguistic Application * Date: 02 May 2007 * * Copyright (C) 2001-2007 Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA *//* * Created on Oct 15, 2004 */package mpi.eudico.server.corpora.clomimpl.shoebox;import mpi.eudico.server.corpora.clom.AnnotationDocEncoder;import mpi.eudico.server.corpora.clom.EncoderInfo;import mpi.eudico.server.corpora.clom.Tier;import mpi.eudico.server.corpora.clom.Transcription;import mpi.eudico.server.corpora.clomimpl.abstr.MediaDescriptor;import mpi.eudico.server.corpora.clomimpl.abstr.TierImpl;import mpi.eudico.server.corpora.clomimpl.abstr.TranscriptionImpl;import mpi.eudico.server.corpora.clomimpl.shoebox.interlinear.Interlinearizer;import mpi.eudico.server.corpora.clomimpl.shoebox.interlinear.TimeCodedTranscriptionImpl;import mpi.eudico.server.corpora.clomimpl.shoebox.utr22.SimpleConverter;import java.io.FileOutputStream;import java.io.OutputStreamWriter;import java.text.DecimalFormat;import java.util.ArrayList;import java.util.Hashtable;import java.util.Iterator;import java.util.List;import java.util.Vector;import javax.swing.JOptionPane;/** * Encodes information from a Transcription to Shoebox/Toolbox format and stores it. * * @author hennie */public class ShoeboxEncoder implements AnnotationDocEncoder { /** Holds value of property DOCUMENT ME! */ public static final String defaultDBType = "ElanExport"; /** Holds value of property DOCUMENT ME! */ public static final String elanELANLabel = "\\ELANExport"; /** Holds value of property DOCUMENT ME! */ public static final String elanBlockStart = "\\block"; /** Holds value of property DOCUMENT ME! */ public static final String elanBeginLabel = "\\ELANBegin"; /** Holds value of property DOCUMENT ME! */ public static final String elanEndLabel = "\\ELANEnd"; /** Holds value of property DOCUMENT ME! */ public static final String elanParticipantLabel = "\\ELANParticipant"; /** Holds value of property DOCUMENT ME! */ public static final String elanMediaURLLabel = "\\ELANMediaURL"; /** Holds value of property DOCUMENT ME! */ public static final String elanMediaExtractedLabel = "\\ELANMediaExtracted"; /** Holds value of property DOCUMENT ME! */ public static final String elanMediaMIMELabel = "\\ELANMediaMIME"; /** Holds value of property DOCUMENT ME! */ public static final String elanMediaOriginLabel = "\\ELANMediaOrigin"; private OutputStreamWriter isoLatinWriter; private OutputStreamWriter utf8Writer; private SimpleConverter simpleConverter; /** * Creates a new ShoeboxEncoder instance * * @param path DOCUMENT ME! */ public ShoeboxEncoder(String path) { try { FileOutputStream out = new FileOutputStream(path); isoLatinWriter = new OutputStreamWriter(out, "ISO-8859-1"); utf8Writer = new OutputStreamWriter(out, "UTF-8"); } catch (Exception ex) { ex.printStackTrace(); } } /* * @see mpi.eudico.server.corpora.clom.AnnotationDocEncoder#encodeAndSave(mpi.eudico.server.corpora.clom.Transcription, java.util.Vector, java.lang.String) */ public void encodeAndSave(Transcription theTranscription, EncoderInfo encoderInfo, Vector tierOrder, String path) { try { writeHeader(theTranscription, encoderInfo); writeBlocks(theTranscription, tierOrder, encoderInfo); // media descriptors are written at the end of the Toolbox file. When written at begin/in header // Toolbox throws them away without any notification. writeMediaDescriptors(theTranscription); closeFile(); } catch (Exception e) { String txt = "Sorry: unable to export this file to Shoebox." + e.getMessage() + ")"; JOptionPane.showMessageDialog(null, txt, txt, JOptionPane.ERROR_MESSAGE); e.printStackTrace(); } } private void writeHeader(Transcription theTranscription, EncoderInfo encoderInfo) { String dbType = ((ToolboxEncoderInfo) encoderInfo).getDatabaseType(); if ((dbType == null) || dbType.equals("") /*|| ((ToolboxEncoderInfo) encoderInfo).getMarkerSource() == ToolboxEncoderInfo.TIERNAMES*/) { dbType = defaultDBType; } write("\\_sh v3.0 400 " + dbType + "\n"); write("\\_DateStampHasFourDigitYear\n"); if (((ToolboxEncoderInfo) encoderInfo).getMarkerSource() == ToolboxEncoderInfo.TIERNAMES) { write(("\n" + elanELANLabel + "\n")); } } private void writeMediaDescriptors(Transcription theTranscription) { // media descriptors Vector mediaDescriptors = null; try { mediaDescriptors = theTranscription.getMediaDescriptors(); } catch (Exception rex) { rex.printStackTrace(); } for (int i = 0; i < mediaDescriptors.size(); i++) { write("\n"); MediaDescriptor md = (MediaDescriptor) mediaDescriptors.elementAt(i); if ((md.mediaURL != null) && !md.mediaURL.equals("")) { write((elanMediaURLLabel + " " + md.mediaURL + "\n")); } if ((md.mimeType != null) && !md.mimeType.equals("")) { write((elanMediaMIMELabel + " " + md.mimeType + "\n")); } if (md.timeOrigin != 0) { write((elanMediaOriginLabel + " " + md.timeOrigin + "\n")); } if ((md.extractedFrom != null) && !md.extractedFrom.equals("")) { write((elanMediaExtractedLabel + " " + md.extractedFrom + "\n")); } } } /** * Jul 2005: added a test on the toplevel tiers:<br> * - if there is only one tier use it as the RecordMarker * - if all top level tiernames start with "xxxx@" use "xxxx" as the RecordMarker * * in both cases don't add the ELAN "block" marker as RecordMarker * * @param theTranscription the transcription to export * @param tierOrder the order of the tiers * @param encoderInfo info for the encoder */ private void writeBlocks(Transcription theTranscription, Vector tierOrder, EncoderInfo encoderInfo) { boolean lineForRootAnnot = false; boolean justOneRoot = false; int blockCounter = 1; Interlinearizer interlinearizer = new Interlinearizer(new TimeCodedTranscriptionImpl( (TranscriptionImpl) theTranscription)); setShoeboxArguments(theTranscription, interlinearizer, tierOrder, encoderInfo); String[] outputLines = interlinearizer.renderAsText(); // find set of root tier names, for each root tier store participant String participantString = ""; Hashtable rootTierNames = new Hashtable(); try { Vector topTiers = ((TranscriptionImpl) theTranscription).getTopTiers(); Iterator tierIter = topTiers.iterator(); while (tierIter.hasNext()) { TierImpl t = (TierImpl) tierIter.next(); rootTierNames.put(t.getName(), t.getParticipant()); } if (topTiers.size() == 1) { justOneRoot = true; } else { // loop over toptiers; if all have a '@' in their name and the prefix // is always the same, use this prefix as the record marker justOneRoot = true; String name; String prefix = null; int atIndex = -1; Iterator it = rootTierNames.keySet().iterator(); while (it.hasNext()) { name = (String) it.next(); atIndex = name.indexOf('@'); if (atIndex < 1) { justOneRoot = false; break; } else { String curPref = name.substring(0, atIndex); //System.out.println("Pref: " + curPref); if (prefix == null) {
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -