📄 shoeboxtypfile.java
字号:
/* * File: ShoeboxTypFile.java * Project: MPI Linguistic Application * Date: 02 May 2007 * * Copyright (C) 2001-2007 Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package mpi.eudico.server.corpora.clomimpl.shoebox;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileReader;import java.io.InputStream;import java.io.InputStreamReader;import java.io.Reader;import java.util.ArrayList;import java.util.HashSet;import java.util.Hashtable;import java.util.Iterator;import java.util.List;import java.util.StringTokenizer;import java.util.Vector;/** * Reads the database description file (*.typ) of a Shoeboxfile. */public class ShoeboxTypFile { private Vector markers = new Vector(); private String typFileName = ""; private String databaseType = ""; /** each tier that is parent in a from-to relation */ public Vector fromArray = new Vector(); /** each tier that is child in a from-to relation */ private Vector toArray = new Vector(); /** hash of the above */ public Hashtable tofromHash = new Hashtable(); // HB, 24 jul 02: store procedureType, to derive constraint stereotype from. // Key values are same as for tofromHash. /** Holds value of property DOCUMENT ME! */ public Hashtable procedureTypeHash = new Hashtable(); /** watch out: no \\ backslash on recordMarker */ public String recordMarker = null; /** Holds value of property DOCUMENT ME! */ public String interlinearRootMarker = null; /** shoebox stores the name of a tier */ private Vector tiersWithIPA = new Vector(); private Vector tiersWithUnicode = new Vector(); private Vector excludedTiers = new Vector(); private boolean allTiersUnicode = false; private boolean ddebug = "true".equals(System.getProperty("ddebug")); private boolean debug = "true".equals(System.getProperty("debug")); /** @param strict1 line must start with label (e.g. \ref). @param label00 name of block starting label, including leading \ */ public ShoeboxTypFile(File file) throws IllegalArgumentException, Exception { if (file == null) { // ShoeboxMarkerDialog MarkerRecords to be use // initializeFromMarkerRecords(); } else { if (!file.canRead()) { throw new IllegalArgumentException("cannot read \"" + file + "\""); } typFileName = file.getAbsolutePath(); readFile(file); } } /** * Creates a new ShoeboxTypFile instance * * @param markerRecords DOCUMENT ME! */ public ShoeboxTypFile(List markerRecords) { if (markerRecords != null) { initializeFromMarkerRecords(markerRecords); } } /** * Fake typ-file, needed for WAC */ public ShoeboxTypFile() throws IllegalArgumentException, Exception { } private void ddebug(String s) { if (ddebug) { System.out.println("---- ShoeboxFile3: " + s); } } private void debug(String s) { if (debug) { System.out.println("-- ShoeboxFile3: " + s); } } /** * DOCUMENT ME! * * @return DOCUMENT ME! */ public Vector getMarkers() { if ((typFileName != null) && typFileName.equals("")) { return markers; } else { return null; } } /* public void setTypFileName(String theName) { typFileName = theName; } public String getTypFileName() { return typFileName; } */ /** * Allow the DatabaseTpe value to be overruled from a .txt file (?) */ public void setDatabaseType(String theType) { databaseType = theType; } /** * DOCUMENT ME! * * @return DOCUMENT ME! */ public String getDatabaseType() { if (databaseType.equals("") && !typFileName.equals("")) { // get database type from typFileName if (typFileName.endsWith(".typ") || typFileName.endsWith(".TYP")) { int leafIndex = typFileName.lastIndexOf("/") + 1; if (leafIndex <= 0) { leafIndex = typFileName.lastIndexOf("\\") + 1; } int endIndex = typFileName.lastIndexOf("."); if ((leafIndex > 0) && (endIndex > 0) && (leafIndex < endIndex)) { databaseType = typFileName.substring(leafIndex, endIndex); } } } return databaseType; } private void initializeFromMarkerRecords(List markerRecords) { // Vector markerRecords = ShoeboxMarkerDialog.getMarkers(); MarkerRecord topMarker = null; // assume that there is only one top marker // take the first marker without a parent Iterator mrIter = markerRecords.iterator(); while (mrIter.hasNext()) { MarkerRecord mr = (MarkerRecord) mrIter.next(); if ((mr.getParentMarker() == null) && !mr.isExcluded()) { if (topMarker == null) { topMarker = mr; } else { // more than 1 root marker, assume that we deal with ELAN exported Toolbox files topMarker = null; // reset break; } } } if (topMarker != null) { interlinearRootMarker = topMarker.getMarker(); } //System.out.println("Root: " + interlinearRootMarker); // if interlinearRootMarker == null parsing seems to silently return // a transcription with zero tiers ?? // fill fromArray, toArray, toFromHash and procedureTypeHash from mr's mrIter = markerRecords.iterator(); while (mrIter.hasNext()) { MarkerRecord mr = (MarkerRecord) mrIter.next(); if (mr.getParentMarker() != null) { fromArray.add("\\" + mr.getParentMarker()); toArray.add("\\" + mr.getMarker()); tofromHash.put("\\" + mr.getMarker(), "\\" + mr.getParentMarker()); if ((mr.getStereoType() != null) && mr.getStereoType().equals("Symbolic Association")) { procedureTypeHash.put("\\" + mr.getMarker(), "Lookup"); } else if ((mr.getStereoType() != null) && mr.getStereoType().equals("Time Subdivision")) { procedureTypeHash.put("\\" + mr.getMarker(), "TimeSubdivision"); } else if ((mr.getStereoType() != null) && mr.getStereoType().equals("Included In")) { procedureTypeHash.put("\\" + mr.getMarker(), "IncludedIn"); } else { procedureTypeHash.put("\\" + mr.getMarker(), "Parse"); } } if (mr.getCharsetString().equals(MarkerRecord.SILIPASTRING)) { tiersWithIPA.add(mr.getMarker()); } if (mr.getCharsetString().equals(MarkerRecord.UNICODESTRING)) { tiersWithUnicode.add(mr.getMarker()); } if (mr.getParticipantMarker()) { ShoeboxArray.label_eudicoparticipant = "\\" + mr.getMarker(); } if (mr.isExcluded()) { excludedTiers.add(mr.getMarker()); } } // HS jul 2005: if there are any markers marked for exclusion, // add their descendants to the excluded tiers array as well if (excludedTiers.size() > 0) { for (int i = 0; i < excludedTiers.size(); i++) { String parent = "\\" + (String) excludedTiers.get(i); if (fromArray.contains(parent)) { addDescendantsToExcludedTiers(parent); } } } } /** * DOCUMENT ME! * * @param arg DOCUMENT ME! * * @throws Exception DOCUMENT ME! */ public static void main(String[] arg) throws Exception { ShoeboxTypFile s = new ShoeboxTypFile(new File(arg[0])); System.out.println(s.interlinearRootMarker); System.out.println(s.fromArray); System.out.println(s.toArray); System.out.println(""); System.out.println(s.procedureTypeHash.keySet()); System.out.println(s.procedureTypeHash.values()); } /* Used for preparation (counting) and storing. */ private final void readFile(File file) throws Exception { String line = null; /* A shoebox file may contain 8byte characters from custom fonts. Treating it as isolatin-1 may introduce character errors! */ Reader filereader; // HB, 24 jul 02: FAKE IMPLEMENTATION IS TO BE SUBSTITUTED ! boolean useDedicatedCharacterset = false; if (useDedicatedCharacterset) { InputStream fis = new FileInputStream(file); filereader = new InputStreamReader(fis, "DedicatedCharacterset"); } else { // use the default encoding filereader = new FileReader(file); } // explicit performance care: buffering the filereader BufferedReader br = new BufferedReader(filereader); String label = null; String content = null; String tierToSetLanguageFor = null; int linenumber = 0; String lastFrom = ""; String lastTo = ""; String procType = "Lookup"; while ((line = br.readLine()) != null) { linenumber++; line = line.trim(); debug(" ..." + line); if (line.length() == 0) { // skip white lines continue; } if (linenumber == 1) { // HB, 24 jul 02: accept only DatabaseType TEXT, only works for Shoebox text databases // MK/02/10/13 loosening check if (line.startsWith("\\+DatabaseType")) { StringTokenizer st = new StringTokenizer(line); st.nextToken(); String db = st.nextToken(); if (db != null) { databaseType = db.trim(); } continue; } else { throw new Exception( "Shoebox typ file must begin with '\\+DatabaseType', found '" + line + "'"); } } // tokenize the shoebox line into label and content { StringTokenizer xxx = new StringTokenizer(line); label = xxx.nextToken(); // the first word // label contains trailing backslash! } content = (line.substring(label.length())).trim(); if ((recordMarker == null) && label.equals("\\mkrRecord")) { interlinearRootMarker = content; recordMarker = content; } /* if (label.equals("\\mkrFrom")) { fromArray.add("\\" + content); lastFrom = content; } if (label.equals("\\mkrTo")) { toArray.add("\\" + content); tofromHash.put("\\" + content, "\\" + lastFrom); //System.out.println("tofromHash.put "+ content + "--" + lastFrom+ "'"); } */ // HB, 24 jul 02: new logic, also storing procedureType, and insensitive to // order of markers.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -