⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 shoeboxtypfile.java

📁 编辑视频文件
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* * File:     ShoeboxTypFile.java * Project:  MPI Linguistic Application * Date:     02 May 2007 * * Copyright (C) 2001-2007  Max Planck Institute for Psycholinguistics * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */package mpi.eudico.server.corpora.clomimpl.shoebox;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileReader;import java.io.InputStream;import java.io.InputStreamReader;import java.io.Reader;import java.util.ArrayList;import java.util.HashSet;import java.util.Hashtable;import java.util.Iterator;import java.util.List;import java.util.StringTokenizer;import java.util.Vector;/** * Reads the database description file (*.typ) of a Shoeboxfile. */public class ShoeboxTypFile {    private Vector markers = new Vector();    private String typFileName = "";    private String databaseType = "";    /**  each tier that is parent in a from-to relation */    public Vector fromArray = new Vector();    /**  each tier that is child in a from-to relation */    private Vector toArray = new Vector();    /**  hash of the above */    public Hashtable tofromHash = new Hashtable();    // HB, 24 jul 02: store procedureType, to derive constraint stereotype from.    // Key values are same as for tofromHash.    /** Holds value of property DOCUMENT ME! */    public Hashtable procedureTypeHash = new Hashtable();    /** watch out: no \\ backslash on recordMarker */    public String recordMarker = null;    /** Holds value of property DOCUMENT ME! */    public String interlinearRootMarker = null;    /** shoebox stores the name of a tier */    private Vector tiersWithIPA = new Vector();    private Vector tiersWithUnicode = new Vector();    private Vector excludedTiers = new Vector();    private boolean allTiersUnicode = false;    private boolean ddebug = "true".equals(System.getProperty("ddebug"));    private boolean debug = "true".equals(System.getProperty("debug"));    /**       @param strict1 line must start with label (e.g. \ref).       @param label00 name of block starting label, including leading \     */    public ShoeboxTypFile(File file) throws IllegalArgumentException, Exception {        if (file == null) { // ShoeboxMarkerDialog MarkerRecords to be use            //		initializeFromMarkerRecords();        } else {            if (!file.canRead()) {                throw new IllegalArgumentException("cannot read \"" + file +                    "\"");            }            typFileName = file.getAbsolutePath();            readFile(file);        }    }    /**     * Creates a new ShoeboxTypFile instance     *     * @param markerRecords DOCUMENT ME!     */    public ShoeboxTypFile(List markerRecords) {        if (markerRecords != null) {            initializeFromMarkerRecords(markerRecords);        }    }    /**     * Fake typ-file, needed for WAC     */    public ShoeboxTypFile() throws IllegalArgumentException, Exception {    }    private void ddebug(String s) {        if (ddebug) {            System.out.println("---- ShoeboxFile3: " + s);        }    }    private void debug(String s) {        if (debug) {            System.out.println("-- ShoeboxFile3: " + s);        }    }    /**     * DOCUMENT ME!     *     * @return DOCUMENT ME!     */    public Vector getMarkers() {        if ((typFileName != null) && typFileName.equals("")) {            return markers;        } else {            return null;        }    }    /*    public void setTypFileName(String theName) {        typFileName = theName;    }    public String getTypFileName() {        return typFileName;    }    */    /**     * Allow the DatabaseTpe value to be overruled from a .txt file (?)     */    public void setDatabaseType(String theType) {        databaseType = theType;    }    /**     * DOCUMENT ME!     *     * @return DOCUMENT ME!     */    public String getDatabaseType() {        if (databaseType.equals("") && !typFileName.equals("")) {            // get database type from typFileName            if (typFileName.endsWith(".typ") || typFileName.endsWith(".TYP")) {                int leafIndex = typFileName.lastIndexOf("/") + 1;                if (leafIndex <= 0) {                    leafIndex = typFileName.lastIndexOf("\\") + 1;                }                int endIndex = typFileName.lastIndexOf(".");                if ((leafIndex > 0) && (endIndex > 0) &&                        (leafIndex < endIndex)) {                    databaseType = typFileName.substring(leafIndex, endIndex);                }            }        }        return databaseType;    }    private void initializeFromMarkerRecords(List markerRecords) {        //	Vector markerRecords = ShoeboxMarkerDialog.getMarkers();        MarkerRecord topMarker = null;        // assume that there is only one top marker        // take the first marker without a parent        Iterator mrIter = markerRecords.iterator();        while (mrIter.hasNext()) {            MarkerRecord mr = (MarkerRecord) mrIter.next();            if ((mr.getParentMarker() == null) && !mr.isExcluded()) {                if (topMarker == null) {                    topMarker = mr;                } else { // more than 1 root marker, assume that we deal with ELAN exported Toolbox files                    topMarker = null; // reset                    break;                }            }        }        if (topMarker != null) {            interlinearRootMarker = topMarker.getMarker();        }        //System.out.println("Root: " + interlinearRootMarker);        // if interlinearRootMarker == null parsing seems to silently return         // a transcription with zero tiers ??        // fill fromArray, toArray, toFromHash and procedureTypeHash from mr's        mrIter = markerRecords.iterator();        while (mrIter.hasNext()) {            MarkerRecord mr = (MarkerRecord) mrIter.next();            if (mr.getParentMarker() != null) {                fromArray.add("\\" + mr.getParentMarker());                toArray.add("\\" + mr.getMarker());                tofromHash.put("\\" + mr.getMarker(),                    "\\" + mr.getParentMarker());                if ((mr.getStereoType() != null) &&                        mr.getStereoType().equals("Symbolic Association")) {                    procedureTypeHash.put("\\" + mr.getMarker(), "Lookup");                } else if ((mr.getStereoType() != null) &&                        mr.getStereoType().equals("Time Subdivision")) {                    procedureTypeHash.put("\\" + mr.getMarker(),                        "TimeSubdivision");                } else if ((mr.getStereoType() != null) &&                        mr.getStereoType().equals("Included In")) {                    procedureTypeHash.put("\\" + mr.getMarker(), "IncludedIn");                } else {                    procedureTypeHash.put("\\" + mr.getMarker(), "Parse");                }            }            if (mr.getCharsetString().equals(MarkerRecord.SILIPASTRING)) {                tiersWithIPA.add(mr.getMarker());            }            if (mr.getCharsetString().equals(MarkerRecord.UNICODESTRING)) {                tiersWithUnicode.add(mr.getMarker());            }            if (mr.getParticipantMarker()) {                ShoeboxArray.label_eudicoparticipant = "\\" + mr.getMarker();            }            if (mr.isExcluded()) {                excludedTiers.add(mr.getMarker());            }        }        // HS jul 2005: if there are any markers marked for exclusion,        // add their descendants to the excluded tiers array as well        if (excludedTiers.size() > 0) {            for (int i = 0; i < excludedTiers.size(); i++) {                String parent = "\\" + (String) excludedTiers.get(i);                if (fromArray.contains(parent)) {                    addDescendantsToExcludedTiers(parent);                }            }        }    }    /**     * DOCUMENT ME!     *     * @param arg DOCUMENT ME!     *     * @throws Exception DOCUMENT ME!     */    public static void main(String[] arg) throws Exception {        ShoeboxTypFile s = new ShoeboxTypFile(new File(arg[0]));        System.out.println(s.interlinearRootMarker);        System.out.println(s.fromArray);        System.out.println(s.toArray);        System.out.println("");        System.out.println(s.procedureTypeHash.keySet());        System.out.println(s.procedureTypeHash.values());    }    /*      Used for preparation (counting) and storing.    */    private final void readFile(File file) throws Exception {        String line = null;        /*          A shoebox file may contain 8byte characters from custom fonts.          Treating it as isolatin-1 may introduce character errors!        */        Reader filereader;        // HB, 24 jul 02: FAKE IMPLEMENTATION IS TO BE SUBSTITUTED !        boolean useDedicatedCharacterset = false;        if (useDedicatedCharacterset) {            InputStream fis = new FileInputStream(file);            filereader = new InputStreamReader(fis, "DedicatedCharacterset");        } else {            // use the default encoding            filereader = new FileReader(file);        }        // explicit performance care: buffering the filereader        BufferedReader br = new BufferedReader(filereader);        String label = null;        String content = null;        String tierToSetLanguageFor = null;        int linenumber = 0;        String lastFrom = "";        String lastTo = "";        String procType = "Lookup";        while ((line = br.readLine()) != null) {            linenumber++;            line = line.trim();            debug("  ..." + line);            if (line.length() == 0) {                // skip white lines                continue;            }            if (linenumber == 1) {                // HB, 24 jul 02: accept only DatabaseType TEXT, only works for Shoebox text databases                // MK/02/10/13 loosening check                if (line.startsWith("\\+DatabaseType")) {                    StringTokenizer st = new StringTokenizer(line);                    st.nextToken();                    String db = st.nextToken();                    if (db != null) {                        databaseType = db.trim();                    }                    continue;                } else {                    throw new Exception(                        "Shoebox typ file must begin with '\\+DatabaseType', found '" +                        line + "'");                }            }            // tokenize the shoebox line into label and content            {                StringTokenizer xxx = new StringTokenizer(line);                label = xxx.nextToken(); // the first word                // label contains trailing backslash!            }            content = (line.substring(label.length())).trim();            if ((recordMarker == null) && label.equals("\\mkrRecord")) {                interlinearRootMarker = content;                recordMarker = content;            }            /*                if (label.equals("\\mkrFrom")) {                    fromArray.add("\\" + content);                    lastFrom = content;                }                if (label.equals("\\mkrTo")) {                    toArray.add("\\" + content);                    tofromHash.put("\\" + content, "\\" + lastFrom);                    //System.out.println("tofromHash.put "+ content + "--" + lastFrom+ "'");                }            */            // HB, 24 jul 02: new logic, also storing procedureType, and insensitive to            // order of markers.

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -