📄 jsense.java

📁 AutoSummary uses Natural Language Processing to generate a contextually-relevant synopsis of plain t
💻 JAVA
字号:
/*
 * This software is OSI Certified Open Source Software.
 * OSI Certified is a certification mark of the Open Source Initiative.
 *
 * This file is part of the JWords package.
 * JWords is licensed under the terms of the BSD License.
 *
 * Copyright (c) 2005, Charles F. Greenbacker III
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 *     * Redistributions of source code must retain the above copyright notice,
 *       this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright notice,
 *       this list of conditions and the following disclaimer in the documentation
 *       and/or other materials provided with the distribution.
 *     * Neither the name of JWords nor the names of its contributors
 *       may be used to endorse or promote products derived from this software without
 *       specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
 * SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package net.artificialminds.JWords;

import java.util.ArrayList;

/**
 * Stores the lexical information of a single sense of the search word.  Accepts an output
 * line from the WordNet command line interface containing the lexigraphical information
 * of a single sense of a particular word, and creates a JSense object representing the
 * aforementioned entry.
 * <p>
 * Note: When compiled with a JDK that supports generics (such as JDK 1.5), you will receive
 * notice that JSense.java uses unchecked or unsafe operations.  This complaint wants
 * the ArrayLists in the code to use generics, but doing so breaks compatibility with
 * older JDKs (J2SE 1.4.2 SDK, etc), so generics are not used.
 *
 * @author                  Charlie Greenbacker
 * @version                 0.2.0a 20050809
 * @since                   JWords 0.1.0a
 */
public class JSense
{
    /**
     * Distinguishes between noun, verb, adjective and adverb
     */
    private String partOfSpeech;

    /**
     * Stores strings containing one or more definitions of this sense of the word
     */
    private ArrayList definitions;

    /**
     * Stores string containing usage examples (if any) of the word
     */
    private ArrayList usage;

    /**
     * Stores strings representing a set of one or more synonyms of the word.  Will
     * always contain at least the word itself, in addition to possible exact synonyms
     */
    private ArrayList synset;

    /**
     * Tracks the number of times this sense was tagged in texts used to create WordNet
     * database.  This value can be used as a (very) rough estimate for frequency of use.
     */
    private int tagCount;

    /**
     * Identifies order of this sense in WordNet entry of the word.  Related to tagCount
     * in that senses are order from most to least frequently used.
     */
    private int number;

    /**
     * Sole constructor.  Takes a line of output from the command line interface and creates
     * a JSense object representing WordNet entry of a specific sense of the search word.
     *
     * @param POS           part-of-speech; either noun, verb, adjective or adverb
     * @param wnOutputLine  contains WordNet entry for this specific sense
     * @see                 JWord
     * @since               JWords 0.1.0a
     */
    public JSense(String POS, String wnOutputLine)
    {
        // initialize part-of-speech; either noun, verb, adjective or adverb
        partOfSpeech = POS;

        // cut the WordNet entry into two parts: number/tag/synset & definitions/usage
        String[] wnSense = wnOutputLine.split("\\s--\\s", 2);

        // process first half of WordNet sense to obtain number, tagCount (if any) and synset
        String entry = wnSense[0];

        // cut entry right after period (.) trailing sense number
        String[] wnEntry = entry.split("\\.\\s");

        // initialize sense number from first half of sense entry (wnEntry[0])
        number = Integer.parseInt(wnEntry[0]);

        // get tagCount and synset from the rest of the first half of the WordNet sense entry
        entry = (wnEntry[1]);

        // determine whether this sense is tagged, ie. (36)
        if (entry.charAt(0)=='(')
        {
            // this is a tagged sense, extract from entry and initialize tagCount
            entry = entry.substring(1);
            String[] wnTag = entry.split("\\)\\s");
            tagCount = Integer.parseInt(wnTag[0]);
            entry = wnTag[1];
        }
        else
        {
            // this is NOT a tagged sense, therefore set tagCount to zero
            tagCount = 0;
        }

        // initialize synset
        synset = new ArrayList();
        /* cut the remainder of the first half of the WordNet sense entry into substrings
         * seperated by commas
         */
        String[] wnSynset = entry.split(",\\s");
        // interate though each direct synonym and add to synset
        for (int i=0; i<wnSynset.length; i++)
        {
            synset.add(wnSynset[i]);
        }
        // minimize memory usage, no need to allocate for empty space
        synset.trimToSize();

        // initialize usage and definitions
        usage = new ArrayList();
        definitions = new ArrayList();
        // process second half of WordNet sense entry
        entry = wnSense[1];
        // chop off first and last characters - extraneous parentheses
        entry = entry.substring(1, entry.length()-1);
        // cut it into substrings seperated by semicolons
        String[] wnMeaning = entry.split(";\\s");
        // iterate though each substring and add to appropriate ArrayList
        for (int i=0; i<wnMeaning.length; i++)
        {
            if ((wnMeaning[i]).charAt(0)=='"')
            {
                // this is a usage example, remove quotes and add to usage
                usage.add((wnMeaning[i]).substring(1, (wnMeaning[i]).length()-1));
            }
            else
            {
                // this is a definition, add to definitions
                definitions.add(wnMeaning[i]);
            }
        }
        // minimize memory usage, no need to allocate for empty space
        usage.trimToSize();
        definitions.trimToSize();

    }

    /**
     * Prints important details about the sense in dictionary format in concert with
     * JWord.print().  Displays sense number, definitions, usage examples (if any) and
     * a list of direct synonyms (synset).
     *
     * @see             JWord#print()
     * @since           JWords 0.1.0a
     */
    public void print()
    {
        System.out.print("     " + number + ". ");
        System.out.print(definitions.get(0));
        for (int i=1; i<definitions.size(); i++)
        {
            System.out.print(", " + definitions.get(i));
        }
        System.out.println();
        for (int i=0; i<usage.size(); i++)
        {
            System.out.println("          \"" + usage.get(i) + "\"");
        }
        System.out.print("          [" + synset.get(0));
        for (int i=1; i<synset.size(); i++)
        {
            System.out.print(", " + synset.get(i));
        }
        System.out.println("]");
        System.out.println();
    }

    /**
     * Retrieves part-of-speech of sense.
     *
     * @return          the part-of-speech of the sense
     * @since           JWords 0.1.0a
     */
    public String getPOS()
    {
        return partOfSpeech;
    }

    /**
     * Retrieves tag count (times tagged in texts used to create WordNet database, used as
     * rough estimate for frequency of use) of sense.
     *
     * @return          the tag count of the sense
     * @since           JWords 0.1.0a
     */
    public int getTagCount()
    {
        return tagCount;
    }

    /**
     * Retrieves sense number of sense.
     *
     * @return          the sense number of the sense
     * @since           JWords 0.1.0a
     */
    public int getSenseNumber()
    {
        return number;
    }

    /**
     * Retrieves list of definitions of sense.
     *
     * @return          the list of definitions of the sense
     * @since           JWords 0.1.0a
     */
    public ArrayList getDefinitions()
    {
        return definitions;
    }

    /**
     * Retrieves list of usage examples (if any) of sense.
     *
     * @return          the list of usage examples of the sense
     * @since           JWords 0.1.0a
     */
    public ArrayList getUsage()
    {
        return usage;
    }

    /**
     * Retrieves list of direct synonyms (synset) of sense.
     *
     * @return          the synset of the sense
     * @since           JWords 0.1.0a
     */
    public ArrayList getSynset()
    {
        return synset;
    }

    /* TODO: add variables to store hypernym/hyponym tree, antonyms, domains, holonyms,
     * compound words, and other advanced information available in the WordNet database
     */

}
💿 文件大小 49 K
👤 上传用户 liuhai
📂 所属分类多国语言处理
🏷️ 相关标签

#contextually-relevant #AutoSummary #Processing #Language
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -