📄 jword.java
字号:
/*
* This software is OSI Certified Open Source Software.
* OSI Certified is a certification mark of the Open Source Initiative.
*
* This file is part of the JWords package.
* JWords is licensed under the terms of the BSD License.
*
* Copyright (c) 2005, Charles F. Greenbacker III
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* * Neither the name of JWords nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
* OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package net.artificialminds.JWords;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.IOException;
import java.util.ArrayList;
/**
* A collection of all of the JSenses of a search word. It creates a JWord object composed of
* lexigraphical information contained in the WordNet entry of a search word provided as a
* parameter. A JWord object is composed of a String that stores the original search word, and
* four ArrayLists that contain JSense objects representing the WordNet entries for each sense
* of the given search word.
* <p>
* Note: When compiled with a JDK that supports generics (such as JDK 1.5), you will receive
* notice that JWord.java uses unchecked or unsafe operations. This complaint wants
* the ArrayLists in the code to use generics, but doing so breaks compatibility with
* older JDKs (J2SE 1.4.2 SDK, etc), so generics are not used.
*
* @author Charlie Greenbacker
* @version 0.2.0a 20050809
* @since JWords 0.1.0a
*/
public class JWord
{
/**
* Search string which references the WordNet entry for this word.
*/
private String word;
/**
* Stores JSense objects representing WordNet entries for each sense for the noun form
* of this word.
*/
private ArrayList nouns = null;
/**
* Stores JSense objects representing WordNet entries for each sense for the verb form
* of this word.
*/
private ArrayList verbs = null;
/**
* Stores JSense objects representing WordNet entries for each sense for the adjective form
* of this word.
*/
private ArrayList adjectives = null;
/**
* Stores JSense objects representing WordNet entries for each sense for the adverb form
* of this word.
*/
private ArrayList adverbs = null;
/**
* Stores POS score of noun forms of the word calculated using weighted probability formula.
*/
private float nounScore = 0;
/**
* Stores POS score of verb forms of the word calculated using weighted probability formula.
*/
private float verbScore = 0;
/**
* Stores POS score of adjective forms of the word calculated using weighted probability formula.
*/
private float adjScore = 0;
/**
* Stores POS score of adverb forms of the word calculated using weighted probability formula.
*/
private float advScore = 0;
/**
* Sole constructor. Creates a JWord object representing WordNet entry of search word
* provided as the parameter.
*
* @param searchWord the input word used to search the WordNet database
* @see JSense
* @since JWords 0.1.0a
*/
public JWord(String searchWord)
{
/* initialize word
*
* TODO: sanitize to eliminate problem input strings (multiple dashes, WordNet
* command line interface arguments, replace spaces with single dashes, etc)
*/
word = searchWord;
// used to access WordNet command line interface to obtain overview information of word
String command = JWords.pathToWordNet + "wn " + word + " -over";
// stores lines of output from WordNet command line interface
String wnOutputLine;
try
{
// create a process to execute the WordNet binary
Process wn = Runtime.getRuntime().exec(command);
// buffer to store output from wn process
BufferedReader wnOutput = new BufferedReader(new InputStreamReader(wn.getInputStream()));
try
{
/* If WordNet output is null, report word not found. Output checking is used
* instead of wn exit status due to Java.lang.Process problems. By avoiding
* Java.lang.Process.exitValue() and Java.lang.Process.waitFor() we eliminate
* a source of program crashes and slowdown.
*
* TODO: create error handler and return null JWord if no WordNet entry
*/
if ((wnOutputLine = wnOutput.readLine()) == null)
{
System.err.println(word + " could not be found in WordNet database.");
System.err.println();
}
else
{
// loop until end of WordNet entry is reached
while ((wnOutputLine = wnOutput.readLine()) != null)
{
/* Look for the beginning of each part-of-speech section, once found
* step through wn output capturing information and creating a JSense
* for each sense, and storing it in the approriate ArrayList.
* After each sense of a given POS is complete, trim the relevant
* ArrayList to minimize memory usage.
*
* TODO: possibly rework this to eliminate "quadruplication" of code
*/
if (wnOutputLine.equals("Overview of noun " + word))
{
wnOutput.readLine();
wnOutput.readLine();
wnOutput.readLine();
nouns = new ArrayList();
while (!((wnOutputLine = wnOutput.readLine()).equals("")))
{
nouns.add(new JSense("noun", wnOutputLine));
}
nouns.trimToSize();
}
if (wnOutputLine.equals("Overview of verb " + word))
{
wnOutput.readLine();
wnOutput.readLine();
wnOutput.readLine();
verbs = new ArrayList();
while (!((wnOutputLine = wnOutput.readLine()).equals("")))
{
verbs.add(new JSense("verb", wnOutputLine));
}
verbs.trimToSize();
}
if (wnOutputLine.equals("Overview of adj " + word))
{
wnOutput.readLine();
wnOutput.readLine();
wnOutput.readLine();
adjectives = new ArrayList();
while (!((wnOutputLine = wnOutput.readLine()).equals("")))
{
adjectives.add(new JSense("adj", wnOutputLine));
}
adjectives.trimToSize();
}
if (wnOutputLine.equals("Overview of adv " + word))
{
wnOutput.readLine();
wnOutput.readLine();
wnOutput.readLine();
adverbs = new ArrayList();
while (!((wnOutputLine = wnOutput.readLine()).equals("")))
{
adverbs.add(new JSense("adverb", wnOutputLine));
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -