📄 wordnetwords.java
字号:
package ijp.assignment1.utils;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutput;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import net.didion.jwnl.JWNL;
import net.didion.jwnl.JWNLException;
import net.didion.jwnl.data.IndexWord;
import net.didion.jwnl.data.POS;
import net.didion.jwnl.dictionary.Dictionary;
/**
* A utility for generating data files representing WordNet entries for nouns,
* verbs and adjectives. It generates array Lists consisting of
* {@link net.didion.jwnl.data.IndexWord}s.
* The purpose of this class is to pre-generate word lists which can then
* be processed more quickly in the {@link aipj.assignment1.utils.WordNetWrapper}
* utility used in Assignment 1.
* <p>This code does not need to be altered for AIPJ assignment 1. It
* may be useful for msc projects.</p>
*
* @author Judy Robertson
*/
public class WordNetWords implements Serializable {
/**
* A WordNet dictionary from which the parts of speech words are extracted.
*/
private Dictionary d;
/**
* A list of all the verbs in WordNet
*/
private List verbList = new ArrayList();
/**
* A list of all the nouns in WordNet
*/
private List nounList = new ArrayList();
/**
* A list of all the adjectives in WordNet
*/
private List adjectiveList = new ArrayList();
/**
* British National Corpus data mapping words to frequencies
*/
private Map BNC;
/**
* Opens the WordNet dictionary, and reads in BNC frequency data. Iterates
* over verbs, nouns and adjectives to build lists for each part of speech
* containing only words which can be found in a shortened version of the BNC.
*/
public WordNetWords() {
try {
FileInputStream fi =
new FileInputStream(System.getProperty("user.dir") + ("/jwnl") +
"/file_properties.xml");
JWNL.initialize(fi);
d = Dictionary.getInstance();
} catch (IOException e) {
System.err.println("An IO error when loading wordnet");
//e.printStackTrace();
} catch (JWNLException err) {
System.err.println("A JWNL exception occured during intialisation");
err.printStackTrace();
}
readinBNC();
buildVerbList();
buildNounList();
buildAdjectiveList();
}
/**
* Iterates over the WordNet verbs and builds a list containing verb IndexWords
* which can also be found in the shortened version of the BNC.
*
*/
private void buildVerbList() {
try {
Iterator i = d.getIndexWordIterator(POS.VERB);
while (i.hasNext()) {
IndexWord word = (IndexWord) i.next();
//weed out words with low familiarity, according to information
//from the BNC
BNCWord bncData = (BNCWord) BNC.get(word.getLemma());
if (bncData != null) {
verbList.add(word);
}
}
} catch (JWNLException e) {
System.err.println(("An error occurred while attempting to get a " +
"word index iterator"));
e.printStackTrace();
}
}
/**
* Iterates over the WordNet adjectives and builds a list containing
* adjective IndexWords which can also be found in the shortened
* version of the BNC.
*
*/
private void buildAdjectiveList() {
try {
Iterator i = d.getIndexWordIterator(POS.ADJECTIVE);
while (i.hasNext()) {
IndexWord word = (IndexWord) i.next();
//weed out words with low familiarity, according to
//information from the BNC
BNCWord bncData = (BNCWord) BNC.get(word.getLemma());
if (bncData != null) {
adjectiveList.add(word);
}
}
} catch (JWNLException e) {
System.err.println(("An error occurred while attempting to get a " +
"word index iterator"));
e.printStackTrace();
}
}
/**
* Iterates over the WordNet nouns and builds a list containing noun IndexWords
* which can also be found in the shortened version of the BNC.
*
*/
private void buildNounList() {
try {
Iterator i = d.getIndexWordIterator(POS.NOUN);
while (i.hasNext()) {
IndexWord word = (IndexWord) i.next();
//weed out words with low familiarity, according to information
//from the BNC
BNCWord bncData = (BNCWord) BNC.get(word.getLemma());
if (bncData != null) {
nounList.add(word);
}
}
} catch (JWNLException e) {
System.err.println(("An error occurred while attempting to get a " +
"word index iterator"));
e.printStackTrace();
}
}
/**
* Write out the data for the specified part of speech to disc as a
* serialised list
* @param pos The part of speech for which the data should be written
*/
private void writeData(String pos) {
File f = new File(System.getProperty("user.dir") + File.separator +
"data" + File.separator + pos + ".dat");
try {
FileOutputStream fstrm = new FileOutputStream(f);
ObjectOutput ostrm = new ObjectOutputStream(fstrm);
if (pos.equals("noun"))
ostrm.writeObject(nounList);
else if (pos.equals("adjective"))
ostrm.writeObject(adjectiveList);
else if (pos.equals("verb"))
ostrm.writeObject(verbList);
ostrm.flush();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* Read in the file containing the serialised BNC data structure from disc
*
*/
public void readinBNC() {
File f = new File(System.getProperty("user.dir") + File.separator
+ "data" + File.separator + "bncobjects.dat");
try {
FileInputStream fin = new FileInputStream(f);
ObjectInputStream istrm = new ObjectInputStream(fin);
BNC = (Map) istrm.readObject();
} catch (IOException e) {
System.out.println("Trouble processing BNC file");
} catch (ClassNotFoundException c) {
System.err.println("Can't find class when trying to read " +
"serialised BNC words");
}
}
/**
* Build data structures for nouns, verbs and adjectives from the WordNet data
* and write them to disc for future use.
*
* @param args None required
*/
public static void main(String[] args) {
WordNetWords w = new WordNetWords();
w.writeData("noun");
w.writeData("verb");
w.writeData("adjective");
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -