⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 wordnet.java

📁 自己写的search engine, 有 boolean search, fuzzy search
💻 JAVA
字号:
package searchingEngine.queryExpansion;

// related file: file_properties.xml, commons-logging.jar, jwnl.jar, utilities.jar, WordNet Dict (from WordNet-2.1.exe)

/**
 * Java WordNet Library (JWNL) @version 1.3
 * See the documentation for copyright information.
 *
 * @version 1.1
 */

import net.didion.jwnl.*;
import net.didion.jwnl.data.*;
import net.didion.jwnl.dictionary.Dictionary;

import java.io.*;
import java.util.*;

/** A class to demonstrate the functionality of the JWNL package. */
public class WordNet
{
	private static final String USAGE = "java WordNet Term";

	// static constructor to load the properties file
	static{
		try{
			// initialize JWNL (this must be done before JWNL can be used)
			JWNL.initialize(WordNet.class.getResourceAsStream("file_properties.xml"));
		} catch(Exception ex){ ex.printStackTrace(); }
	}

	public static void main(String[] args)
	{
		// for testing only...normally wont be called directly
		if (args.length != 1)
		{
			System.out.println("Error!\nUsage: "+USAGE);
			System.exit(-1);
		}
		String[] result = getSynonymsTrimmedForBM(args[0]);
		for (int i=0; i<result.length; i++)
			System.out.print(result[i]);
		System.out.println("\n.:: Total "+result.length+" words found! ::.");
	}

	/* Input : a term string
	   Output: a string array that stores the synonyms of the input string */
	public static String[] getSynonyms(String term)
	{
		try{
			Vector result = new Vector();
			IndexWord[] iwList = Dictionary.getInstance().lookupAllIndexWords(term).getIndexWordArray();
			String tmpLemma;
			for(int i=0; i<iwList.length; i++)
			{
				Synset[] syns = iwList[i].getSenses();
				for (int j=0; j<syns.length; j++)
				{
					Word[] wordList = syns[j].getWords();
					for (int k=0; k<wordList.length; k++)
					{
						// add word to the result list if word!=term & not added b4
						tmpLemma = wordList[k].getLemma().replaceAll("\\(\\p{Alpha}\\)", "");
						if (!tmpLemma.equalsIgnoreCase(term) && !result.contains(tmpLemma))
							result.addElement(tmpLemma);
					}
				}
			}
			return (String[]) result.toArray(new String[0]);
		} catch(Exception e){ return new String[0]; }
	}

	public static String[] getSynonymsTrimmed(String term)
	{
		Vector result = new Vector();
		String[] retrievedWords = getSynonyms(term);

		// for testing only
		for (int i=0; i<retrievedWords.length; i++)
			System.out.println(retrievedWords[i]);

		for(int i=0; i<retrievedWords.length; i++)
		{
			String[] splited = ((String)retrievedWords[i]).split("\\p{Punct}|\\p{Digit}");

			// for testing only
			for(int j=0; j<splited.length; j++)
				System.out.print(j+"."+splited[j]+" ");
			System.out.println();

			if(splited.length==1)
				result.add(retrievedWords[i]);
		}
		return (String[]) result.toArray(new String[0]);
	}

	public static String[] getSynonymsTrimmedForBM(String term)
	{
		String[] trimmed = getSynonymsTrimmed(term);
		Vector result = new Vector();

		// for testing only
		System.out.println("==========");
		for (int i=0; i<trimmed.length; i++)
			System.out.print(trimmed[i]+" ");
		System.out.println("\n==========");

		if(trimmed.length>0)
		{
			result.add("(");
			for(int i=0; i<trimmed.length; i++)
			{
				result.add(trimmed[i]);
				if(i!=(trimmed.length-1))
					result.add("|");
			}
			result.add(")");
		}
		return (String[]) result.toArray(new String[0]);
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -