📄 indexword.java

📁 WordNet is an extensive database developed by Princeton University faculty and students over the las

💻 JAVA

字号:

/*
 * WordNet-Java
 *
 * Copyright 1998 by Oliver Steele.  You can use this software freely so long as you preserve
 * the copyright notice and this restriction, and label your changes.
 */

package edu.gwu.wordnet;
import java.util.*;
import  edu.gwu.wordnet.util.*;

/**
 * An <code>IndexWord</code> represents a line of the <var>pos</var><code>.index</code> file.
 * An <code>IndexWord</code> is created retrieved or retrieved via {@link DictionaryDatabase#lookupIndexWord},
 * and has a <it>lemma</it>, a <it>pos</it>, and a set of <it>senses</it>, which are of type {@link Synset}.
 *
 * @author Oliver Steele, steele@cs.brandeis.edu
 * @version 1.0
 */
public class IndexWord {
	protected FileBackedDictionary dictionary;
	protected POS pos;
	protected long offset;
	protected String lemma;
	protected int taggedSenseCount;
	// senses are initially stored as offsets, and paged in on demand.
	protected long[] synsetOffsets;
	/** This is null until getSenses has been called. */
	protected Synset[] synsets;

        protected PointerType[] ptrTypes = null;
	//
	// Initialization
	//
	IndexWord(FileBackedDictionary dictionary) {
		this.dictionary = dictionary;
	}
	
	IndexWord initializeFrom(String line) {
		TokenizerParser tokenizer = new TokenizerParser(line, " ");
		this.lemma = tokenizer.nextToken().replace('_', ' ');
		this.pos = POS.lookup(tokenizer.nextToken());

		tokenizer.nextToken();	// poly_cnt
		int p_cnt = tokenizer.nextInt();
                ptrTypes = new PointerType[p_cnt];
		for (int i = 0; i < p_cnt; i++) {
                        try {
                            ptrTypes[i] = PointerType.parseKey(tokenizer.nextToken());
                        } catch (java.util.NoSuchElementException exc) {
                                exc.printStackTrace();
                        }
		}

		int senseCount = tokenizer.nextInt();
		this.taggedSenseCount = tokenizer.nextInt();
		this.synsetOffsets = new long[senseCount];
		for (int i = 0; i < senseCount; i++) {
			synsetOffsets[i] = tokenizer.nextLong();
		}
		
		return this;
	}

	static IndexWord parseIndexWord(FileBackedDictionary dictionary, String line) {
		try {
			return new IndexWord(dictionary).initializeFrom(line);
		} catch (RuntimeException e) {
			System.err.println("while parsing " + line);
			throw e;
		}
	}


	//
	// Object methods
	//
	public boolean equals(Object object) {
		return (object instanceof IndexWord)
			&& ((IndexWord) object).pos.equals(pos)
			&& ((IndexWord) object).offset == offset;
	}
	
	public int hashCode() {
		return pos.hashCode() ^ (int) offset;
	}
	
	public String toString() {
		return "[IndexWord " + offset + "@" + pos.getLabel() + ": \"" + lemma + "\"]";
	}

	//
	// Accessors
	//
	public POS getPOS() {
		return pos;
	}

        /**
            The pointer types available for this indexed word.  May not apply to all senses of the word.
        */
        public PointerType[] getPointerTypes() {
            return ptrTypes;
        }
	
	/** Return the word's <it>lemma</it>.  Its lemma is its orthographic representation, for
	 * example <code>"dog"</code> or <code>"get up"</code>.
	 */
	public String getLemma() {
		return lemma;
	}
	
	public int getTaggedSenseCount() {
		return taggedSenseCount;
	}

	public Synset[] getSenses() {
		if (synsets == null) {
			synsets = new Synset[synsetOffsets.length];
			for (int i = 0; i < synsetOffsets.length; ++i) {
				synsets[i] = dictionary.getSynsetAt(pos, synsetOffsets[i]);
			}
		}
		return synsets;
	}
}

💿 文件大小 10705 K

👤 上传用户 sinoarts

📂 所属分类书籍源码

🏷️ 相关标签

#University #Princeton #extensive #developed

⌨️ 快捷键说明

复制代码 Ctrl + C

搜索代码 Ctrl + F

全屏模式 F11

切换主题 Ctrl + Shift + D

显示快捷键 ?

增大字号 Ctrl + =

减小字号 Ctrl + -