mmruletest.java

来自「基于词典和最大匹配算法的的中文分词组件」· Java 代码 · 共 98 行

JAVA
98
字号
/**
 * 
 */
package org.solol.mmseg.test;

import org.solol.mmseg.core.IChunk;
import org.solol.mmseg.core.IRule;
import org.solol.mmseg.core.IWord;
import org.solol.mmseg.internal.Chunk;
import org.solol.mmseg.internal.MMRule;
import org.solol.mmseg.internal.Word;

import junit.framework.TestCase;

/**
 * @author solo L
 * 
 */
public class MMRuleTest extends TestCase {

	private IRule rule;

	/*
	 * (non-Javadoc)
	 * 
	 * @see junit.framework.TestCase#setUp()
	 */
	protected void setUp() throws Exception {
		rule = new MMRule();
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see junit.framework.TestCase#tearDown()
	 */
	protected void tearDown() throws Exception {
	}

	public final void testInvoke1() {
		IWord word1 = new Word("国际化",Word.CJK_WORD);
		IWord word2 = new Word("国际",Word.CJK_WORD);
		IWord word3 = new Word("国",Word.CJK_WORD);
		IWord word4 = new Word("际",Word.CJK_WORD);
		IWord word5 = new Word("化",Word.CJK_WORD);

		IChunk chunk1 = new Chunk(new IWord[] { word1 });
		IChunk chunk2 = new Chunk(new IWord[] { word2 });
		IChunk chunk3 = new Chunk(new IWord[] { word3 });
		IChunk chunk4 = new Chunk(new IWord[] { word4 });
		IChunk chunk5 = new Chunk(new IWord[] { word5 });

		IChunk[] chunk = rule.invoke(new IChunk[] { chunk1, chunk2, chunk3, chunk4, chunk5 });
		assertTrue(1 == chunk.length);
		assertEquals(chunk1, chunk[0]);
	}
	
	public final void testInvoke2() {
		IWord word1 = new Word("眼看",Word.CJK_WORD);
		IWord word2 = new Word("就要",Word.CJK_WORD);
		IWord word3 = new Word("来了",Word.CJK_WORD);
		IChunk chunk1 = new Chunk(new IWord[] { word1, word2, word3 });

		IWord word4 = new Word("来",Word.CJK_WORD);
		IChunk chunk2 = new Chunk(new IWord[] { word1, word2, word4 });

		IWord word5 = new Word("就",Word.CJK_WORD);
		IWord word6 = new Word("要",Word.CJK_WORD);
		IChunk chunk3 = new Chunk(new IWord[] { word1, word5, word6 });

		IWord word7 = new Word("眼",Word.CJK_WORD);
		IWord word8 = new Word("看",Word.CJK_WORD);
		IChunk chunk4 = new Chunk(new IWord[] { word7, word8, word2 });

		IChunk chunk5 = new Chunk(new IWord[] { word7, word8, word5 });

		IChunk[] chunk = rule.invoke(new IChunk[] { chunk1, chunk2, chunk3, chunk4, chunk5 });
		assertTrue(1 == chunk.length);
		assertEquals(chunk1, chunk[0]);
	}
	
	public final void testInvoke3() {
		IWord word1 = new Word("研究",Word.CJK_WORD);
		IWord word2 = new Word("生命",Word.CJK_WORD);
		IWord word3 = new Word("起源",Word.CJK_WORD);
		IWord word4 = new Word("研究生",Word.CJK_WORD);
		IWord word5 = new Word("命",Word.CJK_WORD);

		IChunk chunk1 = new Chunk(new IWord[] { word1, word2, word3 });
		IChunk chunk2 = new Chunk(new IWord[] { word4, word5, word3 });

		IChunk[] chunk = rule.invoke(new IChunk[] { chunk1, chunk2 });
		assertTrue(2 == chunk.length);
		assertEquals(chunk1, chunk[0]);
		assertEquals(chunk2, chunk[1]);
	}
}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?