⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ccontextstat.java

📁 基于中科院的ICTCLAS实现中文分词系统 开发工具是JAVA.经测试,效果很好
💻 JAVA
字号:
package com.gftech.ictclas4j.utility;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;

public class CContextStat {
	private int m_nTableLen;

	private int[] m_pSymbolTable;

	private TagContext[] m_pContext;

	private int m_nCategory;

	public CContextStat() {
	}

	public boolean SetSymbol(int[] nSymbol) {
		m_pSymbolTable = new int[m_nTableLen];
		for (int i = 0; i < m_nTableLen; i++)
			m_pSymbolTable[i] = nSymbol[i];
		return true;
	}

public boolean  Add(int nKey, int nPrevSymbol, int nCurSymbol, int nFrequency)
	{// Add the context symbol to the array
		TagContext[] pRetItem=null;
		TagContext[] pNew=null;
		int nPrevIndex;
		int nCurIndex;
	    if(!GetItem(nKey,pRetItem))// Not get it
		{
			pNew[0]=new TagContext();
			pNew[0].nKey=nKey;
			pNew[0].nTotalFreq=0;
			pNew[0].next=null;
			pNew[0].aContextArray=new int[m_nTableLen][];
			pNew[0].aTagFreq=new int[m_nTableLen]; 
			for(int i=0;i<m_nTableLen;i++)
			{// new buffer for every dimension
				pNew[0].aContextArray[i]=new int[m_nTableLen]; 
			}
			if(pRetItem==null)// Empty, the new item is head
				m_pContext=pNew;
			else// Link the new item between pRetItem and its next item
			{
				pNew[0].next=pRetItem[0].next;
				pRetItem[0].next=pNew[0];
			}
			pRetItem=pNew;
		}
		nPrevIndex=Utility.BinarySearch(nPrevSymbol,m_pSymbolTable,m_nTableLen);
		if(nPrevSymbol>256&&nPrevIndex==-1)// Not find, just for 'nx' and other
											// uncommon POS
			nPrevIndex=Utility.BinarySearch(nPrevSymbol-nPrevSymbol%256,m_pSymbolTable,m_nTableLen);
		
		nCurIndex=Utility.BinarySearch(nCurSymbol,m_pSymbolTable,m_nTableLen);
		if(nCurSymbol>256&&nCurIndex==-1)// Not find, just for 'nx' and other
											// uncommon POS
			nCurIndex=Utility.BinarySearch(nCurSymbol-nCurSymbol%256,m_pSymbolTable,m_nTableLen);
	    if(nPrevIndex==-1||nCurIndex==-1)// error finding the symbol
			return false;
		pRetItem[0].aContextArray[nPrevIndex][nCurIndex]+=nFrequency;// Add the
																	// frequency
		pRetItem[0].aTagFreq[nPrevIndex]+=nFrequency;
		pRetItem[0].nTotalFreq+=nFrequency;
		return true;
	}	


public boolean Save(String sFilename) {
		File file1;
		File file2;
		TagContext[] pCur;
		String sFileShow = null;

		file1 = new File(sFilename);
		if (!file1.canWrite()) {
			return false;
		}
		sFileShow = sFilename + ".shw";

		file2 = new File(new String(sFileShow));
		if (!file2.canWrite()) {
			return false;
		}

		try {
			DataOutputStream out = new DataOutputStream(new FileOutputStream(
					file1));
			out.writeInt(m_nTableLen); // write the table length
			for (int i = 0; i < m_nTableLen; i++)
				out.writeInt(m_pSymbolTable[i]);// write the symbol table

			PrintWriter out2 = new PrintWriter(new FileOutputStream(file2));
			out2.println("Table Len=" + m_nTableLen);
			out2.println("Symbol:");
			for (int i = 0; i < m_nTableLen; i++)
				out2.print(m_pSymbolTable[i]);
			out2.println();

			pCur = m_pContext;
			for (TagContext tc : pCur) {
				out.writeInt(tc.nKey);
				out.writeInt(tc.nTotalFreq);
				out2.println("nKey=" + tc.nKey + ",Total frequency="
						+ tc.nTotalFreq + ":");
				for (int i = 0; i < m_nTableLen; i++)
					out.writeInt(tc.aTagFreq[i]); // the every POS frequency
				for (int i = 0; i < m_nTableLen; i++) {
					for (int j = 0; j < m_nTableLen; j++)
						out.writeInt(tc.aContextArray[i][j]);

					out2.print("No." + i + "=" + m_pSymbolTable[i] + ":");
					for (int j = 0; j < m_nTableLen; j++)
						out2.println("" + tc.aContextArray[i][j]);
					out2.println("total=" + tc.aTagFreq[i]);
				}
			}
			out.close();
			out2.close();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return true;
	}

	public boolean Load(String sFilename) {
		File file = new File(sFilename);
		if (!file.canRead()) {
			return false;
		}
		try {
			DataInputStream in = new DataInputStream(new FileInputStream(file));
			TagContext[] pCur = m_pContext;
			TagContext[] pTemp = null;
			TagContext[] pPre = null;
			for (TagContext tc : pCur) {// delete the context array
				tc = null;
			}

			m_nTableLen = in.readInt();

			m_pSymbolTable = new int[m_nTableLen];// new buffer for symbol
			for (int i = 0; i < m_nTableLen; i++)
				m_pSymbolTable[i] = in.readInt();
			// free exists items
			while (in.read() != -1) {// Read the context
				pCur[0] = new TagContext();
				pCur[0].next = null;
				pCur[0].nKey = in.readInt();
				if (pCur[0].nKey < 1)// Read error
				{
					pCur = null;
					break;
				}
				pCur[0].setNTotalFreq(in.readInt());
				pCur[0].aTagFreq = new int[m_nTableLen];
				for (int i = 0; i < m_nTableLen; i++)
					pCur[i].aTagFreq[i] = in.readInt();// the every POS
														// frequency

				pCur[0].aContextArray = new int[1][m_nTableLen];
				for (int i = 0; i < m_nTableLen; i++) {
					pCur[0].aContextArray[i] = new int[m_nTableLen];
					for (int j = 0; j < m_nTableLen; j++)
						pCur[0].aContextArray[i][j] = in.readInt();
				}
				if (pPre == null)
					m_pContext = pCur;
				else
					pPre[0].next = pCur[0];
				pPre = pCur;
			}
			in.close();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		return true;
	}

	public double GetContextPossibility(int nKey, int nPrev, int nCur) {
		TagContext[] pCur = null;
		int nCurIndex = Utility.BinarySearch(nCur, m_pSymbolTable, m_nTableLen);
		int nPrevIndex = Utility.BinarySearch(nPrev, m_pSymbolTable,
				m_nTableLen);
		if (!GetItem(nKey, pCur) || nCurIndex == -1 || nPrevIndex == -1
				|| pCur[0].aTagFreq[nPrevIndex] == 0
				|| pCur[0].aContextArray[nPrevIndex][nCurIndex] == 0)
			return 0.000001;// return a lower value, not 0 to prevent data
							// sparse
		int nPrevCurConFreq = pCur[0].aContextArray[nPrevIndex][nCurIndex];
		int nPrevFreq = pCur[0].aTagFreq[nPrevIndex];
		return 0.9 * (double) nPrevCurConFreq / (double) nPrevFreq + 0.1
				* (double) nPrevFreq / (double) pCur[0].nTotalFreq;
		// 0.9 and 0.1 is a value based experience
	}

	public int GetFrequency(int nKey, int nSymbol) {// Get the frequency which
													// nSymbol appears
		TagContext[] pFound = null;
		int nIndex, nFrequency = 0;
		if (!GetItem(nKey, pFound))// Not found such a item
			return 0;
		nIndex = Utility.BinarySearch(nSymbol, m_pSymbolTable, m_nTableLen);
		if (nIndex == -1)// error finding the symbol
			return 0;
		nFrequency = pFound[0].aTagFreq[nIndex];// Add the frequency
		return nFrequency;
	}

	public boolean GetItem(int nKey, TagContext[] pItemRet) {// Get the item
																// according the
																// nKey
		TagContext[] pCur = m_pContext;
		TagContext[] pPrev = null;
		if (nKey == 0 && m_pContext != null) {
			pItemRet = m_pContext;
			return true;
		}
		int i = 0;
		for (i = 0; i < pCur.length; i++) {
			if (pCur[i].getNKey() < nKey) {// delete the context array
				pPrev = pCur;
				pCur[i] = pCur[i].getNext();
			}
		}
		if (pCur[i].getNKey() == nKey) {// find it and return the current item
			pItemRet = pCur;
			return true;
		}
		pItemRet = pPrev;
		return false;
	}

	public boolean SetTableLen(int nTableLen) {
		m_nTableLen = nTableLen;// Set the table len
		m_pSymbolTable = new int[nTableLen];// new buffer for symbol

		return true;
	}

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -