📄 ccontextstat.java
字号:
package com.gftech.ictclas4j.utility;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
public class CContextStat {
private int m_nTableLen;
private int[] m_pSymbolTable;
private TagContext[] m_pContext;
private int m_nCategory;
public CContextStat() {
}
public boolean SetSymbol(int[] nSymbol) {
m_pSymbolTable = new int[m_nTableLen];
for (int i = 0; i < m_nTableLen; i++)
m_pSymbolTable[i] = nSymbol[i];
return true;
}
public boolean Add(int nKey, int nPrevSymbol, int nCurSymbol, int nFrequency)
{// Add the context symbol to the array
TagContext[] pRetItem=null;
TagContext[] pNew=null;
int nPrevIndex;
int nCurIndex;
if(!GetItem(nKey,pRetItem))// Not get it
{
pNew[0]=new TagContext();
pNew[0].nKey=nKey;
pNew[0].nTotalFreq=0;
pNew[0].next=null;
pNew[0].aContextArray=new int[m_nTableLen][];
pNew[0].aTagFreq=new int[m_nTableLen];
for(int i=0;i<m_nTableLen;i++)
{// new buffer for every dimension
pNew[0].aContextArray[i]=new int[m_nTableLen];
}
if(pRetItem==null)// Empty, the new item is head
m_pContext=pNew;
else// Link the new item between pRetItem and its next item
{
pNew[0].next=pRetItem[0].next;
pRetItem[0].next=pNew[0];
}
pRetItem=pNew;
}
nPrevIndex=Utility.BinarySearch(nPrevSymbol,m_pSymbolTable,m_nTableLen);
if(nPrevSymbol>256&&nPrevIndex==-1)// Not find, just for 'nx' and other
// uncommon POS
nPrevIndex=Utility.BinarySearch(nPrevSymbol-nPrevSymbol%256,m_pSymbolTable,m_nTableLen);
nCurIndex=Utility.BinarySearch(nCurSymbol,m_pSymbolTable,m_nTableLen);
if(nCurSymbol>256&&nCurIndex==-1)// Not find, just for 'nx' and other
// uncommon POS
nCurIndex=Utility.BinarySearch(nCurSymbol-nCurSymbol%256,m_pSymbolTable,m_nTableLen);
if(nPrevIndex==-1||nCurIndex==-1)// error finding the symbol
return false;
pRetItem[0].aContextArray[nPrevIndex][nCurIndex]+=nFrequency;// Add the
// frequency
pRetItem[0].aTagFreq[nPrevIndex]+=nFrequency;
pRetItem[0].nTotalFreq+=nFrequency;
return true;
}
public boolean Save(String sFilename) {
File file1;
File file2;
TagContext[] pCur;
String sFileShow = null;
file1 = new File(sFilename);
if (!file1.canWrite()) {
return false;
}
sFileShow = sFilename + ".shw";
file2 = new File(new String(sFileShow));
if (!file2.canWrite()) {
return false;
}
try {
DataOutputStream out = new DataOutputStream(new FileOutputStream(
file1));
out.writeInt(m_nTableLen); // write the table length
for (int i = 0; i < m_nTableLen; i++)
out.writeInt(m_pSymbolTable[i]);// write the symbol table
PrintWriter out2 = new PrintWriter(new FileOutputStream(file2));
out2.println("Table Len=" + m_nTableLen);
out2.println("Symbol:");
for (int i = 0; i < m_nTableLen; i++)
out2.print(m_pSymbolTable[i]);
out2.println();
pCur = m_pContext;
for (TagContext tc : pCur) {
out.writeInt(tc.nKey);
out.writeInt(tc.nTotalFreq);
out2.println("nKey=" + tc.nKey + ",Total frequency="
+ tc.nTotalFreq + ":");
for (int i = 0; i < m_nTableLen; i++)
out.writeInt(tc.aTagFreq[i]); // the every POS frequency
for (int i = 0; i < m_nTableLen; i++) {
for (int j = 0; j < m_nTableLen; j++)
out.writeInt(tc.aContextArray[i][j]);
out2.print("No." + i + "=" + m_pSymbolTable[i] + ":");
for (int j = 0; j < m_nTableLen; j++)
out2.println("" + tc.aContextArray[i][j]);
out2.println("total=" + tc.aTagFreq[i]);
}
}
out.close();
out2.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return true;
}
public boolean Load(String sFilename) {
File file = new File(sFilename);
if (!file.canRead()) {
return false;
}
try {
DataInputStream in = new DataInputStream(new FileInputStream(file));
TagContext[] pCur = m_pContext;
TagContext[] pTemp = null;
TagContext[] pPre = null;
for (TagContext tc : pCur) {// delete the context array
tc = null;
}
m_nTableLen = in.readInt();
m_pSymbolTable = new int[m_nTableLen];// new buffer for symbol
for (int i = 0; i < m_nTableLen; i++)
m_pSymbolTable[i] = in.readInt();
// free exists items
while (in.read() != -1) {// Read the context
pCur[0] = new TagContext();
pCur[0].next = null;
pCur[0].nKey = in.readInt();
if (pCur[0].nKey < 1)// Read error
{
pCur = null;
break;
}
pCur[0].setNTotalFreq(in.readInt());
pCur[0].aTagFreq = new int[m_nTableLen];
for (int i = 0; i < m_nTableLen; i++)
pCur[i].aTagFreq[i] = in.readInt();// the every POS
// frequency
pCur[0].aContextArray = new int[1][m_nTableLen];
for (int i = 0; i < m_nTableLen; i++) {
pCur[0].aContextArray[i] = new int[m_nTableLen];
for (int j = 0; j < m_nTableLen; j++)
pCur[0].aContextArray[i][j] = in.readInt();
}
if (pPre == null)
m_pContext = pCur;
else
pPre[0].next = pCur[0];
pPre = pCur;
}
in.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return true;
}
public double GetContextPossibility(int nKey, int nPrev, int nCur) {
TagContext[] pCur = null;
int nCurIndex = Utility.BinarySearch(nCur, m_pSymbolTable, m_nTableLen);
int nPrevIndex = Utility.BinarySearch(nPrev, m_pSymbolTable,
m_nTableLen);
if (!GetItem(nKey, pCur) || nCurIndex == -1 || nPrevIndex == -1
|| pCur[0].aTagFreq[nPrevIndex] == 0
|| pCur[0].aContextArray[nPrevIndex][nCurIndex] == 0)
return 0.000001;// return a lower value, not 0 to prevent data
// sparse
int nPrevCurConFreq = pCur[0].aContextArray[nPrevIndex][nCurIndex];
int nPrevFreq = pCur[0].aTagFreq[nPrevIndex];
return 0.9 * (double) nPrevCurConFreq / (double) nPrevFreq + 0.1
* (double) nPrevFreq / (double) pCur[0].nTotalFreq;
// 0.9 and 0.1 is a value based experience
}
public int GetFrequency(int nKey, int nSymbol) {// Get the frequency which
// nSymbol appears
TagContext[] pFound = null;
int nIndex, nFrequency = 0;
if (!GetItem(nKey, pFound))// Not found such a item
return 0;
nIndex = Utility.BinarySearch(nSymbol, m_pSymbolTable, m_nTableLen);
if (nIndex == -1)// error finding the symbol
return 0;
nFrequency = pFound[0].aTagFreq[nIndex];// Add the frequency
return nFrequency;
}
public boolean GetItem(int nKey, TagContext[] pItemRet) {// Get the item
// according the
// nKey
TagContext[] pCur = m_pContext;
TagContext[] pPrev = null;
if (nKey == 0 && m_pContext != null) {
pItemRet = m_pContext;
return true;
}
int i = 0;
for (i = 0; i < pCur.length; i++) {
if (pCur[i].getNKey() < nKey) {// delete the context array
pPrev = pCur;
pCur[i] = pCur[i].getNext();
}
}
if (pCur[i].getNKey() == nKey) {// find it and return the current item
pItemRet = pCur;
return true;
}
pItemRet = pPrev;
return false;
}
public boolean SetTableLen(int nTableLen) {
m_nTableLen = nTableLen;// Set the table len
m_pSymbolTable = new int[nTableLen];// new buffer for symbol
return true;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -