📄 cdictionary.java
字号:
package com.gftech.ictclas4j.utility;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import com.gftech.common.GFCommon;
import com.gftech.common.GFString;
public class CDictionary {
public TagIndexTable[] m_IndexTable;
public TagModifyTable[] m_pModifyTable;
public CDictionary() {
m_IndexTable=new TagIndexTable[Final.CC_NUM];
m_pModifyTable=new TagModifyTable[Final.CC_NUM];
for(int i=0;i<Final.CC_NUM;i++){
m_IndexTable[i]=new TagIndexTable();
m_pModifyTable[i]=new TagModifyTable();
}
}
public boolean Optimum() {
return false;
}
public boolean Merge(CDictionary dict2, int nRatio) {
return false;
}
public boolean OutputChars(String sFilename) {
return false;
}
public boolean Output(String sFilename) {
return false;
}
public int GetFrequency(byte[] sWord, int nHandle) {
return 0;
}
public boolean GetPOSString(int nPOS, String sPOSRet) {
return false;
}
public int GetPOSValue(byte[] sPOS) {
return 0;
}
public boolean GetMaxMatch(byte[] sWord, byte[] sWordRet, int npHandleRet) {
return false;
}
public boolean MergePOS(int nHandle) {
return false;
}
public boolean GetHandle(byte[] sWord, int pnCount, int[] pnHandle,
int[] pnFrequency) {
return false;
}
public boolean IsExist(byte[] sWord, int nHandle) {
return false;
}
public boolean AddItem(byte[] sWord, int nHandle, int nFrequency) {
return false;
}
public boolean DelItem(byte[] sWord, int nHandle) {
return false;
}
public boolean Save(String sFilename) {
File file;
int i, j, nCount;
int[] nBuffer = new int[3];
TagWordChain[] pCur;
file = new File(sFilename);
if (!file.canWrite())
return false;// fail while opening the file
try {
DataOutputStream out = new DataOutputStream(new FileOutputStream(
file));
for (i = 0; i < Final.CC_NUM; i++) {
if (m_pModifyTable != null) {// Modification made
nCount = m_IndexTable[i].getCount()
+ m_pModifyTable[i].getNCount()
- m_pModifyTable[i].getNDelete();
out.writeInt(nCount);
pCur = m_pModifyTable[i].getWordItemHead();
j = 0;
TagWordItem[] twi = m_IndexTable[i].getWordItemHead();
// Output to the file after comparision
while (pCur[j] != null && j < m_IndexTable[i].getCount()) {
if (pCur[j].getData().getWord().length < twi[j]
.getWord().length
|| (pCur[j].getData().getWord().equals(
twi[j].getWord()) && pCur[j].getData()
.getHandle() < twi[j].getHandle())) {
// Output the modified data to the file
nBuffer[0] = pCur[j].getData().getFrequency();
nBuffer[1] = pCur[j].getData().getWordLen();
nBuffer[2] = pCur[j].getData().getHandle();
for (int n : nBuffer)
out.writeInt(n);
if (nBuffer[1] > 0)// String length is more than 0
out.write(pCur[j].getData().getWord());
j++;
} else if (twi[j].getFrequency() == -1) {
// The item has been removed,so skip it
j += 1;
} else if (pCur[j].getData().getWord().length > twi[j]
.getWord().length
|| (pCur[j].getData().getWord().equals(
twi[j].getWord()) && pCur[j].getData()
.getHandle() > twi[j].getHandle())) {
// Output the index table data to the file
nBuffer[0] = twi[j].getFrequency();
nBuffer[1] = twi[j].getWordLen();
nBuffer[2] = twi[j].getHandle();
for (int n : nBuffer)
out.writeInt(n);
if (nBuffer[1] > 0)// String length is more than 0
out.write(twi[j].getWord());
j += 1;// Get next item in the original table.
}
}
if (j < m_IndexTable[i].getCount()) {
while (j < m_IndexTable[i].getCount()) {
if (twi[j].getFrequency() != -1) {// Has been
// deleted
nBuffer[0] = twi[j].getFrequency();
nBuffer[1] = twi[j].getWordLen();
nBuffer[2] = twi[j].getHandle();
for (int n : nBuffer)
out.writeInt(n);
if (nBuffer[1] > 0)// String length is more
// than 0
out.write(twi[j].getWord());
}
j += 1;// Get next item in the original table.
}
} else
// //No Modification
for (TagWordChain tc : pCur)// Add the rest data to the
// file.
{
nBuffer[0] = tc.getData().getFrequency();
nBuffer[1] = tc.getData().getWordLen();
nBuffer[2] = tc.getData().getHandle();
for (int n : nBuffer)
out.writeInt(n);
if (nBuffer[1] > 0)// String length is more than 0
out.write(tc.getData().getWord());
}
} else {
out.writeInt(m_IndexTable[i].getCount());
// write to the file
j = 0;
TagWordItem[] twi = m_IndexTable[i].getWordItemHead();
while (j < m_IndexTable[i].getCount()) {
nBuffer[0] = twi[j].getFrequency();
nBuffer[1] = twi[j].getWordLen();
nBuffer[2] = twi[j].getHandle();
for (int n : nBuffer)
out.writeInt(n);
if (nBuffer[1] > 0)// String length is more than 0
out.write(twi[j].getWord());
j += 1;// Get next item in the original table.
}
}
}
out.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return true;
}
public boolean Load(String sFilename, boolean bReset) {
File file;
int i, j;
int[] nBuffer = new int[3];
file = new File(sFilename);
if (!file.canRead())
return false;// fail while opening the file
try {
DelModified();
DataInputStream in = new DataInputStream(new FileInputStream(file));
for (i = 0; i < Final.CC_NUM; i++) {
System.out.println("块"+i);
int count = GFCommon.bytes2int(Utility.readBytes(in,4),false);
System.out.println(" count:"+count);
m_IndexTable[i].setCount(count);
if (count > 0){
m_IndexTable[i].setCount(count);
TagWordItem[] twis=new TagWordItem[count];
for(int m=0;m<count;m++)
twis[m]=new TagWordItem();
m_IndexTable[i].setWordItemHead(twis);
}
else {
// m_IndexTable[i].pWordItemHead=0;
continue;
}
j = 0;
while (j < count) {
nBuffer[0] = GFCommon.bytes2int(Utility.readBytes(in,4),false);
nBuffer[1] = GFCommon.bytes2int(Utility.readBytes(in,4),false);
nBuffer[2] = GFCommon.bytes2int(Utility.readBytes(in,4),false);
System.out.println("\n wordLen:"+nBuffer[1]+"\n frequency:"+nBuffer[0]+
"\n handle:"+nBuffer[2]);
// m_IndexTable[i].pWordItemHead[j].sWord=new
// char[nBuffer[1]+1];
TagWordItem[] tis = m_IndexTable[i].getWordItemHead();
if (nBuffer[1] > 0)// String length is more than 0
{
byte[] word = Utility.readBytes(in, nBuffer[1]);
byte[] word2 = new byte[word.length + 1];
GFCommon.bytesCopy(word2, word, 0, word.length);
tis[j].setWord(word2);
System.out.println(" word:"+GFString.bytes2hexstr(word));
System.out.println(" word:"+GFString.getChineseString(word,"gb2312"));
}
if (bReset)// Reset the frequency
tis[j].setFrequency(0);
else
tis[j].setFrequency(nBuffer[0]);
tis[j].setWordLen(nBuffer[1]);
tis[j].setHandle(nBuffer[2]);
j += 1;// Get next item in the original table.
}
}
in.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return true;
}
public int GetWordType(String sWord) {
return 0;
}
public boolean PreProcessing(byte[] sWord, int nId, byte[] sWordRet,boolean bAdd)
{
//Position for the delimeters
int nType=Utility.charType(sWord[0],sWord[1]);
int nLen=sWord.length;;
int nEnd=nLen-1,nBegin=0;
if(nLen==0)
return false;
while(nEnd>=0&&(char)sWord[nEnd]==' ')
nEnd-=1;
while(nBegin<=nEnd&&(char)sWord[nBegin]==' ')
nBegin+=1;
if(nBegin>nEnd)
return false;
if(nEnd!=nLen-1||nBegin!=0)
{
byte[] b=GFCommon.bytesCopy(sWord,nBegin,nEnd-nBegin+1);
GFCommon.bytesCopy(sWord,b,0,nEnd-nBegin+1);
sWord[nEnd-nBegin+1]=0;
}
/*
if((bAdd||strlen(sWord)>4)&&IsAllChineseNum(sWord))
{ //Only convert the Chinese Num to 3755 while
//Get the inner code of the first Chinese Char
strcpy(sWord,"????°?");
}
*/
if(nType==Final.CT_CHINESE)//&&IsAllChinese((unsigned char *)sWord)
{//Chinese word
nId=Utility.CC_ID(sWord[0],sWord[1]);
//Get the inner code of the first Chinese Char
sWordRet[0]=sWord[2];//store the word,not store the first Chinese Char
return true;
}
/* if(nType==CT_NUM&&IsAllNum((unsigned char *)sWord))
{
*nId=3756;
//Get the inner code of the first Chinese Char
sWordRet[0]=0;//store the word,not store the first Chinese Char
return true;
}
*/ if(nType==Final.CT_DELIMITER)
{//Delimiter
nId=3755;
//Get the inner code of the first Chinese Char
GFCommon.bytesCopy(sWordRet,sWord,0,sWord.length);//store the word,not store the first Chinese Char
return true;
}
/*
if(nType==CT_LETTER&&IsAllLetter((unsigned char *)sWord))
{
*nId=3757;
//Get the inner code of the first Chinese Char
sWordRet[0]=0;//store the word,not store the first Chinese Char
return true;
}
if(nType==CT_SINGLE&&IsAllSingleByte((unsigned char *)sWord))
{
*nId=3758;
//Get the inner code of the first Chinese Char
sWordRet[0]=0;//store the word,not store the first Chinese Char
return true;
}
if(nType==CT_INDEX&&IsAllIndex((unsigned char *)sWord))
{
*nId=3759;
//Get the inner code of the first Chinese Char
sWordRet[0]=0;//store the word,not store the first Chinese Char
return true;
}
*/
return false;//other invalid
}
// The data for modify
protected boolean DelModified() {
return false;
}
protected boolean FindInOriginalTable(int nInnerCode, String sWord,
int nHandle, int[] nPosRet) {
return false;
}
protected boolean FindInModifyTable(int nInnerCode, String sWord,
int nHandle, TagWordChain[] pFindRet) {
return false;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -