📄 cresult.java
字号:
package com.gftech.ictclas4j.result;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import com.gftech.common.GFCommon;
import com.gftech.common.GFString;
import com.gftech.ictclas4j.segment.CSegment;
import com.gftech.ictclas4j.tag.CSpan;
import com.gftech.ictclas4j.unknown.CUnknowWord;
import com.gftech.ictclas4j.utility.CDictionary;
import com.gftech.ictclas4j.utility.Final;
import com.gftech.ictclas4j.utility.TagWordResult;
import com.gftech.ictclas4j.utility.Utility;
public class CResult {
private CSegment m_Seg;// Seg class
// Core dictionary,bigram dictionary
private CDictionary m_dictCore;
private CDictionary m_dictBigram;
private CSpan m_POSTagger;// POS tagger
// Person recognition 0:Only Segment;1: First Tag; 2:Second Type
private CUnknowWord m_uPerson;
private CUnknowWord m_uTransPerson;
private CUnknowWord m_uPlace;
public int m_nOperateType;
// 0:PKU criterion;1:973 criterion; 2: XML criterion
public int m_nOutputFormat;
public double m_dSmoothingPara;
public int m_nResultCount;
public TagWordResult[][] m_pResult;
// The buffer which store the segment and POS result
// and They stored order by its possibility
public double[] m_dResultPossibility = new double[Final.MAX_SEGMENT_NUM];
public CResult() {
// malloc buffer
m_pResult = new TagWordResult[Final.MAX_SEGMENT_NUM][];
for (int i = 0; i < Final.MAX_SEGMENT_NUM; i++) {
m_pResult[i] = new TagWordResult[Final.MAX_WORDS];
}
m_dictCore.Load("data\\coreDict.dct", false);
m_POSTagger.LoadContext("data\\lexical.ctx");
/*
* m_dictCore.Load("data\\Dict.dct");
* m_POSTagger.LoadContext("data\\trainTest.ctx");
*/
/*
*
* m_dictCore.AddItem("??·?",'d'*256,+500);
* m_dictCore.AddItem("??·?",'m'*256,-500);
* m_dictCore.AddItem("???ú",'n'*256,-2000);
* m_dictCore.AddItem("???ú",'r'*256,+2000);
* m_dictCore.AddItem("?§?ê",'t'*256,200);
*
* m_dictCore.Optimum(); m_dictCore.Save("data\\coreDictOptimum.dct");
*/
m_POSTagger.SetTagType(Final.TAG_TYPE.TT_NORMAL);
m_uPerson.Configure("data\\nr", Final.TAG_TYPE.TT_PERSON);
// Set the person recognition configure
m_uPlace.Configure("data\\ns", Final.TAG_TYPE.TT_PLACE);
// Set the place recognition configure
m_uTransPerson.Configure("data\\tr", Final.TAG_TYPE.TT_TRANS_PERSON);
// Set the transliteration person recognition configure
m_nOperateType = 2;// 0:Only Segment;1: First Tag; 2:Second Type
m_nOutputFormat = 0;// 0:PKU criterion;1:973 criterion; 2: XML criterion
m_dSmoothingPara = 0.1;// Smoothing parameter
m_dictBigram.Load("data\\BigramDict.dct", false);
}
public boolean Processing(byte[] sSentence, int nCount) {
int nIndex;
byte[] sSegment = new byte[Final.MAX_SENTENCE_LEN * 2];
// Unigram segment
// m_Seg.Segment(sSentence,m_dictCore,nCount);
// Bigram segment
m_Seg.BiSegment(sSentence, m_dSmoothingPara, m_dictCore, m_dictBigram,
nCount);
m_nResultCount = m_Seg.m_nSegmentCount;
// Record the number of result
for (nIndex = 0; nIndex < m_Seg.m_nSegmentCount; nIndex++) {
m_POSTagger.POSTagging(m_Seg.m_pWordSeg[nIndex], m_dictCore,
m_dictCore);
Output(m_Seg.m_pWordSeg[nIndex], sSegment, false);
System.out.println("POS Tag " + nIndex + 1 + " " + sSegment);
m_uPerson.Recognition(m_Seg.m_pWordSeg[nIndex],
m_Seg.m_graphOptimum, m_Seg.m_graphSeg, m_dictCore);
m_uTransPerson.Recognition(m_Seg.m_pWordSeg[nIndex],
m_Seg.m_graphOptimum, m_Seg.m_graphSeg, m_dictCore);
m_uPlace.Recognition(m_Seg.m_pWordSeg[nIndex],
m_Seg.m_graphOptimum, m_Seg.m_graphSeg, m_dictCore);
}
// m_uPerson.Recognition(m_Seg.m_WordSeg[0],m_Seg.m_graphOptimum,m_Seg.m_graphSeg,m_dictCore);
// Person Recognition
System.out.println("After person recognition.");
// Unigram
// m_Seg.OptimumSegmet(nCount);
// Bigram
m_Seg.BiOptimumSegment(nCount, m_dSmoothingPara, m_dictBigram,
m_dictCore);
for (nIndex = 0; nIndex < m_Seg.m_nSegmentCount; nIndex++) {
m_POSTagger.POSTagging(m_Seg.m_pWordSeg[nIndex], m_dictCore,
m_dictCore);
Output(m_Seg.m_pWordSeg[nIndex], sSegment, false);
System.out.println("POS Tag " + nIndex + 1 + " "
+ new String(sSegment));
}
System.out.println("After Sorting.");
Sort();// Sort the ending
for (nIndex = 0; nIndex < m_Seg.m_nSegmentCount; nIndex++) {
Output(m_pResult[nIndex], sSegment, false);
System.out.println("POS Tag " + (nIndex + 1) + "(P=Exp("
+ m_dResultPossibility[nIndex] + ")):"
+ new String(sSegment));
}
return true;
}
public boolean ParagraphProcessing(byte[] sParagraph, byte[] sResult) {
byte[] sSentence;
byte[] sChar = new byte[3];
byte[] sSentenceResult;
int nLen = sParagraph.length + 13;
sSentence = new byte[nLen];// malloc buffer
sSentenceResult = new byte[nLen * 3];// malloc buffer
sSentence[0] = 0;
int nPosIndex = 0, nParagraphLen = sParagraph.length, nSentenceIndex = 0;
sChar[2] = 0;
sResult[0] = 0;// Init the result
boolean bFirstIgnore = true;
int index = 0;
// Add a sentence begin flag
GFCommon.bytesCopy(sSentence, Final.SENTENCE_BEGIN.getBytes(), index,
Final.SENTENCE_BEGIN.getBytes().length);
index += Final.SENTENCE_BEGIN.getBytes().length;
while (nPosIndex < nParagraphLen) {// Find a whole sentence which
// separated by ! . \n \r
sChar[0] = sParagraph[nPosIndex];// Get a char
sChar[1] = 0;
if (sParagraph[nPosIndex] < 0) {// double byte char
nPosIndex += 1;
sChar[1] = sParagraph[nPosIndex];
}
nPosIndex += 1;
/*
* #define SEPERATOR_C_SENTENCE "。!?:;…" #define
* SEPERATOR_C_SUB_SENTENCE "、,()“”‘’" #define SEPERATOR_E_SENTENCE
* "!?:;" #define SEPERATOR_E_SUB_SENTENCE ",()\042'" #define
* SEPERATOR_LINK "\n\r "
*/
if (Utility.CC_Find(Final.SEPERATOR_C_SENTENCE.getBytes(), sChar)
|| Utility.CC_Find(Final.SEPERATOR_C_SUB_SENTENCE
.getBytes(), sChar)
|| Utility.strstr(Final.SEPERATOR_E_SENTENCE.getBytes(),
sChar) != -1
|| Utility.strstr(
Final.SEPERATOR_E_SUB_SENTENCE.getBytes(), sChar) != -1
|| Utility.strstr(Final.SEPERATOR_LINK.getBytes(), sChar) != -1) {// Reach
// end
// of a
// sentence.Get
// a
// whole
// sentence
if (Utility.strstr(Final.SEPERATOR_LINK.getBytes(), sChar) == -1)// Not
// link
// seperator
{
GFCommon.bytesCopy(sSentence, sChar, index, sChar.length);
index += sChar.length;
}
if (sSentence[0] != 0
&& Utility.strcmp(sSentence, Final.SENTENCE_BEGIN
.getBytes()) == false) {
if (Utility.strstr(Final.SEPERATOR_C_SUB_SENTENCE
.getBytes(), sChar) == -1
&& Utility.strstr(Final.SEPERATOR_E_SUB_SENTENCE
.getBytes(), sChar) == -1) {
// Add sentence ending flag
GFCommon
.bytesCopy(sSentence, Final.SENTENCE_END
.getBytes(), index, Final.SENTENCE_END
.length());
index += Final.SENTENCE_END.length();
}
Processing(sSentence, 1);// Processing and output the
// result of current sentence.
Output(m_pResult[0], sSentenceResult, bFirstIgnore);// Output
// to
// the
// imediate
// result
// bFirstIgnore=true;
GFCommon.bytesCopy(sResult, sSentenceResult, index,
sSentenceResult.length);// Store in the result
// buffer
}
// Link the result with the SEPERATOR_LINK
if (Utility.strstr(Final.SEPERATOR_LINK.getBytes(), sChar) != -1) {
GFCommon.bytesCopy(sResult, sChar, index, sChar.length);
// Add a sentence
GFCommon.bytesCopy(sSentence, Final.SENTENCE_BEGIN
.getBytes(), 0,
Final.SENTENCE_BEGIN.getBytes().length);
index += Final.SENTENCE_BEGIN.getBytes().length;
// begin flag
// sSentence[0]=0;//New sentence, and begin new segmentation
// bFirstIgnore=false;
} else if (Utility.strstr(
Final.SEPERATOR_C_SENTENCE.getBytes(), sChar) != -1
|| Utility.strstr(
Final.SEPERATOR_E_SENTENCE.getBytes(), sChar) != -1) {
// Add a sentence
GFCommon.bytesCopy(sSentence, Final.SENTENCE_BEGIN
.getBytes(), 0,
Final.SENTENCE_BEGIN.getBytes().length);
// begin flag
// sSentence[0]=0;//New sentence, and begin new segmentation
// bFirstIgnore=false;
} else {
GFCommon.bytesCopy(sSentence, sChar, 0, sChar.length);// reset
// current
// sentence,
// and
// add the previous end at begin
// position
}
} else
// Other chars and store in the sentence buffer
GFCommon.bytesCopy(sSentence, sChar, index, sChar.length);
}
if (sSentence[0] != 0
&& Utility.strcmp(sSentence, Final.SENTENCE_BEGIN.getBytes()) == false) {
// Add sentence ending flag
GFCommon.bytesCopy(sSentence, Final.SENTENCE_END.getBytes(), index,
Final.SENTENCE_END.getBytes().length);
index += Final.SENTENCE_END.getBytes().length;
Processing(sSentence, 1);// Processing and output the result of
// current sentence.
System.out.println(sSentence);
Output(m_pResult[0], sSentenceResult, bFirstIgnore);
// imediate result
System.out.println(m_pResult);
// Store in the result buffer
GFCommon.bytesCopy(sResult, sSentenceResult, index,
sSentenceResult.length);
}
return true;
}
public boolean FileProcessing(String sSourceFile, String sResultFile) {
// The file pointer of read and write
File fpSource;
File fpResult;
byte[] sParagraph;
byte[] sParagraphResult;
int nLineIndex = 1;
sParagraph = new byte[4 * 1024];
sParagraphResult = new byte[8 * 1024];
fpSource = new File(sSourceFile);
// Cannot open the source file to read
if (fpSource.canRead() == false)
return false;
fpResult = new File(sResultFile);
// Cannot open the result file to write
if (fpResult.canWrite() == false)
return false;
try {
DataInputStream in = new DataInputStream(new FileInputStream(
fpSource));
PrintWriter out = new PrintWriter(new FileOutputStream(fpResult));
if (m_nOutputFormat == 2)// XML format
out
.println("<?xml version=\" 1.0\" encoding=\"gb2312\"?><result>");
int first = -1;
while ((first = in.read()) != -1) {
sParagraph[0] = (byte) first;
byte[] sParagraph2 = Utility.readBytes(in, 4 * 1024 - 1);
GFCommon.bytesCopy(sParagraph, sParagraph2, 1, 4 * 1024 - 1);
System.out.println(nLineIndex++);
ParagraphProcessing(sParagraph, sParagraphResult);
out.println(new String(sParagraphResult));
}
in.close();
if (m_nOutputFormat == 2)// XML format
out.println("</result>");
out.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return true;
}
public boolean Output(TagWordResult[] pItem, byte[] sResult,
boolean bFirstWordIgnore) {
int i = 0;
byte[] sTempBuffer = new byte[Final.WORD_MAXLENGTH];
byte[] sPOS = new byte[3];
sPOS[2] = 0;
sResult[0] = 0;
int index = 0;
String result = "";
if (bFirstWordIgnore)// Ignore first valid
i = 1;
// Not sentence ending flag
while (pItem[i].sWord[0] != 0
&& pItem[i].nHandle != Final.CT_SENTENCE_END) {
// Get the POS string
if (m_nOutputFormat != 0)// Not PKU format
PKU2973POS(pItem[i].nHandle, sPOS);
else// PKU format
{
sPOS[0] = (byte) (pItem[i].nHandle / 256);
sPOS[1] = (byte) (pItem[i].nHandle % 256);
}
sPOS[m_nOperateType] = 0;// Set the sPOS with operate type
if (m_nOutputFormat == 0)// PKU format
{
result += GFString.getChineseString(pItem[i].sWord, "gb2312");
if (sPOS[0] != 0)// need POS
{
result += GFString.getChineseString(sPOS, "gb2312");
}
result += " ";
} else if (m_nOutputFormat == 1)// 973 format
{
result += GFString.getChineseString(pItem[i].sWord, "gb2312")
+ "\\";
if (sPOS[0] != 0)// need POS
{
result += "[" + GFString.getChineseString(sPOS, "gb2312")
+ "]";
}
} else if (m_nOutputFormat == 2)// XML format
{
if (sPOS[0] != 0)// POS
{
result += "<any type=\""
+ GFString.getChineseString(sPOS, "gb2312") + "\">";
}
result += "<src>"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -