📄 fc.cpp
字号:
#include <windows.h>
#include "jni.h"
#include "fc.h"
#include <stdio.h>
#include "HLSegFunc.h"
#include <string.h>
JNIEXPORT jboolean JNICALL Java_fc_HLSplitInit
(JNIEnv *, jclass)
{
return HLSplitInit();
}
JNIEXPORT void JNICALL Java_fc_HLFreeSplit
(JNIEnv *, jclass)
{
HLFreeSplit();
}
JNIEXPORT jboolean JNICALL Java_fc_HLOpenUsrDict
(JNIEnv *env, jclass b, jbyteArray c)
{
jbyte * arrayBody = (env)->GetByteArrayElements(c,0);
char * ptr = (char *)arrayBody;
return HLOpenUsrDict(ptr);
}
JNIEXPORT void JNICALL Java_fc_seg
(JNIEnv *env, jobject b, jbyteArray c, jint nWordPos, jint nKeyWords, jint nFinger, jint nForSearcher)
{
//初始化附加计算标识
int nExtra=0;
//获得附加计算标识
if(nWordPos==1)
nExtra |= HL_CAL_OPT_POS ;
if(nKeyWords==1)
nExtra |= HL_CAL_OPT_KEYWORD ;
if(nForSearcher==1)
nExtra |= HL_CAL_OPT_SEARCH;
if(nFinger==1)
nExtra |= HL_CAL_OPT_FINGER ;
//待分词字符串的指定处理
jbyte * arrayBody = (env)->GetByteArrayElements(c,0);
char * ptr = (char *)arrayBody;
//打开分词句柄
HANDLE hHandle=HLOpenSplit();
if(hHandle == INVALID_HANDLE_VALUE)
{
printf("分词句柄打开失败!");
return ;
}
//记录开始时间
DWORD dwStart = GetTickCount();
//按附加计算进行分词
HLSplitWord(hHandle ,ptr,nExtra);
//记录结束时间
DWORD dwEnd = GetTickCount();
//显示花费时间
printf("字节:%d 耗时:%dms \n",strlen(ptr),(dwEnd-dwStart));
//没有附加计算的分词结果:
int nCnt = HLGetWordCnt(hHandle) ;
//定义结构体指针
SHLSegWord* pWord = NULL ;
printf("\n======>没有附加计算的分词结果:\n\n");
for(int i = 0 ; i < nCnt ; i++)
{
//获得指定的分词结果
pWord = HLGetWordAt(hHandle,i);
printf("%s ",pWord->s_szWord);
}
//显示附加计算的分词结果:
//(1)结果加注词性
if(nWordPos==1)
{
//获得分词结果个数
int nCnt = HLGetWordCnt(hHandle) ;
//定义结构体指针
SHLSegWord* pWord = NULL ;
printf("\n\n======>分词结果加注词性:\n\n");
for(int i = 0 ; i < nCnt ; i++)
{
//获得指定的分词结果
pWord = HLGetWordAt(hHandle,i);
//在词后面加注词性,可考虑将词性加注在结构体的s_szWord成员变量后面
if((pWord->s_dwPOS & NATURE_D_A) == NATURE_D_A)
{
printf("%s/a ",pWord->s_szWord);//形容词
}
else if((pWord->s_dwPOS & NATURE_D_B) == NATURE_D_B)
{
printf("%s/b ",pWord->s_szWord);//区别词
}
else if((pWord->s_dwPOS & NATURE_D_C) == NATURE_D_C)
{
printf("%s/c ",pWord->s_szWord);//连词
}
else if((pWord->s_dwPOS & NATURE_D_D) == NATURE_D_D)
{
printf("%s/d ",pWord->s_szWord);//副词
}
else if((pWord->s_dwPOS & NATURE_D_E) == NATURE_D_E)
{
printf("%s/e ",pWord->s_szWord);//叹词
}
else if((pWord->s_dwPOS & NATURE_D_F) == NATURE_D_F)
{
printf("%s/f ",pWord->s_szWord);//方位词
}
else if((pWord->s_dwPOS & NATURE_D_I) == NATURE_D_I)
{
printf("%s/i ",pWord->s_szWord);//成语
}
else if((pWord->s_dwPOS & NATURE_D_L) == NATURE_D_L)
{
printf("%s/l ",pWord->s_szWord);//习语
}
else if((pWord->s_dwPOS & NATURE_A_M) == NATURE_A_M)
{
printf("%s/m ",pWord->s_szWord);//数词
}
else if((pWord->s_dwPOS & NATURE_D_MQ) == NATURE_D_MQ)
{
printf("%s/mq ",pWord->s_szWord);//数量词
}
else if((pWord->s_dwPOS & NATURE_D_N) == NATURE_D_N)
{
printf("%s/n ",pWord->s_szWord);//名词
}
else if((pWord->s_dwPOS & NATURE_D_O) == NATURE_D_O)
{
printf("%s/o ",pWord->s_szWord);//拟声词
}
else if((pWord->s_dwPOS & NATURE_D_P) == NATURE_D_P)
{
printf("%s/p ",pWord->s_szWord);//介词
}
else if((pWord->s_dwPOS & NATURE_A_Q) == NATURE_A_Q)
{
printf("%s/q ",pWord->s_szWord);//量词
}
else if((pWord->s_dwPOS & NATURE_D_R) == NATURE_D_R)
{
printf("%s/r ",pWord->s_szWord);//代词
}
else if((pWord->s_dwPOS & NATURE_D_S) == NATURE_D_S)
{
printf("%s/s ",pWord->s_szWord);//处所词
}
else if((pWord->s_dwPOS & NATURE_D_T) == NATURE_D_T)
{
printf("%s/t ",pWord->s_szWord);//时间词
}
else if((pWord->s_dwPOS & NATURE_D_U) == NATURE_D_U)
{
printf("%s/u ",pWord->s_szWord);//助词
}
else if((pWord->s_dwPOS & NATURE_D_V) == NATURE_D_V)
{
printf("%s/v ",pWord->s_szWord);//动词
}
else if((pWord->s_dwPOS & NATURE_D_W) == NATURE_D_W)
{
printf("%s/w ",pWord->s_szWord);//标点符号
}
else if((pWord->s_dwPOS & NATURE_D_X) == NATURE_D_X)
{
printf("%s/x ",pWord->s_szWord);//非语素字
}
else if((pWord->s_dwPOS & NATURE_D_Y) == NATURE_D_Y)
{
printf("%s/y ",pWord->s_szWord);//语气词
}
else if((pWord->s_dwPOS & NATURE_D_Z) == NATURE_D_Z)
{
printf("%s/z ",pWord->s_szWord);//状态词
}
else if((pWord->s_dwPOS & NATURE_A_NR) == NATURE_A_NR)
{
printf("%s/nr ",pWord->s_szWord);//人名
}
else if((pWord->s_dwPOS & NATURE_A_NS) == NATURE_A_NS)
{
printf("%s/ns ",pWord->s_szWord);//地名
}
else if((pWord->s_dwPOS & NATURE_A_NT) == NATURE_A_NT)
{
printf("%s/nt ",pWord->s_szWord);//机构团体
}
else if((pWord->s_dwPOS & NATURE_A_NX) == NATURE_A_NX)
{
printf("%s/nx ",pWord->s_szWord);//外文字符
}
else if((pWord->s_dwPOS & NATURE_A_NZ) == NATURE_A_NZ)
{
printf("%s/nz ",pWord->s_szWord);//其他专名
}
else if((pWord->s_dwPOS & NATURE_D_H) == NATURE_D_H)
{
printf("%s/h ",pWord->s_szWord);//前接成分
}
else if((pWord->s_dwPOS & NATURE_D_K) == NATURE_D_K)
{
printf("%s/k ",pWord->s_szWord);//后接成分
}
else
{
printf("%s/? ",pWord->s_szWord);//未知词性
}
}
}
//(2)显示关键词的相关信息
if(nKeyWords==1)
{
//获得关键词个数
int nKeyCnt = HLGetFileKeyCnt(hHandle) ;
//char* strKeywords;
printf("\n\n======>显示关键词的相关信息:\n\n");
for(int j = 0 ; j < nKeyCnt ; j++)
{
//获得指定的关键词
SHLSegWord* pKey = HLGetFileKeyAt(hHandle,j);
if(!pKey)//认为标点符号不是关键词
continue ;
printf("%d.\t%s\t%f\n",j+1,pKey->s_szWord ,pKey->s_fWeight) ;
}//for j
}
//(3)显示语义指纹
if(nFinger==1)
{
LPBYTE pData ;
unsigned long nDataLen ;
//获得语义指纹
printf("\n======>语义指纹: ");
if(HLGetFingerM(hHandle,pData,nDataLen))
{
for(unsigned int j=0; j < nDataLen; j++)
{
printf("%2.2x ",pData[j]);
}
}
}
//(4)面向检索的分词结果:
if(nForSearcher==1)
{
printf("\n======>Sorry! 为检索优化的接口研究版没有提供。");
}
HLCloseSplit(hHandle);//关闭分词句柄
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -