⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fc.cpp

📁 海量中文分词java接口
💻 CPP
字号:
#include <windows.h>
#include "jni.h"
#include "fc.h"
#include <stdio.h>
#include "HLSegFunc.h"
#include <string.h>

JNIEXPORT jboolean JNICALL Java_fc_HLSplitInit
  (JNIEnv *, jclass)
{
	return HLSplitInit();
}

JNIEXPORT void JNICALL Java_fc_HLFreeSplit
  (JNIEnv *, jclass)
{
	HLFreeSplit();
}

JNIEXPORT jboolean JNICALL Java_fc_HLOpenUsrDict
  (JNIEnv *env, jclass b, jbyteArray c)
{
	jbyte * arrayBody = (env)->GetByteArrayElements(c,0);
    char * ptr = (char *)arrayBody; 
    return HLOpenUsrDict(ptr);
}

JNIEXPORT void JNICALL Java_fc_seg
  (JNIEnv *env, jobject b, jbyteArray c, jint nWordPos, jint nKeyWords, jint nFinger, jint nForSearcher)
{
	//初始化附加计算标识
	int nExtra=0;
	
	//获得附加计算标识
	if(nWordPos==1)
		nExtra |= HL_CAL_OPT_POS ;
	if(nKeyWords==1)
		nExtra |= HL_CAL_OPT_KEYWORD ;
	if(nForSearcher==1)
		nExtra |= HL_CAL_OPT_SEARCH;
	if(nFinger==1)
		nExtra |= HL_CAL_OPT_FINGER ;
	
	//待分词字符串的指定处理
	jbyte * arrayBody = (env)->GetByteArrayElements(c,0);
    char * ptr = (char *)arrayBody; 
	
	//打开分词句柄
	HANDLE hHandle=HLOpenSplit();
	if(hHandle == INVALID_HANDLE_VALUE)
	{
		printf("分词句柄打开失败!");
		return ;
	}
	
	//记录开始时间
	DWORD dwStart = GetTickCount();
    //按附加计算进行分词
	HLSplitWord(hHandle ,ptr,nExtra);
	//记录结束时间
	DWORD dwEnd = GetTickCount();
	//显示花费时间
	printf("字节:%d  耗时:%dms \n",strlen(ptr),(dwEnd-dwStart));  
	
	//没有附加计算的分词结果:
	int nCnt = HLGetWordCnt(hHandle) ;	
	//定义结构体指针
	SHLSegWord* pWord = NULL ;
	printf("\n======>没有附加计算的分词结果:\n\n");
	for(int i = 0 ; i < nCnt ; i++)
	{			
		//获得指定的分词结果
		pWord = HLGetWordAt(hHandle,i);
		printf("%s ",pWord->s_szWord);
	}
	
	//显示附加计算的分词结果:

	//(1)结果加注词性
	if(nWordPos==1)
	{
		//获得分词结果个数
		int nCnt = HLGetWordCnt(hHandle) ;	
		//定义结构体指针
		SHLSegWord* pWord = NULL ;
		printf("\n\n======>分词结果加注词性:\n\n");

		for(int i = 0 ; i < nCnt ; i++)
		{			
			//获得指定的分词结果
			pWord = HLGetWordAt(hHandle,i);
			//在词后面加注词性,可考虑将词性加注在结构体的s_szWord成员变量后面
			if((pWord->s_dwPOS & NATURE_D_A) == NATURE_D_A)
			{
				printf("%s/a ",pWord->s_szWord);//形容词
			}
			else if((pWord->s_dwPOS & NATURE_D_B) == NATURE_D_B)
			{
				printf("%s/b ",pWord->s_szWord);//区别词
			}
			else if((pWord->s_dwPOS & NATURE_D_C) == NATURE_D_C)
			{
				printf("%s/c ",pWord->s_szWord);//连词
			}
			else if((pWord->s_dwPOS & NATURE_D_D) == NATURE_D_D)
			{
				printf("%s/d ",pWord->s_szWord);//副词
			}
			else if((pWord->s_dwPOS & NATURE_D_E) == NATURE_D_E)
			{
				printf("%s/e ",pWord->s_szWord);//叹词
			}
			else if((pWord->s_dwPOS & NATURE_D_F) == NATURE_D_F)
			{
				printf("%s/f ",pWord->s_szWord);//方位词
			}
			else if((pWord->s_dwPOS & NATURE_D_I) == NATURE_D_I)
			{
				printf("%s/i ",pWord->s_szWord);//成语
			}
			else if((pWord->s_dwPOS & NATURE_D_L) == NATURE_D_L)
			{
				printf("%s/l ",pWord->s_szWord);//习语
			}
			else if((pWord->s_dwPOS & NATURE_A_M) == NATURE_A_M)
			{
				printf("%s/m ",pWord->s_szWord);//数词
			}
			else if((pWord->s_dwPOS & NATURE_D_MQ) == NATURE_D_MQ)
			{
				printf("%s/mq ",pWord->s_szWord);//数量词
			}
			else if((pWord->s_dwPOS & NATURE_D_N) == NATURE_D_N)
			{
				printf("%s/n ",pWord->s_szWord);//名词
			}
			else if((pWord->s_dwPOS & NATURE_D_O) == NATURE_D_O)
			{
				printf("%s/o ",pWord->s_szWord);//拟声词		
			}
			else if((pWord->s_dwPOS & NATURE_D_P) == NATURE_D_P)
			{
				printf("%s/p ",pWord->s_szWord);//介词
			}
			else if((pWord->s_dwPOS & NATURE_A_Q) == NATURE_A_Q)
			{
				printf("%s/q ",pWord->s_szWord);//量词
			}
			else if((pWord->s_dwPOS & NATURE_D_R) == NATURE_D_R)
			{
				printf("%s/r ",pWord->s_szWord);//代词
			}
			else if((pWord->s_dwPOS & NATURE_D_S) == NATURE_D_S)
			{
				printf("%s/s ",pWord->s_szWord);//处所词
			}
			else if((pWord->s_dwPOS & NATURE_D_T) == NATURE_D_T)
			{
				printf("%s/t ",pWord->s_szWord);//时间词
			}
			else if((pWord->s_dwPOS & NATURE_D_U) == NATURE_D_U)
			{
				printf("%s/u ",pWord->s_szWord);//助词
			}
			else if((pWord->s_dwPOS & NATURE_D_V) == NATURE_D_V)
			{
				printf("%s/v ",pWord->s_szWord);//动词
			}
			else if((pWord->s_dwPOS & NATURE_D_W) == NATURE_D_W)
			{
				printf("%s/w ",pWord->s_szWord);//标点符号
			}
			else if((pWord->s_dwPOS & NATURE_D_X) == NATURE_D_X)
			{
				printf("%s/x ",pWord->s_szWord);//非语素字
			}
			else if((pWord->s_dwPOS & NATURE_D_Y) == NATURE_D_Y)
			{
				printf("%s/y ",pWord->s_szWord);//语气词
			}
			else if((pWord->s_dwPOS & NATURE_D_Z) == NATURE_D_Z)
			{
				printf("%s/z ",pWord->s_szWord);//状态词
			}
			else if((pWord->s_dwPOS & NATURE_A_NR) == NATURE_A_NR)
			{
				printf("%s/nr ",pWord->s_szWord);//人名			
			}
			else if((pWord->s_dwPOS & NATURE_A_NS) == NATURE_A_NS)
			{
				printf("%s/ns ",pWord->s_szWord);//地名
			}
			else if((pWord->s_dwPOS & NATURE_A_NT) == NATURE_A_NT)
			{
				printf("%s/nt ",pWord->s_szWord);//机构团体
			}
			else if((pWord->s_dwPOS & NATURE_A_NX) == NATURE_A_NX)
			{
				printf("%s/nx ",pWord->s_szWord);//外文字符
			}
			else if((pWord->s_dwPOS & NATURE_A_NZ) == NATURE_A_NZ)
			{
				printf("%s/nz ",pWord->s_szWord);//其他专名
			}
			else if((pWord->s_dwPOS & NATURE_D_H) == NATURE_D_H)
			{
				printf("%s/h ",pWord->s_szWord);//前接成分				
			}
			else if((pWord->s_dwPOS & NATURE_D_K) == NATURE_D_K)
			{
				printf("%s/k ",pWord->s_szWord);//后接成分
			}
			else
			{
				printf("%s/? ",pWord->s_szWord);//未知词性
			}
		} 
	}

	//(2)显示关键词的相关信息
	if(nKeyWords==1)
	{
		//获得关键词个数
		int nKeyCnt = HLGetFileKeyCnt(hHandle) ;
		//char* strKeywords;
		printf("\n\n======>显示关键词的相关信息:\n\n");
		for(int j = 0 ; j < nKeyCnt ; j++)
		{
			//获得指定的关键词
			SHLSegWord* pKey = HLGetFileKeyAt(hHandle,j);
			if(!pKey)//认为标点符号不是关键词
				continue ;
			printf("%d.\t%s\t%f\n",j+1,pKey->s_szWord ,pKey->s_fWeight) ;
		}//for j
	}

	//(3)显示语义指纹
	if(nFinger==1)
	{
		LPBYTE pData ;
		unsigned long nDataLen ;
		//获得语义指纹
		printf("\n======>语义指纹: ");
		if(HLGetFingerM(hHandle,pData,nDataLen))
		{
			for(unsigned int j=0; j < nDataLen; j++)
			{
				printf("%2.2x ",pData[j]);
			}
		}
	}

	//(4)面向检索的分词结果:
	if(nForSearcher==1)
	{
		printf("\n======>Sorry! 为检索优化的接口研究版没有提供。");
	}

	HLCloseSplit(hHandle);//关闭分词句柄
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -