⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 fenci_main.cpp

📁 用来文本分类的
💻 CPP
字号:
#include "fenci_main.h"
#define DICLENGTH 30083
/* function to abstract i to i+j character(s) from input */
void strsel(int i, int j, char *input,char *output) {
	int k;
	for(k=0;k<j;k++)
	{
		output[k]=*(input+i);
        i++;
	}
	output[k]=0;
}

void tfcout(char *pword, int * pDoctf, WListName &wlist,char (* listStopWord)[10],int nNumOfStopWord) {
	//initial pDoctf
	for(int i=0;i<DICLENGTH;i++)
		pDoctf[i]=0;

	char Sentence[200];
	ifstream fin(pword,ios::nocreate);
	if(fin.is_open()==NULL) {
		cout<<"Error Opening "<<pword<<" for read. "<<endl;
		return;
		}

    while(!fin.eof()){

	fin.getline(Sentence,sizeof(Sentence));
	int SenLength=strlen(Sentence);
	if(SenLength==0) continue; //HANDLING BLANK LINE;
	
	int j,offset;
	for (i=0;i<SenLength;i+=(2*j)) {
		char p[256];
		for (j=1;(i+2*j)<=SenLength;j++) {
			if((i+2*j)==SenLength) {
				strsel(i,2*j,Sentence,p);
				if (wlist.LMNameIdx(p)<0) break;
             //   cout<<p<<endl;
				offset=wlist.LMNameIdx(p);
				*(pDoctf+offset)+=1;
			}
			else {
			    strsel(i,2*(j+1),Sentence,p);
		        if(wlist.LMNameIdx(p)<0) {
			      strsel(i,2*j,Sentence,p);
				  if (wlist.LMNameIdx(p)<0) break;
			    //  cout<<p<<endl;
				  offset=wlist.LMNameIdx(p);
				  *(pDoctf+offset)+=1;
			      break;
			}
			}
		}
	}
	}
//use stopusing
	for(i=0;i<nNumOfStopWord;i++)
	{
		int idx=wlist.LMNameIdx(listStopWord[i]);
		if(*(pDoctf+idx)!=0)
			*(pDoctf+idx)=0;
	}


}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -