⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 update10_31.c

📁 对索引表中的TXT文本进行处理
💻 C
字号:
#include "update.h"


char **upFileNameList;
char **upFileName;
int upcount = 0;
int *upsize;

int InstLine(char *sword, char * temp, InvtNode *pItHead)
{
	InvtNode *ptemp;
	ptemp = pItHead;
	if (!sword || !temp || !pItHead) {
		return EPAR;
	}
	while (ptemp->pNextWord != NULL){
		if (strcmp(ptemp->sWord, sword) == 0) {
			break;
		}
		ptemp = ptemp->pNextWord;
	}
	/*如果找到该词*/
	if (strcmp(ptemp->sWord, sword) == 0) {
		while (ptemp->pNextAppear != NULL) {
			ptemp = ptemp->pNextAppear;
		}
		ptemp->pNextAppear = (InvtNode *) malloc(sizeof(InvtNode));
		ptemp = ptemp->pNextAppear;
		ptemp->pNextAppear = NULL;
		ptemp->pNextWord = NULL;
		ptemp->line = (char *) malloc(sizeof(char) * MAXLEN_OF_LINE);
		strcpy(ptemp->line,temp);
		strcpy(ptemp->sWord,sword);
	}
	/*否则建立新的词结点*/
	else 
	{
		if (strcmp(ptemp->sWord,"\0") != 0) {
			ptemp->pNextWord = (InvtNode *) malloc(sizeof(InvtNode));
			ptemp = ptemp->pNextWord;
		}
		ptemp->pNextAppear = NULL;
		ptemp->pNextWord = NULL;
		ptemp->line = (char *) malloc(sizeof(char) * MAXLEN_OF_LINE);
		strcpy(ptemp->line,temp);
		strcpy(ptemp->sWord,sword);
	}
	return 0;
}

/*功能:读倒排文件
入口参数:char *file 倒排文件名字
          InvtNode *pItNode 倒排文件链表首地址
返回值:正常返回0
        否则返回错误码
*/
int invtfileread(char *file, InvtNode *pItHead)
{
	FILE *stream;
	char *sword,*temp;
	int i;

	if((stream = fopen(file, "r")) == NULL){
		printf("cann't open the invert file!\n");
		return EFILEOPEN;
	}

	sword = (char *) malloc(sizeof(char) * MAXWORDONE);
	temp = (char *) malloc(sizeof(char) * MAXLEN_OF_LINE);
	while(!feof(stream)){
		/*读一行*/
		if (fgets(temp, MAXLEN_OF_LINE, stream) != NULL) {
			i = 1;
			while ((temp[i] != ' ') && (temp[i] != '\0')) {
				sword[i - 1] = temp[i];
				i++;
			}
			if(temp[i] == '\0')
			{
				continue;
			}
			sword[i-1] = '\0';
			InstLine(sword, temp, pItHead);
		}
	}
	fclose(stream);
	free(temp);
	free(sword);
	return 0;
}

int invtfilewrite(char *filename, InvtNode *pItHead, char *idxfile, char *path)
{
	FILE *stream, *idx;
	InvtNode *pwdtemp, *plntemp;
	char *file;
	if (!filename || !pItHead || !idxfile || !path) {
		return EPAR;
	}
	file = (char *) malloc(sizeof(char) * MAXPATHLEN);
	strcpy(file,path);
	strcat(file,filename);
	if((stream = fopen(file, "w+")) == NULL){
		printf("cann't open the invert file!\n");
		return EFILEOPEN;
	}
	strcpy(file,path);
	strcat(file, idxfile);
	if((idx = fopen(file, "a+")) == NULL){
		printf("cann't open the invert file!\n");
		return EFILEOPEN;
	}

	pwdtemp = pItHead;
	plntemp = pItHead;
	while (pwdtemp != NULL) {
		while (plntemp != NULL) {
			if (plntemp->line) {
				fprintf(stream,"%s",plntemp->line);
			}
			plntemp = plntemp->pNextAppear;
		}
		fprintf(idx,"%s ,",pwdtemp->sWord);
		fprintf(idx,"%s\n",filename);
		pwdtemp = pwdtemp->pNextWord;
		plntemp = pwdtemp;
	}
	fclose(stream);
	fclose(idx);
	return 0;
}
/*释放倒排文件内存*/
int freeinvtfile(InvtNode *pItHead)
{
	InvtNode *pwdtemp, *plntemp;	
	InvtNode *prewdtemp, *prelntemp;
	if (!pItHead) {
		return -1;
	}
	pwdtemp = pItHead;
	prewdtemp = pItHead;
	plntemp = pItHead;
	prelntemp = pItHead;
	pwdtemp = prewdtemp->pNextWord;
	while (pwdtemp != NULL) {
		prelntemp = pwdtemp;
		plntemp = pwdtemp->pNextAppear;
		while (plntemp != NULL) {
			prelntemp->pNextAppear = plntemp->pNextAppear;
			if (plntemp){
				if (plntemp->line) {
					free(plntemp->line);
				}
				free(plntemp);
			}
			plntemp = prelntemp->pNextAppear;
		}
		prewdtemp->pNextWord = pwdtemp->pNextWord;
		if (pwdtemp){
			if (pwdtemp->line) {
				free(pwdtemp->line);
			}
			free(pwdtemp);
		}
		pwdtemp = prewdtemp->pNextWord;
	}
	free(pItHead);
	return 0;
}

/*功能:该函数功能是单个倒排文档内容的整理
入口参数:filename 需要整理的文件名字
          path 是idx.txt的路径
返回值:0正常返回,否则给出错误码
*/
int fileupdate(char *filename, char * idxfile, char *path)
{
	char *file;
	InvtNode *pItHead;

	if(!filename || !path){
		printf("the EPAR of the idxupdate error!\n");
		return EPAR;
	}
	file = (char *) malloc((strlen(filename) + strlen(path) + 2) * sizeof(char));
	strcpy(file, path);
	strcat(file, filename);
	pItHead = (InvtNode *) malloc(sizeof(InvtNode));
	pItHead->pNextAppear = NULL;
	pItHead->pNextWord = NULL;
	pItHead->line = NULL;
	strcpy(pItHead->sWord,"\0");
	
	if (invtfileread(file, pItHead) != 0) {
		return EINVTFILERD;
	}	
	if (invtfilewrite(filename, pItHead, idxfile, path) != 0) {
		return EINVTFILERD;
	}
	/*释放掉倒排文件内存*/
	freeinvtfile(pItHead);
	return 0;
}

void BrowseDir(char *curpath)
{	
	struct _finddata_t file;/*定义结构体变量*/	
	char path[256];/*路径*/	
	long handle;

	_chdir(curpath);
	handle=_findfirst("*",&file);/*查找所有txt文件*/
	if(handle==-1)/*如果handle为-1,表示当前目录为空,则结束查找而返回  */
		return;
	if(file.attrib &_A_SUBDIR)/*是目录*/
	{
		if(file.name[0]!='.')
		{
			_chdir(file.name);/*进入该目录*/
			_getcwd(path,256);/*获得目录路径*/			
			BrowseDir(path);/*继续遍历*/
			_chdir("..");/*查找完毕之后,返回上一级目录*/  
		}
	}
	else
	{/*如果第一个实体不是目录,处理该文件*/
		_getcwd(path,256);/*获得文件的完整的路径名包含文件的名称*/
		/*modified by sy 11-16*/
		/*if(((strcmp(strchr(file.name,'.'),".txt")==0)||(strcmp(strchr(file.name,'.'),".TXT")==0)) && (strcmp(file.name,"$curfile.txt") != 0) && (strcmp(file.name,"idx.txt") != 0))*/
		if(((strcmp(strchr(file.name,'.'),".txt")==0)||(strcmp(strchr(file.name,'.'),".TXT")==0)) && (strcmp(file.name,"$curfile.txt") != 0))
		{
			strcat(path,"\\");			
			strcpy(upFileNameList[upcount],path);
			strcpy(upFileName[upcount], file.name);
			upsize[upcount]=file.size;
			upcount++;
		}		
	}
	while(!(_findnext(handle,&file)))
	{/*继续对当前目录中的下一个子目录或文件进行与上面同样的查找*/
		if(file.attrib & _A_SUBDIR)
		{
			if(file.name[0]!='.')
			{
				_chdir(file.name);/*进入该目录*/
				_getcwd(path,256);/*获得目录路径*/
				BrowseDir(path);/*继续遍历*/
				_chdir("..");/*查找完毕之后,返回上一级目录*/
			}
		}
		else
		{/*如果第一个实体不是目录,显示该文件*/
			_getcwd(path,256);/* 获得文件的完整的路径名包含文件的名称*/ 
			/*modified by sy 11-16*/
			/*if(((strcmp(strchr(file.name,'.'),".txt")==0)||(strcmp(strchr(file.name,'.'),".TXT")==0)) && (strcmp(file.name,"$curfile.txt") != 0) && (strcmp(file.name,"idx.txt") != 0))*/
			if(((strcmp(strchr(file.name,'.'),".txt")==0)||(strcmp(strchr(file.name,'.'),".TXT")==0)) && (strcmp(file.name,"$curfile.txt") != 0))
			{
				strcat(path,"\\");
				strcpy(upFileNameList[upcount],path);
				strcpy(upFileName[upcount], file.name);
				upsize[upcount]=file.size;
				upcount++;
			}
		}
	}
	_findclose(handle);//关闭控制台
} 

int update_main()
{
	char *filename, *idxfile;
	char *path;
	int i;
	char *removename;

	upsize=(int *)malloc(sizeof(int)*40000);
	upFileNameList=(char**)malloc(sizeof(char*)*40000);
	upFileName=(char**)malloc(sizeof(char*)*40000);
	removename = (char *) malloc(sizeof(char*)*60);
    for(i=0;i<40000;i++)
	{
		upFileNameList[i]=(char *)malloc(sizeof(char)*256);
		upFileName[i]=(char *)malloc(sizeof(char)*256);
		strcpy(upFileName[i],"");		
		strcpy(upFileNameList[i],"");
	}

	path = (char *) malloc(sizeof(char) * MAXPATHLEN);
	printf("\n输入要更新的文件夹路径名称:\n");
	gets(path);
	while (strcmp(path,"\n") == 0) {
		printf("\n输入要更新的文件夹路径名称:\n");
		gets(path);
	}
	BrowseDir(path);
	printf("%d\n",upcount);
	/*先把所有的idx.txt都删掉   ?????????   */
	for(i = 0; i < upcount; i++)
	{
		if(strcmp(upFileName[i], "idx.txt") == 0)
		{
			strcpy(removename,upFileNameList[i]);
			strcat(removename,upFileName[i] );
			remove(removename);
		}
		if(upsize[i] == 0)
		{
			strcpy(removename,upFileNameList[i]);
			strcat(removename,upFileName[i] );
			remove(removename);
		}
	}

	filename = (char *) malloc(sizeof(char) * 50);
	idxfile = (char *) malloc(sizeof(char) * 10);
	path = (char *) malloc(sizeof(char) * 50);
	strcpy(idxfile, "idx.txt");	
	for(i = 0; i < upcount; i++)
	{
		printf("the number of txt is: %d\n",i);
		if(strcmp(upFileName[i], "idx.txt") == 0)
		{
			continue;
		}
		fileupdate(upFileName[i], idxfile, upFileNameList[i]);
	}
	free(filename);
	free(path);
	for(i = 0; i < 40000; i++)
	{
		free(upFileNameList[i]);
	}
	free(upFileNameList);
	for(i = 0; i < 40000; i++)
	{
		free(upFileName[i]);
	}
	free(upFileName);
	free(upsize);
	free(removename);
	return 0;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -