⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 invert10_31.c

📁 中文信息处理
💻 C
📖 第 1 页 / 共 4 页
字号:
		/* 若文件不存在, 以创建方式打开文件 */
		stream = fopen(sDir, "w+");
		if(stream == NULL)
		{
			printf("error is: %d\n", EFILEOPEN);
			return EFILEOPEN;
		}
		/*第一次创建时写入1.txt*/
		fprintf(stream, "%s", "1.txt");
		fclose(stream);
	}
	/*将文件名字记录在$curfile.txt文件中*/
	else
	{
		fprintf(stream, "%s", sFileName);
		fclose(stream);
	}
	return 0;
}

int wrtlst(InvertNode *pCurIvtIdx, char * sFileName, char * sCurDir, long nKbyte)
{
	FILE *stream;
	long curFileSize = 0;
	DocNode *pCurDocNode;
	int i, nFileNum = 0, malloccount;
	int bdown = 0;
	char  *newFileName = NULL, *curFileName, *curPFN, *strbuf, *tempbuf;
	
	if((pCurIvtIdx == NULL) || (sFileName == NULL) || (sCurDir == NULL) || (nKbyte <= 0))
	{
		printf("error is: %d\n", EWT2FLP);
		return EWT2FLP;
	}
	curFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
	newFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
	curPFN = (char *) malloc (sizeof(char) * ( 20 + strlen(sCurDir)));
	strbuf = (char *) malloc(sizeof(char) * MAXLINELEN * MAXWORD);
	tempbuf = (char *) malloc(sizeof(char) * MAXLINELEN * MAXWORD);
	malloccount = 1;
	strbuf[0] = '\0';
	/*将文档内容写入字符串*/
	while (pCurIvtIdx != NULL)
	{
		pCurDocNode = pCurIvtIdx->pDocNode;
		if (pCurDocNode != NULL)
		{
			/*printf("%s ", pCurIvtIdx->sWords);*/
			strcat(strbuf,"@");
			sprintf(tempbuf,"%s", pCurIvtIdx->sWords);
			strcat(strbuf, tempbuf);
			sprintf(tempbuf," #%d ", pCurIvtIdx->lDocNum);
			strcat(strbuf, tempbuf);
			while (pCurDocNode != NULL)
			{
				sprintf(tempbuf,"%s ,%f,%d,", pCurDocNode->sDocID, pCurDocNode->fWeight, pCurDocNode->iFreq);
				strcat(strbuf, tempbuf);
				/*为了缩小倒排文档,只最多只写3个位置*/
				/*for(i = 0; i < pCurDocNode->iFreq - 1; i++)*/
				for(i = 0; (i < pCurDocNode->iFreq - 1) && (i < 2); i++)
				{
					sprintf(tempbuf, "%d+", pCurDocNode->iPos[i]);
					strcat(strbuf, tempbuf);
				}
				sprintf(tempbuf, "%d,", pCurDocNode->iPos[i]);
				strcat(strbuf, tempbuf);
				sprintf(tempbuf,"%s ", pCurDocNode->sFileURL);
				strcat(strbuf, tempbuf);								
				sprintf(tempbuf,";\n");	
				strcat(strbuf, tempbuf);								
				pCurDocNode = pCurDocNode->pNext;
			}
		}
		pCurIvtIdx = pCurIvtIdx->pNextNode;
	}
	/*将字串内容写入文档*/
	strcpy(curPFN, sCurDir);
	strcat(curPFN, "\\");
	strcat(curPFN, sFileName);/**/
	sprintf(curFileName, "%s", sFileName);

	if ((stream = fopen(curPFN, "a+")) == NULL)
	{
		/* 若文件不存在, 以创建方式打开文件 */
		stream = fopen(curPFN, "wt");
	}
	else
	{
		/* 若文件已经存在,把文件指针指向文件末;
		*  若文件已经存在, 且大小超过指定要求,关闭当前文件, 新建文件
		*/
		curFileSize = filesize(stream);
		while (curFileSize >= nKbyte) 
		{
			fclose(stream);
			create_next_file_name(curFileName, &newFileName);
			sprintf(curFileName, "%s",newFileName);
			/*获得当前路径和名称*/
			strcpy(curPFN, sCurDir);
			strcat(curPFN, "\\");
			strcat(curPFN, curFileName);

			if ((stream = fopen(curPFN, "a+")) == NULL)
			{
				stream = fopen(curPFN, "w+");
			}
			curFileSize = filesize(stream);
		}
		fseek(stream, 0L, SEEK_END);
	}
	/*将文档名称写入文件*/
	if(WrtFlName(curFileName, sCurDir) != 0)
	{
		printf("wrtfilname error!\n");
		return EINVTWRT;
	}
	/*printf("%s\n",strbuf);*/
	fprintf(stream, "%s", strbuf);
	fclose(stream);

	if(curFileName)
		free(curFileName);
	if(newFileName)
		free(newFileName);
	if(curPFN)
		free(curPFN);
	if (strbuf) {
		free(strbuf);
	}
	if (tempbuf) {
		free(tempbuf);		
	}
	return 0;
}

int wrtlst2file(InvertNode *pCurIvtIdx, char * sFileName, char * sCurDir, long nKbyte)
{
	FILE *stream;
	int bFileExist = -1;
	long curFileSize = 0;
	DocNode *pCurDocNode, *pPreDoc, *ptemp;
	int i, nFileNum = 0;
	int bdown = 0;
	char  *newFileName = NULL, *curFileName, *sIdxname, *curPFN;

	if((pCurIvtIdx == NULL) || (sFileName == NULL) || (sCurDir == NULL) || (nKbyte <= 0))
	{
		printf("error is: %d\n", EWT2FLP);
		return EWT2FLP;
	}
	curFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
	newFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
	sIdxname = (char *) malloc (sizeof(char) * (20 + strlen(sCurDir)));
	curPFN = (char *) malloc (sizeof(char) * ( 20 + strlen(sCurDir)));

	strcpy(curPFN, sCurDir);
	strcat(curPFN, "\\");

	strcat(curPFN, sFileName);/**/

	sprintf(curFileName, "%s", sFileName);
	if ((stream = fopen(curPFN, "a+")) == NULL)
	{
		/* 若文件不存在, 以创建方式打开文件 */
		bFileExist = SENGFALSE;
		stream = fopen(curPFN, "wt");
	}
	else
	{
		/* 若文件已经存在,把文件指针指向文件末;
		*  若文件已经存在, 且大小超过指定要求,关闭当前文件, 新建文件
		*/
		bFileExist = SENGTRUE;
		curFileSize = filesize(stream);
		while (curFileSize >= nKbyte) 
		{
			fclose(stream);
			create_next_file_name(curFileName, &newFileName);
			sprintf(curFileName, "%s",newFileName);
			/*获得当前路径和名称*/
			strcpy(curPFN, sCurDir);
			strcat(curPFN, "\\");
			strcat(curPFN, curFileName);

			if ((stream = fopen(curPFN, "a+")) == NULL)
			{
				stream = fopen(curPFN, "w+");
			}
			curFileSize = filesize(stream);
		}
		fseek(stream, 0L, SEEK_END);
		/* */
	}

	while (pCurIvtIdx != NULL)
	{
		/* 判断文件是否已经超过指定大小 */
		curFileSize = filesize(stream);
		if (curFileSize >= nKbyte) 
		{
			/* 若文件超过指定大小, 则按照规则新建文件*/
			fclose(stream);
			create_next_file_name(curFileName, &newFileName);
			sprintf(curFileName, "%s",newFileName);

			strcpy(curPFN, sCurDir);
			strcat(curPFN, "\\");
			strcat(curPFN, curFileName);

			stream = fopen(curPFN, "wt");
			curFileSize = 0;
		}
		/*将文档名字写入*/
		if(WrtFlName(curFileName, sCurDir) != 0)
		{
			printf("wrtfilname error!\n");
			return EINVTWRT;
		}

		pCurDocNode = pCurIvtIdx->pDocNode;
		bdown = 0;
		if (pCurDocNode != NULL)
		{
			bdown = 1;
			fprintf(stream, "@");
			fprintf(stream, "%s #%d ", pCurIvtIdx->sWords, pCurIvtIdx->lDocNum);
			/*fprintf(idx, "%s ,%s;\n", pCurIvtIdx->sWords, curFileName);		*/
			pPreDoc = pCurDocNode;
			while (pCurDocNode != NULL)
			{
				fprintf(stream, "%s ,%f,%d,",pCurDocNode->sDocID, pCurDocNode->fWeight, pCurDocNode->iFreq);
				for(i = 0; i < pCurDocNode->iFreq - 1; i++)
				{
					fprintf(stream, "%d+", pCurDocNode->iPos[i]);
				}
				fprintf(stream, "%d,", pCurDocNode->iPos[i]);
				fprintf(stream, "%s ", pCurDocNode->sFileURL);
				fprintf(stream, ";\n");
				pPreDoc->pNext = pCurDocNode->pNext;
				ptemp = pCurDocNode;
				pCurDocNode = pPreDoc->pNext;
			}
		}
		fclose(stream);			
		/*释放当前节点所占的内存*/
		pCurIvtIdx = pCurIvtIdx->pNextNode;
	}

	if(curFileName)
		free(curFileName);
	if(newFileName)
		free(newFileName);
	if(sIdxname)
		free(sIdxname);
	if(curPFN)
		free(curPFN);
	
	return 0;
}

/*将英文词写入倒排文档,与中文倒排文档不同的是英文倒排有两层目录
第一层目录名是单词的第一个字母,
第二层目录名是单词的第二个字母,
如果是单个字母的词,则直接建在第一层目录下
入口参数:InvertNode *pCurIvtIdx 倒排表指针
          char * sFileName, 要写入文件的名字"1.txt"
		  char * sCurDir, 当前路径“e:\\invt\\a”
		  long nKbyte     倒排索引文件大小的最大值
 */
int wrt2engfile(InvertNode *pCurIvtIdx, char * sFileName, const char * sCurDir, long nKbyte)
{
	FILE *stream;
	/*FILE *idx;*/
	int bFileExist = -1;
	long curFileSize = 0;
	DocNode *pCurDocNode, *pPreDoc, *ptemp;
	InvertNode *pPreIdx, *ptempIdx;
	int i, nFileNum = 0;
	char *sTemp;
	int sStrlenth;
	char  *newFileName = NULL, *curFileName, *sIdxname, *curPFN, *Dir;

	if((pCurIvtIdx == NULL) || (sFileName == NULL) || (sCurDir == NULL) || (nKbyte <= 0))
	{
		printf("error is: %d\n", EWT2FLP);
		return EWT2FLP;
	}
	curFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
	newFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
	sIdxname = (char *) malloc (sizeof(char) * (20 + strlen(sCurDir)));
	curPFN = (char *) malloc (sizeof(char) * ( 20 + strlen(sCurDir)));
	sTemp = (char *) malloc (sizeof(char) * MAXWORD);
	Dir = (char *) malloc (sizeof(char) * MAXPATHLEN);

	pPreIdx = pCurIvtIdx;
	while (pCurIvtIdx != NULL)
	{
		/*首先找到所写的文档信息应该在的路径*/
		strcpy(sTemp,pCurIvtIdx->sWords);
		sStrlenth = strlen(sTemp);
		if(sStrlenth == 0)
		{
			pPreIdx->pNextNode = pCurIvtIdx->pNextNode;
			ptempIdx = pCurIvtIdx;
			pCurIvtIdx = pPreIdx->pNextNode;
			continue;
		}

		/*如果该单词只有一个字母,则在当前目录下记录*/
		if(sStrlenth == 1)
		{
			strcpy(Dir, sCurDir);
			mkdir(Dir);
			strcpy(curPFN, sCurDir);
			/*得到应该得到的文件名字*/
			strcpy(curFileName,sFileName);
			strcat(curPFN, "\\");
			strcat(curPFN,curFileName);
			if ((stream = fopen(curPFN, "a+")) == NULL)
			{
				/* 若文件不存在, 以创建方式打开文件 */
				stream = fopen(curPFN, "wt");
			}
			else
			{
				/* 若文件已经存在,把文件指针指向文件末;
				*  若文件已经存在, 且大小超过指定要求,关闭当前文件, 新建文件
				*/
				curFileSize = filesize(stream);
				while (curFileSize >= nKbyte) 
				{
					fclose(stream);
					create_next_file_name(curFileName, &newFileName);
					sprintf(curFileName, "%s",newFileName);
					/*获得当前路径和名称*/
					strcpy(curPFN, sCurDir);
					strcat(curPFN, "\\");
					strcat(curPFN, curFileName);

					if ((stream = fopen(curPFN, "a+")) == NULL)
					{
						stream = fopen(curPFN, "w+");
					}
					curFileSize = filesize(stream);
				}
				fseek(stream, 0L, SEEK_END);
				/* */
			}
			strcpy(curPFN,sCurDir);
			strcat(curPFN, "\\");
			if(WrtFlName(curFileName, curPFN) != 0)
			{
				printf("wrtfilname error!\n");
				return EINVTWRT;
			}
		}
		/*有一个以上的字母,为了排除为0的情况*/
		else if(sStrlenth > 1)
		{
			sTemp[0] = sTemp[1];
			sTemp[1] = '\0';
			strcpy(Dir, sCurDir);
			strcat(Dir, "\\");
			strcat(Dir, sTemp);
			mkdir(Dir);
			if(curFileName)
				free(curFileName);
			curFileName = GetWrtFlName(Dir);

			strcpy(curPFN, sCurDir);
			strcat(curPFN, "\\");
			strcat(curPFN, sTemp);
			strcat(curPFN, "\\");
			strcat(curPFN, curFileName);/**/

			if ((stream = fopen(curPFN, "a+")) == NULL)
			{
				/* 若文件不存在, 以创建方式打开文件 */
				bFileExist = SENGFALSE;
				stream = fopen(curPFN, "wt");
			}
			else
			{
				/* 若文件已经存在,把文件指针指向文件末;
				*  若文件已经存在, 且大小超过指定要求,关闭当前文件, 新建文件
				*/
				bFileExist = SENGTRUE;
				curFileSize = filesize(stream);
				while (curFileSize >= nKbyte) 
				{
					fclose(stream);
					create_next_file_name(curFileName, &newFileName);
					sprintf(curFileName, "%s",newFileName);
					/*获得当前路径和名称*/
					strcpy(curPFN, sCurDir);
					strcat(curPFN, "\\");

					strcat(curPFN, sTemp);
					strcat(curPFN, "\\");

					strcat(curPFN, curFileName);

					if ((stream = fopen(curPFN, "a+")) == NULL)
					{
						stream = fopen(curPFN, "w+");
					}
					curFileSize = filesize(stream);
				}
				fseek(stream, 0L, SEEK_END);
				/* */
			}
			strcpy(curPFN, sCurDir);
			strcat(curPFN, "\\");
			strcat(curPFN, sTemp);
			strcat(curPFN, "\\");
			if(WrtFlName(curFileName, curPFN) != 0)
			{
				printf("wrtfilname error!\n");
				return EINVTWRT;
			}
		}

		pCurDocNode = pCurIvtIdx->pDocNode;
		if (pCurDocNode != NULL)
		{
			fprintf(stream, "@");
			fprintf(stream, "%s #%d ", pCurIvtIdx->sWords, pCurIvtIdx->lDocNum);
			pPreDoc = pCurDocNode;
			while (pCurDocNode != NULL)
			{
				fprintf(stream, "%s ,%f,%d,",pCurDocNode->sDocID, pCurDocNode->fWeight, pCurDocNode->iFreq);
				for(i = 0; (i < pCurDocNode->iFreq - 1) && (i < 2); i++)
				{
					fprintf(stream, "%d+", pCurDocNode->iPos[i]);
				}
				fprintf(stream, "%d,", pCurDocNode->iPos[i]);
				fprintf(stream, "%s ", pCurDocNode->sFileURL);
				fprintf(stream, ";\n");
				pPreDoc->pNext = pCurDocNode->pNext;
				ptemp = pCurDocNode;
				pCurDocNode = pPreDoc->pNext;
			}
		}
		fclose(stream);
		/*释放当前节点所占的内存*/
		pPreIdx->pNextNode = pCurIvtIdx->pNextNode;
		ptempIdx = pCurIvtIdx;
		pCurIvtIdx = pPreIdx->pNextNode;
	}
	if(curFileName)
		free(curFileName);
	if(newFileName)
		free(newFileName);
	if(sIdxname)
		free(sIdxname);
	if(curPFN)
		free(curPFN);
	if(sTemp)
		free(sTemp);
	if(Dir)
		free(Dir);
	return 0;
}


int IdxFlWrt(char * sFileName,char * StringBuffer)
{

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -