📄 invert10_31.c
字号:
/* 若文件不存在, 以创建方式打开文件 */
stream = fopen(sDir, "w+");
if(stream == NULL)
{
printf("error is: %d\n", EFILEOPEN);
return EFILEOPEN;
}
/*第一次创建时写入1.txt*/
fprintf(stream, "%s", "1.txt");
fclose(stream);
}
/*将文件名字记录在$curfile.txt文件中*/
else
{
fprintf(stream, "%s", sFileName);
fclose(stream);
}
return 0;
}
int wrtlst(InvertNode *pCurIvtIdx, char * sFileName, char * sCurDir, long nKbyte)
{
FILE *stream;
long curFileSize = 0;
DocNode *pCurDocNode;
int i, nFileNum = 0, malloccount;
int bdown = 0;
char *newFileName = NULL, *curFileName, *curPFN, *strbuf, *tempbuf;
if((pCurIvtIdx == NULL) || (sFileName == NULL) || (sCurDir == NULL) || (nKbyte <= 0))
{
printf("error is: %d\n", EWT2FLP);
return EWT2FLP;
}
curFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
newFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
curPFN = (char *) malloc (sizeof(char) * ( 20 + strlen(sCurDir)));
strbuf = (char *) malloc(sizeof(char) * MAXLINELEN * MAXWORD);
tempbuf = (char *) malloc(sizeof(char) * MAXLINELEN * MAXWORD);
malloccount = 1;
strbuf[0] = '\0';
/*将文档内容写入字符串*/
while (pCurIvtIdx != NULL)
{
pCurDocNode = pCurIvtIdx->pDocNode;
if (pCurDocNode != NULL)
{
/*printf("%s ", pCurIvtIdx->sWords);*/
strcat(strbuf,"@");
sprintf(tempbuf,"%s", pCurIvtIdx->sWords);
strcat(strbuf, tempbuf);
sprintf(tempbuf," #%d ", pCurIvtIdx->lDocNum);
strcat(strbuf, tempbuf);
while (pCurDocNode != NULL)
{
sprintf(tempbuf,"%s ,%f,%d,", pCurDocNode->sDocID, pCurDocNode->fWeight, pCurDocNode->iFreq);
strcat(strbuf, tempbuf);
/*为了缩小倒排文档,只最多只写3个位置*/
/*for(i = 0; i < pCurDocNode->iFreq - 1; i++)*/
for(i = 0; (i < pCurDocNode->iFreq - 1) && (i < 2); i++)
{
sprintf(tempbuf, "%d+", pCurDocNode->iPos[i]);
strcat(strbuf, tempbuf);
}
sprintf(tempbuf, "%d,", pCurDocNode->iPos[i]);
strcat(strbuf, tempbuf);
sprintf(tempbuf,"%s ", pCurDocNode->sFileURL);
strcat(strbuf, tempbuf);
sprintf(tempbuf,";\n");
strcat(strbuf, tempbuf);
pCurDocNode = pCurDocNode->pNext;
}
}
pCurIvtIdx = pCurIvtIdx->pNextNode;
}
/*将字串内容写入文档*/
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, sFileName);/**/
sprintf(curFileName, "%s", sFileName);
if ((stream = fopen(curPFN, "a+")) == NULL)
{
/* 若文件不存在, 以创建方式打开文件 */
stream = fopen(curPFN, "wt");
}
else
{
/* 若文件已经存在,把文件指针指向文件末;
* 若文件已经存在, 且大小超过指定要求,关闭当前文件, 新建文件
*/
curFileSize = filesize(stream);
while (curFileSize >= nKbyte)
{
fclose(stream);
create_next_file_name(curFileName, &newFileName);
sprintf(curFileName, "%s",newFileName);
/*获得当前路径和名称*/
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, curFileName);
if ((stream = fopen(curPFN, "a+")) == NULL)
{
stream = fopen(curPFN, "w+");
}
curFileSize = filesize(stream);
}
fseek(stream, 0L, SEEK_END);
}
/*将文档名称写入文件*/
if(WrtFlName(curFileName, sCurDir) != 0)
{
printf("wrtfilname error!\n");
return EINVTWRT;
}
/*printf("%s\n",strbuf);*/
fprintf(stream, "%s", strbuf);
fclose(stream);
if(curFileName)
free(curFileName);
if(newFileName)
free(newFileName);
if(curPFN)
free(curPFN);
if (strbuf) {
free(strbuf);
}
if (tempbuf) {
free(tempbuf);
}
return 0;
}
int wrtlst2file(InvertNode *pCurIvtIdx, char * sFileName, char * sCurDir, long nKbyte)
{
FILE *stream;
int bFileExist = -1;
long curFileSize = 0;
DocNode *pCurDocNode, *pPreDoc, *ptemp;
int i, nFileNum = 0;
int bdown = 0;
char *newFileName = NULL, *curFileName, *sIdxname, *curPFN;
if((pCurIvtIdx == NULL) || (sFileName == NULL) || (sCurDir == NULL) || (nKbyte <= 0))
{
printf("error is: %d\n", EWT2FLP);
return EWT2FLP;
}
curFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
newFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
sIdxname = (char *) malloc (sizeof(char) * (20 + strlen(sCurDir)));
curPFN = (char *) malloc (sizeof(char) * ( 20 + strlen(sCurDir)));
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, sFileName);/**/
sprintf(curFileName, "%s", sFileName);
if ((stream = fopen(curPFN, "a+")) == NULL)
{
/* 若文件不存在, 以创建方式打开文件 */
bFileExist = SENGFALSE;
stream = fopen(curPFN, "wt");
}
else
{
/* 若文件已经存在,把文件指针指向文件末;
* 若文件已经存在, 且大小超过指定要求,关闭当前文件, 新建文件
*/
bFileExist = SENGTRUE;
curFileSize = filesize(stream);
while (curFileSize >= nKbyte)
{
fclose(stream);
create_next_file_name(curFileName, &newFileName);
sprintf(curFileName, "%s",newFileName);
/*获得当前路径和名称*/
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, curFileName);
if ((stream = fopen(curPFN, "a+")) == NULL)
{
stream = fopen(curPFN, "w+");
}
curFileSize = filesize(stream);
}
fseek(stream, 0L, SEEK_END);
/* */
}
while (pCurIvtIdx != NULL)
{
/* 判断文件是否已经超过指定大小 */
curFileSize = filesize(stream);
if (curFileSize >= nKbyte)
{
/* 若文件超过指定大小, 则按照规则新建文件*/
fclose(stream);
create_next_file_name(curFileName, &newFileName);
sprintf(curFileName, "%s",newFileName);
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, curFileName);
stream = fopen(curPFN, "wt");
curFileSize = 0;
}
/*将文档名字写入*/
if(WrtFlName(curFileName, sCurDir) != 0)
{
printf("wrtfilname error!\n");
return EINVTWRT;
}
pCurDocNode = pCurIvtIdx->pDocNode;
bdown = 0;
if (pCurDocNode != NULL)
{
bdown = 1;
fprintf(stream, "@");
fprintf(stream, "%s #%d ", pCurIvtIdx->sWords, pCurIvtIdx->lDocNum);
/*fprintf(idx, "%s ,%s;\n", pCurIvtIdx->sWords, curFileName); */
pPreDoc = pCurDocNode;
while (pCurDocNode != NULL)
{
fprintf(stream, "%s ,%f,%d,",pCurDocNode->sDocID, pCurDocNode->fWeight, pCurDocNode->iFreq);
for(i = 0; i < pCurDocNode->iFreq - 1; i++)
{
fprintf(stream, "%d+", pCurDocNode->iPos[i]);
}
fprintf(stream, "%d,", pCurDocNode->iPos[i]);
fprintf(stream, "%s ", pCurDocNode->sFileURL);
fprintf(stream, ";\n");
pPreDoc->pNext = pCurDocNode->pNext;
ptemp = pCurDocNode;
pCurDocNode = pPreDoc->pNext;
}
}
fclose(stream);
/*释放当前节点所占的内存*/
pCurIvtIdx = pCurIvtIdx->pNextNode;
}
if(curFileName)
free(curFileName);
if(newFileName)
free(newFileName);
if(sIdxname)
free(sIdxname);
if(curPFN)
free(curPFN);
return 0;
}
/*将英文词写入倒排文档,与中文倒排文档不同的是英文倒排有两层目录
第一层目录名是单词的第一个字母,
第二层目录名是单词的第二个字母,
如果是单个字母的词,则直接建在第一层目录下
入口参数:InvertNode *pCurIvtIdx 倒排表指针
char * sFileName, 要写入文件的名字"1.txt"
char * sCurDir, 当前路径“e:\\invt\\a”
long nKbyte 倒排索引文件大小的最大值
*/
int wrt2engfile(InvertNode *pCurIvtIdx, char * sFileName, const char * sCurDir, long nKbyte)
{
FILE *stream;
/*FILE *idx;*/
int bFileExist = -1;
long curFileSize = 0;
DocNode *pCurDocNode, *pPreDoc, *ptemp;
InvertNode *pPreIdx, *ptempIdx;
int i, nFileNum = 0;
char *sTemp;
int sStrlenth;
char *newFileName = NULL, *curFileName, *sIdxname, *curPFN, *Dir;
if((pCurIvtIdx == NULL) || (sFileName == NULL) || (sCurDir == NULL) || (nKbyte <= 0))
{
printf("error is: %d\n", EWT2FLP);
return EWT2FLP;
}
curFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
newFileName = (char*)malloc(sizeof(char)* (20 + strlen(sCurDir)));
sIdxname = (char *) malloc (sizeof(char) * (20 + strlen(sCurDir)));
curPFN = (char *) malloc (sizeof(char) * ( 20 + strlen(sCurDir)));
sTemp = (char *) malloc (sizeof(char) * MAXWORD);
Dir = (char *) malloc (sizeof(char) * MAXPATHLEN);
pPreIdx = pCurIvtIdx;
while (pCurIvtIdx != NULL)
{
/*首先找到所写的文档信息应该在的路径*/
strcpy(sTemp,pCurIvtIdx->sWords);
sStrlenth = strlen(sTemp);
if(sStrlenth == 0)
{
pPreIdx->pNextNode = pCurIvtIdx->pNextNode;
ptempIdx = pCurIvtIdx;
pCurIvtIdx = pPreIdx->pNextNode;
continue;
}
/*如果该单词只有一个字母,则在当前目录下记录*/
if(sStrlenth == 1)
{
strcpy(Dir, sCurDir);
mkdir(Dir);
strcpy(curPFN, sCurDir);
/*得到应该得到的文件名字*/
strcpy(curFileName,sFileName);
strcat(curPFN, "\\");
strcat(curPFN,curFileName);
if ((stream = fopen(curPFN, "a+")) == NULL)
{
/* 若文件不存在, 以创建方式打开文件 */
stream = fopen(curPFN, "wt");
}
else
{
/* 若文件已经存在,把文件指针指向文件末;
* 若文件已经存在, 且大小超过指定要求,关闭当前文件, 新建文件
*/
curFileSize = filesize(stream);
while (curFileSize >= nKbyte)
{
fclose(stream);
create_next_file_name(curFileName, &newFileName);
sprintf(curFileName, "%s",newFileName);
/*获得当前路径和名称*/
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, curFileName);
if ((stream = fopen(curPFN, "a+")) == NULL)
{
stream = fopen(curPFN, "w+");
}
curFileSize = filesize(stream);
}
fseek(stream, 0L, SEEK_END);
/* */
}
strcpy(curPFN,sCurDir);
strcat(curPFN, "\\");
if(WrtFlName(curFileName, curPFN) != 0)
{
printf("wrtfilname error!\n");
return EINVTWRT;
}
}
/*有一个以上的字母,为了排除为0的情况*/
else if(sStrlenth > 1)
{
sTemp[0] = sTemp[1];
sTemp[1] = '\0';
strcpy(Dir, sCurDir);
strcat(Dir, "\\");
strcat(Dir, sTemp);
mkdir(Dir);
if(curFileName)
free(curFileName);
curFileName = GetWrtFlName(Dir);
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, sTemp);
strcat(curPFN, "\\");
strcat(curPFN, curFileName);/**/
if ((stream = fopen(curPFN, "a+")) == NULL)
{
/* 若文件不存在, 以创建方式打开文件 */
bFileExist = SENGFALSE;
stream = fopen(curPFN, "wt");
}
else
{
/* 若文件已经存在,把文件指针指向文件末;
* 若文件已经存在, 且大小超过指定要求,关闭当前文件, 新建文件
*/
bFileExist = SENGTRUE;
curFileSize = filesize(stream);
while (curFileSize >= nKbyte)
{
fclose(stream);
create_next_file_name(curFileName, &newFileName);
sprintf(curFileName, "%s",newFileName);
/*获得当前路径和名称*/
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, sTemp);
strcat(curPFN, "\\");
strcat(curPFN, curFileName);
if ((stream = fopen(curPFN, "a+")) == NULL)
{
stream = fopen(curPFN, "w+");
}
curFileSize = filesize(stream);
}
fseek(stream, 0L, SEEK_END);
/* */
}
strcpy(curPFN, sCurDir);
strcat(curPFN, "\\");
strcat(curPFN, sTemp);
strcat(curPFN, "\\");
if(WrtFlName(curFileName, curPFN) != 0)
{
printf("wrtfilname error!\n");
return EINVTWRT;
}
}
pCurDocNode = pCurIvtIdx->pDocNode;
if (pCurDocNode != NULL)
{
fprintf(stream, "@");
fprintf(stream, "%s #%d ", pCurIvtIdx->sWords, pCurIvtIdx->lDocNum);
pPreDoc = pCurDocNode;
while (pCurDocNode != NULL)
{
fprintf(stream, "%s ,%f,%d,",pCurDocNode->sDocID, pCurDocNode->fWeight, pCurDocNode->iFreq);
for(i = 0; (i < pCurDocNode->iFreq - 1) && (i < 2); i++)
{
fprintf(stream, "%d+", pCurDocNode->iPos[i]);
}
fprintf(stream, "%d,", pCurDocNode->iPos[i]);
fprintf(stream, "%s ", pCurDocNode->sFileURL);
fprintf(stream, ";\n");
pPreDoc->pNext = pCurDocNode->pNext;
ptemp = pCurDocNode;
pCurDocNode = pPreDoc->pNext;
}
}
fclose(stream);
/*释放当前节点所占的内存*/
pPreIdx->pNextNode = pCurIvtIdx->pNextNode;
ptempIdx = pCurIvtIdx;
pCurIvtIdx = pPreIdx->pNextNode;
}
if(curFileName)
free(curFileName);
if(newFileName)
free(newFileName);
if(sIdxname)
free(sIdxname);
if(curPFN)
free(curPFN);
if(sTemp)
free(sTemp);
if(Dir)
free(Dir);
return 0;
}
int IdxFlWrt(char * sFileName,char * StringBuffer)
{
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -