📄 directory.c
字号:
#include <direct.h>#include <io.h>#include <stdio.h>#include <string.h>#include "malloc.h" /*词结构链表*/struct ForwardNode{ int frenqucy; int filenum; char *sWord; struct WordName *next;};typedef struct ForwardNode ForwardNode;/*文件结构链表*/struct FileNode{ int FileID; char *pfilename; struct FileNode *next; struct ForwardNode *pWordName; int wordnum;};struct FileNode *ptem1=NULL;/*类别结构*/struct ClassNode{ int ClassID; char *pclassname; int filenum; struct FileNode *pfileNode; struct ClassNode *next;}; struct ClassNode *pctem1=NULL,*head=NULL; /*创建词链表*/ struct ForwardNode *CWordNode(){ struct WordNode *p; p=(struct ForwardNode *)malloc(sizeof(struct ForwardNode)); if(p==NULL) { printf("No enough memory to alloc"); } p->sWord=(char *)malloc(sizeof(char)); p->next=NULL; return(p);} /*创建文件链表*/ struct FileNode *CFileNode(){ struct FileName *p; p=(struct FileNode *)malloc(sizeof(struct FileNode)); if(p==NULL) { printf("No enough memory to alloc"); } p->pfilename=(char *)malloc(sizeof(char)); p->next=NULL; return(p);} /*创建类别链表*/ struct ClassNode *CreateNode() { struct ClassNode *p; p=(struct ClassNode *)malloc(sizeof(struct ClassNode)); p->next=NULL; p->pclassname=(char *)malloc(sizeof(char)); p->pfileNode=NULL; return(p); } /*文件路径拷贝*/char *ChangeDir(char path[]){ char *Newpath=NULL; int i=2; strcpy(Newpath,""); Newpath[0]='e'; Newpath[1]=':'; while(path[i]!='\0') { Newpath[i]=path[i]; i++; } Newpath[i]='\0'; return(Newpath); }/*统计文件词频*/
/*从文件中读出正向表*/
int ForwardRead(char *cfilename, ForwardNode **fNode, int *DocCount)
{
int malloccount = 0;
char cTemp;
unsigned char sTemp[SHORTSIZE];
unsigned char sShortTemp[SHORTSIZE];
int iStringPos = 0;
int iComaCount = 0;
int flagWord = 0;
long DCount = 0;
int poscount = 0;
int i,j;
ForwardNode *ptempFNode;
WordNode *pcurWordNode;
FILE *pTxtFile, *pFileTemp;
*fNode = (ForwardNode *) malloc((iMallocSize + (malloccount++) * iReallocSize) * sizeof(ForwardNode));
/*打开文档*/
if( (pTxtFile = fopen( cfilename, "r" )) == NULL )
{
return EFILEOPEN;
}
pFileTemp = pTxtFile;
ptempFNode = *fNode;
/*开始读字节*/
cTemp = fgetc(pTxtFile);
while(cTemp != EOF)
{
/*读文档编号*/
if(cTemp == '@')
{
iStringPos = 0;
cTemp = fgetc(pTxtFile);
while(cTemp != '#')
{
sTemp[iStringPos++] = cTemp;
cTemp = fgetc(pTxtFile);
}
/*处理文档编号的内容*/
sTemp[iStringPos] = '\0';
/*分配的内存够用*/
strcpy(ptempFNode->sDocID, sTemp);
/*分配的内存不够用???????????*/
}
/*读词内容*/
else if((cTemp == '#')||(flagWord == 1))
{
/*如果是新文档的第一个词,分配第一个词节点*/
if(cTemp == '#')
{
cTemp = fgetc(pTxtFile);
ptempFNode->wFWordNode = (WordNode *)malloc(sizeof(WordNode));
ptempFNode->wFWordNode->pnext = NULL;
pcurWordNode = ptempFNode->wFWordNode;
}
/*如果是同一个文档中的下一个词,分配下一个词节点*/
else if(flagWord == 1)
{
flagWord = 0;
pcurWordNode->pnext = (WordNode *) malloc(1 * sizeof(WordNode));
pcurWordNode = pcurWordNode->pnext;
pcurWordNode->pnext = NULL;
}
iStringPos = 0;
while(cTemp != ',')
{
sTemp[iStringPos++] = cTemp;
cTemp = fgetc(pTxtFile);
}
sTemp[iStringPos] = '\0';
/* 处理词的内容*/
pcurWordNode->sWords = (unsigned char *) malloc (iStringPos * sizeof(unsigned char));
strcpy(pcurWordNode->sWords,sTemp);
}
/*读权重*/
else if((cTemp == ',') && (iComaCount == 0))
{
iComaCount++;
iStringPos = 0;
cTemp = fgetc(pTxtFile);
while(cTemp != ',')
{
sTemp[iStringPos++] = cTemp;
cTemp = fgetc(pTxtFile);
}
/* 处理权重的内容*/
sTemp[iStringPos] = '\0';
pcurWordNode->fWeight = atof(sTemp);
}
/*读词频*/
else if ((cTemp == ',') && (iComaCount == 1))
{
iComaCount++;
iStringPos = 0;
cTemp = fgetc(pTxtFile);
while(cTemp != ',')
{
sTemp[iStringPos++] = cTemp;
cTemp = fgetc(pTxtFile);
}
/* 处理词频的内容*/
sTemp[iStringPos] = '\0';
pcurWordNode->iFreq = atol(sTemp);
}
/*读位置*/
else if ((cTemp == ',') && (iComaCount == 2))
{
iComaCount++;
iStringPos = 0;
cTemp = fgetc(pTxtFile);
while(cTemp != ';')
{
sTemp[iStringPos++] = cTemp;
cTemp = fgetc(pTxtFile);
}
sTemp[iStringPos] = '\0';
/* 处理位置的内容*/
pcurWordNode->iPos = (int *) malloc((int)(pcurWordNode->iFreq) * sizeof(int));
poscount = 0;
i = 0;
while( sTemp[i] != '\0')
{
j = 0;
while((sTemp[i] != '+') && (sTemp[i] != '\0'))
{
sShortTemp[j++] = sTemp[i++];
}
if((sTemp[i] == '+') )
{
sShortTemp[j] = '\0';
pcurWordNode->iPos[poscount++] = atoi(sShortTemp);
i++;
}
else if(sTemp[i] == '\0')
{
sShortTemp[j] = '\0';
pcurWordNode->iPos[poscount++] = atoi(sShortTemp);
}
}
}
/*下一个词或文档开始*/
else if (cTemp == ';')
{
cTemp = fgetc(pTxtFile);
/*读下一篇文档*/
if(cTemp == '\n')
{
iComaCount = 0;
DCount++;
ptempFNode = *fNode + DCount;
cTemp = fgetc(pTxtFile);
continue;
}
/*文档读完,返回*/
else if(cTemp == EOF)
{
return 0;
}
/*一个词结点读完,读下一个词*/
else
{
iComaCount = 0;
flagWord = 1;
continue;
}
}
else
{
printf("file format is error!");
return EFILEFORMAT;
}/* */
}
*DocCount = DCount;
printf("The ForwardRead Function is over!");
return 0;
}
struct FileName *FiletoWord(char *path,long wordnum){ struct FileName pQuery=NULL,pQueryT=NULL; if((fp=fopen(path,"r"))==NULL) printf("open error"); while(!feof(fp)) { cht=fgetc(fp1); while(*cht==' ') cht=fgetc(fp1); p2=strchr(p1,' '); if(cht!='\n') { if(cht==44) /*如果是逗号*/ printf("g"); else if(cht==34) /*如果是引号*/ { cht='\n'; fputc(cht,fp2); } else fputc(cht,fp2); /* cht=fgetc(fp1); fputc(cht,fp2);*/ /* fputc(fgetc(fp1),fp2);*/ fclose(fp); }/*遍历文件夹*/int i=0;void ProcessFileFromDir(char *spath){ long numWordNum=0; struct FileName *ptem2=NULL; struct _finddata_t file;/*定义结构体变量*/ struct ClassName *pctem2=NULL; char path[128],*Newpath;/*路径*/ long handle; FILE *fp; handle=(long)_findfirst("*",&file);/*查找所有文件*/ if(handle==-1)/*如果handle为-1,表示当前目录为空,则结束查找而返回*/ return; while(!(_findnext(handle,&file))) {/*继续对当前目录中的下一个子目录或文件进行与上面同样的查找*/ if(file.attrib&_A_SUBDIR) { if(file.name[0]!='.') { _chdir(file.name);/*进入该目录*/ _getcwd(path,256);/*获得目录路径*/ /* 建立类别节点*/ pctem2=CreateNode(); /* 定义类别节点的文件链表头指针*/ ptem1=pctem2->pfileNode; /*拷贝文件名*/ strcpy(pctem2->pclassname,path); i++; printf("%d",i); /*类别编码 */ pctem2->ClassID=i; /*初始化类别的文件数目为0*/ pctem2->filenum=0; /*类别节点连接到链表*/ if(pctem1==NULL) pctem1=pctem2; else { pctem1->next=pctem2; pctem1=pctem1->next; pctem1->next=NULL; } /*定义类别节点头指针*/ if(head==NULL) head=pctem1; Newpath=ChangeDir(path); mkdir(Newpath); ProcessFileFromDir(path); _chdir(".."); /*查找完毕之后,返回上一级目录*/ } } else {/*如果第一个实体不是目录,显示该文件*/ _getcwd(path,256);/* 获得文件的完整的路径名包含文件的名称*/ if(strcmp(strchr(file.name,'.'),".txt")==0) { /*建立文件节点并赋值*/ ptem2=CFileNode(); /*文件名拷贝到节点*/ strcat(path,"\\"); strcat(path,file.name); strcpy(ptem2->pfilename,path); /*文件计数*/ pctem1->filenum++; ptem2->FileID=pctem1->filenum; /*打开文件,统计词*/ ptem2->pWordName=FiletoWord(path,wordnum); /*文件节点连接到链表*/ if(ptem1==NULL) ptem1=ptem2; else { ptem1->next=ptem2; ptem1=ptem1->next; ptem1->next=NULL; } if(pctem1->pfileNode==NULL) pctem1->pfileNode=ptem2; /* printf(" %ld %s\n",ptem2->FileID,ptem2->pfilename); */ /* Newpath=ChangeDir(path); */ } } } /*printf("j%d",head->filenum);*/ _findclose(handle);//关闭控制台 }main(){ struct ClassName *pr; char filename[128]; printf("input path\n"); gets(filename); _chdir(filename); ProcessFileFromDir(filename); pr=head; while(pr!=NULL) { printf("class%d %s %d\n",pr->ClassID,pr->pclassname,pr->filenum); while(pr->pfileNode!=NULL) { printf("file%d,%s \n",pr->pfileNode->FileID,pr->pfileNode->pfilename); pr->pfileNode=pr->pfileNode->next; } pr=pr->next; } getchar();}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -