📄 merge.c
字号:
/*** Copyright (C) 1995, Enterprise Integration Technologies Corp. ** All Rights Reserved.** Kevin Hughes, kevinh@eit.com ** 3/11/94**** Released under the GPL by EIT**** Heavily hacked for Harvest*/#include "swish.h"#include "merge.h"#include "error.h"#include "file.h"#include "search.h"#include "string.h"#include "index.h"#include "hash.h"#include "mem.h"/* The main merge functions - it accepts three file names.** This is a bit hairy. It basically acts as a zipper,** zipping up both index files into one.*/void readmerge(file1, file2, outfile) char *file1; char *file2; char *outfile;{ int i, j, indexfilenum1, indexfilenum2, result, totalfiles, skipwords, skipfiles; long limit1, limit2, fileinfo1, fileinfo2, offsetstart; char line[MAXSTRLEN]; struct indexentry *ip1, *ip2, *ip3; struct indexentry *buffer1, *buffer2; FILE *fp1, *fp2, *fp3; struct metaMergeEntry *metaFile1, *metaFile2; /* remapVar is used into addindexfilelist and need to be ** initialized each time two indexes are merged. */ remapVar = 0; metaFile1 = metaFile2 = NULL; if ((fp1 = fopen(file1, "r")) == NULL) { sprintf(errorstr, "Couldn't read the index file \"%s\".", file1); progerr(errorstr); } if (!isokindexheader(fp1)) { sprintf(errorstr, "\"%s\" has an unknown format.", file1); progerr(errorstr); } if ((fp2 = fopen(file2, "r")) == NULL) { sprintf(errorstr, "Couldn't read the index file \"%s\".", file2); progerr(errorstr); } if (!isokindexheader(fp2)) { sprintf(errorstr, "\"%s\" has an unknown format.", file2); progerr(errorstr); } ip1 = ip2 = ip3 = NULL; buffer1 = buffer2 = NULL; if (verbose) printf("Counting files... "); indexfilenum1 = getindexfilenum(fp1); indexfilenum2 = getindexfilenum(fp2); totalfiles = indexfilenum1 + indexfilenum2; if (verbose) { printf("%d files.\n", indexfilenum1 + indexfilenum2); printf("Reading stopwords..."); } readoffsets(fp1); readstopwords(fp1); limit1 = offsets[STOPWORDPOS]; fileinfo1 = offsets[FILELISTPOS]; metaFile1 = readMergeMeta(metaFile1,fp1); readoffsets(fp2); readstopwords(fp2); limit2 = offsets[STOPWORDPOS]; fileinfo2 = offsets[FILELISTPOS]; metaFile2 = readMergeMeta(metaFile2,fp2); /* Create the merged list and modify the individual ones with the new meta index */ metaEntryList = createMetaMerge(metaFile1, metaFile2); if (verbose) printf("\nReading file info..."); totalfiles=indexfilenum1+indexfilenum2;/* fseek(fp1, fileinfo1, 0); for (i = 1; i <= indexfilenum1; i++) { fgets(line, MAXSTRLEN, fp1); addindexfilelist(i, line, &totalfiles); } fseek(fp2, fileinfo2, 0); for (i = 1; i <= indexfilenum2; i++) { fgets(line, MAXSTRLEN, fp2); addindexfilelist(i + indexfilenum1, line, &totalfiles); }*/ if ((fp3 = fopen(outfile, "w")) == NULL) { sprintf(errorstr, "Couldn't write the merged index file \"%s\".", outfile); progerr(errorstr); } if (verbose) printf("\nMerging words... "); printheader(fp3, outfile, 0, totalfiles); offsetstart = ftell(fp3); for (i = 0; i < MAXCHARS; i++) fprintf(fp3, "%016li", offsets[i]); fputc('\n', fp3); readoffsets(fp1); readoffsets(fp2); for (i = 0; i < MAXCHARS; i++) offsets[i] = 0; skipwords = 0; while (1) { if (buffer1 == NULL) { ip1 = readindexline(fp1, limit1,metaFile1); if (ip1 == NULL) { if (ip2 == NULL) { break; } } buffer1 = ip1; } if (buffer2 == NULL) { ip2 = readindexline(fp2, limit2,metaFile2); if (ip2 == NULL){ if (ip1 == NULL) { break; } } else addfilenums(ip2, indexfilenum1); buffer2 = ip2; } if (ip1 == NULL) result = 1; else if (ip2 == NULL) result = -1; else result = strcmp(ip1->word, ip2->word); if (!result) { ip3 = mergeindexentries(ip1, ip2); printindexentry(ip3, fp3); freeindexentry(ip1); freeindexentry(ip2); freeindexentry(ip3); ip1 = ip2 = ip3 = NULL; buffer1 = buffer2 = NULL; skipwords++; } else if (result < 0) { printindexentry(ip1, fp3); freeindexentry(ip1); buffer1 = NULL; ip1 = NULL; } else { printindexentry(ip2, fp3); freeindexentry(ip2); buffer2 = NULL; ip2 = NULL; } } if (verbose) { if (skipwords) printf("%d redundant word%s.", skipwords, (skipwords == 1) ? "" : "s"); else printf("no redundant words."); } printstopwords(fp3); if (verbose) printf("\nMerging file info... "); offsets[FILELISTPOS] = ftell(fp3); fseek(fp1, fileinfo1, 0); for (i= 1; i <= indexfilenum1; i++) { addtofilehashlist(i-1, ftell(fp3)); fgets(line, MAXSTRLEN, fp1); fputs(line, fp3); } fseek(fp2, fileinfo2, 0); for (i= 1; i <= indexfilenum2; i++) { addtofilehashlist(i+indexfilenum1-1, ftell(fp3)); fgets(line, MAXSTRLEN, fp2); fputs(line, fp3); }/* for (i = j = 1; i <= indexfilenum1 + indexfilenum2; i++) if (getmap(i) == j) { addtofilehashlist(j++ - 1, ftell(fp3)); fprintf(fp3, "%s", lookupindexfilenum(i)); }*/ skipfiles = (indexfilenum1 + indexfilenum2) - totalfiles; if (verbose) { if (skipfiles) printf("%d redundant file%s.", skipfiles, (skipfiles == 1) ? "" : "s"); else printf("no redundant files."); } printfileoffsets(fp3); printMetaNames(fp3); fseek(fp3, offsetstart, 0); for (i = 0; i < MAXCHARS; i++) fprintf(fp3, "%016li", offsets[i]); fclose(fp3); fclose(fp1); fclose(fp2); if (verbose) printf("\nDone.\n");}/* Gets the number of files in an index file.*/int getindexfilenum(fp) FILE *fp;{ int i; char line[MAXSTRLEN]; readoffsets(fp); fseek(fp, offsets[FILELISTPOS], 0); i = 0; while(ftell(fp) != offsets[FILEOFFSETPOS]) { fgets(line, MAXSTRLEN, fp); i++; } return i;}/* This adds an offset to the file numbers in a particular** result list. For instance, file 1 has file numbers going from** 1 to 10, but so does file 2, so I have to add 10 to all the** file numbers in file 2 before merging.*/void addfilenums(ip, num) struct indexentry *ip; int num;{ struct resultMerge *rp; rp = ip->result; while (rp != NULL) { rp->filenum = encodefilenum(decodefilenum(rp->filenum) + num); rp = rp->next; }}/* This reads the next line in the index file and puts the results** in a result structure.*/struct indexentry *readindexline(FILE *fp, long limit, struct metaMergeEntry * metaFile){ int i, c, x, countnum, rank=0, filenum=0, attribute; char fileword[MAXWORDLEN]; struct resultMerge *rp; struct indexentry *ip; struct metaMergeEntry* tmp; rp = NULL; if (limit == ftell(fp)) return NULL; for (i = 0; (c = fgetc(fp)) != 0; ) { if (c == ':') { fileword[i] = '\0'; break; } else fileword[i++] = c; } countnum = 1; ungetc(c, fp); while ((c = fgetc(fp)) != 0) { x = 0; do { c = fgetc(fp); if (c == 0) break; x *= 128; x += c & 127; } while (c & 128); if (c == 0) break; if (x) { if (countnum == 1) { filenum = x; countnum++; } else if (countnum == 2) { rank = x; countnum++; } else if (countnum == 3) { attribute = x; /*Need to modify metaName with new list*/ for(tmp=metaFile;tmp;tmp=tmp->next) { if (tmp->oldIndex == attribute) { attribute = tmp->newIndex; break; } } rp = addtoresultlistMerge(rp, filenum, rank, attribute); countnum = 1; } } } ip = (struct indexentry *) emalloc(sizeof(struct indexentry)); ip->word = (char *) mystrdup(fileword); ip->result = rp; return ip;}/* This simply concatenates two information lists that correspond** to a word found in both index files.*/struct indexentry *mergeindexentries(struct indexentry *ip1, struct indexentry *ip2){ struct resultMerge *newrp, *rp1, *rp2; struct indexentry *ep; rp1 = ip1->result; rp2 = ip2->result; newrp = NULL; while (rp1 != NULL) { newrp = addtoresultlistMerge(newrp, rp1->filenum, rp1->rank, rp1->attribute); rp1 = rp1->next; } while (rp2 != NULL) { newrp = addtoresultlistMerge(newrp, rp2->filenum, rp2->rank, rp2->attribute); rp2 = rp2->next; } ep = (struct indexentry *) emalloc(sizeof(struct indexentry)); ep->word = (char *) mystrdup(ip1->word); ep->result = newrp; return ep;}/* This prints a new word entry into the merged index file,** removing redundant file information as it goes along.*/void printindexentry(ip, fp) struct indexentry *ip; FILE *fp;{ int i, num, attribute; struct resultMerge *rp; for (i = 0; indexchars[i] != '\0'; i++) if ((ip->word)[0] == indexchars[i] && !offsets[i]) offsets[i] = ftell(fp); fprintf(fp, "%s:", ip->word); rp = ip->result; while (rp != NULL) { num = rp->filenum; attribute = rp->attribute; compress(num, fp); compress(rp->rank, fp); compress(rp->attribute,fp); rp = rp->next; } fputc(0, fp);}#ifdef NDEF/* This associates a number with a new number.
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -