📄 search.cpp
字号:
#include <sys/time.h>#include <regex.h>#include "conf.h"char* FileDB[MAXHOSTS];int OnList[MAXHOSTS];char* token[MAXHOSTS];regex_t rc[MAXHOSTS];char itemtype[12];FILE *idx, *ref;int Counter;int tokencnt;bool onlineonly;timeval starttime;void Chop(char *s){ char *ptr; if((ptr=strchr(s,CR))!=NULL) *ptr='\0'; if((ptr=strchr(s,LF))!=NULL) *ptr='\0';}int timeval_subtract (timeval *result, timeval *x, timeval *y){ /* Perform the carry for the later subtraction by updating y. */ if (x->tv_usec < y->tv_usec) { int nsec = (y->tv_usec - x->tv_usec) / 1000000 + 1; y->tv_usec -= 1000000 * nsec; y->tv_sec += nsec; } if (x->tv_usec - y->tv_usec > 1000000) { int nsec = (x->tv_usec - y->tv_usec) / 1000000; y->tv_usec += 1000000 * nsec; y->tv_sec -= nsec; } /* Compute the time remaining to wait. tv_usec is certainly positive. */ result->tv_sec = x->tv_sec - y->tv_sec; result->tv_usec = x->tv_usec - y->tv_usec; /* Return 1 if result is negative. */ return x->tv_sec < y->tv_sec;}void InitTiming(){ gettimeofday(&starttime, NULL);}void PrintTiming(const char* event){ timeval now, rs; gettimeofday(&now, NULL); timeval_subtract(&rs, &now, &starttime); fprintf(stderr, "%03d%03d.%03d -- %s\n", rs.tv_sec, rs.tv_usec/1000, rs.tv_usec%1000, event);}void LoadFDB(void){ FILE *filedb; char buffer[MAX], fn[MAX], host[MAX]; int c, on; fprintf(stderr,"Loading filelist..."); if(!(filedb=fopen(DBHOSTS,"r"))) { perror("550 Cannot open filelist"); exit(101); } while(1) { fgets(buffer, MAX, filedb); if (feof(filedb)) break; sscanf(buffer,"%d %d %s", &c, &on, host); Chop(host); sprintf(fn, "ftpdata/%s.dat", host); FileDB[c] = new char[strlen(fn)+2]; strcpy(FileDB[c], fn); OnList[c] = on; } fclose(filedb); fprintf(stderr,"end\n");}int FindBItem(FILE *idx, char *key, DiskBTreeHead *head){ int i, r; char tmp[20]; while(1) { fread(head, sizeof(DiskBTreeHead), 1, idx); if(!strncasecmp(key, head->key, KEYSIZE)) return S_OK; for(i=0;i<B;i++) { if((*head->treerefs[i].top) && (strncasecmp(key,head->treerefs[i].top,KEYSIZE)<=0)) { r = ftell(idx); fseek(idx, head->treerefs[i].offset, SEEK_SET); if (ftell(idx) == r) return S_ERROR; break; } } if(i==B) return S_ERROR; }}void DumpRefs(FILE *idx, FILE *ref, long numhosts){ FILE *db; HostRef hostref; char buffer[MAX]; char *fn, host[MAX]; long itemcnt, offset; int i, j, t; bool dironly = false; for (t = 0; t < tokencnt; t++) { regcomp(&rc[t], token[t], REG_ICASE); } if (strcmp(itemtype, "directory") == 0) dironly = true; itemcnt = 0L; for (i = 0; i < numhosts; i++) {// PrintTiming("Start Dump a host"); fread(&hostref, sizeof(HostRef), 1, idx); if (onlineonly && OnList[hostref.host] == 0) continue; fn = FileDB[hostref.host]; strcpy(host, fn+8); host[strlen(host)-4] = 0; db = fopen(fn, "r"); fseek(ref, hostref.offset * sizeof(long), SEEK_SET);// PrintTiming("Finish Read a hostref"); for (j = 0; j < hostref.len; j++) {// PrintTiming("Start Read a itemref"); fread(&offset, sizeof(long), 1, ref); fseek(db, offset, SEEK_SET); fgets(buffer, MAX, db); char *p1 = buffer+strlen(buffer), *p2; while (p1 != buffer && *p1 != '/') p1--; p1++; p2 = p1; while (*(p2+1) != 0 && (*p2 != '!' || *(p2+1) != '!')) p2++; *p2 = 0; if (dironly && *(p2+2) != 'd') continue; bool matched = true; for (t = 0; t < tokencnt; t++) { if (regexec(&rc[t], p1, 0, NULL, 0) != 0) { matched = false; break; } } if (matched && itemtype[0] != 0 && !dironly && strcasecmp(p2-strlen(itemtype), itemtype) != 0) matched = false; *p2 = '!'; // PrintTiming("Finish Read a itemref"); if (matched) { printf("%s!!%s", host, buffer); itemcnt++; } } fclose(db); }}void OpenIdx(){ if (!(idx = fopen(DBINDEX, "r"))) { printf("550 Cannot open SEEDBTREE"); exit(102); } if (!(ref = fopen(DBITEMOFFSET, "r"))) { printf("550 Cannot open SEEDBTREE"); exit(102); }}void DumpBTree(char* key){ DiskBTreeHead head; if (!idx) { fprintf(stderr,"ERROR ! Idx not open !\n"); return; } fread(&head, sizeof(DiskBTreeHead), 1, idx); fprintf(stderr,"numhosts : %ld\n", head.numhosts); PrintTiming("Finish Read Last BTreeHead"); DumpRefs(idx, ref, head.numhosts); fclose(idx); fclose(ref); idx=NULL;}void Substring(){ char *c; char z[2]; char key[KEYSIZE+1]; DiskBTreeHead head; long minrefs; long bestoffset; int i, t, len; minrefs=-1; bestoffset=0L; for (t = 0; t < tokencnt; t++) { c = token[t]; len = strlen(c) - KEYSIZE; for(i = 0; i <= len; i++) { rewind(idx); if(FindBItem(idx, &c[i], &head)!=S_OK) { bestoffset=-1; continue; } if((minrefs==-1) || (minrefs>head.numrefs)) { bzero(key,sizeof(key)); strncpy(key,&c[i],KEYSIZE); minrefs=head.numrefs; bestoffset=ftell(idx)-sizeof(DiskBTreeHead); } } } if(bestoffset==-1) { return; } key[KEYSIZE]='\0'; fseek(idx, bestoffset, SEEK_SET); fprintf(stderr, "Searching (key=`%s') (offset=%ld)\n",key,bestoffset); fflush(stdout); Counter=-1; PrintTiming("Finish Find Key"); DumpBTree(key);}void PreProcess(char* query){ const char delimiters[] = " "; char *t, *cp; tokencnt = 0; cp = strdup(query); t = strtok(cp, delimiters); while (t) { tokencnt++; token[tokencnt-1] = strdup(t); t = strtok(NULL, delimiters); }}int main(int argc, char** argv){ if (argc <= 1) return -1; onlineonly = false; if (argc >= 3 && strncmp(argv[2], "on", 2) == 0) onlineonly = true; itemtype[0] = 0; if (argc >= 4) strncpy(itemtype, argv[3], 10); InitTiming(); LoadFDB(); PrintTiming("Finish Load Host List"); OpenIdx(); PrintTiming("Finish Open Index"); PreProcess(argv[1]); Substring(); PrintTiming("Finish All"); return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -