⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 search.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
	rulenum = OR_RULE;	while (searchwordlist != NULL) {		strcpy(word, searchwordlist->line);		if (rulenum == NO_RULE)			rulenum = DEFAULT_RULE;		if (isunaryrule(word)) {			searchwordlist = searchwordlist->next;			rp = (struct result *) parseterm(fp, 1, metaName);			rp = (struct result *) notresultlist(rp, fp);			/* Wild goose chase */			rulenum = NO_RULE;			continue;		}		else if (isbooleanrule(word)) {			rulenum = getrulenum(word);			searchwordlist = searchwordlist->next;			continue;		}		if (word[0] == '(') {			searchwordlist = searchwordlist->next;			newrp = (struct result *) parseterm(fp, 0, metaName);			if (rulenum == AND_RULE)				rp = (struct result *)				andresultlists(rp, newrp);			else if (rulenum == OR_RULE)				rp = (struct result *)				orresultlists(rp, newrp);			if (searchwordlist == NULL)				break;			rulenum = NO_RULE;			continue;		}		else if (word[0] == ')') {			searchwordlist = searchwordlist->next;			break;		}		/* Check if the next word is '=' */		if ( isMetaName(searchwordlist->next) ) {		  metaName = getMetaName(word);		  if (metaName == 1){		    printf ("err: The metaName %s doesn't exist in  user configfile\n",			    word);		    exit(0);		  }		  /* Skip both the metaName end the '=' */		  searchwordlist = searchwordlist->next->next;		  newrp = (struct result *) parseterm(fp, 1, metaName);		  if (rulenum == AND_RULE)		    rp = (struct result *) andresultlists(rp, newrp);		  else if (rulenum == OR_RULE)		    rp = (struct result *) orresultlists(rp, newrp);		  if (searchwordlist == NULL)		    break;		  rulenum = NO_RULE;		  metaName = 1;		  continue;		}		rp = (struct result *) operate(rp, rulenum, word, 					       fp, metaName);		if (parseone) {			searchwordlist = searchwordlist->next;			break;		}		rulenum = NO_RULE;		searchwordlist = searchwordlist->next;        }	return rp;}/* Looks up a word in the index file -** it calls getfileinfo(), which does the real searching.*/struct result *operate(struct result *rp, int rulenum, char *word, FILE *fp, 		       int attribute){        int i, found;	struct result *newrp, *returnrp=NULL;	if (isstopword(word) && !isrule(word)) {		if (rulenum == OR_RULE && rp != NULL)			return rp;		else			commonerror = 1;	}	for (i = found = 0; indexchars[i] != '\0'; i++)		if (word[0] == indexchars[i]) {			fseek(fp, offsets[i], 0);			found = 1;		}	if (!found) {		if (rulenum == AND_RULE)			return NULL;		else if (rulenum == OR_RULE)			return rp;	}	newrp = getfileinfo(word, fp, attribute);	if (rulenum == AND_RULE)		returnrp = andresultlists(rp, newrp);	else if (rulenum == OR_RULE)		returnrp = orresultlists(rp, newrp);	else if (rulenum == NOT_RULE)		returnrp = notresultlist(newrp, fp);	return returnrp;}/* Looks up a file name in the index file.*/char *lookupfile(int filenum, FILE *fp){        static char line[MAXSTRLEN];        fseek(fp, getfilenum(decodefilenum(filenum) - 1), 0);        fgets(line, MAXSTRLEN, fp);	return line;}/* Finds a word and returns its corresponding file and rank information list.** If not found, NULL is returned.*/struct result *getfileinfo(char *word, FILE *fp, int attribute){	int i, c, x, s, countnum, rank = 0, filenum = 0;        char fileword[MAXWORDLEN];        struct result *rp;	rp = NULL;        for (i = 0; (c = fgetc(fp)) != 0; ) {                if (c == ':') {                        fileword[i] = '\0';			i = 0;			s = strcmp(word, fileword);                        if (s==0)                                break;			else if (s<0) 				return NULL;                        else {				while ((c = fgetc(fp)) != 0)					;				if (offsets[STOPWORDPOS] == ftell(fp))					return NULL;				continue;			}                }		else			fileword[i++] = c;	}	if (c == 0) return NULL;        countnum = 1;	ungetc(c, fp);	while ((c = fgetc(fp)) != 0) {		x = 0;		do {			c = fgetc(fp);			if (c == 0)				return rp;			x *= 128;			x += c & 127;		} while (c & 128);		if (x) {			if (countnum == 1) {				filenum = x;				countnum++;			}			else if (countnum == 2) {				rank = x;				countnum++;			}			else if (countnum == 3) {			  if ( x == attribute )			    rp = addtoresultlist(rp,filenum, rank);			  countnum = 1;			}		      }	      }		return rp;}/* Is a word a rule?*/int isrule(word)     char *word;{	if (!strcmp(word, "and") || !strcmp(word, "or") || !strcmp(word, "not"))		return 1;	else		return 0;}/* Is a word a boolean rule?*/int isbooleanrule(word)     char *word;{	if (!strcmp(word, "and") || !strcmp(word, "or"))		return 1;	else		return 0;}/* Is a word a unary rule?*/int isunaryrule(word)     char *word;{	if (!strcmp(word, "not"))		return 1;	else		return 0;}/* Return the number for a rule.*/int getrulenum(word)     char *word;{	if (!strcmp(word, "and"))		return AND_RULE;	else if (!strcmp(word, "or"))		return OR_RULE;	else if (!strcmp(word, "not"))		return NOT_RULE;	return NO_RULE;}/* Takes two lists of results from searches and ANDs them together.*/struct result *andresultlists(r1, r2)     struct result *r1;     struct result *r2;{        static struct result *tmpnode, *newnode;        if (r1 == NULL || r2 == NULL)                return NULL;        newnode = NULL;        while (r1 != NULL) {                tmpnode = r2;                while (tmpnode != NULL) {                        if (r1->filenum == tmpnode->filenum)                                newnode = (struct result *)                                addtoresultlist(newnode, r1->filenum,                                (r1->rank + tmpnode->rank) / 2);                        tmpnode = tmpnode->next;                }                r1 = r1->next;        }        return newnode;}/* Takes two lists of results from searches and ORs them together.*/struct result *orresultlists(r1, r2)     struct result *r1;     struct result *r2;{        int i;        struct result *rp;        static struct result *newnode;	newnode = NULL;        if (r1 == NULL)                return r2;        else if (r2 == NULL)                return r1;	initresulthashlist();	while (r1 != NULL) {		mergeresulthashlist(r1->filenum, r1->rank);		r1 = r1->next;	}	while (r2 != NULL) {		mergeresulthashlist(r2->filenum, r2->rank);		r2 = r2->next;	}	for (i = 0; i < HASHSIZE; i++) {		rp = resulthashlist[i];		while (rp != NULL) {			newnode = (struct result *) addtoresultlist(newnode,			rp->filenum, rp->rank);			rp = rp->next;		}	}        return newnode;}/* This performs the NOT unary operation on a result list.** NOTed files are marked with a default rank of 1000.**** Basically it returns all the files that have not been** marked (GH)*/struct result *notresultlist(rp, fp)     struct result *rp;     FILE *fp;{	int i, filenums;        struct result *newp;	newp = NULL;	initmarkentrylist();	while (rp != NULL) {		marknum(rp->filenum);		rp = rp->next;	}	filenums = getindexfilenum(fp);	for (i = 1; i <= filenums; i++) {	  if (!ismarked(i))	    newp = (struct result *) addtoresultlist(newp,						     i, 1000, IN_ALL);	}	return newp;}/* Adds a file number and rank to a list of results.*/struct result *addtoresultlist(rp, filenum, rank)     struct result *rp;     int filenum;     int rank;{        struct result *newnode;        static struct result *head;        newnode = (struct result *) emalloc(sizeof(struct result));        newnode->filenum = filenum;        newnode->rank = rank;        newnode->next = NULL;        if (rp == NULL)                rp = newnode;        else                head->next = newnode;        head = newnode;        return rp;}/* Adds the results of a search, sorts them by rank.*/struct sortresult *addsortresult(sp, rank, fileinfo)     struct sortresult *sp;     int rank;     char *fileinfo;{        if (rank > bigrank)                bigrank = rank;        if (sp == NULL) {                sp = (struct sortresult *) emalloc(sizeof(struct sortresult));                sp->rank = rank;                sp->fileinfo = (char *) mystrdup(fileinfo);                sp->left = sp->right = NULL;        }        else {                if (sp->rank < rank)                        sp->left = (struct sortresult *)                        addsortresult(sp->left, rank, fileinfo);                else                        sp->right = (struct sortresult *)                        addsortresult(sp->right, rank, fileinfo);        }        return sp;}/* Prints the final results of a search.*/void printsortedresults(sp, num)     struct sortresult *sp;     float num;{        int rank;        if (sp != NULL) {                printsortedresults(sp->left, num);                rank = (int) ((float) sp->rank * num);                if (rank >= 999)                        rank = 1000;		if (maxhits) {                	if (maxhits == -1)				printf("%d %s", (rank <= 0) ? 1 :				rank, sp->fileinfo);			else if (maxhits > 0) {				printf("%d %s", (rank <= 0) ? 1 :				rank, sp->fileinfo);				maxhits--;			}		}                printsortedresults(sp->right, num);        }}/* Reads a compressed line. This is just here for testing, etc.*/void getrawindexline(fp)     FILE *fp;{        int c, inword;        inword = 1;        while ((c = fgetc(fp)) != EOF) {                if (c == ':' && inword)                        inword = 0;                if (!inword) {                        do {                                c = fgetc(fp);                                if (c == 0)                                        return;                        } while (c & 128);                }        }}/* Does an index file have a readable format?*/int isokindexheader(fp)     FILE *fp;{	char line[MAXSTRLEN];	fseek(fp, 0, 0);	fgets(line, MAXSTRLEN, fp);	if (line[strlen(line) - 1] == '\n')		line[strlen(line) - 1] = '\0';	if (strcmp(line, INDEXHEADER)) {		fseek(fp, 0, 0);		return 0;	}	fseek(fp, 0, 0);	return 1;}/* Returns the value associated with the metaName if it exists*/int getMetaName(word)     char * word;{  struct metaEntry* temp;    for (temp = metaEntryList; temp != NULL; temp = temp->next)     if (!strcmp(temp->metaName, word) )      return temp->index;  return 1;}/* Checks if the next word is "="*/int isMetaName (searchWord)     struct swline* searchWord;{  if (searchWord == NULL)    return 0;  if (!strcmp(searchWord->line, "=") )    return 1;  return 0;}      

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -