📄 search.c
字号:
/*** Copyright (C) 1995, Enterprise Integration Technologies Corp. ** All Rights Resereved.** Kevin Hughes, kevinh@eit.com ** 3/11/94**** Released under the GPL by EIT**** Heavily hacked for Harvest*/#include "swish.h"#include "search.h"#include "file.h"#include "list.h"#include "string.h"#include "hash.h"#include "merge.h"#include "mem.h"/* The main search function.** Parentheses are stripped out, things made lowercase,** extra blanks removed, etc.*/void search(words, indexlist) char *words; struct swline *indexlist;{ int i, j, metaName; float num; char word[MAXWORDLEN]; struct result *resultlist; struct sortresult *sortresultlist; struct swline *tmplist; FILE *fp;#ifdef DEBUG struct swline *newp2;#endif searchwordlist = NULL; metaName = 1; for (i = j = 0; words[i] != '\0' && words[i] != '\n'; i++) { if (isspace(words[i]) || words[i] == '(' || words[i] == ')' || words[i] == '=') { if (words[i] == '=') { if (j != 0) {if (words[i-1] != '/') { word[j] = '\0'; searchwordlist = (struct swline *) addswline(searchwordlist, (char *) convertentities(word)); j = 0; searchwordlist = (struct swline *) addswline(searchwordlist, "="); } else { /* Needs to erase the '/' */ j--; word[j++] = tolower(words[i]); } } else searchwordlist = (struct swline *) addswline(searchwordlist, "="); } else { if (j) { word[j] = '\0'; searchwordlist = (struct swline *) addswline(searchwordlist, (char *) convertentities(word)); j = 0; } if (words[i] == '(') { searchwordlist = (struct swline *) addswline(searchwordlist, "("); } if (words[i] == ')') { searchwordlist = (struct swline *) addswline(searchwordlist, ")"); } } } else word[j++] = tolower(words[i]); } if (j) { word[j] = '\0'; searchwordlist = (struct swline *) addswline(searchwordlist, (char *) convertentities(word)); } printf("%s\n", INDEXHEADER); if (words[0] == '\0') { printf("err: no search words specified\n.\n"); exit(0); } printf("# Search words:"); tmplist = searchwordlist; while (tmplist != NULL) { printf(" %s", tmplist->line); tmplist = tmplist->next; } putchar('\n'); while (indexlist != NULL) { commonerror = bigrank = 0; if ((fp = fopen(indexlist->line, "r")) == NULL) { printf("# Name: unknown index\n"); printf("err: could not open index file\n.\n"); exit(0); } if (!isokindexheader(fp)) { printf("err: the index file format is unknown\n.\n"); exit(0); } getheader(fp); if (!getindexfilenum(fp)) { printf("err: the index file is empty\n.\n"); exit(0); } readoffsets(fp); readstopwords(fp); readfileoffsets(fp); readMetaNames(fp); resultlist = NULL; tmplist = searchwordlist; tmplist = (struct swline *) fixnot(tmplist, fp); searchwordlist = (struct swline *) expandstar(tmplist, fp);#ifdef DEBUG newp2 = searchwordlist; while (newp2 != NULL) { printf("%s ", newp2->line); newp2 = newp2->next; } putchar('\n');#endif resultlist = (struct result *) parseterm(fp, 0, metaName); sortresultlist = NULL; while (resultlist != NULL) { sortresultlist = (struct sortresult *) addsortresult(sortresultlist, resultlist->rank, lookupfile(resultlist->filenum, fp)); resultlist = resultlist->next; } fclose(fp); if (sortresultlist == NULL) { if (commonerror) printf("err: a word is too common\n"); else printf("err: no results\n"); } else { if (bigrank) num = 1000.0 / (float) bigrank; else num = 1000; printsortedresults(sortresultlist, num); } searchwordlist = tmplist; indexlist = indexlist->next; } printf(".\n");}/* This puts parentheses in the right places around not structures** so the parser can do its thing correctly.** It does it both for 'not' and '='; the '=' is used for the METADATA (GH)*/struct swline *fixnot(sp) struct swline *sp;{ int openparen, hasnot; int openMeta, hasMeta; struct swline *tmpp, *newp;#ifdef DEBUG struct swline *newp2;#endif tmpp = sp; newp = NULL; openparen = 0; openMeta = 0; hasMeta = 0; hasnot = 0; while (tmpp != NULL) { if ( ((tmpp->line)[0] == '(') && hasnot) openparen++; else if ( ((tmpp->line)[0] == '(') && hasMeta) openMeta++; else if ( ((tmpp->line)[0] == ')') && hasnot) openparen--; else if ( ((tmpp->line)[0] == ')') && hasMeta) openMeta--; if (isMetaName(tmpp->next)) { /* If it is a metaName add the name and = and skip to next */ hasMeta = 1; newp = (struct swline *) addswline(newp, "("); newp = (struct swline *) addswline(newp, tmpp->line); newp = (struct swline *) addswline(newp, "="); tmpp = tmpp->next; tmpp = tmpp->next; continue; } if (!strcmp(tmpp->line, "not") ) { hasnot = 1; newp = (struct swline *) addswline(newp, "("); } else if (hasnot && !openparen) { hasnot = 0; newp = (struct swline *) addswline(newp, tmpp->line); newp = (struct swline *) addswline(newp, ")"); tmpp = tmpp->next; continue; } else if (hasMeta && !openMeta) { hasMeta = 0; newp = (struct swline *) addswline(newp, tmpp->line); newp = (struct swline *) addswline(newp, ")"); tmpp = tmpp->next; continue; } newp = (struct swline *) addswline(newp, tmpp->line); if (!strcmp(tmpp->line, "=") ) { hasMeta = 1; newp = (struct swline *) addswline(newp, "("); } tmpp = tmpp->next; }#ifdef DEBUG newp2 = newp; while (newp2 != NULL) { printf("%s ", newp2->line); newp2 = newp2->next; } putchar('\n');#endif return newp;}/* Expands words with asterisks as wildcards into a series of** "or" searches. Terms like "quick\*" are expanded into** "quicktime or quickly", etc.*/struct swline *expandstar(sp, fp) struct swline *sp; FILE *fp;{ int i, firsttime, gotstar; char foundword[MAXWORDLEN], searchword[MAXWORDLEN]; struct swline *newp; newp = NULL; while (sp != NULL) { strcpy(searchword, sp->line); if (searchword[0] != '*' && strchr(searchword, '*')) { for (i = gotstar = 0; searchword[i]; i++) if (gotstar) searchword[i] = '\0'; else if (searchword[i] == '*') { searchword[i] = '\0'; gotstar = 1; } firsttime = 0; do { strcpy(foundword, getmatchword(searchword, fp, firsttime)); if (strcmp(foundword, NOWORD)) { /* Add "(" if it is the first time */ if (firsttime == 0) newp = (struct swline *) addswline(newp, "("); if (firsttime) newp = (struct swline *) addswline(newp, "or"); newp = (struct swline *) addswline(newp, foundword); } else { if (!firsttime) newp = (struct swline *) addswline(newp, NOWORD); else /*Add ")" if last of many */ newp = (struct swline *) addswline(newp, ")"); break; } firsttime++; } while (strcmp(foundword, NOWORD)); } else { newp = (struct swline *) addswline(newp, searchword); } sp = sp->next; } return newp;}/* If firsttime is 1, returns the first match to a beginnng of a word.** Else if it's 0, returns the next match, until nothing is found,** in which case NULL is returned.*/char *getmatchword(word, fp, firsttime) char *word; FILE *fp; int firsttime;{ int i, c, found; char *d; static char fileword[MAXWORDLEN]; if (!firsttime) { for (i = found = 0; indexchars[i] != '\0'; i++) if (word[0] == indexchars[i]) { fseek(fp, offsets[i], 0); found = 1; } if (!found) return NOWORD; } if (offsets[STOPWORDPOS] == ftell(fp)) return NOWORD; for (i = 0; (c = fgetc(fp)) != 0; ) { if (c == ':') { fileword[i] = '\0'; i = 0; while ((c = fgetc(fp)) != 0) ; if (fileword[0] != word[0]) return NOWORD; d = (char *) strstr(fileword, word); if (d != NULL && d == &fileword[0]) return fileword; else { if (offsets[STOPWORDPOS] == ftell(fp)) return NOWORD; } } else fileword[i++] = c; } return NOWORD;}/* Reads and prints the header of an index file.*/void getheader(fp) FILE *fp;{ int c; char line[MAXSTRLEN]; fgets(line, MAXSTRLEN, fp); while (1) { c = fgetc(fp); ungetc(c, fp); if (c == '#') { fgets(line, MAXSTRLEN, fp); printf("%s", line); continue; } else break; } fseek(fp, 0, 0);}/* Reads the offsets in the index file so word lookup is faster.*/void readoffsets(fp) FILE *fp;{ int c, i, k; long j, num; for (i = 0; i < MAXCHARS; i++) offsets[i] = 0; fseek(fp, 0, 0); while (1) { c = fgetc(fp); if (c == '#') { do { c = fgetc(fp); } while (c && c != '\n'); continue; } else break; } j = 0; while (c != EOF && c != '\n') { k = MAXLONGLEN; for (num = 0; c && isdigit(c) && k--; ) { num = (num * 10) + (c - '0'); c = fgetc(fp); } offsets[j++] = num; }}/* Reads the stopwords in the index file.*/void readstopwords(fp) FILE *fp;{ int i, c; char word[MAXWORDLEN]; fseek(fp, offsets[STOPWORDPOS], 0); for (i = 0; (c = fgetc(fp)) != '\n' && c != EOF; ) if (!isspace(c)) word[i++] = c; else { word[i] = '\0'; addstophash(word); i = 0; }}/* Reads the metaNames from the index*/void readMetaNames(fp) FILE *fp;{ int i, c; char word[MAXWORDLEN]; fseek(fp, offsets[METANAMEPOS], 0); for (i = 0; (c = fgetc(fp)) != '\n' && c != EOF; ) if (!isspace(c)) word[i++] = c; else { word[i] = '\0'; metaEntryList = addMetaEntry(metaEntryList, word); i = 0; }}/* Reads the file offset table in the index file.*/void readfileoffsets(fp) FILE *fp;{ int j, k, c; long num; j = 0; fseek(fp, offsets[FILEOFFSETPOS], 0); c = fgetc(fp); while (c != EOF && c != '\n') { k = MAXLONGLEN; for (num = 0; c != EOF && isdigit(c) && k--; ) { num = (num * 10) + (c - '0'); c = fgetc(fp); } addtofilehashlist(j++, num); }}/* The recursive parsing function.** This was a headache to make but ended up being surprisingly easy. :)** parseone tells the function to only operate on one word or term.*/struct result *parseterm(fp, parseone, metaName) FILE *fp; int parseone; int metaName;{ int rulenum; char word[MAXWORDLEN]; struct result *rp, *newrp; rp = NULL;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -