📄 hash.c
字号:
fclose(hashfp); return 1;}/* * VERY particular to the format of the hash-table file: * -- does an fscanf+2atoi's+strlen all in one scan. * Returns 0 if you are in padded are, -1 on EOF, else ~. */intmyhashread(fp, pint1, pint2, str, plen) FILE *fp; int *pint1; int *pint2; char *str; int *plen;{ int numread; int int1, int2; int c; if((int1 = getc(fp)) == '\n') return 0; /* padded area */ if(int1 != 0) return -1; /* formatting error! */ if ((int1 = getc(fp)) == EOF) return -1; if ((int2 = getc(fp)) == EOF) return -1; *pint1 = (int1 << 8) | int2; /* hashindex */ if ((int1 = getc(fp)) == EOF) return -1; if ((int2 = getc(fp)) == EOF) return -1; *pint2 = (int1 << 8) | int2; /* wordindex */ numread = 5; *plen = 0; /* wordname */ while((c = getc(fp)) != EOF) { if ( (c == '\0') || (c == '\n') ){ ungetc(c, fp); str[*plen] = '\0'; return numread; } str[(*plen)++] = c; numread ++; if (numread >= MAX_NAME_LEN) { str[*plen - 1] = '\0'; return numread; } } return -1;}inttbuild_hash(hash_table, hashfp, bytestoread) hash_entry *hash_table[HASH_TABLE_SIZE]; FILE *hashfp; int bytestoread;{ int hashindex; int wordindex; int numread = 0; int ret; int len; char *word; char dummybuf[MAX_WORD_BUF]; hash_entry *e; if (bytestoread == -1) { /* read until end of file */ while (1) { if (usemalloc) word = dummybuf; else { if (free_str == NULL) free_str = (char *)malloc(AVG_WORD_LEN * DEF_MAX_WORDS); if (free_str == NULL) break; word = &free_str[next_free_str]; } if ((ret = myhashread(hashfp, &hashindex, &wordindex, word, &len)) == 0) continue; if (ret == -1) break; if ((hashindex >= HASH_TABLE_SIZE) || (hashindex < 0)) continue; /* ignore */ hashalloc(e); if (usemalloc) { if ((word = (char *)malloc(len + 2)) == NULL) break; strcpy(word, dummybuf); } else next_free_str += len + 2; e->word = word; e->val.attribute.freq = 0; /* just exists in compress's dict: not found in text-file yet! */ e->val.attribute.index = wordindex; e->next = hash_table[hashindex]; hash_table[hashindex] = e;#if 0 printf("word=%s index=%d\n", word, wordindex);#endif /*0*/ } } else { /* read only a specified number of bytes */ while (bytestoread > numread) { if (usemalloc) word = dummybuf; else { if (free_str == NULL) free_str = (char *)malloc(AVG_WORD_LEN * DEF_MAX_WORDS); if (free_str == NULL) break; word = &free_str[next_free_str]; } if ((ret = myhashread(hashfp, &hashindex, &wordindex, word, &len)) <= 0) break; if ((hashindex >= HASH_TABLE_SIZE) || (hashindex < 0)) continue; /* ignore */ hashalloc(e); if (usemalloc) { if ((word = (char *)malloc(len + 2)) == NULL) break; strcpy(word, dummybuf); } else next_free_str += len + 2; e->word = word; e->val.attribute.freq = 0; /* just exists in compress's dict: not found in text-file yet! */ e->val.attribute.index = wordindex; e->next = hash_table[hashindex]; hash_table[hashindex] = e; wordindex ++; numread += ret;#if 0 printf("%d %d %s\n", hashindex, wordindex, word);#endif /*0*/ } } return (wordindex + 1); /* the highest indexed word + 1 */}/* * Interprets srcbuf as a series of words separated by newlines and looks * for a complete occurrence of words in patbuf in it. If there IS an occurrence, * it builds the hash-table for THAT page. The hashfp must start at the * beginning on each call. */intbuild_partial_hash(hash_table, hashfp, srcbuf, srclen, patbuf, patlen, blocksize, loaded_hash_table) hash_entry *hash_table[HASH_TABLE_SIZE]; FILE *hashfp; unsigned char *srcbuf; int srclen; unsigned char *patbuf; int patlen; int blocksize; char loaded_hash_table[HASH_FILE_BLOCKS];{ unsigned char *srcpos; unsigned char *srcinit, *srcend, dest[MAX_NAME_LEN]; int blockindex = 0; int i, initlen, endlen; unsigned char *strings[MAX_NAME_LEN]; /* maximum pattern length */ int numstrings = 0; int inword = 0; /* * Find all the relevant strings in the pattern. */ i = 0; while(i<patlen) { if (isalnum(patbuf[i])) { if (!inword) { strings[numstrings++] = &dest[i]; inword = 1; } if (isupper(patbuf[i])) dest[i] = tolower(patbuf[i]); else dest[i] = patbuf[i]; } else { dest[i] = '\0'; /* ignore that character */ inword = 0; } i++; }#if 0 for (i=0; i<numstrings; i++) printf("word%d=%s\n", i, strings[i]); getchar();#endif /*0*/ srcpos = srcbuf; while (srcpos < (srcbuf + srclen)) { srcinit = srcpos; initlen = strlen((char *)srcinit); srcend = srcinit + initlen + 1; endlen = strlen((char *)srcend);#if 0 printf("%s -- %s\n", srcinit, srcend);#endif /*0*/ for (i=0; i<numstrings; i++) if ((strcmp((char *)strings[i], (char *)srcinit) >= 0) && (strcmp((char *)strings[i], (char *)srcend) <= 0)) goto include_page; blockindex++; srcpos += (initlen + endlen + 2); continue; include_page: /* Include it if any of the patterns fit within this range */ if (loaded_hash_table[blockindex++]) continue;#if 0 printf("build_partial_hash: hashing words in page# %d\n", blockindex);#endif /*0*/ loaded_hash_table[blockindex - 1] = 1; fseek(hashfp, (blockindex-1)*blocksize, 0); tbuild_hash(hash_table, hashfp, blocksize); srcpos += (initlen + endlen + 2); } return 0;}pad_hash_file(filename, FILEBLOCKSIZE) unsigned char *filename; int FILEBLOCKSIZE;{ FILE *outfp, *infp, *indexfp; int offset = 0, len; unsigned char buf[MAX_NAME_LEN]; int pid = getpid(); int i; unsigned char word[MAX_NAME_LEN]; unsigned char prev_word[MAX_NAME_LEN]; unsigned int hashindex, wordindex; char es1[MAX_LINE_LEN], es2[MAX_LINE_LEN]; if ((infp = fopen((char *)filename, "r")) == NULL) { fprintf(stderr, "cannot open for reading: %s\n", filename); exit(2); } sprintf(buf, "%s.index", filename); if ((indexfp = fopen(buf, "w")) == NULL) { fprintf(stderr, "cannot open for writing: %s\n", buf); fclose(infp); exit(2); } sprintf(buf, "%s.%d", filename, pid); if ((outfp = fopen(buf, "w")) == NULL) { fprintf(stderr, "cannot open for writing: %s\n", buf); fclose(infp); fclose(indexfp); exit(2); } if ((FILEBLOCKSIZE % MIN_BLOCKSIZE) != 0) { fprintf(stderr, "invalid block size %d: changing to %d\n", FILEBLOCKSIZE, MIN_BLOCKSIZE); FILEBLOCKSIZE = MIN_BLOCKSIZE; } fprintf(indexfp, "%d\n", FILEBLOCKSIZE); if ((char*)buf != fgets(buf, MAX_NAME_LEN, infp)) goto end_of_input; len = strlen((char *)buf); sscanf(buf, "%d %d %s\n", &hashindex, &wordindex, word); putc(0, outfp); putc((hashindex & 0xff00)>>8, outfp); putc((hashindex & 0x00ff), outfp); putc((wordindex & 0xff00)>>8, outfp); putc((wordindex & 0x00ff), outfp); fprintf(outfp, "%s", word); buf[len-1] = '\0'; /* fgets gives you the newline too */ for (i=0; i< len; i++) if (isupper(buf[i])) buf[i] = tolower(buf[i]); for (i=len-2; i>=0; i--) if (buf[i] == ' ') { i++; break; } if (i < 0) i = 0; strcpy((char *)prev_word, (char *)&buf[i]); fprintf(indexfp, "%s", &buf[i]); /* the first word */ putc(0, indexfp); /* null terminated */ offset += strlen((char *)word)+5; while(fgets(buf, MAX_NAME_LEN, infp) == (char *)buf) { len = strlen((char *)buf); if (offset + len > FILEBLOCKSIZE) { /* Put the last char of the prev. page */ fprintf(indexfp, "%s", prev_word); putc(0, indexfp); /* null terminated */ for (i=0; i<FILEBLOCKSIZE-offset; i++) /* fill up with so many newlines until the next block size */ putc('\n', outfp); sscanf(buf, "%d %d %s\n", &hashindex, &wordindex, word); putc(0, outfp); putc((hashindex & 0xff00)>>8, outfp); putc((hashindex & 0x00ff), outfp); putc((wordindex & 0xff00)>>8, outfp); putc((wordindex & 0x00ff), outfp); fprintf(outfp, "%s", word); buf[len-1] = '\0'; /* fgets gives you the newline too */ for (i=0; i< len; i++) if (isupper(buf[i])) buf[i] = tolower(buf[i]); for (i=len-2; i>=0; i--) if (buf[i] == ' ') { i++; break; } if (i < 0) i = 0; strcpy((char *)prev_word, (char *)&buf[i]); fprintf(indexfp, "%s", &buf[i]); /* store the first word at each page */ putc(0, indexfp); /* null terminated */ offset = 0; } else { sscanf(buf, "%d %d %s\n", &hashindex, &wordindex, word); putc(0, outfp); putc((hashindex & 0xff00)>>8, outfp); putc((hashindex & 0x00ff), outfp); putc((wordindex & 0xff00)>>8, outfp); putc((wordindex & 0x00ff), outfp); fprintf(outfp, "%s", word); buf[len-1] = '\0'; /* fgets gives you the newline too */ for (i=0; i<len; i++) if (isupper(buf[i])) buf[i] = tolower(buf[i]); for (i=len-2; i>=0; i--) if (buf[i] == ' ') { i++; break; } if (i < 0) i = 0; strcpy((char *)prev_word, (char *)&buf[i]); } offset += strlen((char *)word)+5; } fprintf(indexfp, "%s", prev_word); putc(0, indexfp); /* null terminated */end_of_input: fclose(infp); fflush(outfp); fclose(outfp); fflush(indexfp); fclose(indexfp); sprintf(buf, "exec %s '%s.%d' '%s'\n", SYSTEM_MV, tescapesinglequote(filename, es1), pid, tescapesinglequote(filename, es2)); system(buf);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -