📄 string.c
字号:
/* Copyright (c) 1994 Burra Gopal, Udi Manber. All Rights Reserved. *//* * string.c: String table manipulation routines. Can be used to compute * the dictionary as well as uncompress files. */#include "defs.h"extern int MAX_WORDS;extern int RESERVED_CHARS;int next_free_strtable = 0;char *free_strtable = NULL; /*[DEF_MAX_WORDS * AVG_WORD_LEN]; */extern int usemalloc;/* debugging only */intdump_string(string_table, string_file, index_file) char **string_table; unsigned char *string_file, *index_file;{ FILE *stringfp; FILE *indexfp; int i; if ((stringfp = fopen(string_file, "w")) == NULL) { fprintf(stderr, "cannot open for writing: %s\n", string_file); return 0; } if ((indexfp = fopen(index_file, "r")) == NULL) { fprintf(stderr, "cannot open for reading: %s\n", index_file); fclose(stringfp); return 0; } for(i=0; i<MAX_WORDS; i++) fprintf(stringfp, "%s\n", string_table[i]); fflush(stringfp); fclose(stringfp); fclose(indexfp); return 1;}/* * VERY particular to the format of the string-table file: which is a series * of words separated by newlines -- this does an fscanf+strlen in one scan. */intmystringread(fp, str) FILE *fp; char *str;{ int numread = 0; int c; while((numread <= MAX_WORD_LEN) && ((c = getc(fp)) != EOF)) { if (c == '\n') { if (numread==0) break; /* first char '\n' => in padded area */ c = '\0'; str[numread++] = c; return numread; } else str[numread++] = c; } str[numread] = '\0'; if (c == EOF) return -1; return numread;}intbuild_string(string_table, stringfp, bytestoread, initialwordindex) char *string_table[DEF_MAX_WORDS]; /*[MAX_WORD_LEN+2]; */ FILE *stringfp; int bytestoread; int initialwordindex;{ int wordindex = initialwordindex; int numread = 0; int ret; char dummybuf[MAX_WORD_BUF]; char *word; if (bytestoread == -1) { /* read until end of file */ while (wordindex < MAX_WORDS) { if (usemalloc) word = dummybuf; else { if (free_strtable == NULL) free_strtable = (char *)malloc(AVG_WORD_LEN * DEF_MAX_WORDS); if (free_strtable == NULL) break; word = &free_strtable[next_free_strtable]; } if ((ret = mystringread(stringfp, word)) == 0) continue; if (ret == -1) break; if (usemalloc) { if ((word = (char *)malloc(ret + 2)) == NULL) break; strcpy(word, dummybuf); } else next_free_strtable += ret + 2; string_table[wordindex] = word;#if 0 printf("word=%s index=%d\n", string_table[wordindex], wordindex);#endif /*0*/ wordindex ++; } } else { /* read only the specified number of bytes */ while((wordindex < MAX_WORDS) && (bytestoread > numread)) { if (usemalloc) word = dummybuf; else { if (free_strtable == NULL) free_strtable = (char *)malloc(AVG_WORD_LEN * DEF_MAX_WORDS); if (free_strtable == NULL) break; word = &free_strtable[next_free_strtable]; } if ((ret = mystringread(stringfp, word)) <= 0) break; /* quit if EOF OR if padded area */ if (usemalloc) { if ((word = (char *)malloc(ret + 2)) == NULL) break; strcpy(word, dummybuf); } else next_free_strtable += ret + 2; string_table[wordindex] = word;#if 0 printf("word=%s index=%d\n", string_table[wordindex], wordindex);#endif /*0*/ wordindex ++; numread += ret; } } return wordindex;}/* * Interprets srcbuf as a set of srclen/2 short integers. It looks for all the * short-integers encoding words in the matched line and loads only those blocks * of the string table. Note: srcbuf must be aligned on a short-int boundary. */intbuild_partial_string(string_table, stringfp, srcbuf, srclen, linebuf, linelen, blocksize, loaded_string_table) char *string_table[DEF_MAX_WORDS]; /* [MAX_WORD_LEN+2]; */ FILE *stringfp; unsigned char *srcbuf; int srclen; unsigned char *linebuf; int linelen; int blocksize; char loaded_string_table[STRING_FILE_BLOCKS];{ unsigned char *srcpos; int blockindex = 0; unsigned short srcinit, srcend; unsigned short wordnums[MAX_NAME_LEN]; /* maximum pattern length */ int numwordnums = 0; int i; /* * Find all the relevant wordnums in the line. */ i = 0; while(i<linelen) { if (linebuf[i] < RESERVED_CHARS) { if (linebuf[i] == BEGIN_VERBATIM) { if (ISASCII(linebuf[i+1])) { while ((linebuf[i] != END_VERBATIM) && (i <linelen)) i ++; } else i ++; /* skip over the BEGIN_VERBATIM of non-ascii character */ i ++; /* skip over the non-ascii character OR END_VERBATIM: let it overshoot linelen...its ok */ } else i ++; /* skip over the character encoding a special word OR a special character */ } else { wordnums[numwordnums] = (unsigned char)linebuf[i]; /* always big-endian compression */ wordnums[numwordnums] <<= 8; wordnums[numwordnums] |= (unsigned char)linebuf[i+1]; wordnums[numwordnums] = decode_index(wordnums[numwordnums]); /* roundabout to avoid buserr */ numwordnums ++; i += sizeof(short); } }#if 0 for (i=0; i<numwordnums; i++) printf("num%d=%d\n", i, wordnums[i]); getchar();#endif /*0*/ srcpos = srcbuf; srcend = *((unsigned short *)srcpos); srcpos += sizeof(short); while (srcpos < srcbuf + srclen) { srcinit = srcend; srcend = *((unsigned short *)srcpos); srcpos += sizeof(short);#if 0 printf("%d -- %d\n", srcinit, srcend);#endif /*0*/ for (i=0; i<numwordnums; i++) if ((wordnums[i] >= srcinit) && (wordnums[i] <= srcend)) goto include_page; blockindex++; continue; include_page: /* Include it if any of the word-indices fit within this range */ if (loaded_string_table[blockindex++]) continue;#if 0 printf("build_partial_string: hashing words in page# %d\n", blockindex);#endif /*0*/ loaded_string_table[blockindex - 1] = 1; fseek(stringfp, (blockindex-1)*blocksize, 0); build_string(string_table, stringfp, blocksize, srcinit); } return 0;}pad_string_file(filename, FILEBLOCKSIZE) unsigned char *filename; int FILEBLOCKSIZE;{ FILE *outfp, *infp, *indexfp; int offset = 0, len; unsigned char buf[MAX_NAME_LEN]; int pid = getpid(); int i; unsigned short wordindex = 0; char es1[MAX_LINE_LEN], es2[MAX_LINE_LEN]; if ((infp = fopen(filename, "r")) == NULL) { fprintf(stderr, "cannot open for reading: %s\n", filename); exit(2); } sprintf(buf, "%s.index", filename); if ((indexfp = fopen(buf, "w")) == NULL) { fprintf(stderr, "cannot open for writing: %s\n", buf); fclose(infp); exit(2); } sprintf(buf, "%s.%d", filename, pid); if ((outfp = fopen(buf, "w")) == NULL) { fprintf(stderr, "cannot open for writing: %s\n", buf); fclose(infp); fclose(indexfp); exit(2); } if ((FILEBLOCKSIZE % MIN_BLOCKSIZE) != 0) { fprintf(stderr, "invalid block size %d: changing to %d\n", FILEBLOCKSIZE, MIN_BLOCKSIZE); FILEBLOCKSIZE = MIN_BLOCKSIZE; } fprintf(indexfp, "%d\n", FILEBLOCKSIZE); buf[0] = '\0'; if ((char *)buf != fgets(buf, MAX_NAME_LEN, infp)) goto end_of_input; len = strlen((char *)buf); fputs(buf, outfp); fprintf(indexfp, "%d\n", wordindex); offset += len; wordindex ++; while(fgets(buf, MAX_NAME_LEN, infp) == (char *)buf) { len = strlen((char *)buf); if (offset + len > FILEBLOCKSIZE) { for (i=0; i<FILEBLOCKSIZE-offset; i++) /* fill up with so many newlines until the next block size */ putc('\n', outfp); fputs(buf, outfp); fprintf(indexfp, "%d\n", wordindex); offset = 0; } else fputs(buf, outfp); offset += len; wordindex ++; } fprintf(indexfp, "%d\n", wordindex);end_of_input: fclose(infp); fflush(outfp); fclose(outfp); fflush(indexfp); fclose(indexfp); sprintf(buf, "exec %s %s.%d %s\n", SYSTEM_MV, tescapesinglequote(filename, es1), pid, tescapesinglequote(filename, es2)); system(buf); return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -