📄 indxbib.cc
字号:
common_words_table[h] = new word_list(key_buffer, key_len, common_words_table[h]); } if (++count >= n_ignore_words) break; key_len = 0; if (c == EOF) break; } n_ignore_words = count; fclose(fp);}static int do_whole_file(const char *filename){ errno = 0; FILE *fp = fopen(filename, "r"); if (!fp) { error("can't open `%1': %2", filename, strerror(errno)); return 0; } int count = 0; int key_len = 0; int c; while ((c = getc(fp)) != EOF) { if (csalnum(c)) { key_len = 1; key_buffer[0] = c; while ((c = getc(fp)) != EOF) { if (!csalnum(c)) break; if (key_len < truncate_len) key_buffer[key_len++] = c; } if (store_key(key_buffer, key_len)) { if (++count >= max_keys_per_item) break; } if (c == EOF) break; } } store_reference(filenames.length(), 0, 0); store_filename(filename); fclose(fp); return 1;}static int do_file(const char *filename){ errno = 0; FILE *fp = fopen(filename, "r"); if (fp == 0) { error("can't open `%1': %2", filename, strerror(errno)); return 0; } int filename_index = filenames.length(); store_filename(filename); enum { START, // at the start of the file; also in between references BOL, // in the middle of a reference, at the beginning of the line PERCENT, // seen a percent at the beginning of the line IGNORE, // ignoring a field IGNORE_BOL, // at the beginning of a line ignoring a field KEY, // in the middle of a key DISCARD, // after truncate_len bytes of a key MIDDLE // in between keys } state = START; // In states START, BOL, IGNORE_BOL, space_count how many spaces at // the beginning have been seen. In states PERCENT, IGNORE, KEY, // MIDDLE space_count must be 0. int space_count = 0; int byte_count = 0; // bytes read int key_len = 0; int ref_start = -1; // position of start of current reference for (;;) { int c = getc(fp); if (c == EOF) break; byte_count++; switch (state) { case START: if (c == ' ' || c == '\t') { space_count++; break; } if (c == '\n') { space_count = 0; break; } ref_start = byte_count - space_count - 1; space_count = 0; if (c == '%') state = PERCENT; else if (csalnum(c)) { state = KEY; key_buffer[0] = c; key_len = 1; } else state = MIDDLE; break; case BOL: switch (c) { case '%': if (space_count > 0) { space_count = 0; state = MIDDLE; } else state = PERCENT; break; case ' ': case '\t': space_count++; break; case '\n': store_reference(filename_index, ref_start, byte_count - 1 - space_count - ref_start); state = START; space_count = 0; break; default: space_count = 0; if (csalnum(c)) { state = KEY; key_buffer[0] = c; key_len = 1; } else state = MIDDLE; } break; case PERCENT: if (strchr(ignore_fields, c) != 0) state = IGNORE; else if (c == '\n') state = BOL; else state = MIDDLE; break; case IGNORE: if (c == '\n') state = IGNORE_BOL; break; case IGNORE_BOL: switch (c) { case '%': if (space_count > 0) { state = IGNORE; space_count = 0; } else state = PERCENT; break; case ' ': case '\t': space_count++; break; case '\n': store_reference(filename_index, ref_start, byte_count - 1 - space_count - ref_start); state = START; space_count = 0; break; default: space_count = 0; state = IGNORE; } break; case KEY: if (csalnum(c)) { if (key_len < truncate_len) key_buffer[key_len++] = c; else state = DISCARD; } else { possibly_store_key(key_buffer, key_len); key_len = 0; if (c == '\n') state = BOL; else state = MIDDLE; } break; case DISCARD: if (!csalnum(c)) { possibly_store_key(key_buffer, key_len); key_len = 0; if (c == '\n') state = BOL; else state = MIDDLE; } break; case MIDDLE: if (csalnum(c)) { state = KEY; key_buffer[0] = c; key_len = 1; } else if (c == '\n') state = BOL; break; default: assert(0); } } switch (state) { case START: break; case DISCARD: case KEY: possibly_store_key(key_buffer, key_len); // fall through case BOL: case PERCENT: case IGNORE_BOL: case IGNORE: case MIDDLE: store_reference(filename_index, ref_start, byte_count - ref_start - space_count); break; default: assert(0); } fclose(fp); return 1;}static void store_reference(int filename_index, int pos, int len){ tag t; t.filename_index = filename_index; t.start = pos; t.length = len; fwrite_or_die(&t, sizeof(t), 1, indxfp); ntags++;}static void store_filename(const char *fn){ filenames += fn; filenames += '\0';}static void init_hash_table(){ hash_table = new table_entry[hash_table_size]; for (int i = 0; i < hash_table_size; i++) hash_table[i].ptr = 0;}static void possibly_store_key(char *s, int len){ static int last_tagno = -1; static int key_count; if (last_tagno != ntags) { last_tagno = ntags; key_count = 0; } if (key_count < max_keys_per_item) { if (store_key(s, len)) key_count++; }}static int store_key(char *s, int len){ if (len < shortest_len) return 0; int is_number = 1; for (int i = 0; i < len; i++) if (!csdigit(s[i])) { is_number = 0; s[i] = cmlower(s[i]); } if (is_number && !(len == 4 && s[0] == '1' && s[1] == '9')) return 0; int h = hash(s, len) % hash_table_size; if (common_words_table) { for (word_list *ptr = common_words_table[h]; ptr; ptr = ptr->next) if (len == ptr->len && memcmp(s, ptr->str, len) == 0) return 0; } table_entry *pp = hash_table + h; if (!pp->ptr) pp->ptr = new block; else if (pp->ptr->v[pp->ptr->used - 1] == ntags) return 1; else if (pp->ptr->used >= BLOCK_SIZE) pp->ptr = new block(pp->ptr); pp->ptr->v[(pp->ptr->used)++] = ntags; return 1;}static void write_hash_table(){ const int minus_one = -1; int li = 0; for (int i = 0; i < hash_table_size; i++) { block *ptr = hash_table[i].ptr; if (!ptr) hash_table[i].count = -1; else { hash_table[i].count = li; block *rev = 0; while (ptr) { block *tem = ptr; ptr = ptr->next; tem->next = rev; rev = tem; } while (rev) { fwrite_or_die(rev->v, sizeof(int), rev->used, indxfp); li += rev->used; block *tem = rev; rev = rev->next; delete tem; } fwrite_or_die(&minus_one, sizeof(int), 1, indxfp); li += 1; } } if (sizeof(table_entry) == sizeof(int)) fwrite_or_die(hash_table, sizeof(int), hash_table_size, indxfp); else { assert(0); // write it out word by word } fwrite_or_die(filenames.contents(), 1, filenames.length(), indxfp); if (fseek(indxfp, 0, 0) < 0) fatal("error seeking on index file: %1", strerror(errno)); index_header h; h.magic = INDEX_MAGIC; h.version = INDEX_VERSION; h.tags_size = ntags; h.lists_size = li; h.table_size = hash_table_size; h.strings_size = filenames.length(); h.truncate = truncate_len; h.shortest = shortest_len; h.common = n_ignore_words; fwrite_or_die(&h, sizeof(h), 1, indxfp);}static void fwrite_or_die(const void *ptr, int size, int nitems, FILE *fp){ if (fwrite(ptr, size, nitems, fp) != nitems) fatal("fwrite failed: %1", strerror(errno));}void fatal_error_exit(){ cleanup(); exit(3);}extern "C" {void cleanup(){ if (temp_index_file) unlink(temp_index_file);}}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -