📄 indxbib.cc
字号:
// -*- C++ -*-/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc. Written by James Clark (jjc@jclark.com)This file is part of groff.groff is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.groff is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public License alongwith groff; see the file COPYING. If not, write to the Free SoftwareFoundation, 675 Mass Ave, Cambridge, MA 02139, USA. */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <assert.h>#include <errno.h>#include "posix.h"#include "lib.h"#include "errarg.h"#include "error.h"#include "stringclass.h"#include "cset.h"#include "cmap.h"#include "defs.h"#include "index.h"extern "C" { // Sun's stdlib.h fails to declare this. char *mktemp(char *);}#define DEFAULT_HASH_TABLE_SIZE 997#define TEMP_INDEX_TEMPLATE "indxbibXXXXXX"// (2^n - MALLOC_OVERHEAD) should be a good argument for malloc().#define MALLOC_OVERHEAD 16#ifdef BLOCK_SIZE#undef BLOCK_SIZE#endifconst int BLOCK_SIZE = ((1024 - MALLOC_OVERHEAD - sizeof(struct block *) - sizeof(int)) / sizeof(int));struct block { block *next; int used; int v[BLOCK_SIZE]; block(block *p = 0) : next(p), used(0) { }};struct block;union table_entry { block *ptr; int count;};struct word_list { word_list *next; char *str; int len; word_list(const char *, int, word_list *);};table_entry *hash_table;int hash_table_size = DEFAULT_HASH_TABLE_SIZE;// We make this the same size as hash_table so we only have to do one// mod per key.static word_list **common_words_table = 0;char *key_buffer;FILE *indxfp;int ntags = 0;string filenames;char *temp_index_file = 0;const char *ignore_fields = "XYZ";const char *common_words_file = COMMON_WORDS_FILE;int n_ignore_words = 100;int truncate_len = 6;int shortest_len = 3;int max_keys_per_item = 100;static void usage();static void write_hash_table();static void init_hash_table();static void read_common_words_file();static int store_key(char *s, int len);static void possibly_store_key(char *s, int len);static int do_whole_file(const char *filename);static int do_file(const char *filename);static void store_reference(int filename_index, int pos, int len);static void check_integer_arg(char opt, const char *arg, int min, int *res);static void store_filename(const char *);static void fwrite_or_die(const void *ptr, int size, int nitems, FILE *fp);static char *get_cwd();extern "C" { void cleanup(); long dir_name_max(const char *); void catch_fatal_signals(); void ignore_fatal_signals();}int main(int argc, char **argv){ program_name = argv[0]; static char stderr_buf[BUFSIZ]; setbuf(stderr, stderr_buf); const char *basename = 0; typedef int (*parser_t)(const char *); parser_t parser = do_file; const char *directory = 0; const char *foption = 0; int opt; while ((opt = getopt(argc, argv, "c:o:h:i:k:l:t:n:c:d:f:vw")) != EOF) switch (opt) { case 'c': common_words_file = optarg; break; case 'd': directory = optarg; break; case 'f': foption = optarg; break; case 'h': check_integer_arg('h', optarg, 1, &hash_table_size); if (!is_prime(hash_table_size)) { while (!is_prime(++hash_table_size)) ; warning("%1 not prime: using %2 instead", optarg, hash_table_size); } break; case 'i': ignore_fields = optarg; break; case 'k': check_integer_arg('k', optarg, 1, &max_keys_per_item); break; case 'l': check_integer_arg('l', optarg, 0, &shortest_len); break; case 'n': check_integer_arg('n', optarg, 0, &n_ignore_words); break; case 'o': basename = optarg; break; case 't': check_integer_arg('t', optarg, 1, &truncate_len); break; case 'w': parser = do_whole_file; break; case 'v': { extern const char *version_string; fprintf(stderr, "GNU indxbib version %s\n", version_string); fflush(stderr); break; } case '?': usage(); break; default: assert(0); break; } if (optind >= argc && foption == 0) fatal("no files and no -f option"); if (!directory) { char *path = get_cwd(); store_filename(path); a_delete path; } else store_filename(directory); init_hash_table(); store_filename(common_words_file); store_filename(ignore_fields); key_buffer = new char[truncate_len]; read_common_words_file(); if (!basename) basename = optind < argc ? argv[optind] : DEFAULT_INDEX_NAME; const char *p = strrchr(basename, '/'); long name_max; if (p) { char *dir = strsave(basename); dir[p - basename] = '\0'; name_max = dir_name_max(dir); a_delete dir; } else name_max = dir_name_max("."); const char *filename = p ? p + 1 : basename; if (name_max >= 0 && strlen(filename) + sizeof(INDEX_SUFFIX) - 1 > name_max) fatal("`%1.%2' is too long for a filename", filename, INDEX_SUFFIX); if (p) { p++; temp_index_file = new char[p - basename + sizeof(TEMP_INDEX_TEMPLATE)]; memcpy(temp_index_file, basename, p - basename); strcpy(temp_index_file + (p - basename), TEMP_INDEX_TEMPLATE); } else { temp_index_file = strsave(TEMP_INDEX_TEMPLATE); } if (!mktemp(temp_index_file) || !temp_index_file[0]) fatal("cannot create file name for temporary file"); catch_fatal_signals(); int fd = creat(temp_index_file, S_IRUSR|S_IRGRP|S_IROTH); if (fd < 0) fatal("can't create temporary index file: %1", strerror(errno)); indxfp = fdopen(fd, "w"); if (indxfp == 0) fatal("fdopen failed"); if (fseek(indxfp, sizeof(index_header), 0) < 0) fatal("can't seek past index header: %1", strerror(errno)); int failed = 0; if (foption) { FILE *fp = stdin; if (strcmp(foption, "-") != 0) { errno = 0; fp = fopen(foption, "r"); if (!fp) fatal("can't open `%1': %2", foption, strerror(errno)); } string path; int lineno = 1; for (;;) { for (int c = getc(fp); c != '\n' && c != EOF; c = getc(fp)) { if (c == '\0') error_with_file_and_line(foption, lineno, "nul character in pathname ignored"); else path += c; } if (path.length() > 0) { path += '\0'; if (!(*parser)(path.contents())) failed = 1; path.clear(); } if (c == EOF) break; lineno++; } if (fp != stdin) fclose(fp); } for (int i = optind; i < argc; i++) if (!(*parser)(argv[i])) failed = 1; write_hash_table(); if (fclose(indxfp) < 0) fatal("error closing temporary index file: %1", strerror(errno)); char *index_file = new char[strlen(basename) + sizeof(INDEX_SUFFIX)]; strcpy(index_file, basename); strcat(index_file, INDEX_SUFFIX);#ifdef HAVE_RENAME if (rename(temp_index_file, index_file) < 0) fatal("can't rename temporary index file: %1", strerror(errno));#else /* not HAVE_RENAME */ ignore_fatal_signals(); if (unlink(index_file) < 0) { if (errno != ENOENT) fatal("can't unlink `%1': %2", index_file, strerror(errno)); } if (link(temp_index_file, index_file) < 0) fatal("can't link temporary index file: %1", strerror(errno)); if (unlink(temp_index_file) < 0) fatal("can't unlink temporary index file: %1", strerror(errno));#endif /* not HAVE_RENAME */ temp_index_file = 0; exit(failed);}static void usage(){ fprintf(stderr,"usage: %s [-vw] [-c file] [-d dir] [-f file] [-h n] [-i XYZ] [-k n]\n"" [-l n] [-n n] [-o base] [-t n] [files...]\n", program_name); exit(1);}static void check_integer_arg(char opt, const char *arg, int min, int *res){ char *ptr; long n = strtol(arg, &ptr, 10); if (n == 0 && ptr == arg) error("argument to -%1 not an integer", opt); else if (n < min) error("argument to -%1 must not be less than %2", opt, min); else { if (n > INT_MAX) error("argument to -%1 greater than maximum integer", opt); else if (*ptr != '\0') error("junk after integer argument to -%1", opt); *res = int(n); }}static char *get_cwd(){ char *buf; int size = 12; for (;;) { buf = new char[size]; if (getcwd(buf, size)) break; if (errno != ERANGE) fatal("cannot get current working directory: %1", strerror(errno)); a_delete buf; if (size == INT_MAX) fatal("current working directory longer than INT_MAX"); if (size > INT_MAX/2) size = INT_MAX; else size *= 2; } return buf;}word_list::word_list(const char *s, int n, word_list *p): next(p), len(n){ str = new char[n]; memcpy(str, s, n);}static void read_common_words_file(){ if (n_ignore_words <= 0) return; errno = 0; FILE *fp = fopen(common_words_file, "r"); if (!fp) fatal("can't open `%1': %2", common_words_file, strerror(errno)); common_words_table = new word_list * [hash_table_size]; for (int i = 0; i < hash_table_size; i++) common_words_table[i] = 0; int count = 0; int key_len = 0; for (;;) { int c = getc(fp); while (c != EOF && !csalnum(c)) c = getc(fp); if (c == EOF) break; do { if (key_len < truncate_len) key_buffer[key_len++] = cmlower(c); c = getc(fp); } while (c != EOF && csalnum(c)); if (key_len >= shortest_len) { int h = hash(key_buffer, key_len) % hash_table_size;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -