⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 indxbib.cc

📁 早期freebsd实现
💻 CC
📖 第 1 页 / 共 2 页
字号:
// -*- C++ -*-/* Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.     Written by James Clark (jjc@jclark.com)This file is part of groff.groff is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.groff is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public License alongwith groff; see the file COPYING.  If not, write to the Free SoftwareFoundation, 675 Mass Ave, Cambridge, MA 02139, USA. */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <assert.h>#include <errno.h>#include "posix.h"#include "lib.h"#include "errarg.h"#include "error.h"#include "stringclass.h"#include "cset.h"#include "cmap.h"#include "defs.h"#include "index.h"extern "C" {  // Sun's stdlib.h fails to declare this.  char *mktemp(char *);}#define DEFAULT_HASH_TABLE_SIZE 997#define TEMP_INDEX_TEMPLATE "indxbibXXXXXX"// (2^n - MALLOC_OVERHEAD) should be a good argument for malloc().#define MALLOC_OVERHEAD 16#ifdef BLOCK_SIZE#undef BLOCK_SIZE#endifconst int BLOCK_SIZE = ((1024 - MALLOC_OVERHEAD - sizeof(struct block *)			 - sizeof(int)) / sizeof(int));struct block {  block *next;  int used;  int v[BLOCK_SIZE];    block(block *p = 0) : next(p), used(0) { }};struct block;union table_entry {  block *ptr;  int count;};struct word_list {  word_list *next;  char *str;  int len;  word_list(const char *, int, word_list *);};table_entry *hash_table;int hash_table_size = DEFAULT_HASH_TABLE_SIZE;// We make this the same size as hash_table so we only have to do one// mod per key.static word_list **common_words_table = 0;char *key_buffer;FILE *indxfp;int ntags = 0;string filenames;char *temp_index_file = 0;const char *ignore_fields = "XYZ";const char *common_words_file = COMMON_WORDS_FILE;int n_ignore_words = 100;int truncate_len = 6;int shortest_len = 3;int max_keys_per_item = 100;static void usage();static void write_hash_table();static void init_hash_table();static void read_common_words_file();static int store_key(char *s, int len);static void possibly_store_key(char *s, int len);static int do_whole_file(const char *filename);static int do_file(const char *filename);static void store_reference(int filename_index, int pos, int len);static void check_integer_arg(char opt, const char *arg, int min, int *res);static void store_filename(const char *);static void fwrite_or_die(const void *ptr, int size, int nitems, FILE *fp);static char *get_cwd();extern "C" {  void cleanup();  long dir_name_max(const char *);  void catch_fatal_signals();  void ignore_fatal_signals();}int main(int argc, char **argv){  program_name = argv[0];  static char stderr_buf[BUFSIZ];  setbuf(stderr, stderr_buf);    const char *basename = 0;  typedef int (*parser_t)(const char *);  parser_t parser = do_file;  const char *directory = 0;  const char *foption = 0;  int opt;  while ((opt = getopt(argc, argv, "c:o:h:i:k:l:t:n:c:d:f:vw")) != EOF)    switch (opt) {    case 'c':      common_words_file = optarg;      break;    case 'd':      directory = optarg;      break;    case 'f':      foption = optarg;      break;    case 'h':      check_integer_arg('h', optarg, 1, &hash_table_size);      if (!is_prime(hash_table_size)) {	while (!is_prime(++hash_table_size))	  ;	warning("%1 not prime: using %2 instead", optarg, hash_table_size);      }      break;    case 'i':      ignore_fields = optarg;      break;    case 'k':      check_integer_arg('k', optarg, 1, &max_keys_per_item);      break;    case 'l':      check_integer_arg('l', optarg, 0, &shortest_len);      break;    case 'n':      check_integer_arg('n', optarg, 0, &n_ignore_words);      break;    case 'o':      basename = optarg;      break;    case 't':      check_integer_arg('t', optarg, 1, &truncate_len);      break;    case 'w':      parser = do_whole_file;      break;    case 'v':      {	extern const char *version_string;	fprintf(stderr, "GNU indxbib version %s\n", version_string);	fflush(stderr);	break;      }    case '?':      usage();      break;    default:      assert(0);      break;    }  if (optind >= argc && foption == 0)    fatal("no files and no -f option");  if (!directory) {    char *path = get_cwd();    store_filename(path);    a_delete path;  }  else    store_filename(directory);  init_hash_table();  store_filename(common_words_file);  store_filename(ignore_fields);  key_buffer = new char[truncate_len];  read_common_words_file();  if (!basename)    basename = optind < argc ? argv[optind] : DEFAULT_INDEX_NAME;  const char *p = strrchr(basename, '/');  long name_max;  if (p) {    char *dir = strsave(basename);    dir[p - basename] = '\0';    name_max = dir_name_max(dir);    a_delete dir;  }  else    name_max = dir_name_max(".");  const char *filename = p ? p + 1 : basename;  if (name_max >= 0 && strlen(filename) + sizeof(INDEX_SUFFIX) - 1 > name_max)    fatal("`%1.%2' is too long for a filename", filename, INDEX_SUFFIX);  if (p) {    p++;    temp_index_file = new char[p - basename + sizeof(TEMP_INDEX_TEMPLATE)];    memcpy(temp_index_file, basename, p - basename);    strcpy(temp_index_file + (p - basename), TEMP_INDEX_TEMPLATE);  }  else {    temp_index_file = strsave(TEMP_INDEX_TEMPLATE);  }  if (!mktemp(temp_index_file) || !temp_index_file[0])    fatal("cannot create file name for temporary file");  catch_fatal_signals();  int fd = creat(temp_index_file, S_IRUSR|S_IRGRP|S_IROTH);  if (fd < 0)    fatal("can't create temporary index file: %1", strerror(errno));  indxfp = fdopen(fd, "w");  if (indxfp == 0)    fatal("fdopen failed");  if (fseek(indxfp, sizeof(index_header), 0) < 0)    fatal("can't seek past index header: %1", strerror(errno));  int failed = 0;  if (foption) {    FILE *fp = stdin;    if (strcmp(foption, "-") != 0) {      errno = 0;      fp = fopen(foption, "r");      if (!fp)	fatal("can't open `%1': %2", foption, strerror(errno));    }    string path;    int lineno = 1;    for (;;) {      for (int c = getc(fp); c != '\n' && c != EOF; c = getc(fp)) {	if (c == '\0')	  error_with_file_and_line(foption, lineno,				   "nul character in pathname ignored");	else	  path += c;      }      if (path.length() > 0) {	path += '\0';	if (!(*parser)(path.contents()))	  failed = 1;	path.clear();      }      if (c == EOF)	break;      lineno++;    }    if (fp != stdin)      fclose(fp);  }  for (int i = optind; i < argc; i++)    if (!(*parser)(argv[i]))      failed = 1;  write_hash_table();  if (fclose(indxfp) < 0)    fatal("error closing temporary index file: %1", strerror(errno));  char *index_file = new char[strlen(basename) + sizeof(INDEX_SUFFIX)];      strcpy(index_file, basename);  strcat(index_file, INDEX_SUFFIX);#ifdef HAVE_RENAME  if (rename(temp_index_file, index_file) < 0)    fatal("can't rename temporary index file: %1", strerror(errno));#else /* not HAVE_RENAME */  ignore_fatal_signals();  if (unlink(index_file) < 0) {    if (errno != ENOENT)      fatal("can't unlink `%1': %2", index_file, strerror(errno));  }  if (link(temp_index_file, index_file) < 0)    fatal("can't link temporary index file: %1", strerror(errno));  if (unlink(temp_index_file) < 0)    fatal("can't unlink temporary index file: %1", strerror(errno));#endif /* not HAVE_RENAME */  temp_index_file = 0;  exit(failed);}static void usage(){  fprintf(stderr,"usage: %s [-vw] [-c file] [-d dir] [-f file] [-h n] [-i XYZ] [-k n]\n""       [-l n] [-n n] [-o base] [-t n] [files...]\n",	  program_name);  exit(1);}static void check_integer_arg(char opt, const char *arg, int min, int *res){  char *ptr;  long n = strtol(arg, &ptr, 10);  if (n == 0 && ptr == arg)    error("argument to -%1 not an integer", opt);  else if (n < min)    error("argument to -%1 must not be less than %2", opt, min);  else {    if (n > INT_MAX)      error("argument to -%1 greater than maximum integer", opt);    else if (*ptr != '\0')      error("junk after integer argument to -%1", opt);    *res = int(n);  }}static char *get_cwd(){  char *buf;  int size = 12;  for (;;) {    buf = new char[size];    if (getcwd(buf, size))      break;    if (errno != ERANGE)      fatal("cannot get current working directory: %1", strerror(errno));    a_delete buf;    if (size == INT_MAX)      fatal("current working directory longer than INT_MAX");    if (size > INT_MAX/2)      size = INT_MAX;    else      size *= 2;  }  return buf;}word_list::word_list(const char *s, int n, word_list *p): next(p), len(n){  str = new char[n];  memcpy(str, s, n);}static void read_common_words_file(){  if (n_ignore_words <= 0)    return;  errno = 0;  FILE *fp = fopen(common_words_file, "r");  if (!fp)    fatal("can't open `%1': %2", common_words_file, strerror(errno));  common_words_table = new word_list * [hash_table_size];  for (int i = 0; i < hash_table_size; i++)    common_words_table[i] = 0;  int count = 0;  int key_len = 0;  for (;;) {    int c = getc(fp);    while (c != EOF && !csalnum(c))      c = getc(fp);    if (c == EOF)      break;    do {      if (key_len < truncate_len)	key_buffer[key_len++] = cmlower(c);      c = getc(fp);    } while (c != EOF && csalnum(c));    if (key_len >= shortest_len) {      int h = hash(key_buffer, key_len) % hash_table_size;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -