⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 indxbib.cc

📁 早期freebsd实现
💻 CC
📖 第 1 页 / 共 2 页
字号:
      common_words_table[h] = new word_list(key_buffer, key_len,					    common_words_table[h]);    }    if (++count >= n_ignore_words)      break;    key_len = 0;    if (c == EOF)      break;  }  n_ignore_words = count;  fclose(fp);}static int do_whole_file(const char *filename){  errno = 0;  FILE *fp = fopen(filename, "r");  if (!fp) {    error("can't open `%1': %2", filename, strerror(errno));    return 0;  }  int count = 0;  int key_len = 0;  int c;  while ((c = getc(fp)) != EOF) {    if (csalnum(c)) {      key_len = 1;      key_buffer[0] = c;      while ((c = getc(fp)) != EOF) {	if (!csalnum(c))	  break;	if (key_len < truncate_len)	  key_buffer[key_len++] = c;      }      if (store_key(key_buffer, key_len)) {	if (++count >= max_keys_per_item)	  break;      }      if (c == EOF)	break;    }  }  store_reference(filenames.length(), 0, 0);  store_filename(filename);  fclose(fp);  return 1;}static int do_file(const char *filename){  errno = 0;  FILE *fp = fopen(filename, "r");  if (fp == 0) {    error("can't open `%1': %2", filename, strerror(errno));    return 0;  }  int filename_index = filenames.length();  store_filename(filename);  enum {    START,	// at the start of the file; also in between references    BOL,	// in the middle of a reference, at the beginning of the line    PERCENT,	// seen a percent at the beginning of the line    IGNORE,	// ignoring a field    IGNORE_BOL,	// at the beginning of a line ignoring a field    KEY,	// in the middle of a key    DISCARD,	// after truncate_len bytes of a key    MIDDLE	// in between keys  } state = START;    // In states START, BOL, IGNORE_BOL, space_count how many spaces at  // the beginning have been seen.  In states PERCENT, IGNORE, KEY,  // MIDDLE space_count must be 0.  int space_count = 0;  int byte_count = 0;		// bytes read  int key_len = 0;  int ref_start = -1;		// position of start of current reference  for (;;) {    int c = getc(fp);    if (c == EOF)      break;    byte_count++;    switch (state) {    case START:      if (c == ' ' || c == '\t') {	space_count++;	break;      }      if (c == '\n') {	space_count = 0;	break;      }      ref_start = byte_count - space_count - 1;      space_count = 0;      if (c == '%')	state = PERCENT;      else if (csalnum(c)) {	state = KEY;	key_buffer[0] = c;	key_len = 1;      }      else	state = MIDDLE;      break;    case BOL:      switch (c) {      case '%':	if (space_count > 0) {	  space_count = 0;	  state = MIDDLE;	}	else	  state = PERCENT;	break;      case ' ':      case '\t':	space_count++;	break;      case '\n':	store_reference(filename_index, ref_start,			byte_count - 1 - space_count - ref_start);	state = START;	space_count = 0;	break;      default:	space_count = 0;	if (csalnum(c)) {	  state = KEY;	  key_buffer[0] = c;	  key_len = 1;	}	else	  state = MIDDLE;      }      break;    case PERCENT:      if (strchr(ignore_fields, c) != 0)	state = IGNORE;      else if (c == '\n')	state = BOL;      else	state = MIDDLE;      break;    case IGNORE:      if (c == '\n')	state = IGNORE_BOL;      break;    case IGNORE_BOL:      switch (c) {      case '%':	if (space_count > 0) {	  state = IGNORE;	  space_count = 0;	}	else	  state = PERCENT;	break;      case ' ':      case '\t':	space_count++;	break;      case '\n':	store_reference(filename_index, ref_start,			byte_count - 1 - space_count - ref_start);	state = START;	space_count = 0;	break;      default:	space_count = 0;	state = IGNORE;      }      break;    case KEY:      if (csalnum(c)) {	if (key_len < truncate_len)	  key_buffer[key_len++] = c;	else	  state = DISCARD;      }      else {	possibly_store_key(key_buffer, key_len);	key_len = 0;	if (c == '\n')	  state = BOL;	else	  state = MIDDLE;      }      break;    case DISCARD:      if (!csalnum(c)) {	possibly_store_key(key_buffer, key_len);	key_len = 0;	if (c == '\n')	  state = BOL;	else	  state = MIDDLE;      }      break;    case MIDDLE:      if (csalnum(c)) {	state = KEY;	key_buffer[0] = c;	key_len = 1;      }      else if (c == '\n')	state = BOL;      break;    default:      assert(0);    }  }  switch (state) {  case START:    break;  case DISCARD:  case KEY:    possibly_store_key(key_buffer, key_len);    // fall through  case BOL:  case PERCENT:  case IGNORE_BOL:  case IGNORE:  case MIDDLE:    store_reference(filename_index, ref_start,		    byte_count - ref_start - space_count);    break;  default:    assert(0);  }  fclose(fp);  return 1;}static void store_reference(int filename_index, int pos, int len){  tag t;  t.filename_index = filename_index;  t.start = pos;  t.length = len;  fwrite_or_die(&t, sizeof(t), 1, indxfp);  ntags++;}static void store_filename(const char *fn){  filenames += fn;  filenames += '\0';}static void init_hash_table(){  hash_table = new table_entry[hash_table_size];  for (int i = 0; i < hash_table_size; i++)    hash_table[i].ptr = 0;}static void possibly_store_key(char *s, int len){  static int last_tagno = -1;  static int key_count;  if (last_tagno != ntags) {    last_tagno = ntags;    key_count = 0;  }  if (key_count < max_keys_per_item) {    if (store_key(s, len))      key_count++;  }}static int store_key(char *s, int len){  if (len < shortest_len)    return 0;  int is_number = 1;  for (int i = 0; i < len; i++)    if (!csdigit(s[i])) {      is_number = 0;      s[i] = cmlower(s[i]);    }  if (is_number && !(len == 4 && s[0] == '1' && s[1] == '9'))    return 0;  int h = hash(s, len) % hash_table_size;  if (common_words_table) {    for (word_list *ptr = common_words_table[h]; ptr; ptr = ptr->next)      if (len == ptr->len && memcmp(s, ptr->str, len) == 0)	return 0;  }  table_entry *pp =  hash_table + h;  if (!pp->ptr)    pp->ptr = new block;  else if (pp->ptr->v[pp->ptr->used - 1] == ntags)    return 1;  else if (pp->ptr->used >= BLOCK_SIZE)    pp->ptr = new block(pp->ptr);  pp->ptr->v[(pp->ptr->used)++] = ntags;  return 1;}static void write_hash_table(){  const int minus_one = -1;  int li = 0;  for (int i = 0; i < hash_table_size; i++) {    block *ptr = hash_table[i].ptr;    if (!ptr)      hash_table[i].count = -1;    else {      hash_table[i].count = li;      block *rev = 0;      while (ptr) {	block *tem = ptr;	ptr = ptr->next;	tem->next = rev;	rev = tem;      }      while (rev) {	fwrite_or_die(rev->v, sizeof(int), rev->used, indxfp);	li += rev->used;	block *tem = rev;	rev = rev->next;	delete tem;      }      fwrite_or_die(&minus_one, sizeof(int), 1, indxfp);      li += 1;    }  }  if (sizeof(table_entry) == sizeof(int))    fwrite_or_die(hash_table, sizeof(int), hash_table_size, indxfp);  else {    assert(0);    // write it out word by word  }  fwrite_or_die(filenames.contents(), 1, filenames.length(), indxfp);  if (fseek(indxfp, 0, 0) < 0)    fatal("error seeking on index file: %1", strerror(errno));  index_header h;  h.magic = INDEX_MAGIC;  h.version = INDEX_VERSION;  h.tags_size = ntags;  h.lists_size = li;  h.table_size = hash_table_size;  h.strings_size = filenames.length();  h.truncate = truncate_len;  h.shortest = shortest_len;  h.common = n_ignore_words;  fwrite_or_die(&h, sizeof(h), 1, indxfp);}static void fwrite_or_die(const void *ptr, int size, int nitems, FILE *fp){  if (fwrite(ptr, size, nitems, fp) != nitems)    fatal("fwrite failed: %1", strerror(errno));}void fatal_error_exit(){  cleanup();  exit(3);}extern "C" {void cleanup(){  if (temp_index_file)    unlink(temp_index_file);}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -