fastfind.c
来自「一个很有名的浏览器」· C语言 代码 · 共 795 行 · 第 1/2 页
C
795 行
/* Very fast search_keyword_in_list. *//* $Id: fastfind.c,v 1.72.4.1 2005/04/05 21:08:43 jonas Exp $ */#ifdef HAVE_CONFIG_H#include "config.h"#endif#include <stdio.h>#include <stdlib.h>#include "elinks.h"#include "util/conv.h"#include "util/error.h"#include "util/fastfind.h"#include "util/memdebug.h"#include "util/memory.h"#ifdef USE_FASTFIND/* It replaces bsearch() + strcasecmp() + callback + ... * * Following conditions should be met: * * - list keys are C strings. * - keys should not be greater than 255 characters, and optimally < 20 * characters. It can work with greater keys but then memory usage will * grow a lot. * - each key must be unique and non empty. * - list do not have to be ordered. * - total number of unique characters used in all keys should be <= 128 * - idealy total number of keys should be <= 512 (but see below) * * (c) 2003 Laurent MONIN (aka Zas) * Feel free to do whatever you want with that code. *//* These routines use a tree search. First, a big tree is composed from the * keys on input. Then, when searching we just go through the tree. If we will * end up on an 'ending' node, we've got it. * * Hm, okay. For keys { 'head', 'h1', 'body', 'bodyrock', 'bodyground' }, it * would look like: * * [root] * b h * o e 1 * d a * Y D * g r * r o * o c * u K * D * * (the ending nodes are upcased just for this drawing, not in real) * * To optimize this for speed, leafs of nodes are organized in per-node arrays * (so-called 'leafsets'), indexed by symbol value of the key's next character. * But to optimize that for memory, we first compose own alphabet consisting * only from the chars we ever use in the key strings. @uniq_chars holds that * alphabet and @idxtab is used to translate between it and ASCII. * * Tree building: O((L+M)*N) * (L: mean key length, M: alphabet size, * N: number of items). * String lookup: O(N) (N: string length). *//* Define it to generate performance and memory usage statistics to stderr. */#if 0#define DEBUG_FASTFIND#endif/* Define whether to use 32 or 64 bits per compressed element. */#if 1#define USE_32_BITS#endif#define END_LEAF_BITS 1#define COMPRESSED_BITS 1#ifdef USE_32_BITS/* Use only 32 bits per element, but has very low limits. *//* Adequate for ELinks tags search. */#define POINTER_INDEX_BITS 10 /* 1024 */#define LEAFSET_INDEX_BITS 13 /* 8192 */#define COMP_CHAR_INDEX_BITS 7 /* 128 */#define ff_node ff_node_c /* Both are 32 bits long. */#if (POINTER_INDEX_BITS + LEAFSET_INDEX_BITS + \ COMP_CHAR_INDEX_BITS + END_LEAF_BITS + \ COMPRESSED_BITS) > 32#error Over 32 bits in struct ff_node !!#endif#else /* !USE_32_BITS *//* Keep this one if there is more than 512 keywords in a list * it eats a bit more memory. * ELinks may need this one if fastfind is used in other * things than tags searching. *//* This will make struct ff_node_c use 64 bits. */#define POINTER_INDEX_BITS 12#define LEAFSET_INDEX_BITS 18#define COMP_CHAR_INDEX_BITS 8#if (POINTER_INDEX_BITS + LEAFSET_INDEX_BITS + \ + END_LEAF_BITS + COMPRESSED_BITS) > 32#error Over 32 bits in struct ff_node !!#endifstruct ff_node { /* End leaf -> p is significant */ unsigned int e:END_LEAF_BITS; /* Compressed */ unsigned int c:COMPRESSED_BITS; /* Index in pointers */ unsigned int p:POINTER_INDEX_BITS; /* Index in leafsets */ unsigned int l:LEAFSET_INDEX_BITS;};#endif /* USE_32_BITS */#define FF_MAX_KEYS (1 << POINTER_INDEX_BITS)#define FF_MAX_LEAFSETS ((1 << LEAFSET_INDEX_BITS) - 1)#define FF_MAX_CHARS (1 << COMP_CHAR_INDEX_BITS)#define FF_MAX_KEYLEN 255struct ff_node_c { unsigned int e:END_LEAF_BITS; unsigned int c:COMPRESSED_BITS; unsigned int p:POINTER_INDEX_BITS; unsigned int l:LEAFSET_INDEX_BITS; /* Index of char when compressed. */ unsigned int ch:COMP_CHAR_INDEX_BITS;};struct ff_data { void *pointer; int keylen;};struct fastfind_info { struct ff_data *data; struct ff_node **leafsets; struct ff_node *root_leafset; int min_key_len; int max_key_len; int uniq_chars_count; int count; int pointers_count; int leafsets_count; unsigned int case_aware:1; unsigned int compress:1; int idxtab[FF_MAX_CHARS]; unsigned char uniq_chars[FF_MAX_CHARS];#ifdef DEBUG_FASTFIND struct { unsigned long searches; unsigned long found; unsigned long itertmp; unsigned long iterdelta; unsigned long itermax; unsigned long iterations; unsigned long tests; unsigned long teststmp; unsigned long testsdelta; unsigned long testsmax; unsigned long memory_usage; unsigned long total_key_len; unsigned int compressed_nodes; unsigned char *comment; } debug;#endif};#ifdef DEBUG_FASTFIND/* These are for performance testing. */#define FF_DBG_mem(x, size) (x)->debug.memory_usage += (size)#define FF_DBG_test(x) (x)->debug.tests++#define FF_DBG_iter(x) (x)->debug.iterations++#define FF_DBG_cnode(x) (x)->debug.compressed_nodes++#define FF_DBG_found(x) \ do { \ unsigned long iter = (x)->debug.iterations - (x)->debug.itertmp; \ unsigned long tests = (x)->debug.tests - (x)->debug.teststmp; \ \ (x)->debug.iterdelta += iter; \ (x)->debug.testsdelta += tests; \ if (iter > (x)->debug.itermax) \ (x)->debug.itermax = iter; \ if (tests > (x)->debug.testsmax) \ (x)->debug.testsmax = tests; \ (x)->debug.found++; \ } while (0)#define FF_DBG_comment(x, str) do { (x)->debug.comment = empty_string_or_(str); } while (0)/* Update search stats. */static voidFF_DBG_search_stats(struct fastfind_info *info, int key_len){ info->debug.searches++; info->debug.total_key_len += key_len; info->debug.teststmp = info->debug.tests; info->debug.itertmp = info->debug.iterations;}/* Dump all stats. */static voidFF_DBG_dump_stats(struct fastfind_info *info){ fprintf(stderr, "------ FastFind Statistics ------\n"); fprintf(stderr, "Comment : %s\n", info->debug.comment); fprintf(stderr, "Case-aware : %s\n", info->case_aware ? "yes" : "no"); fprintf(stderr, "Compress : %s\n", info->compress ? "yes" : "no"); fprintf(stderr, "Uniq_chars : %s\n", info->uniq_chars); fprintf(stderr, "Uniq_chars #: %d/%d max.\n", info->uniq_chars_count, FF_MAX_CHARS); fprintf(stderr, "Min_key_len : %d\n", info->min_key_len); fprintf(stderr, "Max_key_len : %d\n", info->max_key_len); fprintf(stderr, "Entries : %d/%d max.\n", info->pointers_count, FF_MAX_KEYS); fprintf(stderr, "Leafsets : %d/%d max.\n", info->leafsets_count, FF_MAX_LEAFSETS); if (info->compress) fprintf(stderr, "C. leafsets : %d/%d (%0.2f%%)\n", info->debug.compressed_nodes, info->leafsets_count, 100 * (double) info->debug.compressed_nodes / info->leafsets_count); fprintf(stderr, "Memory usage: %lu bytes (cost per entry = %0.2f bytes)\n", info->debug.memory_usage, (double) info->debug.memory_usage / info->pointers_count); fprintf(stderr, "Struct info : %d bytes\n", sizeof(*info) - sizeof(info->debug)); fprintf(stderr, "Struct node : %d bytes\n", sizeof(struct ff_node)); fprintf(stderr, "Struct cnode: %d bytes\n", sizeof(struct ff_node_c)); fprintf(stderr, "Searches : %lu\n", info->debug.searches); fprintf(stderr, "Found : %lu (%0.2f%%)\n", info->debug.found, 100 * (double) info->debug.found / info->debug.searches); fprintf(stderr, "Iterations : %lu (%0.2f per search, %0.2f before found, %lu max)\n", info->debug.iterations, (double) info->debug.iterations / info->debug.searches, (double) info->debug.iterdelta / info->debug.found, info->debug.itermax); fprintf(stderr, "Tests : %lu (%0.2f per search, %0.2f per iter., %0.2f before found, %lu max)\n", info->debug.tests, (double) info->debug.tests / info->debug.searches, (double) info->debug.tests / info->debug.iterations, (double) info->debug.testsdelta / info->debug.found, info->debug.testsmax); fprintf(stderr, "Total keylen: %lu bytes (%0.2f per search, %0.2f per iter.)\n", info->debug.total_key_len, (double) info->debug.total_key_len / info->debug.searches, (double) info->debug.total_key_len / info->debug.iterations); fprintf(stderr, "\n");}#else /* !DEBUG_FASTFIND */#define FF_DBG_mem(x, size)#define FF_DBG_test(x)#define FF_DBG_iter(x)#define FF_DBG_cnode(x)#define FF_DBG_found(x)#define FF_DBG_comment(x, comment)#define FF_DBG_search_stats(info, key_len)#define FF_DBG_dump_stats(info)#endif /* DEBUG_FASTFIND */static struct fastfind_info *init_fastfind(struct fastfind_index *index, enum fastfind_flags flags){ struct fastfind_info *info = mem_calloc(1, sizeof(*info)); index->handle = info; if (!info) return NULL; info->min_key_len = FF_MAX_KEYLEN; info->case_aware = !!(flags & FF_CASE_AWARE); info->compress = !!(flags & FF_COMPRESS); FF_DBG_mem(info, sizeof(*info) - sizeof(info->debug)); FF_DBG_comment(info, index->comment); return info;}/* Return 1 on success, 0 on allocation failure */static intalloc_ff_data(struct fastfind_info *info){ struct ff_data *data; assert(info->count < FF_MAX_KEYS); if_assert_failed return 0; /* On error, cleanup is done by fastfind_done(). */ data = mem_calloc(info->count, sizeof(*data)); if (!data) return 0; info->data = data; FF_DBG_mem(info, info->count * sizeof(*data)); return 1;}/* Add pointer and its key length to correspondant arrays, incrementing * internal counter. */static voidadd_to_ff_data(void *p, int key_len, struct fastfind_info *info){ struct ff_data *data = &info->data[info->pointers_count++]; /* Record new pointer and key len, used in search */ data->pointer = p; data->keylen = key_len;}/* Return 1 on success, 0 on allocation failure */static intalloc_leafset(struct fastfind_info *info){ struct ff_node **leafsets; struct ff_node *leafset; assert(info->leafsets_count < FF_MAX_LEAFSETS); if_assert_failed return 0; /* info->leafsets[0] is never used since l=0 marks no leaf * in struct ff_node. That's the reason of that + 2. */ leafsets = mem_realloc(info->leafsets, sizeof(*leafsets) * (info->leafsets_count + 2)); if (!leafsets) return 0; info->leafsets = leafsets; leafset = mem_calloc(info->uniq_chars_count, sizeof(*leafset)); if (!leafset) return 0; FF_DBG_mem(info, sizeof(*leafsets)); FF_DBG_mem(info, sizeof(*leafset) * info->uniq_chars_count); info->leafsets_count++; info->leafsets[info->leafsets_count] = leafset; return 1;}static inline intchar2idx(unsigned char c, struct fastfind_info *info){ unsigned char *idx = memchr(info->uniq_chars, c, info->uniq_chars_count); if (idx) return (idx - info->uniq_chars); return -1;}static inline voidinit_idxtab(struct fastfind_info *info){ int i; for (i = 0; i < FF_MAX_CHARS; i++) info->idxtab[i] = char2idx((unsigned char) i, info);}static inline voidcompress_node(struct ff_node *leafset, struct fastfind_info *info, int i, int pos){ struct ff_node_c *new = mem_alloc(sizeof(*new)); if (!new) return; new->c = 1; new->e = leafset[pos].e; new->p = leafset[pos].p; new->l = leafset[pos].l; new->ch = pos; mem_free_set(&info->leafsets[i], (struct ff_node *) new); FF_DBG_cnode(info); FF_DBG_mem(info, sizeof(*new)); FF_DBG_mem(info, sizeof(*leafset) * -info->uniq_chars_count);}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?