⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 catalog.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
/* Normalize public identifiers to handle ISO 8879[-:]1986 problem.What should happen if there's a duplicate in a single catalog entry file? */#include "config.h"#include "std.h"#include "catalog.h"#ifdef USE_PROTOTYPES#define P(parms) parms#else#define P(parms) ()#endif#include "alloc.h"#define MINIMUM_DATA_CHARS \"abcdefghijklmnopqrstuvwxyz\ABCDEFGHIJKLMNOPQRSTUVWXYZ\0123456789-.'()+,/:=?"#define N_DECL_TYPE 3#define PUBLIC_ID_MAP N_DECL_TYPE#define N_TABLES (N_DECL_TYPE + 1)enum literal_type {  NORMAL_LITERAL,  MINIMUM_LITERAL};typedef enum {  EOF_PARAM,  NAME_PARAM,  LITERAL_PARAM} PARAM_TYPE;enum catalog_error {  E_NAME_EXPECTED,  E_LITERAL_EXPECTED,  E_ARG_EXPECTED,  E_MINIMUM_DATA,  E_EOF_COMMENT,  E_EOF_LITERAL,  E_NUL_CHAR,  E_CANNOT_OPEN,  E_GETC,  E_FCLOSE};#define FIRST_SYSTEM_ERROR E_CANNOT_OPEN#define HASH_TABLE_INITIAL_SIZE 8#define HASH_TABLE_MAX_SIZE (((SIZE_T)-1)/sizeof(struct hash_table_entry *))struct hash_table_entry {  int file_index;  const char *key;  const char *system_id;};/* Number of bytes per string block. */#define BLOCK_SIZE 1000/* Bytes follow the struct. */struct string_block {  struct string_block *next;};struct hash_table {  struct hash_table_entry **v;  SIZE_T size;			/* must be power of 2 */  SIZE_T used;  SIZE_T used_limit;};struct catalog {  struct hash_table tables[N_TABLES];  char **files;  int n_files;  struct string_block *blocks;  char *block_ptr;  SIZE_T block_spare;  CATALOG_ERROR_HANDLER error_handler;  int loaded;};struct parser {  FILE *fp;  struct catalog *cat;  char *param;  SIZE_T param_length;  SIZE_T param_alloc;  int file_index;  const char *filename;  unsigned long newline_count;  char minimum_data[256];};staticVOID add_catalog_file P((struct catalog *cat, const char *filename,			 SIZE_T length));staticVOID load P((struct catalog *cat));staticVOID parse_file P((struct parser *parser));staticVOID parse_public P((struct parser *parser));staticVOID parse_name_map P((struct parser *parser,		       int decl_type));staticint parse_arg P((struct parser *parser));staticPARAM_TYPE parse_param P((struct parser *parser, enum literal_type));staticVOID skip_comment P((struct parser *parser));staticPARAM_TYPE parse_literal P((struct parser *parser, int lit,			    enum literal_type));staticPARAM_TYPE parse_name P((struct parser *parser, int first_char));staticVOID param_grow P((struct parser *parser));staticconst char *param_save P((struct parser *parser));staticchar *alloc_bytes P((struct catalog *catalog, SIZE_T n));staticint param_equal P((struct parser *parser, const char *key));staticint hash_table_add P((struct hash_table *table, const char *s,		      const char *system_id, int file_index));staticstruct hash_table_entry *hash_table_lookup P((struct hash_table *table,					      const char *s));staticstruct hash_table_entry *hash_table_lookup_subst P((struct hash_table *table,						    const char *subst_table,						    const char *s));staticVOID hash_table_init P((struct hash_table *p));staticVOID hash_table_delete P((struct hash_table *p));staticSIZE_T hash_table_start_index P((struct hash_table *p, const char *s));staticint subst_equal P((const char *subst_table, const char *s1, const char *s2));staticVOID error P((struct parser *parser, enum catalog_error err));#define param_char(parser, c) \  ((((parser)->param_length < (parser)->param_alloc) \     || (param_grow(parser), 1)), \  ((parser)->param[(parser)->param_length] = (c)), \  ((parser)->param_length += 1))#define param_init(parser) ((parser)->param_length = 0)#define param_chop(parser) \  ((parser)->param_length = (parser)->param_length - 1)const char *catalog_error_text(error_number)     int error_number;{  static const char *text[] = {    "Name expected",    "Literal expected",    "Missing argument",    "Only minimum data characters allowed in a public identifier",    "End of file in comment",    "End of file in literal",    "Nul character is not allowed",    "Cannot open `%s': %s",    "Error reading `%s': %s",    "Error closing `%s': %s"  };  if (error_number >= 0 && error_number < sizeof(text)/sizeof(text[0]))    return text[error_number];  else    return "(invalid error number)";}CATALOG catalog_create(error_handler)     CATALOG_ERROR_HANDLER error_handler;{  int i;  struct catalog *p = (struct catalog *)xmalloc(sizeof(struct catalog));  p->loaded = 0;  p->n_files = 0;  p->files = 0;  p->error_handler = error_handler;  p->blocks = 0;  p->block_spare = 0;  p->block_ptr = 0;  for (i = 0; i < N_TABLES; i++)    hash_table_init(p->tables + i);  return (CATALOG)p;}VOID catalog_delete(cat)     CATALOG cat;{  int i;  struct string_block *block;  struct catalog *catalog = (struct catalog *)cat;  for (i = 0; i < 4; i++)    hash_table_delete(catalog->tables + i);  if (catalog->files)    free(catalog->files);  block = catalog->blocks;  while (block) {    struct string_block *tem = block;    block = block->next;    free((UNIV)tem);  }  catalog->blocks = 0;  free((UNIV)catalog);}VOID catalog_load_file(p, filename)     CATALOG p;     const char *filename;{  add_catalog_file((struct catalog *)p, filename, strlen(filename));}int catalog_lookup_entity(cat, public_id, name, decl_type, subst_table,			  system_id, catalog_file)     CATALOG cat;     const char *public_id;     const char *name;     enum catalog_decl_type decl_type;     const char *subst_table;     const char **system_id;     const char **catalog_file;{  struct catalog *catalog = (struct catalog *)cat;  const struct hash_table_entry *entry = 0;  if (!catalog->loaded)    load(catalog);  if (public_id)    entry = hash_table_lookup(catalog->tables + PUBLIC_ID_MAP, public_id);  if (name      && decl_type >= 0      && decl_type < N_DECL_TYPE      && (!entry || entry->file_index > 0)) {    const struct hash_table_entry *entity_entry = 0;    if (!subst_table)      entity_entry = hash_table_lookup(catalog->tables + decl_type, name);    else      entity_entry = hash_table_lookup_subst(catalog->tables + decl_type,					     subst_table, name);    if (!entry	|| (entity_entry	    && entity_entry->file_index < entry->file_index))      entry = entity_entry;  }  if (!entry)    return 0;  *system_id = entry->system_id;  *catalog_file = catalog->files[entry->file_index];  return 1;}staticVOID add_catalog_file(cat, filename, length)     struct catalog *cat;     const char *filename;     SIZE_T length;{  char *s;  if (!cat->files)    cat->files = (char **)xmalloc(sizeof(char *));  else    cat->files      = (char **)xrealloc(cat->files, (cat->n_files + 1)*sizeof(char *));  s = alloc_bytes(cat, length + 1);  memcpy(s, filename, length);  s[length] = '\0';  cat->files[cat->n_files] = s;  cat->n_files += 1;}staticVOID load(cat)     struct catalog *cat;{  int i;  const char *p;  struct parser parser;  const char *env_var;  int optional_file_index = cat->n_files;  cat->loaded = 1;  parser.param = 0;  parser.param_alloc = 0;  parser.cat = cat;  for (i = 0; i < 256; i++)    parser.minimum_data[i] = 0;  for (p = MINIMUM_DATA_CHARS; *p; p++)    parser.minimum_data[(unsigned char)*p] = 1;  env_var = getenv(CATALOG_FILES_ENV_VAR);  if (!env_var || *env_var == '\0')    env_var = DEFAULT_CATALOG_FILES;  for (;;) {    for (p = env_var; *p && *p != PATH_FILE_SEP; p++)      ;    if (p > env_var)      add_catalog_file(cat, env_var, p - env_var);    if (!*p)      break;    env_var = p + 1;  }  for (i = 0; i < cat->n_files; i++) {    parser.filename = cat->files[i];    parser.newline_count = 0;    parser.fp = fopen(cat->files[i], "r");    if (!parser.fp) {      if (i < optional_file_index)	error(&parser, E_CANNOT_OPEN);    }    else {      parser.file_index = i;      parse_file(&parser);      errno = 0;      if (fclose(parser.fp) < 0)	error(&parser, E_FCLOSE);    }  }  if (parser.param)    free(parser.param);}staticVOID parse_file(parser)     struct parser *parser;{  int skipping = 0;  for (;;) {    PARAM_TYPE type = parse_param(parser, NORMAL_LITERAL);    if (type == NAME_PARAM) {      if (param_equal(parser, "PUBLIC"))	parse_public(parser);      else if (param_equal(parser, "ENTITY"))	parse_name_map(parser, CATALOG_ENTITY_DECL);      else if (param_equal(parser, "DOCTYPE"))	parse_name_map(parser, CATALOG_DOCTYPE_DECL);      else if (param_equal(parser, "LINKTYPE"))	parse_name_map(parser, CATALOG_LINKTYPE_DECL);      else	skipping = 1;    }    else if (type == EOF_PARAM)      break;    else if (!skipping) {      skipping = 1;      error(parser, E_NAME_EXPECTED);    }  }}staticVOID parse_public(parser)     struct parser *parser;{  const char *public_id;  if (parse_param(parser, MINIMUM_LITERAL) != LITERAL_PARAM)    error(parser, E_LITERAL_EXPECTED);  public_id = param_save(parser);  if (!parse_arg(parser))    return;  hash_table_add(parser->cat->tables + PUBLIC_ID_MAP,		 public_id, param_save(parser), parser->file_index);}staticVOID parse_name_map(parser, decl_type)     struct parser *parser;     int decl_type;{  const char *name;  if (!parse_arg(parser))    return;  name = param_save(parser);  if (!parse_arg(parser))    return;  hash_table_add(parser->cat->tables + decl_type,		 name, param_save(parser), parser->file_index);}staticint parse_arg(parser)     struct parser *parser;{  PARAM_TYPE parm = parse_param(parser, NORMAL_LITERAL);  if (parm != NAME_PARAM && parm != LITERAL_PARAM) {    error(parser, E_ARG_EXPECTED);    return 0;  }  return 1;}staticPARAM_TYPE parse_param(parser, lit_type)     struct parser *parser;     enum literal_type lit_type;{  for (;;) {    int c = getc(parser->fp);    switch (c) {    case EOF:      if (ferror(parser->fp))	error(parser, E_GETC);      return EOF_PARAM;    case '"':    case '\'':      return parse_literal(parser, c, lit_type);    case '\n':      parser->newline_count += 1;      break;    case '\t':    case ' ':      break;    case '\0':      error(parser, E_NUL_CHAR);      break;    case '-':      c = getc(parser->fp);      if (c == '-') {	skip_comment(parser);	break;      }      ungetc(c, parser->fp);      c = '-';      /* fall through */    default:      return parse_name(parser, c);    }  }}staticVOID skip_comment(parser)     struct parser *parser;{  FILE *fp = parser->fp;  for (;;) {    int c = getc(fp);    if (c == '-') {      c = getc(fp);      if (c == '-')	return;    }    if (c == EOF) {      if (ferror(fp))	error(parser, E_GETC);      error(parser, E_EOF_COMMENT);      return;    }    if (c == '\n')      parser->newline_count += 1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -