⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zebramap.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
/* $Id: zebramap.c,v 1.30 2003/03/26 16:41:48 adam Exp $   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003   Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra.  If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*/#include <assert.h>#include <ctype.h>#include <yaz/yaz-util.h>#include <charmap.h>#include <zebramap.h>#define ZEBRA_MAP_TYPE_SORT  1#define ZEBRA_MAP_TYPE_INDEX 2#define ZEBRA_REPLACE_ANY  300struct zm_token {    int *token_from;    char *token_to;    int token_min;    struct zm_token *next;};struct zebra_map {    unsigned reg_id;    int completeness;    int positioned;    int type;    union {        struct {            int dummy;        } index;        struct {            int entry_size;        } sort;    } u;    chrmaptab maptab;    const char *maptab_name;    struct zebra_map *next;    struct zm_token *replace_tokens;};struct zebra_maps {    char *tabpath;    char *tabroot;    NMEM nmem;    struct zebra_map *map_list;    char temp_map_str[2];    const char *temp_map_ptr[2];    struct zebra_map **lookup_array;    WRBUF wrbuf_1, wrbuf_2;};void zebra_maps_close (ZebraMaps zms){    struct zebra_map *zm = zms->map_list;    while (zm)    {	if (zm->maptab)	    chrmaptab_destroy (zm->maptab);	zm = zm->next;    }    wrbuf_free (zms->wrbuf_1, 1);    wrbuf_free (zms->wrbuf_2, 1);    nmem_destroy (zms->nmem);    xfree (zms);}static void zebra_map_read (ZebraMaps zms, const char *name){    FILE *f;    char line[512];    char *argv[10];    int argc;    int lineno = 0;    struct zebra_map **zm = 0, *zp;    if (!(f = yaz_fopen(zms->tabpath, name, "r", zms->tabroot)))    {	logf(LOG_WARN|LOG_ERRNO, "%s", name);	return ;    }    while ((argc = readconf_line(f, &lineno, line, 512, argv, 10)))    {	if (!yaz_matchstr (argv[0], "index") && argc == 2)	{	    if (!zm)		zm = &zms->map_list;	    else		zm = &(*zm)->next;	    *zm = (struct zebra_map *) nmem_malloc (zms->nmem, sizeof(**zm));	    (*zm)->reg_id = argv[1][0];	    (*zm)->maptab_name = NULL;	    (*zm)->maptab = NULL;	    (*zm)->type = ZEBRA_MAP_TYPE_INDEX;	    (*zm)->completeness = 0;	    (*zm)->positioned = 1;	    (*zm)->replace_tokens = 0;	}	else if (!yaz_matchstr (argv[0], "sort") && argc == 2)	{	    if (!zm)		zm = &zms->map_list;	    else		zm = &(*zm)->next;	    *zm = (struct zebra_map *) nmem_malloc (zms->nmem, sizeof(**zm));	    (*zm)->reg_id = argv[1][0];	    (*zm)->maptab_name = NULL;	    (*zm)->type = ZEBRA_MAP_TYPE_SORT;            (*zm)->u.sort.entry_size = 80;	    (*zm)->maptab = NULL;	    (*zm)->completeness = 0;	    (*zm)->positioned = 0;	    (*zm)->replace_tokens = 0;	}	else if (zm && !yaz_matchstr (argv[0], "charmap") && argc == 2)	{	    (*zm)->maptab_name = nmem_strdup (zms->nmem, argv[1]);	}	else if (zm && !yaz_matchstr (argv[0], "completeness") && argc == 2)	{	    (*zm)->completeness = atoi (argv[1]);	}	else if (zm && !yaz_matchstr (argv[0], "position") && argc == 2)	{	    (*zm)->positioned = atoi (argv[1]);	}        else if (zm && !yaz_matchstr (argv[0], "entrysize") && argc == 2)        {            if ((*zm)->type == ZEBRA_MAP_TYPE_SORT)		(*zm)->u.sort.entry_size = atoi (argv[1]);        }        else if (zm && !yaz_matchstr (argv[0], "replace") && argc >= 2)        {	    struct zm_token *token = nmem_malloc (zms->nmem, sizeof(*token));	    token->next = (*zm)->replace_tokens;	    (*zm)->replace_tokens = token;#if 0	    logf (LOG_LOG, "replace %s", argv[1]);#endif	    token->token_from = 0;            if (argc >= 2)            {	        char *cp = argv[1];	        int *dp = token->token_from = (int *)                    nmem_malloc (zms->nmem, (1+strlen(cp))*sizeof(int));	        while (*cp)		    if (*cp == '$')		    {		        *dp++ = ' ';		        cp++;		    }		    else if (*cp == '.')                    {                        *dp++ = ZEBRA_REPLACE_ANY;                        cp++;                    }                    else		    {		        *dp++ = zebra_prim(&cp);#if 0			logf (LOG_LOG, "  char %2X %c", dp[-1], dp[-1]);#endif		    }	        *dp = '\0';	    }	    if (argc >= 3)	    {                char *cp = argv[2];		char *dp = token->token_to =                    nmem_malloc (zms->nmem, strlen(cp)+1);		while (*cp)		    if (*cp == '$')		    {			*dp++ = ' ';			cp++;		    }		    else			*dp++ = zebra_prim(&cp);		*dp = '\0';	    }	    else		token->token_to = 0;        }    }    if (zm)	(*zm)->next = NULL;    yaz_fclose (f);    for (zp = zms->map_list; zp; zp = zp->next)	zms->lookup_array[zp->reg_id] = zp;}static void zms_map_handle (void *p, const char *name, const char *value){    ZebraMaps zms = (ZebraMaps) p;        zebra_map_read (zms, value);}ZebraMaps zebra_maps_open (Res res, const char *base){    ZebraMaps zms = (ZebraMaps) xmalloc (sizeof(*zms));    int i;    zms->nmem = nmem_create ();    zms->tabpath = nmem_strdup (zms->nmem,				res_get_def (res, "profilePath",                                             DEFAULT_PROFILE_PATH));    zms->tabroot = 0;    if (base)        zms->tabroot = nmem_strdup (zms->nmem, base);    zms->map_list = NULL;    zms->temp_map_str[0] = '\0';    zms->temp_map_str[1] = '\0';    zms->temp_map_ptr[0] = zms->temp_map_str;    zms->temp_map_ptr[1] = NULL;    zms->lookup_array = (struct zebra_map**)	nmem_malloc (zms->nmem, sizeof(*zms->lookup_array)*256);    for (i = 0; i<256; i++)	zms->lookup_array[i] = 0;    if (!res || !res_trav (res, "index", zms, zms_map_handle))	zebra_map_read (zms, "default.idx");    zms->wrbuf_1 = wrbuf_alloc();    zms->wrbuf_2 = wrbuf_alloc();    return zms;}struct zebra_map *zebra_map_get (ZebraMaps zms, unsigned reg_id){    return zms->lookup_array[reg_id];}chrmaptab zebra_charmap_get (ZebraMaps zms, unsigned reg_id){    struct zebra_map *zm = zebra_map_get (zms, reg_id);    if (!zm)    {	zm = (struct zebra_map *) nmem_malloc (zms->nmem, sizeof(*zm));	logf (LOG_WARN, "Unknown register type: %c", reg_id);	zm->reg_id = reg_id;	zm->maptab_name = nmem_strdup (zms->nmem, "@");	zm->maptab = NULL;	zm->type = ZEBRA_MAP_TYPE_INDEX;	zm->completeness = 0;	zm->next = zms->map_list;	zms->map_list = zm->next;	zms->lookup_array[zm->reg_id & 255] = zm;    }    if (!zm->maptab)    {	if (!zm->maptab_name || !yaz_matchstr (zm->maptab_name, "@"))	    return NULL;	if (!(zm->maptab = chrmaptab_create (zms->tabpath,					     zm->maptab_name, 0,                                             zms->tabroot)))	    logf(LOG_WARN, "Failed to read character table %s",		 zm->maptab_name);	else	    logf(LOG_DEBUG, "Read character table %s", zm->maptab_name);    }    return zm->maptab;}const char **zebra_maps_input (ZebraMaps zms, unsigned reg_id,			       const char **from, int len){    chrmaptab maptab;    maptab = zebra_charmap_get (zms, reg_id);    if (maptab)	return chr_map_input(maptab, from, len);        zms->temp_map_str[0] = **from;    (*from)++;    return zms->temp_map_ptr;}const char *zebra_maps_output(ZebraMaps zms, unsigned reg_id,			      const char **from){    chrmaptab maptab = zebra_charmap_get (zms, reg_id);    if (!maptab)	return 0;    return chr_map_output (maptab, from, 1);}/* ------------------------------------ */typedef struct {    int type;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -