⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 d1_absyn.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
/* $Id: d1_absyn.c,v 1.9 2003/06/12 18:20:24 adam Exp $   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002   Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra.  If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*/#include <stdio.h>#include <assert.h>#include <stdlib.h>#include <string.h>#include <yaz/oid.h>#include <yaz/log.h>#include <data1.h>#include <zebra_xpath.h>#define D1_MAX_NESTING  128struct data1_systag {    char *name;    char *value;    struct data1_systag *next;};struct data1_absyn_cache_info {    char *name;    data1_absyn *absyn;    data1_absyn_cache next;};struct data1_attset_cache_info {    char *name;    data1_attset *attset;    data1_attset_cache next;};data1_absyn *data1_absyn_search (data1_handle dh, const char *name){    data1_absyn_cache p = *data1_absyn_cache_get (dh);    while (p)    {	if (!strcmp (name, p->name))	    return p->absyn;	p = p->next;    }    return NULL;}/* *ostrich*   We need to destroy DFAs, in xp_element (xelm) definitions    pop, 2002-12-13*/void data1_absyn_destroy (data1_handle dh){    data1_absyn_cache p = *data1_absyn_cache_get (dh);        while (p)    {        data1_absyn *abs = p->absyn;	if (abs)	{	    data1_xpelement *xpe = abs->xp_elements;	    while (xpe) {		logf (LOG_DEBUG,"Destroy xp element %s",xpe->xpath_expr);		if (xpe->dfa) {  dfa_delete (&xpe->dfa); }		xpe = xpe->next;	    } 	}        p = p->next;    }}void data1_absyn_trav (data1_handle dh, void *handle,		       void (*fh)(data1_handle dh, void *h, data1_absyn *a)){    data1_absyn_cache p = *data1_absyn_cache_get (dh);    while (p)    {	(*fh)(dh, handle, p->absyn);	p = p->next;    }}data1_absyn *data1_absyn_add (data1_handle dh, const char *name){    char fname[512];    NMEM mem = data1_nmem_get (dh);    data1_absyn_cache p = (data1_absyn_cache)nmem_malloc (mem, sizeof(*p));    data1_absyn_cache *pp = data1_absyn_cache_get (dh);    sprintf(fname, "%s.abs", name);    p->absyn = data1_read_absyn (dh, fname, 0);    p->name = nmem_strdup (mem, name);    p->next = *pp;    *pp = p;    return p->absyn;}data1_absyn *data1_get_absyn (data1_handle dh, const char *name){    data1_absyn *absyn;    if (!(absyn = data1_absyn_search (dh, name)))	absyn = data1_absyn_add (dh, name);    return absyn;}data1_attset *data1_attset_search_name (data1_handle dh, const char *name){    data1_attset_cache p = *data1_attset_cache_get (dh);    while (p)    {	if (!strcmp (name, p->name))	    return p->attset;	p = p->next;    }    return NULL;}data1_attset *data1_attset_search_id (data1_handle dh, int id){    data1_attset_cache p = *data1_attset_cache_get (dh);    while (p)    {	if (id == p->attset->reference)	    return p->attset;	p = p->next;    }    return NULL;}data1_attset *data1_attset_add (data1_handle dh, const char *name){    char fname[512], aname[512];    NMEM mem = data1_nmem_get (dh);    data1_attset *attset;    strcpy (aname, name);    sprintf(fname, "%s.att", name);    attset = data1_read_attset (dh, fname);    if (!attset)    {	char *cp;	attset = data1_read_attset (dh, name);	if (attset && (cp = strrchr (aname, '.')))	    *cp = '\0';    }    if (!attset)	yaz_log (LOG_WARN|LOG_ERRNO, "Couldn't load attribute set %s", name);    else    {	data1_attset_cache p = (data1_attset_cache)	    nmem_malloc (mem, sizeof(*p));	data1_attset_cache *pp = data1_attset_cache_get (dh);		attset->name = p->name = nmem_strdup (mem, aname);	p->attset = attset;	p->next = *pp;	*pp = p;    }    return attset;}data1_attset *data1_get_attset (data1_handle dh, const char *name){    data1_attset *attset;    if (!(attset = data1_attset_search_name (dh, name)))	attset = data1_attset_add (dh, name);    return attset;}data1_esetname *data1_getesetbyname(data1_handle dh, data1_absyn *a,				    const char *name){    data1_esetname *r;    for (r = a->esetnames; r; r = r->next)	if (!data1_matchstr(r->name, name))	    return r;    return 0;}data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs,					  data1_element *parent,					  const char *tagname){    data1_element *r;    /* It's now possible to have a data1 tree with no abstract syntax */    if ( !abs )        return 0;    if (!parent)        r = abs->main_elements;    else	r = parent->children;    for (; r; r = r->next)    {	data1_name *n;	for (n = r->tag->names; n; n = n->next)	    if (!data1_matchstr(tagname, n->name))		return r;    }    return 0;}data1_element *data1_getelementbyname (data1_handle dh, data1_absyn *absyn,				       const char *name){    data1_element *r;    /* It's now possible to have a data1 tree with no abstract syntax */    if ( !absyn )        return 0;    for (r = absyn->main_elements; r; r = r->next)	if (!data1_matchstr(r->name, name))	    return r;    return 0;}void fix_element_ref (data1_handle dh, data1_absyn *absyn, data1_element *e){    /* It's now possible to have a data1 tree with no abstract syntax */    if ( !absyn )        return;    for (; e; e = e->next)    {	if (!e->sub_name)	{	    if (e->children)		fix_element_ref (dh, absyn, e->children);	}	else	{	    data1_sub_elements *sub_e = absyn->sub_elements;	    while (sub_e && strcmp (e->sub_name, sub_e->name))		sub_e = sub_e->next;	    if (sub_e)		e->children = sub_e->elements;	    else		yaz_log (LOG_WARN, "Unresolved reference to sub-elements %s",		      e->sub_name);	}    }}/* *ostrich*   New function, a bit dummy now... I've seen it in zrpn.c... We should build   more clever regexps...      //a    ->    ^a/.*$      //a/b  ->    ^b/a/.*$      /a     ->    ^a/$      /a/b   ->    ^b/a/$      /      ->    none   pop, 2002-12-13   Now [] predicates are supported   pop, 2003-01-17 */const char * mk_xpath_regexp (data1_handle dh, char *expr) {    char *p = expr;    char *pp;    char *s;    int abs = 1;    int i;    int j;    int e=0;    int is_predicate = 0;        static char *stack[32];    static char res[1024];    char *r = "";        if (*p != '/') { return (""); }    p++;    if (*p == '/') { abs=0; p++; }        while (*p) {        i=0;        while (*p && !strchr("/",*p)) { 	  i++; p++; 	}        stack[e] = (char *) nmem_malloc (data1_nmem_get (dh), i+1);	s = stack[e];	for (j=0; j< i; j++) {	  pp = p-i+j;	  if (*pp == '[') {	    is_predicate=1;	  }	  else if (*pp == ']') {	    is_predicate=0;	  }	  else {	    if (!is_predicate) {	      if (*pp == '*') 		 *s++ = '.';	      *s++ = *pp;	    }	  }	}	*s = 0;        e++;        if (*p) {p++;}    }    e--;  p = &res[0]; i=0;    sprintf (p, "^"); p++;    while (e >= 0) {        /* !!! res size is not checked !!! */        sprintf (p, "%s/",stack[e]);        p += strlen(stack[e]) + 1;        e--;    }    if (!abs) { sprintf (p, ".*"); p+=2; }    sprintf (p, "$"); p++;    r = nmem_strdup (data1_nmem_get (dh), res);    yaz_log(LOG_DEBUG,"Got regexp: %s",r);    return (r);}/* *ostrich*   added arg xpelement... when called from xelm context, it's 1, saying   that ! means xpath, not element name as attribute name...   pop, 2002-12-13 */static int parse_termlists (data1_handle dh, data1_termlist ***tpp,			    char *p, const char *file, int lineno,			    const char *element_name, data1_absyn *res,			    int xpelement){    data1_termlist **tp = *tpp;    do    {	char attname[512], structure[512];	char *source;	int r;		if (!(r = sscanf(p, "%511[^:,]:%511[^,]", attname,			 structure)))	{	    yaz_log(LOG_WARN,		    "%s:%d: Syntax error in termlistspec '%s'",		    file, lineno, p);	    return -1;	}	*tp = (data1_termlist *)	  nmem_malloc(data1_nmem_get(dh), sizeof(**tp));	(*tp)->next = 0;        	if (!xpelement) {            if (*attname == '!')                strcpy(attname, element_name);	}	if (!((*tp)->att = data1_getattbyname(dh, res->attset,                                              attname))) {            if ((!xpelement) || (*attname != '!')) {                yaz_log(LOG_WARN,                        "%s:%d: Couldn't find att '%s' in attset",                        file, lineno, attname);                return -1;            } else {                (*tp)->att = 0;            }	}        	if (r == 2 && (source = strchr(structure, ':')))	    *source++ = '\0';   /* cut off structure .. */	else	    source = "data";    /* ok: default is leaf data */	(*tp)->source = (char *)	    nmem_strdup (data1_nmem_get (dh), source);		if (r < 2) /* is the structure qualified? */	    (*tp)->structure = "w";	else 	    (*tp)->structure = (char *)		nmem_strdup (data1_nmem_get (dh), structure);	tp = &(*tp)->next;    }    while ((p = strchr(p, ',')) && *(++p));    *tpp = tp;    return 0;}const char *data1_systag_lookup(data1_absyn *absyn, const char *tag,                                const char *default_value){    struct data1_systag *p = absyn->systags;    for (; p; p = p->next)        if (!strcmp(p->name, tag))            return p->value;    return default_value;}#define l_isspace(c) ((c) == '\t' || (c) == ' ' || (c) == '\n' || (c) == '\r')int read_absyn_line(FILE *f, int *lineno, char *line, int len,		    char *argv[], int num){    char *p;    int argc;    int quoted = 0;        while ((p = fgets(line, len, f)))    {	(*lineno)++;	while (*p && l_isspace(*p))	    p++;	if (*p && *p != '#')	    break;    }    if (!p)	return 0;        for (argc = 0; *p ; argc++)    {	if (*p == '#')  /* trailing comment */	    break;	argv[argc] = p;	while (*p && !(l_isspace(*p) && !quoted)) {  	  if (*p =='"') quoted = 1 - quoted;  	  if (*p =='[') quoted = 1;  	  if (*p ==']') quoted = 0;	  p++;	}	if (*p)	{	    *(p++) = '\0';	    while (*p && l_isspace(*p))		p++;	}    }    return argc;}data1_absyn *data1_read_absyn (data1_handle dh, const char *file,                               int file_must_exist){    data1_sub_elements *cur_elements = NULL;    data1_xpelement *cur_xpelement = NULL;    data1_absyn *res = 0;    FILE *f;    data1_element **ppl[D1_MAX_NESTING];    data1_esetname **esetpp;    data1_maptab **maptabp;    data1_marctab **marcp;    data1_termlist *all = 0;    data1_attset_child **attset_childp;    data1_tagset **tagset_childp;    struct data1_systag **systagsp;    int level = 0;    int lineno = 0;    int argc;    char *argv[50], line[512];    if (!(f = data1_path_fopen(dh, file, "r")))    {	yaz_log(LOG_WARN|LOG_ERRNO, "Couldn't open %s", file);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -