⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zinfo.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
/* $Id: zinfo.c,v 1.37 2003/06/30 19:37:12 adam Exp $   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003   Index Data ApsThis file is part of the Zebra server.Zebra is free software; you can redistribute it and/or modify it underthe terms of the GNU General Public License as published by the FreeSoftware Foundation; either version 2, or (at your option) any laterversion.Zebra is distributed in the hope that it will be useful, but WITHOUT ANYWARRANTY; without even the implied warranty of MERCHANTABILITY orFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public Licensefor more details.You should have received a copy of the GNU General Public Licensealong with Zebra; see the file LICENSE.zebra.  If not, write to theFree Software Foundation, 59 Temple Place - Suite 330, Boston, MA02111-1307, USA.*/#include <stdlib.h>#include <assert.h>#include <string.h>#include <time.h>#include <zebraver.h>#include "zinfo.h"#define ZINFO_DEBUG 0struct zebSUInfo {    int set;    int use;    int ordinal;};struct zebSUInfoB {    struct zebSUInfo info;    struct zebSUInfoB *next;};typedef struct zebAccessObjectB *zebAccessObject;struct zebAccessObjectB {    void *handle;    int sysno;    Odr_oid *oid;    zebAccessObject next;};typedef struct zebAccessInfoB *zebAccessInfo;struct zebAccessInfoB {    zebAccessObject attributeSetIds;    zebAccessObject schemas;};typedef struct {    struct zebSUInfoB *SUInfo;    int sysno;    int dirty;    int readFlag;    data1_node *data1_tree;} *zebAttributeDetails;struct zebDatabaseInfoB {    zebAttributeDetails attributeDetails;    char *databaseName;    data1_node *data1_database;    int recordCount;     /* records in db */    int recordBytes;     /* size of records */    int sysno;           /* sysno of database info */    int readFlag;        /* 1: read is needed when referenced; 0 if not */    int dirty;           /* 1: database is dirty: write is needed */    struct zebDatabaseInfoB *next;    zebAccessInfo accessInfo;};struct zebraExplainAttset {    char *name;    int ordinal;    struct zebraExplainAttset *next;};struct zebraCategoryListInfo {    int dirty;    int sysno;    data1_node *data1_categoryList;};struct zebraExplainInfo {    int  ordinalSU;    int  runNumber;    int  dirty;    int write_flag;    Records records;    data1_handle dh;    Res res;    struct zebraExplainAttset *attsets;    NMEM nmem;    data1_node *data1_target;    struct zebraCategoryListInfo *categoryList;    struct zebDatabaseInfoB *databaseInfo;    struct zebDatabaseInfoB *curDatabaseInfo;    zebAccessInfo accessInfo;    char date[15]; /* YYYY MMDD HH MM SS */    int (*updateFunc)(void *handle, Record drec, data1_node *n);    void *updateHandle;};static void zebraExplain_initCommonInfo (ZebraExplainInfo zei, data1_node *n);static void zebraExplain_initAccessInfo (ZebraExplainInfo zei, data1_node *n);static data1_node *read_sgml_rec (data1_handle dh, NMEM nmem, Record rec){    return data1_read_sgml (dh, nmem, rec->info[recInfo_storeData]);}static void zebraExplain_writeDatabase (ZebraExplainInfo zei,                                        struct zebDatabaseInfoB *zdi,					int key_flush);static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei,						zebAttributeDetails zad,						const char *databaseName,						int key_flush);static void zebraExplain_writeTarget (ZebraExplainInfo zei, int key_flush);static void zebraExplain_writeAttributeSet (ZebraExplainInfo zei,					    zebAccessObject o,					    int key_flush);static void zebraExplain_writeCategoryList (ZebraExplainInfo zei,					    struct zebraCategoryListInfo *zcl,					    int key_flush);static Record createRecord (Records records, int *sysno){    Record rec;    if (*sysno)    {	rec = rec_get (records, *sysno);	xfree (rec->info[recInfo_storeData]);    }    else    {	rec = rec_new (records);	*sysno = rec->sysno;		rec->info[recInfo_fileType] =	    rec_strdup ("grs.sgml", &rec->size[recInfo_fileType]);	rec->info[recInfo_databaseName] =	    rec_strdup ("IR-Explain-1",			&rec->size[recInfo_databaseName]);     }    return rec;}void zebraExplain_flush (ZebraExplainInfo zei, void *handle){    if (!zei)        return;    zei->updateHandle = handle;    if (zei->write_flag)    {	struct zebDatabaseInfoB *zdi;	zebAccessObject o;	/* write each database info record */	for (zdi = zei->databaseInfo; zdi; zdi = zdi->next)	{	    zebraExplain_writeDatabase (zei, zdi, 1);	    zebraExplain_writeAttributeDetails (zei, zdi->attributeDetails,						zdi->databaseName, 1);	}	zebraExplain_writeTarget (zei, 1);	zebraExplain_writeCategoryList (zei,					zei->categoryList,					1);	assert (zei->accessInfo);	for (o = zei->accessInfo->attributeSetIds; o; o = o->next)	    if (!o->sysno)		zebraExplain_writeAttributeSet (zei, o, 1);	for (o = zei->accessInfo->schemas; o; o = o->next)	    if (!o->sysno)	    {/* 		zebraExplain_writeSchema (zei, o, 1); */	    }	for (zdi = zei->databaseInfo; zdi; zdi = zdi->next)	{	    zebraExplain_writeDatabase (zei, zdi, 0);	    zebraExplain_writeAttributeDetails (zei, zdi->attributeDetails,						zdi->databaseName, 0);	}	zebraExplain_writeTarget (zei, 0);    }}void zebraExplain_close (ZebraExplainInfo zei){#if ZINFO_DEBUG    yaz_log (LOG_LOG, "zebraExplain_close");#endif    if (!zei)	return;    zebraExplain_flush (zei, zei->updateHandle);    nmem_destroy (zei->nmem);}void zebraExplain_mergeOids (ZebraExplainInfo zei, data1_node *n,			     zebAccessObject *op){    data1_node *np;    for (np = n->child; np; np = np->next)    {	char str[64];	int len;	Odr_oid *oid;	zebAccessObject ao;	if (np->which != DATA1N_tag || strcmp (np->u.tag.tag, "oid"))	    continue;	len = np->child->u.data.len;	if (len > 63)	    len = 63;	memcpy (str, np->child->u.data.data, len);	str[len] = '\0';		oid = odr_getoidbystr_nmem (zei->nmem, str);	for (ao = *op; ao; ao = ao->next)	    if (!oid_oidcmp (oid, ao->oid))	    {		ao->sysno = 1;		break;	    }	if (!ao)	{	    ao = (zebAccessObject) nmem_malloc (zei->nmem, sizeof(*ao));	    ao->handle = NULL;	    ao->sysno = 1;	    ao->oid = oid;	    ao->next = *op;	    *op = ao;	}    }}void zebraExplain_mergeAccessInfo (ZebraExplainInfo zei, data1_node *n,				   zebAccessInfo *accessInfo){    data1_node *np;        if (!n)    {	*accessInfo = (zebAccessInfo)	    nmem_malloc (zei->nmem, sizeof(**accessInfo));	(*accessInfo)->attributeSetIds = NULL;	(*accessInfo)->schemas = NULL;    }    else    {	if (!(n = data1_search_tag (zei->dh, n->child, "accessInfo")))	    return;	if ((np = data1_search_tag (zei->dh, n->child, "attributeSetIds")))	    zebraExplain_mergeOids (zei, np,				    &(*accessInfo)->attributeSetIds);	if ((np = data1_search_tag (zei->dh, n->child, "schemas")))	    zebraExplain_mergeOids (zei, np,				    &(*accessInfo)->schemas);    }}/* Explain structure    root record      of type targetInfo      and has sysno = 1    databaseList (list of databases)*//*Example root:explain:  targetInfo: TargetInfo    name: Zebra    namedResultSets: 1    multipleDbSearch: 1    nicknames:      name: Zebra    commonInfo:      dateAdded: 20030630190601      dateChanged: 20030630190601      languageCode: EN    accessinfo:      unitSystems:        string: ISO      attributeSetIds:        oid: 1.2.840.10003.3.2        oid: 1.2.840.10003.3.5        oid: 1.2.840.10003.3.1      schemas:        oid: 1.2.840.10003.13.1000.81.2        oid: 1.2.840.10003.13.2    zebraInfo:      version: 1.3.12      databaseList:        database:          name: Default          id: 50          attributeDetailsId: 51        database:          name: IR-Explain-1          id: 52          attributeDetailsId: 53      ordinalSU: 38      runNumber: 1nextResultSetPosition = 2*/ZebraExplainInfo zebraExplain_open (    Records records, data1_handle dh,    Res res,    int writeFlag,    void *updateHandle,    int (*updateFunc)(void *handle, Record drec, data1_node *n)){    Record trec;    ZebraExplainInfo zei;    struct zebDatabaseInfoB **zdip;    time_t our_time;    struct tm *tm;    NMEM nmem = nmem_create ();#if ZINFO_DEBUG    logf (LOG_LOG, "zebraExplain_open wr=%d", writeFlag);#endif    zei = (ZebraExplainInfo) nmem_malloc (nmem, sizeof(*zei));    zei->write_flag = writeFlag;    zei->updateHandle = updateHandle;    zei->updateFunc = updateFunc;    zei->dirty = 0;    zei->curDatabaseInfo = NULL;    zei->records = records;    zei->nmem = nmem;    zei->dh = dh;    zei->attsets = NULL;    zei->res = res;    zei->categoryList = (struct zebraCategoryListInfo *)	nmem_malloc (zei->nmem, sizeof(*zei->categoryList));    zei->categoryList->sysno = 0;    zei->categoryList->dirty = 0;    zei->categoryList->data1_categoryList = NULL;    if ( atoi (res_get_def (res, "notimestamps", "0") )== 0)    {        time (&our_time);        tm = localtime (&our_time);        sprintf (zei->date, "%04d%02d%02d%02d%02d%02d",	         tm->tm_year+1900, tm->tm_mon+1,  tm->tm_mday,	         tm->tm_hour, tm->tm_min, tm->tm_sec);    } else {        sprintf (zei->date, "%04d%02d%02d%02d%02d%02d",	         0, 0, 0,  0, 0, 0);    }    zdip = &zei->databaseInfo;    trec = rec_get (records, 1);      /* get "root" record */    zei->ordinalSU = 1;    zei->runNumber = 0;    zebraExplain_mergeAccessInfo (zei, 0, &zei->accessInfo);    if (trec)    /* targetInfo already exists ... */    {	data1_node *node_tgtinfo, *node_zebra, *node_list, *np;	zei->data1_target = read_sgml_rec (zei->dh, zei->nmem, trec);#if 0	if (!zei->data1_target || !zei->data1_target->u.root.absyn)#else	if (!zei->data1_target)#endif	{	    logf (LOG_FATAL, "Explain schema missing. Check profilePath");	    nmem_destroy (zei->nmem);	    return 0;	}#if ZINFO_DEBUG	data1_pr_tree (zei->dh, zei->data1_target, stderr);#endif	node_tgtinfo = data1_search_tag (zei->dh, zei->data1_target,					 "/targetInfo");	zebraExplain_mergeAccessInfo (zei, node_tgtinfo,				      &zei->accessInfo);	node_zebra = data1_search_tag (zei->dh, node_tgtinfo->child,				       "zebraInfo");	np = 0;	if (node_zebra)	{	    node_list = data1_search_tag (zei->dh, node_zebra->child,					  "databaseList");	    if (node_list)		np = node_list->child;	}	for (; np; np = np->next)	{	    data1_node *node_name = NULL;	    data1_node *node_id = NULL;	    data1_node *node_aid = NULL;	    data1_node *np2;	    if (np->which != DATA1N_tag || strcmp (np->u.tag.tag, "database"))		continue;	    for (np2 = np->child; np2; np2 = np2->next)	    {		if (np2->which != DATA1N_tag)		    continue;		if (!strcmp (np2->u.tag.tag, "name"))		    node_name = np2->child;		else if (!strcmp (np2->u.tag.tag, "id"))		    node_id = np2->child;		else if (!strcmp (np2->u.tag.tag, "attributeDetailsId"))		    node_aid = np2->child;	    }	    assert (node_id && node_name && node_aid);	    	    *zdip = (struct zebDatabaseInfoB *) 		nmem_malloc (zei->nmem, sizeof(**zdip));            (*zdip)->readFlag = 1;            (*zdip)->dirty = 0;	    (*zdip)->data1_database = NULL;	    (*zdip)->recordCount = 0;	    (*zdip)->recordBytes = 0;	    zebraExplain_mergeAccessInfo (zei, 0, &(*zdip)->accessInfo);	    (*zdip)->databaseName = (char *)		nmem_malloc (zei->nmem, 1+node_name->u.data.len);	    memcpy ((*zdip)->databaseName, node_name->u.data.data,		    node_name->u.data.len);	    (*zdip)->databaseName[node_name->u.data.len] = '\0';	    (*zdip)->sysno = atoi_n (node_id->u.data.data,				     node_id->u.data.len);	    (*zdip)->attributeDetails = (zebAttributeDetails)		nmem_malloc (zei->nmem, sizeof(*(*zdip)->attributeDetails));	    (*zdip)->attributeDetails->sysno = atoi_n (node_aid->u.data.data,						       node_aid->u.data.len);	    (*zdip)->attributeDetails->readFlag = 1;	    (*zdip)->attributeDetails->dirty = 0;	    (*zdip)->attributeDetails->SUInfo = NULL;	    zdip = &(*zdip)->next;	}	if (node_zebra)	{	    np = data1_search_tag (zei->dh, node_zebra->child,				   "ordinalSU");	    np = np->child;	    assert (np && np->which == DATA1N_data);	    zei->ordinalSU = atoi_n (np->u.data.data, np->u.data.len);	    	    np = data1_search_tag (zei->dh, node_zebra->child,				   "runNumber");	    np = np->child;	    assert (np && np->which == DATA1N_data);	    zei->runNumber = atoi_n (np->u.data.data, np->u.data.len);            yaz_log (LOG_DEBUG, "read runnumber = %d", zei->runNumber);	    *zdip = NULL;	}	rec_rm (&trec);    }    else  /* create initial targetInfo */    {	data1_node *node_tgtinfo;	*zdip = NULL;	if (writeFlag)	{	    char *sgml_buf;	    int sgml_len;	    zei->data1_target =		data1_read_sgml (zei->dh, zei->nmem,				 "<explain><targetInfo>TargetInfo\n"				 "<name>Zebra</>\n"				 "<namedResultSets>1</>\n"				 "<multipleDBSearch>1</>\n"				 "<nicknames><name>Zebra</></>\n"				 "</></>\n" );	    if (!zei->data1_target)	    {		logf (LOG_FATAL, "Explain schema missing. Check profilePath");		nmem_destroy (zei->nmem);		return 0;	    }

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -