⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 zinfo.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
	    node_tgtinfo = data1_search_tag (zei->dh, zei->data1_target,                                             "/targetInfo");	    assert (node_tgtinfo);	    zebraExplain_initCommonInfo (zei, node_tgtinfo);	    zebraExplain_initAccessInfo (zei, node_tgtinfo);	    /* write now because we want to be sure about the sysno */	    trec = rec_new (records);	    trec->info[recInfo_fileType] =		rec_strdup ("grs.sgml", &trec->size[recInfo_fileType]);	    trec->info[recInfo_databaseName] =		rec_strdup ("IR-Explain-1", &trec->size[recInfo_databaseName]);	    	    sgml_buf = data1_nodetoidsgml(dh, zei->data1_target, 0, &sgml_len);	    trec->info[recInfo_storeData] = (char *) xmalloc (sgml_len);	    memcpy (trec->info[recInfo_storeData], sgml_buf, sgml_len);	    trec->size[recInfo_storeData] = sgml_len;	    	    rec_put (records, &trec);	    rec_rm (&trec);	}	zebraExplain_newDatabase (zei, "IR-Explain-1", 0);	    	if (!zei->categoryList->dirty)	{	    struct zebraCategoryListInfo *zcl = zei->categoryList;	    data1_node *node_cl;	    	    zcl->dirty = 1;	    zcl->data1_categoryList =		data1_read_sgml (zei->dh, zei->nmem,				 "<explain><categoryList>CategoryList\n"				 "</></>\n");		    if (zcl->data1_categoryList)	    {		node_cl = data1_search_tag (zei->dh, zcl->data1_categoryList,					    "/categoryList");		assert (node_cl);		zebraExplain_initCommonInfo (zei, node_cl);	    }	}    }    return zei;}static void zebraExplain_readAttributeDetails (ZebraExplainInfo zei,					       zebAttributeDetails zad){    Record rec;    struct zebSUInfoB **zsuip = &zad->SUInfo;    data1_node *node_adinfo, *node_zebra, *node_list, *np;    assert (zad->sysno);    rec = rec_get (zei->records, zad->sysno);    zad->data1_tree = read_sgml_rec (zei->dh, zei->nmem, rec);    node_adinfo = data1_search_tag (zei->dh, zad->data1_tree,				    "/attributeDetails");    node_zebra = data1_search_tag (zei->dh, node_adinfo->child,				 "zebraInfo");    node_list = data1_search_tag (zei->dh, node_zebra->child,				  "attrlist");    for (np = node_list->child; np; np = np->next)    {	data1_node *node_set = NULL;	data1_node *node_use = NULL;	data1_node *node_ordinal = NULL;	data1_node *np2;	char oid_str[128];	int oid_str_len;	if (np->which != DATA1N_tag || strcmp (np->u.tag.tag, "attr"))	    continue;	for (np2 = np->child; np2; np2 = np2->next)	{	    if (np2->which != DATA1N_tag || !np2->child ||		np2->child->which != DATA1N_data)		continue;	    if (!strcmp (np2->u.tag.tag, "set"))		node_set = np2->child;	    else if (!strcmp (np2->u.tag.tag, "use"))		node_use = np2->child;	    else if (!strcmp (np2->u.tag.tag, "ordinal"))		node_ordinal = np2->child;	}	assert (node_set && node_use && node_ordinal);	oid_str_len = node_set->u.data.len;	if (oid_str_len >= (int) sizeof(oid_str))	    oid_str_len = sizeof(oid_str)-1;	memcpy (oid_str, node_set->u.data.data, oid_str_len);	oid_str[oid_str_len] = '\0';        *zsuip = (struct zebSUInfoB *)	    nmem_malloc (zei->nmem, sizeof(**zsuip));	(*zsuip)->info.set = oid_getvalbyname (oid_str);	(*zsuip)->info.use = atoi_n (node_use->u.data.data,				     node_use->u.data.len);	(*zsuip)->info.ordinal = atoi_n (node_ordinal->u.data.data,					 node_ordinal->u.data.len);	logf (LOG_DEBUG, "set=%d use=%d ordinal=%d",	      (*zsuip)->info.set, (*zsuip)->info.use, (*zsuip)->info.ordinal);        zsuip = &(*zsuip)->next;    }    *zsuip = NULL;    zad->readFlag = 0;    rec_rm (&rec);}static void zebraExplain_readDatabase (ZebraExplainInfo zei,				       struct zebDatabaseInfoB *zdi){    Record rec;    data1_node *node_dbinfo, *node_zebra, *np;    assert (zdi->sysno);    rec = rec_get (zei->records, zdi->sysno);    zdi->data1_database = read_sgml_rec (zei->dh, zei->nmem, rec);        node_dbinfo = data1_search_tag (zei->dh, zdi->data1_database,                                    "/databaseInfo");    assert (node_dbinfo);    zebraExplain_mergeAccessInfo (zei, node_dbinfo, &zdi->accessInfo);    node_zebra = data1_search_tag (zei->dh, node_dbinfo->child,				 "zebraInfo");    if (node_zebra	&& (np = data1_search_tag (zei->dh, node_zebra->child,				   "recordBytes")) 	&& np->child && np->child->which == DATA1N_data)	zdi->recordBytes = atoi_n (np->child->u.data.data,				   np->child->u.data.len);    if ((np = data1_search_tag (zei->dh, node_dbinfo->child,				"recordCount")) &&	(np = data1_search_tag (zei->dh, np->child,				"recordCountActual")) &&	np->child->which == DATA1N_data)    {	zdi->recordCount = atoi_n (np->child->u.data.data,				   np->child->u.data.len);    }    zdi->readFlag = 0;    rec_rm (&rec);}int zebraExplain_removeDatabase(ZebraExplainInfo zei, void *update_handle){    struct zebDatabaseInfoB **zdip = &zei->databaseInfo;    while (*zdip)    {	if (*zdip == zei->curDatabaseInfo)	{	    struct zebDatabaseInfoB *zdi = *zdip;	    Record rec;	    zei->dirty = 1;	    zei->updateHandle = update_handle;	    if (zdi->attributeDetails)	    {		/* remove attribute details keys and delete it */		zebAttributeDetails zad = zdi->attributeDetails;				rec = rec_get(zei->records, zad->sysno);		(*zei->updateFunc)(zei->updateHandle, rec, 0);		rec_rm(&rec);	    }	    /* remove database record keys and delete it */	    rec = rec_get (zei->records, zdi->sysno);	    (*zei->updateFunc)(zei->updateHandle, rec, 0);	    rec_rm(&rec);	    /* remove from list */	    *zdip = zdi->next;	    /* current database is IR-Explain-1 */	    return 0;	}	zdip = &(*zdip)->next;    }    return -1;}int zebraExplain_curDatabase (ZebraExplainInfo zei, const char *database){    struct zebDatabaseInfoB *zdi;    const char *database_n = strrchr (database, '/');    if (database_n)        database_n++;    else        database_n = database;        assert (zei);    if (zei->curDatabaseInfo &&        !STRCASECMP (zei->curDatabaseInfo->databaseName, database))        return 0;    for (zdi = zei->databaseInfo; zdi; zdi=zdi->next)    {        if (!STRCASECMP (zdi->databaseName, database_n))            break;    }    if (!zdi)        return -1;#if ZINFO_DEBUG    logf (LOG_LOG, "zebraExplain_curDatabase: %s", database);#endif    if (zdi->readFlag)    {#if ZINFO_DEBUG	logf (LOG_LOG, "zebraExplain_readDatabase: %s", database);#endif        zebraExplain_readDatabase (zei, zdi);    }    if (zdi->attributeDetails->readFlag)    {#if ZINFO_DEBUG	logf (LOG_LOG, "zebraExplain_readAttributeDetails: %s", database);#endif        zebraExplain_readAttributeDetails (zei, zdi->attributeDetails);    }    zei->curDatabaseInfo = zdi;    return 0;}static void zebraExplain_initCommonInfo (ZebraExplainInfo zei, data1_node *n){    data1_node *c = data1_mk_tag (zei->dh, zei->nmem, "commonInfo", 0, n);    data1_mk_tag_data_text (zei->dh, c, "dateAdded", zei->date, zei->nmem);    data1_mk_tag_data_text (zei->dh, c, "dateChanged", zei->date, zei->nmem);    data1_mk_tag_data_text (zei->dh, c, "languageCode", "EN", zei->nmem);}static void zebraExplain_updateCommonInfo (ZebraExplainInfo zei, data1_node *n){    data1_node *c = data1_search_tag (zei->dh, n->child, "commonInfo");    assert (c);    data1_mk_tag_data_text_uni (zei->dh, c, "dateChanged", zei->date,                                zei->nmem);}static void zebraExplain_initAccessInfo (ZebraExplainInfo zei, data1_node *n){    data1_node *c = data1_mk_tag (zei->dh, zei->nmem, "accessInfo", 0, n);    data1_node *d = data1_mk_tag (zei->dh, zei->nmem, "unitSystems", 0, c);    data1_mk_tag_data_text (zei->dh, d, "string", "ISO", zei->nmem);}static void zebraExplain_updateAccessInfo (ZebraExplainInfo zei, data1_node *n,					   zebAccessInfo accessInfo){    data1_node *c = data1_search_tag (zei->dh, n->child, "accessInfo");    data1_node *d;    zebAccessObject p;        if (!c)    {        data1_pr_tree (zei->dh, n, stdout);        exit (0);        assert (c);    }    if ((p = accessInfo->attributeSetIds))    {	d = data1_mk_tag_uni (zei->dh, zei->nmem, "attributeSetIds", c);	for (; p; p = p->next)	    data1_mk_tag_data_oid (zei->dh, d, "oid", p->oid, zei->nmem);    }    if ((p = accessInfo->schemas))    {	d = data1_mk_tag_uni (zei->dh, zei->nmem, "schemas", c);	for (; p; p = p->next)	    data1_mk_tag_data_oid (zei->dh, d, "oid", p->oid, zei->nmem);    }}int zebraExplain_newDatabase (ZebraExplainInfo zei, const char *database,			      int explain_database){    struct zebDatabaseInfoB *zdi;    data1_node *node_dbinfo, *node_adinfo;    const char *database_n = strrchr (database, '/');    if (database_n)        database_n++;    else        database_n = database;#if ZINFO_DEBUG    logf (LOG_LOG, "zebraExplain_newDatabase: %s", database);#endif    assert (zei);    for (zdi = zei->databaseInfo; zdi; zdi=zdi->next)    {        if (!STRCASECMP (zdi->databaseName, database_n))            break;    }    if (zdi)        return -1;    /* it's new really. make it */    zdi = (struct zebDatabaseInfoB *) nmem_malloc (zei->nmem, sizeof(*zdi));    zdi->next = zei->databaseInfo;    zei->databaseInfo = zdi;    zdi->sysno = 0;    zdi->recordCount = 0;    zdi->recordBytes = 0;    zdi->readFlag = 0;    zdi->databaseName = nmem_strdup (zei->nmem, database_n);    zebraExplain_mergeAccessInfo (zei, 0, &zdi->accessInfo);        assert (zei->dh);    assert (zei->nmem);    zdi->data1_database =	data1_read_sgml (zei->dh, zei->nmem, 			 "<explain><databaseInfo>DatabaseInfo\n"			 "</></>\n");    if (!zdi->data1_database)	return -2;    node_dbinfo = data1_search_tag (zei->dh, zdi->data1_database,                                    "/databaseInfo");    assert (node_dbinfo);    zebraExplain_initCommonInfo (zei, node_dbinfo);    zebraExplain_initAccessInfo (zei, node_dbinfo);    data1_mk_tag_data_text (zei->dh, node_dbinfo, "name",			       database, zei->nmem);        if (explain_database)	data1_mk_tag_data_text (zei->dh, node_dbinfo, "explainDatabase",				"", zei->nmem);        data1_mk_tag_data_text (zei->dh, node_dbinfo, "userFee",			    "0", zei->nmem);        data1_mk_tag_data_text (zei->dh, node_dbinfo, "available",			    "1", zei->nmem);    #if ZINFO_DEBUG    data1_pr_tree (zei->dh, zdi->data1_database, stderr);#endif    zdi->dirty = 1;    zei->dirty = 1;    zei->curDatabaseInfo = zdi;    zdi->attributeDetails = (zebAttributeDetails)	nmem_malloc (zei->nmem, sizeof(*zdi->attributeDetails));    zdi->attributeDetails->readFlag = 0;    zdi->attributeDetails->sysno = 0;    zdi->attributeDetails->dirty = 1;    zdi->attributeDetails->SUInfo = NULL;    zdi->attributeDetails->data1_tree =	data1_read_sgml (zei->dh, zei->nmem,			 "<explain><attributeDetails>AttributeDetails\n"			 "</></>\n");    node_adinfo = data1_search_tag (zei->dh, zdi->attributeDetails->data1_tree,                                    "/attributeDetails");    assert (node_adinfo);    zebraExplain_initCommonInfo (zei, node_adinfo);    return 0;}static void writeAttributeValueDetails (ZebraExplainInfo zei,				  zebAttributeDetails zad,				  data1_node *node_atvs, data1_attset *attset){    struct zebSUInfoB *zsui;    int set_ordinal = attset->reference;    data1_attset_child *c;    for (c = attset->children; c; c = c->next)	writeAttributeValueDetails (zei, zad, node_atvs, c->child);    for (zsui = zad->SUInfo; zsui; zsui = zsui->next)    {	data1_node *node_attvalue, *node_value;	if (set_ordinal != zsui->info.set)	    continue;	node_attvalue = data1_mk_tag (zei->dh, zei->nmem, "attributeValue",                                      0 /* attr */, node_atvs);	node_value = data1_mk_tag (zei->dh, zei->nmem, "value",                                   0 /* attr */, node_attvalue);	data1_mk_tag_data_int (zei->dh, node_value, "numeric",			       zsui->info.use, zei->nmem);    }}static void zebraExplain_writeCategoryList (ZebraExplainInfo zei,					    struct zebraCategoryListInfo *zcl,					    int key_flush){    char *sgml_buf;    int sgml_len;    int i;    Record drec;    data1_node *node_ci, *node_categoryList;    int sysno = 0;    static char *category[] = {	"CategoryList",	"TargetInfo",	"DatabaseInfo",	"AttributeDetails",	NULL    };    assert (zcl);    if (!zcl->dirty)	return ;    zcl->dirty = 1;    node_categoryList = zcl->data1_categoryList;#if ZINFO_DEBUG    logf (LOG_LOG, "zebraExplain_writeCategoryList");#endif    drec = createRecord (zei->records, &sysno);        node_ci = data1_search_tag (zei->dh, node_categoryList,				"/categoryList");    assert (node_ci);    node_ci = data1_mk_tag (zei->dh, zei->nmem, "categories", 0 /* attr */,                            node_ci);    assert (node_ci);        for (i = 0; category[i]; i++)    {	data1_node *node_cat = data1_mk_tag (zei->dh, zei->nmem,  "category",                                             0 /* attr */, node_ci);	data1_mk_tag_data_text (zei->dh, node_cat, "name",				category[i], zei->nmem);    }    /* extract *searchable* keys from it. We do this here, because       record count, etc. is affected */    if (key_flush)	(*zei->updateFunc)(zei->updateHandle, drec, node_categoryList);    /* convert to "SGML" and write it */#if ZINFO_DEBUG    data1_pr_tree (zei->dh, node_categoryList, stderr);#endif    sgml_buf = data1_nodetoidsgml(zei->dh, node_categoryList, 0, &sgml_len);    drec->info[recInfo_storeData] = (char *) xmalloc (sgml_len);    memcpy (drec->info[recInfo_storeData], sgml_buf, sgml_len);    drec->size[recInfo_storeData] = sgml_len;        rec_put (zei->records, &drec);}static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei,						zebAttributeDetails zad,						const char *databaseName,						int key_flush){    char *sgml_buf;    int sgml_len;    Record drec;    data1_node *node_adinfo, *node_list, *node_zebra, *node_attributesBySet;    struct zebSUInfoB *zsui;    int set_min;        if (!zad->dirty)	return;        zad->dirty = 0;#if ZINFO_DEBUG    logf (LOG_LOG, "zebraExplain_writeAttributeDetails");    #endif    drec = createRecord (zei->records, &zad->sysno);    assert (zad->data1_tree);    node_adinfo = data1_search_tag (zei->dh, zad->data1_tree,				   "/attributeDetails");    zebraExplain_updateCommonInfo (zei, node_adinfo);    data1_mk_tag_data_text (zei->dh, node_adinfo, "name",			    databaseName, zei->nmem);    /* extract *searchable* keys from it. We do this here, because

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -