📄 collector.c
字号:
static char rcsid[] = "$Id: collector.c,v 2.2 2000/01/21 17:37:33 sxw Exp $";/* * collector.c -- Utility procs for add/delete/refresh objects in the Broker. * * DEBUG: section 71, level 1 Broker collection routines * AUTHOR: Harvest Derived (William G. Camargo, Darren Hardy) * * Harvest Indexer http://harvest.sourceforge.net/ * ----------------------------------------------- * * The Harvest Indexer is a continued development of code developed by * the Harvest Project. Development is carried out by numerous individuals * in the Internet community, and is not officially connected with the * original Harvest Project or its funding sources. * * Please mail lee@arco.de if you are interested in participating * in the development effort. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* ---------------------------------------------------------------------- * Copyright (c) 1994, 1995. All rights reserved. * * The Harvest software was developed by the Internet Research Task * Force Research Group on Resource Discovery (IRTF-RD): * * Mic Bowman of Transarc Corporation. * Peter Danzig of the University of Southern California. * Darren R. Hardy of the University of Colorado at Boulder. * Udi Manber of the University of Arizona. * Michael F. Schwartz of the University of Colorado at Boulder. * Duane Wessels of the University of Colorado at Boulder. * * This copyright notice applies to software in the Harvest * ``src/'' directory only. Users should consult the individual * copyright notices in the ``components/'' subdirectories for * copyright information about other software bundled with the * Harvest source code distribution. * * TERMS OF USE * * The Harvest software may be used and re-distributed without * charge, provided that the software origin and research team are * cited in any use of the system. Most commonly this is * accomplished by including a link to the Harvest Home Page * (http://harvest.cs.colorado.edu/) from the query page of any * Broker you deploy, as well as in the query result pages. These * links are generated automatically by the standard Broker * software distribution. * * The Harvest software is provided ``as is'', without express or * implied warranty, and with no support nor obligation to assist * in its use, correction, modification or enhancement. We assume * no liability with respect to the infringement of copyrights, * trade secrets, or any patents, and are not responsible for * consequential damages. Proper use of the Harvest software is * entirely the responsibility of the user. * * DERIVATIVE WORKS * * Users may make derivative works from the Harvest software, subject * to the following constraints: * * - You must include the above copyright notice and these * accompanying paragraphs in all forms of derivative works, * and any documentation and other materials related to such * distribution and use acknowledge that the software was * developed at the above institutions. * * - You must notify IRTF-RD regarding your distribution of * the derivative work. * * - You must clearly notify users that your are distributing * a modified version and not the original Harvest software. * * - Any derivative product is also subject to these copyright * and use restrictions. * * Note that the Harvest software is NOT in the public domain. We * retain copyright, as specified above. * * HISTORY OF FREE SOFTWARE STATUS * * Originally we required sites to license the software in cases * where they were going to build commercial products/services * around Harvest. In June 1995 we changed this policy. We now * allow people to use the core Harvest software (the code found in * the Harvest ``src/'' directory) for free. We made this change * in the interest of encouraging the widest possible deployment of * the technology. The Harvest software is really a reference * implementation of a set of protocols and formats, some of which * we intend to standardize. We encourage commercial * re-implementations of code complying to this set of standards. * */#include "broker.h"#include "log.h"#define LUPDATE_SIZE MAXHOSTNAMELEN+30/* Global variables */extern char *Gather;extern char *HName;extern char *DIRpath;extern char *ColConfig;extern char *obj_desc;extern int obj_desc_s;int new_nobjs, up_nobjs, del_nobjs, ref_nobjs, recv_nobjs, ign_nobjs;time_t max_update_time = 0;GathererID *COL_gid = NULL;/* Local functions */static int COL_put_last_update();/* ----------------------------------------------------------------- * COL_UPD_Obj_begin() -- initialize a new summary object. * ----------------------------------------------------------------- */FILE *COL_UPD_Obj_begin(entry) reg_t *entry;{ fd_t fd; if ((fd = SM_Create_Obj()) == ERROR) { errorlog("Collector: Cannot create a new object.\n"); return NULL; } entry->FD = fd; return (SM_Write_Obj(fd));}/* ----------------------------------------------------------------- * * COL_Fill_Entry() -- fill in refresh rate and expiration times based * on given/default info. * ----------------------------------------------------------------- */int COL_Fill_Entry(entry) reg_t *entry;{ /* * All Registry entries MUST have: * URL, Gatherer-Name, Gatherer-Host, Gatherer-Version, * and Update-Time * MD5's are optional, but may used in elimination searches. */ if (entry->url == NULL) { errorlog("%s%s: %s attribute is missing from object: %s\n", COLLECT, ENTRY_ERR, "URL", entry->url); return ERROR; } if (entry->update_time == 0) { errorlog("%s%s: %s attribute is missing from object: %s\n", COLLECT, ENTRY_ERR, "Update-Time", entry->url); return ERROR; } if (COL_gid->gn == NULL) { errorlog("%s%s: %s attribute is missing from object: %s\n", COLLECT, ENTRY_ERR, "Gatherer-Name", entry->url); return ERROR; } if (COL_gid->gh == NULL) { errorlog("%s%s: %s attribute is missing from object: %s\n", COLLECT, ENTRY_ERR, "Gatherer-Host", entry->url); return ERROR; } if (COL_gid->gv == NULL) { errorlog("%s%s: %s attribute is missing from object: %s\n", COLLECT, ENTRY_ERR, "Gatherer-Version", entry->url); return ERROR; } COL_gid->GID = -1; entry->GID = RG_gid_register(COL_gid); if (entry->GID == -1) { errorlog("%s%s: illegal Gatherer ID for object.\n", COLLECT, ENTRY_ERR); return ERROR; } /* Set default values */ if (entry->lmt < 1) entry->lmt = 0; if (entry->refresh_rate < 1) entry->refresh_rate = (time_t) WEEK; return SUCCESS;}/* ----------------------------------------------------------------- * * COL_UPD_Obj_end() -- Add initialized summary object to the Broker. * ----------------------------------------------------------------- */int COL_UPD_Obj_end(entry) reg_t *entry;{ reg_t *tmp; int updating = 0; /* * We want to see if the new object already matches any objects * in the current Registry. If it does, then if the new object's * Update-Time is older than or the same as the Registry object's * Update-Time, then we ignore the new object. Otherwise, we need * to replace the Registry objects with the new object. We do this * by deleting the Registry objects, then adding the new object * to the Registry. The RG_Cleaner() will run periodically * to compress the Registry. * * If the new object is not in the Registry, then it's new * so we add it to the Registry. */ while ((tmp = RG_Object_Search_Entry(entry)) != NULL) { if (tmp->update_time >= entry->update_time) { (void) SM_Destroy_Obj(entry->FD); RG_Free_Entry(entry); ign_nobjs++; if (updating) del_nobjs++; /* There will be no update */ return SUCCESS; } else { (void) RG_Clean_Entry(tmp); if (updating) del_nobjs++; /* One might be updated, */ updating = 1; /* others are deleted. */ } } if (RG_Register(entry) == ERROR) { RG_Free_Entry(entry); return ERROR; } do_IND_New_Object(entry); LOGUPDATE(entry); if (updating) { up_nobjs++; } else new_nobjs++; return SUCCESS;}/* ----------------------------------------------------------------- * COL_DEL_Obj() -- remove an object from the Broker. * ----------------------------------------------------------------- */int COL_DEL_Obj(entry) reg_t *entry;{ reg_t *tmp; int err = SUCCESS; if ((tmp = RG_Object_Search_Entry(entry)) != NULL) { LOGDELETE(tmp); if (RG_Clean_Entry(tmp) == ERROR) err = ERROR; del_nobjs++; } else { ign_nobjs++; } RG_Free_Entry(entry); return (err);}/* ----------------------------------------------------------------- * COL_REF_Obj -- update expiration time of an object. * ----------------------------------------------------------------- */int COL_REF_Obj(entry) reg_t *entry;{ reg_t *tmp; /* * When refreshing the object, all we need to do is save * the new update_time, then write it to the Registry file. */ if ((tmp = RG_Object_Search_Entry(entry)) != NULL) { /* save new expiration time on disk */ tmp->update_time = entry->update_time; replace_record(tmp); LOGREFRESH(entry); ref_nobjs++; RG_Free_Entry(entry); return SUCCESS; } ign_nobjs++; RG_Free_Entry(entry); return ERROR;}/* ----------------------------------------------------------------- * * COL_Save_Att() -- decide which attributes are needed in the registry * and save/free it * ----------------------------------------------------------------- */int COL_Save_Att(wlk, entry) AVPair *wlk; reg_t *entry;{ num32 len; char *field_name; char *value; field_name = wlk->attribute; value = wlk->value; len = (num32) wlk->vsize; /* We can assume that these strcmp will only match once per object */ if (strcmp(field_name, GATH_HOST) == 0) { COL_gid->gh = (char *) xmalloc(len + 1); memcpy(COL_gid->gh, value, len); COL_gid->gh[len] = '\0'; COL_gid->ghs = len; return SUCCESS; } else if (strcmp(field_name, GATH_NAME) == 0) { COL_gid->gn = (char *) xmalloc(len + 1); memcpy(COL_gid->gn, value, len); COL_gid->gn[len] = '\0'; COL_gid->gns = len; return SUCCESS; } else if (strcmp(field_name, GATH_VER) == 0) { COL_gid->gv = (char *) xmalloc(len + 1); memcpy(COL_gid->gv, value, len); COL_gid->gv[len] = '\0'; COL_gid->gvs = len; return SUCCESS; } else if (strcmp(field_name, MD5) == 0) { entry->md5 = (char *) xmalloc(len + 1); memcpy(entry->md5, value, len); entry->md5[len] = '\0'; entry->md5s = len; return SUCCESS; } else if (strcmp(field_name, LMT_A) == 0) { entry->lmt = (time_t) atol(value); if (entry->lmt < 1) entry->lmt = 0; return SUCCESS; } else if (strcmp(field_name, UPDATE_A) == 0) { entry->update_time = (time_t) atol(value); if (entry->update_time < 1) entry->update_time = 0; return SUCCESS; } else if (strcmp(field_name, TTL) == 0) { entry->ttl = (time_t) atol(value); if (entry->ttl < 1) entry->ttl = 0; return SUCCESS; } else if (strcmp(field_name, REFRESH_A) == 0) { entry->refresh_rate = (time_t) atol(value); if (entry->refresh_rate < 1) entry->refresh_rate = 0; return SUCCESS; } else if (strcasecmp(field_name, obj_desc) == 0) { if (entry->desc != NULL) { /* Ignore duplicate description field */ return SUCCESS; }#ifdef TRUNCATE_DESCRIPTIONS { /* Makes all descriptions one-line only */ int maxdesc = 70, x; char *s; /* don't malloc too much; we truncate at maxdesc */ x = ((maxdesc + 10) < len) ? (maxdesc + 10) : len; entry->desc = (char *) xmalloc(x + 1); memcpy(entry->desc, value, x); entry->desc[x] = '\0'; /* See if chopping at the first newline will do it */ if ((s = strchr(entry->desc, '\n')) != NULL) *s = '\0'; if (strlen(entry->desc) > maxdesc) { /* we'd better just chop off the end */ entry->desc[maxdesc - 1] = '\0'; entry->desc[maxdesc - 2] = '.'; entry->desc[maxdesc - 3] = '.'; entry->desc[maxdesc - 4] = '.'; } /* reassign buffer */ s = xstrdup(entry->desc); xfree(entry->desc); entry->desc = s; }#else entry->desc = (char *) xmalloc(len + 1); memcpy(entry->desc, value, len); entry->desc[len] = '\0';#endif entry->descs = strlen(entry->desc); return SUCCESS; } return ERROR;}/* ----------------------------------------------------------------- * * COL_Normalize() do some thesaurus, normalization-- changes all * field names to lower case * ----------------------------------------------------------------- */char *COL_Normalize_Name(name) char *name;{ UTIL_Make_Lower(name); return (name);}static int To_Gatherer[2];/* * COL_Create_Read_Pipe - creates a read pipe from the gather process. */FILE *COL_Create_Read_Pipe(cmd) char *cmd;{ int pid; static FILE *fp; if (pipe(To_Gatherer) < 0) { log_errno("pipe"); return (NULL); } /* need to use fork() rather than vfork() because of a memory leak */ if ((pid = fork()) < 0) { log_errno("fork"); return (NULL); } if (pid == 0) { /* child */ char *argv[64]; /* simple parsing of the command string */ memset(argv, '\0', sizeof(char *) * 64); parse_argv(argv, cmd); /* make 'gather' talk with the Broker */ close(To_Gatherer[0]); dup2(To_Gatherer[1], 1); /* stdout -> write pipe */ /* close to prevent gather from getting the broker sockets */ close_all_fds(3); /* stdin is /dev/null, stdout is pipe, stderr is broker.out */ execvp(argv[0], argv); perror(argv[0]); _exit(1); } /* parent */ close(To_Gatherer[1]); if ((fp = fdopen(To_Gatherer[0], "r")) == NULL) { errorlog("COL_Create_Read_Pipe: fdopen(%d, \"r\") failed.\n", To_Gatherer[0]); close(To_Gatherer[0]); return (NULL);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -