📄 parser.c
字号:
static char rcsid[] = "$Id: parser.c,v 2.1 1997/03/21 17:20:05 sxw Exp $";/* * parser.c -- Broker * * parse input from Gatherer and perform operations using collector utils. * parser for the Collector<->Gatherer protocol * * DEBUG: section 72, level 1 Broker SOIF parsing routines * AUTHOR: Harvest derived (William G. Camargo, Darren R. Hardy) * * Harvest Indexer http://www.tardis.ed.ac.uk/harvest/ * --------------------------------------------------- * * The Harvest Indexer is a continued development of code developed by * the Harvest Project. Development is carried out by numerous individuals * in the Internet community, and is not officially connected with the * original Harvest Project or its funding sources. * * Please mail harvest@tardis.ed.ac.uk if you are interested in participating * in the development effort. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* ---------------------------------------------------------------------- * Copyright (c) 1994, 1995. All rights reserved. * * The Harvest software was developed by the Internet Research Task * Force Research Group on Resource Discovery (IRTF-RD): * * Mic Bowman of Transarc Corporation. * Peter Danzig of the University of Southern California. * Darren R. Hardy of the University of Colorado at Boulder. * Udi Manber of the University of Arizona. * Michael F. Schwartz of the University of Colorado at Boulder. * Duane Wessels of the University of Colorado at Boulder. * * This copyright notice applies to software in the Harvest * ``src/'' directory only. Users should consult the individual * copyright notices in the ``components/'' subdirectories for * copyright information about other software bundled with the * Harvest source code distribution. * * TERMS OF USE * * The Harvest software may be used and re-distributed without * charge, provided that the software origin and research team are * cited in any use of the system. Most commonly this is * accomplished by including a link to the Harvest Home Page * (http://harvest.cs.colorado.edu/) from the query page of any * Broker you deploy, as well as in the query result pages. These * links are generated automatically by the standard Broker * software distribution. * * The Harvest software is provided ``as is'', without express or * implied warranty, and with no support nor obligation to assist * in its use, correction, modification or enhancement. We assume * no liability with respect to the infringement of copyrights, * trade secrets, or any patents, and are not responsible for * consequential damages. Proper use of the Harvest software is * entirely the responsibility of the user. * * DERIVATIVE WORKS * * Users may make derivative works from the Harvest software, subject * to the following constraints: * * - You must include the above copyright notice and these * accompanying paragraphs in all forms of derivative works, * and any documentation and other materials related to such * distribution and use acknowledge that the software was * developed at the above institutions. * * - You must notify IRTF-RD regarding your distribution of * the derivative work. * * - You must clearly notify users that your are distributing * a modified version and not the original Harvest software. * * - Any derivative product is also subject to these copyright * and use restrictions. * * Note that the Harvest software is NOT in the public domain. We * retain copyright, as specified above. * * HISTORY OF FREE SOFTWARE STATUS * * Originally we required sites to license the software in cases * where they were going to build commercial products/services * around Harvest. In June 1995 we changed this policy. We now * allow people to use the core Harvest software (the code found in * the Harvest ``src/'' directory) for free. We made this change * in the interest of encouraging the widest possible deployment of * the technology. The Harvest software is really a reference * implementation of a set of protocols and formats, some of which * we intend to standardize. We encourage commercial * re-implementations of code complying to this set of standards. * */#include "broker.h"#include "log.h"#define SP_SIZE BUFSIZchar space[SP_SIZE];extern int recv_nobjs;extern GathererID *COL_gid;/* parse an input file. */int P_parse_input(tfile, type) char *tfile; int type;{ int err = SUCCESS; int Mode = NO_MODE; FILE *InFile = NULL; Mode = NO_MODE; InFile = NULL; Debug(72, 1, ("P_parse_input: starting with type %d\n", type)); /* The tfile is really a FILE * to 'gather' if a Gatherer */ if (type < BAFULL_U) { InFile = (FILE *) tfile; } else if ((InFile = fopen(tfile, "r")) == NULL) { errorlog("Parser: Cannot read %s\n", tfile); return ERROR; } while (err == SUCCESS) { err = P_parse_command(InFile, Mode); } if (type < BAFULL_U) { COL_Close_Read_Pipe(InFile); } else { (void) fclose(InFile); if (unlink(tfile) < 0) { errorlog("Parser: Cannot remove %s\n", tfile); log_errno(tfile); xfree(tfile); return ERROR; } xfree(tfile); } return (err);}/* Do the commands: update, delete or refresh */int P_parse_command(InFile, Mode) FILE *InFile; int Mode;{ char *command; int nextc, n = 0; Debug(72, 1, ("P_parse_command: starting with Mode %d\n", Mode)); nextc = P_get_next_char(InFile); if ((nextc == EOF) || (nextc != '@')) return ERROR; command = space; command[0] = '\0'; if (fgets(command, SP_SIZE, InFile) == NULL) { errorlog("P_parse_command: Cannot read command.\n"); return ERROR; } if (strncmp(command, "DELETE", 6) == 0) { if (strchr(command, '}') != NULL) return SUCCESS; /* nop */ Mode = DEL_MODE; } else if (strncmp(command, "UPDATE", 6) == 0) { if (strchr(command, '}') != NULL) return SUCCESS; /* nop */ Mode = UPD_MODE; } else if (strncmp(command, "REFRESH", 7) == 0) { if (strchr(command, '}') != NULL) return SUCCESS; /* nop */ Mode = REF_MODE; } else { errorlog("Parser: P_parse_command: Cannot determine next command: %s\n", command); return ERROR; } init_parse_template_file(InFile); while (P_parse_object(Mode) == SUCCESS) { /* every 100 objects, give status */ if (recv_nobjs > 0 && recv_nobjs % 250 == 0) { Log("Received %d objects so far...\n", recv_nobjs); } /* check for pending connections */ if ((n++ & 0x1F) == 0) { (void) select_loop(0, 0, 0); } } finish_parse_template(); return SUCCESS;}/* update/delete/refresh an object */int P_parse_object(Mode) int Mode;{ reg_t *new_r; Template *template; AVList *walker; FILE *OutFile = NULL; extern time_t max_update_time; /* * Read the next template from the input. If the parser returns NULL, * then we check to see if we've reached the end of the file. If * So we stop the parsing by returning ERROR; otherwise we continue * trying to parse by running SUCCESS. */ if ((template = parse_template()) == NULL) return (is_parse_end_of_input()? ERROR : SUCCESS); Debug(72, 1, ("P_parse_object: received object: %s\n", template->url)); recv_nobjs++; /* Set up the new reg_t record; and add the URL to it */ new_r = (reg_t *) xmalloc(sizeof(reg_t)); memset(new_r, '\0', sizeof(reg_t)); /* null entire record */ new_r->url = xstrdup(template->url); /* Save URL in reg ent */ new_r->urls = strlen(new_r->url); new_r->GID = -1; /* Find a file to which to write the template */ if (Mode == UPD_MODE) { if ((OutFile = COL_UPD_Obj_begin(new_r)) == NULL) { Log("WARNING: Cannot initialize update: %s (FD %d).\n", new_r->url, new_r->FD); free_template(template); RG_Free_Entry(new_r); return ERROR; } } else { OutFile = NULL; } /* * Walk the attribute-value list of the template, and save * away the needed reg_t values into new_r. Also, normalize * all attribute names. */ COL_gid = (GathererID *) xmalloc(sizeof(GathererID)); memset(COL_gid, '\0', sizeof(GathererID)); COL_gid->GID = -1; for (walker = template->list; walker; walker = walker->next) { (void) COL_Normalize_Name(walker->data->attribute); (void) COL_Save_Att(walker->data, new_r); } /* pick off max update time for logging in LASTUPDATE */ max_update_time = new_r->update_time > max_update_time ? new_r->update_time : max_update_time; /* Now write the template to the file in the database, if needed */ if (OutFile != NULL) { (void) init_print_template(OutFile); print_template(template); finish_print_template(); (void) fclose(OutFile); OutFile = NULL; } free_template(template); /* Don't need anymore */ /* Verify/correct the reg_t record */ if (COL_Fill_Entry(new_r) == ERROR) { /* backout of the changes */ if (Mode == UPD_MODE) (void) SM_Destroy_Obj(new_r->FD); RG_Free_Entry(new_r); RG_gid_free(COL_gid); return ERROR; } RG_gid_free(COL_gid); /* Finish the job */ switch (Mode) { case UPD_MODE: return (COL_UPD_Obj_end(new_r)); case DEL_MODE: return (COL_DEL_Obj(new_r)); case REF_MODE: return (COL_REF_Obj(new_r)); default: break; } errorlog("P_parse_error: Internal error: Illegal Mode: %d\n", Mode); return (ERROR);}/* get next non-whitespace character on input stream */int P_get_next_char(InFile) FILE *InFile;{ int tmp; tmp = getc(InFile); while (isspace((unsigned char) tmp)) { tmp = getc(InFile); } return (tmp);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -