📄 post_process.c
字号:
static char rcsid[] = "post_process.c,v 1.20 1996/03/26 04:35:14 wessels Exp";/* * post_process.c - Post Processing routines for the Essence system * * Duane Wessels, wessels@cs.colorado.edu, May 1995 * * DEBUG: section 66, level 1 Gatherer essence post-summarizing * * ---------------------------------------------------------------------- * Copyright (c) 1994, 1995. All rights reserved. * * The Harvest software was developed by the Internet Research Task * Force Research Group on Resource Discovery (IRTF-RD): * * Mic Bowman of Transarc Corporation. * Peter Danzig of the University of Southern California. * Darren R. Hardy of the University of Colorado at Boulder. * Udi Manber of the University of Arizona. * Michael F. Schwartz of the University of Colorado at Boulder. * Duane Wessels of the University of Colorado at Boulder. * * This copyright notice applies to software in the Harvest * ``src/'' directory only. Users should consult the individual * copyright notices in the ``components/'' subdirectories for * copyright information about other software bundled with the * Harvest source code distribution. * * TERMS OF USE * * The Harvest software may be used and re-distributed without * charge, provided that the software origin and research team are * cited in any use of the system. Most commonly this is * accomplished by including a link to the Harvest Home Page * (http://harvest.cs.colorado.edu/) from the query page of any * Broker you deploy, as well as in the query result pages. These * links are generated automatically by the standard Broker * software distribution. * * The Harvest software is provided ``as is'', without express or * implied warranty, and with no support nor obligation to assist * in its use, correction, modification or enhancement. We assume * no liability with respect to the infringement of copyrights, * trade secrets, or any patents, and are not responsible for * consequential damages. Proper use of the Harvest software is * entirely the responsibility of the user. * * DERIVATIVE WORKS * * Users may make derivative works from the Harvest software, subject * to the following constraints: * * - You must include the above copyright notice and these * accompanying paragraphs in all forms of derivative works, * and any documentation and other materials related to such * distribution and use acknowledge that the software was * developed at the above institutions. * * - You must notify IRTF-RD regarding your distribution of * the derivative work. * * - You must clearly notify users that your are distributing * a modified version and not the original Harvest software. * * - Any derivative product is also subject to these copyright * and use restrictions. * * Note that the Harvest software is NOT in the public domain. We * retain copyright, as specified above. * * HISTORY OF FREE SOFTWARE STATUS * * Originally we required sites to license the software in cases * where they were going to build commercial products/services * around Harvest. In June 1995 we changed this policy. We now * allow people to use the core Harvest software (the code found in * the Harvest ``src/'' directory) for free. We made this change * in the interest of encouraging the widest possible deployment of * the technology. The Harvest software is really a reference * implementation of a set of protocols and formats, some of which * we intend to standardize. We encourage commercial * re-implementations of code complying to this set of standards. * */#include <stdlib.h>#include <unistd.h>#include <fcntl.h>#include <sys/stat.h>#include "util.h"#include "template.h"#include <sys/types.h>#include "post_process.h"Rule *PPRules = NULL;/* * my_write() * * a persistent write() for sockets and pipes. Don't return until * all bytes have been written, or an error condition. */static int my_write(fd, ptr, nbytes) register int fd, nbytes; register char *ptr;{ static int nleft, nwritten; nleft = nbytes; while (nleft > 0) { nwritten = write(fd, ptr, nleft > 8192 ? 8192 : nleft); if (nwritten <= 0) { return (nwritten); } nleft -= nwritten; ptr += nwritten; } return (nbytes - nleft);}/* * my_read() * * a persistent read() for sockets and pipes. Don't return until * all bytes have been read, or an error condition. */static int my_read(fd, ptr, nbytes) register int fd, nbytes; register char *ptr;{ static int nleft, nread; nleft = nbytes; while (nleft > 0) { nread = read(fd, ptr, nleft); if (nread < 0) return (nread); else if (nread == 0) break; nleft -= nread; ptr += nread; } return (nbytes - nleft);}/* * do_command_io (argv, writebuf, bytesout, bytesin) * * Writes 'bytesout' of 'writebuf' to a forked processes which * executes the command in 'argv'. The command will read from * stdin and write to stdout, which will be a tmpfile. * The function returns a malloc'd buffer that contains the * command output, and sets *bytesin accordingly. */static char *do_command_io(argv, writebuf, bytesout, bytesin) char **argv; char *writebuf; int bytesout; int *bytesin;{ int n; char *tfile = NULL; int p[2]; int fd; int pid = -1; int status; struct stat sb; char *inbuf = NULL; Debug(66, 5, ("do_command_io: Running '%s'\n", *argv)); if ((tfile = tempnam(0, 0)) == (char *) 0) goto do_cmd_done; if (pipe(p) < 0) { log_errno2(__FILE__, __LINE__, "pipe"); goto do_cmd_done; } if ((pid = fork()) == 0) { /* child */ fd = open(tfile, O_WRONLY | O_TRUNC | O_CREAT, 0660); if (fd < 0) { log_errno2(__FILE__, __LINE__, tfile); _exit(1); } dup2(fd, 1); close(fd); dup2(p[0], 0); close(p[0]); close(p[1]); execvp(*argv, argv); log_errno2(__FILE__, __LINE__, *argv); _exit(1); } close(p[0]); my_write(p[1], writebuf, bytesout); close(p[1]); waitpid(pid, &status, 0); Debug(66, 5, ("do_command_io: '%s' returned %d\n", *argv, status >> 8)); if (stat(tfile, &sb) < 0) { log_errno2(__FILE__, __LINE__, tfile); goto do_cmd_done; } if (sb.st_size <= 0) { Debug(66, 1, ("do_command_io: '%s' wrote no data\n", *argv)); goto do_cmd_done; } fd = open(tfile, O_RDONLY); if (fd < 0) { log_errno2(__FILE__, __LINE__, tfile); goto do_cmd_done; } inbuf = (char *) xmalloc(sb.st_size); n = my_read(fd, inbuf, sb.st_size); if (n < 0) { log_errno2(__FILE__, __LINE__, "read"); xfree(inbuf); inbuf = 0; goto do_cmd_done; } *bytesin = n; do_cmd_done: close(fd); if (tfile) unlink(tfile); xfree(tfile); return inbuf;}/* * check_condition() * * Check a single condition from the rules. Return 1 if the condition * holds, or 0 if it fails. Supported conditions are string equals * and regular expression matching. */static int check_condition(c, T) Cond *c; Template *T;{ char *attr = NULL; char *c_val = NULL; char *t_val = NULL; AVPair *pair = NULL; int ret = 0; regex_t compiled_pattern; if (!c) goto finish_check_cond; if (!c->attr) goto finish_check_cond; if (!c->value) goto finish_check_cond; if (!c->attr->word) goto finish_check_cond; if (!c->value->word) goto finish_check_cond; attr = xstrdup(c->attr->word); c_val = xstrdup(c->value->word); if (!strcasecmp(attr, "url")) t_val = xstrdup(T->url); else { pair = extract_AVPair(T->list, attr); if (pair == NULL) goto finish_check_cond; t_val = xstrdup(pair->value); } Debug(66, 5, ("check_condition: attr=%s\n", attr)); Debug(66, 5, ("check_condition: c_val=%s\n", c_val)); Debug(66, 5, ("check_condition: t_val=%s\n", t_val)); switch (c->op) { case EQUALS: ret = (strcasecmp(t_val, c_val) == 0); break; case NOTEQ: ret = (strcasecmp(t_val, c_val) != 0); break; case REGEX: regcomp(&compiled_pattern, c_val, REG_EXTENDED); ret = (regexec(&compiled_pattern, t_val, 0, 0, 0) == 0); regfree(&compiled_pattern); break; case NOTRE: regcomp(&compiled_pattern, c_val, REG_EXTENDED); ret = (regexec(&compiled_pattern, t_val, 0, 0, 0) != 0); regfree(&compiled_pattern); break; default: ret = 0; break; } finish_check_cond: Debug(66, 1, ("check_condition: returning %d\n", ret)); xfree(t_val); xfree(c_val); xfree(attr); return ret;}/* * check_conditions() * * Check a group of conditions from the rules. Return 1 if the conditions * hold, or 0 if they do not. Conditions can be joined with AND, OR. * Individual conditions are evaluated in left->right order. Complex * AND/OR groupings are not possible. */static int check_conditions(C, T) Cond *C; Template *T;{ Cond *c = NULL; int this_val; int running_val; int lastop = -1; for (c = C; c; c = c->next) { this_val = check_condition(c, T); if (lastop != -1) switch (lastop) { case AND: running_val = running_val && this_val; break; case OR: running_val = running_val || this_val; break; default: fprintf(stderr, "Unknown condition op: %d\n", lastop); break; } else { running_val = this_val; } lastop = c->nextop; } return running_val;}/* * do_assign_inst (T, attrs, args) * * attrs->word is an attribute name * args->word is the attribute value * * Simply add or replace this A/V pair in the Template */static int do_assign_inst(T, attrs, args) Template *T; Word *attrs; Word *args;{ if (!attrs) return 0; if (!attrs->word) return 0; if (!args) return 0; if (!args->word) return 0; Debug(66, 5, ("do_assign_inst: %s = %s\n", attrs->word, args->word)); add_AVList(T->list, attrs->word, args->word, strlen(args->word)); return 1;}/* * do_pipe_inst (T, attrs, args) * * attrs->word is an attribute name * args is a list of words that make up a command. * * Open a pipe to the command and write the attribute value. The command * output replaces the attribute value in the Template. */static int do_pipe_inst(T, attrs, args) Template *T; Word *attrs; Word *args;{ AVPair *pr = NULL; int argc; int i, n; char **argv = NULL; Word *w = NULL; int ret = 0; char *inbuf = NULL; char *t = NULL; if (!attrs) return 0; if (!attrs->word) return 0; if (!args) return 0; if (!args->word) return 0; Debug(66, 5, ("do_pipe_inst: %s | %s ...\n", attrs->word, args->word)); if (strcasecmp(attrs->word, "url") == 0) { pr = (AVPair *) xmalloc(sizeof(AVPair)); pr->vsize = strlen(T->url) + 2; pr->value = xmalloc(pr->vsize); sprintf(pr->value, "%s\n", T->url); } else { pr = extract_AVPair(T->list, attrs->word); } if (!pr) { Debug(66, 5, ("Attribute '%s' not found.\n", attrs->word)); return 0; } for (argc = 0, w = args; w; w = w->next) argc++; argv = (char **) xmalloc((argc + 1) * sizeof(char *)); for (i = 0, w = args; w; i++, w = w->next) *(argv + i) = xstrdup(w->word); *(argv + argc) = NULL; inbuf = do_command_io(argv, pr->value, pr->vsize, &n); if (inbuf == (char *) NULL) goto do_pipe_done; ret = 1; if (strcasecmp(attrs->word, "url") == 0) { if ((t = strchr(inbuf, '\n'))) *t = '\0'; xfree(T->url); T->url = xstrdup(inbuf); } else { add_AVList(T->list, attrs->word, inbuf, n); } do_pipe_done: for (i = 0; i < argc; i++) xfree(*(argv + i)); xfree(argv); xfree(inbuf); if (strcasecmp(attrs->word, "url") == 0) free_AVPair(pr); return ret;}/* * do_bang_inst (T, attrs, args) * * attrs is a list of attribute names. * args is a list of words that make up a command. * * Open a pipe to the command and write the SOIF A/V pairs for the * given attributes. The output of the command is also SOIF A/V pairs * which is incoprorated into the template. Existing attributes will * be overwritten. */static int do_bang_inst(T, attrs, args) Template *T; Word *attrs; Word *args;{ AVPair *pr = NULL; int argc; int i, n; char **argv = NULL; Word *w = NULL; int ret = 0; char *inbuf = NULL; Buffer *outb = NULL; Template *N = NULL; if (!attrs) return 0; if (!attrs->word) return 0; if (!args) return 0; if (!args->word) return 0; Debug(66, 5, ("do_bang_inst: %s ... ! %s ...\n", attrs->word, args->word)); N = create_template(0, T->url); for (w = attrs; w; w = w->next) { pr = extract_AVPair(T->list, w->word); if (!pr) continue; if (!N->list) { N->list = create_AVList(pr->attribute, pr->value, pr->vsize); } else { add_AVList(N->list, pr->attribute, pr->value, pr->vsize); } } outb = init_print_template(0); print_template(N); free_template(N); for (argc = 0, w = args; w; w = w->next) argc++; argv = (char **) xmalloc((argc + 1) * sizeof(char *)); for (i = 0, w = args; w; i++, w = w->next) *(argv + i) = xstrdup(w->word); *(argv + argc) = NULL; Debug(66, 1, ("Writing this data (%d bytes) to %s:%s\n", outb->length, *argv, outb->data)); inbuf = do_command_io(argv, outb->data, outb->length, &n); finish_print_template(); if (!inbuf) goto do_bang_done; init_parse_template_string(inbuf, n); N = parse_template(); finish_parse_template(); if (!N) goto do_bang_done; merge_AVList(T->list, N->list); free_template(N); ret = 1; do_bang_done: for (i = 0; i < argc; i++) xfree(*(argv + i)); xfree(argv); xfree(inbuf); return ret;}/* * do_instructions() * * Run the instructions from a rule */static int do_instructions(I, T) Inst *I; Template *T;{ Inst *i = NULL; int ret = 0; Debug(66, 1, ("do_instructions: %s\n", T->url)); for (i = I; i; i = i->next) { Debug(66, 1, ("Doing instruction type %d\n", i->op)); switch (i->op) { case ASSIGN: ret = do_assign_inst(T, i->attrs, i->args); break; case PIPE: ret = do_pipe_inst(T, i->attrs, i->args); break; case BANG: ret = do_bang_inst(T, i->attrs, i->args); break; case DELETE: ret = SUMMARIZE_DONT_ADD_OBJECT; break; default: ret = 0; break; } } return ret;}/* * post_process() - Post Process a SOIF template. */int post_process(T) Template *T;{ Rule *r = NULL; int ret = 0; Debug(66, 1, ("post_process: Starting: %s\n", T->url)); for (r = PPRules; r; r = r->next) { if (!check_conditions(r->cond, T)) continue; Debug(66, 2, ("post_process: Munging: %s\n", T->url)); ret = do_instructions(r->inst, T); } Debug(66, 2, ("post_process: Returning: %d\n", ret)); return ret;}extern int yyparse();extern FILE *yyin;/* * pp_parse_rules () * * opens and parses a file of post-processing rules. * * return 1 on failure, 0 on success */int pp_parse_rules(filename) char *filename;{ FILE *fp = NULL; int ret; Log("reading post-processing rules from %s\n", filename); fp = fopen(filename, "r"); if (!fp) { log_errno2(__FILE__, __LINE__, filename); return 1; } yyin = fp; ret = yyparse(); fclose(fp); yyin = (FILE *) NULL; Debug(66, 1, ("returning %d from yyparse\n", ret)); return ret;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -