⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 template.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
static char rcsid[] = "template.c,v 1.32 1996/01/05 20:28:22 duane Exp";/* *  template.c - SOIF Object ("template") processing code for Harvest * *  DEBUG: section  69, level 1, 5, 9   Common SOIF template processing * *  Darren Hardy, hardy@cs.colorado.edu, February 1994 * *  ---------------------------------------------------------------------- *  Copyright (c) 1994, 1995.  All rights reserved. * *    The Harvest software was developed by the Internet Research Task *    Force Research Group on Resource Discovery (IRTF-RD): * *          Mic Bowman of Transarc Corporation. *          Peter Danzig of the University of Southern California. *          Darren R. Hardy of the University of Colorado at Boulder. *          Udi Manber of the University of Arizona. *          Michael F. Schwartz of the University of Colorado at Boulder. *          Duane Wessels of the University of Colorado at Boulder. * *    This copyright notice applies to software in the Harvest *    ``src/'' directory only.  Users should consult the individual *    copyright notices in the ``components/'' subdirectories for *    copyright information about other software bundled with the *    Harvest source code distribution. * *  TERMS OF USE * *    The Harvest software may be used and re-distributed without *    charge, provided that the software origin and research team are *    cited in any use of the system.  Most commonly this is *    accomplished by including a link to the Harvest Home Page *    (http://harvest.cs.colorado.edu/) from the query page of any *    Broker you deploy, as well as in the query result pages.  These *    links are generated automatically by the standard Broker *    software distribution. * *    The Harvest software is provided ``as is'', without express or *    implied warranty, and with no support nor obligation to assist *    in its use, correction, modification or enhancement.  We assume *    no liability with respect to the infringement of copyrights, *    trade secrets, or any patents, and are not responsible for *    consequential damages.  Proper use of the Harvest software is *    entirely the responsibility of the user. * *  DERIVATIVE WORKS * *    Users may make derivative works from the Harvest software, subject *    to the following constraints: * *      - You must include the above copyright notice and these *        accompanying paragraphs in all forms of derivative works, *        and any documentation and other materials related to such *        distribution and use acknowledge that the software was *        developed at the above institutions. * *      - You must notify IRTF-RD regarding your distribution of *        the derivative work. * *      - You must clearly notify users that your are distributing *        a modified version and not the original Harvest software. * *      - Any derivative product is also subject to these copyright *        and use restrictions. * *    Note that the Harvest software is NOT in the public domain.  We *    retain copyright, as specified above. * *  HISTORY OF FREE SOFTWARE STATUS * *    Originally we required sites to license the software in cases *    where they were going to build commercial products/services *    around Harvest.  In June 1995 we changed this policy.  We now *    allow people to use the core Harvest software (the code found in *    the Harvest ``src/'' directory) for free.  We made this change *    in the interest of encouraging the widest possible deployment of *    the technology.  The Harvest software is really a reference *    implementation of a set of protocols and formats, some of which *    we intend to standardize.  We encourage commercial *    re-implementations of code complying to this set of standards. * */#include <stdio.h>#include <string.h>#include <stdlib.h>#include <ctype.h>#include "util.h"#include "template.h"/* Local functions */static void output_char();static void output_buffer();static int attribute_cmp();/* *  create_AVList() - Creates an Attribute-Value node to include in an AVList */AVList *create_AVList(attr, value, vsize)     char *attr;     char *value;     int vsize;{	static AVList *l;	l = (AVList *) xmalloc(sizeof(AVList));	l->data = (AVPair *) xmalloc(sizeof(AVPair));	l->data->value = (char *) xmalloc(vsize + 1);	l->data->attribute = xstrdup(attr);	l->data->vsize = vsize;	memcpy(l->data->value, value, l->data->vsize);	l->data->value[l->data->vsize] = '\0';	l->data->offset = -1;	l->next = NULL;	return (l);}/* *  free_AVList() - Cleans up an AVList */void free_AVList(list)     AVList *list;{	AVList *walker = list, *t;	while (walker) {		if (walker->data)			free_AVPair(walker->data);		t = walker;		walker = walker->next;		xfree(t);	}}/* *  free_AVPair() - Cleans up an AVPair */void free_AVPair(avp)     AVPair *avp;{	if (!avp)		return;	if (avp->attribute)		xfree(avp->attribute);	if (avp->value)		xfree(avp->value);	xfree(avp);}/* *  add_offset() - Adds the offset value to the AVPair matching attribute. */void add_offset(l, attr, off)     AVList *l;     char *attr;     size_t off;{	AVPair *avp = extract_AVPair(l, attr);	if (avp != NULL)		avp->offset = off;}/* *  extract_AVPair() - Searches for the given attribute in the AVList. *  Does a case insensitive match on the attributes.  Returns NULL *  on error; otherwise returns the matching AVPair. */AVPair *extract_AVPair(list, attribute)     AVList *list;     char *attribute;{	AVList *walker;	for (walker = list; walker; walker = walker->next) {		if (!strcasecmp(walker->data->attribute, attribute))			return (walker->data);	}	return (NULL);}/* *  exists_AVList() - Checks to see if an AVPair exists for the given *  attribute.  Returns non-zero if it does; 0 if it doesn't. */int exists_AVList(list, attr)     AVList *list;     char *attr;{	return (extract_AVPair(list, attr) != NULL ? 1 : 0);}/* *  add_AVList() - Adds the Attribute-Value pair to the given AVList */void add_AVList(list, attr, value, vsize)     AVList *list;     char *attr;     char *value;     int vsize;{	AVList *walker = list;	if (list == NULL)		return;	/* move to the end of the list, and add a node */	while (walker->next) {		/* Don't add a duplicate Attribute, just replace it */		if (!strcasecmp(attr, walker->data->attribute)) {			xfree(walker->data->value);			walker->data->vsize = vsize;			walker->data->value = xmalloc(vsize + 1);			memcpy(walker->data->value, value, vsize);			walker->data->value[vsize] = '\0';			return;		}		walker = walker->next;	}	walker->next = create_AVList(attr, value, vsize);}/* *  FAST_add_AVList() - Quick version of add_AVList().  Doesn't check *  for duplicates.  attr MUST be unique to the list. */void FAST_add_AVList(list, attr, value, vsize)     AVList *list;     char *attr;     char *value;     int vsize;{	AVList *t;	if (list == NULL)		return;	t = create_AVList(attr, value, vsize);	t->next = list->next;	list->next = t;}/* *  merge_AVList() - Merges the b list into the a list.  If the AVPair *  in b exists in the a list, then the data is replaced.  Otherwise, *  the data is appended to the list. */AVList *merge_AVList(a, b)     AVList *a, *b;{	AVList *walker = b;	AVPair *avp;	if (a == NULL)		return (NULL);	while (walker) {		avp = extract_AVPair(a, walker->data->attribute);		if (avp != NULL) {			/* replace the data */			xfree(avp->value);			avp->value = (char *) xmalloc(walker->data->vsize);			memcpy(avp->value, walker->data->value,			    walker->data->vsize);			avp->vsize = walker->data->vsize;			avp->offset = walker->data->offset;		} else {			/* append it to 'a' */			add_AVList(a, walker->data->attribute,			    walker->data->value, walker->data->vsize);			add_offset(a, walker->data->attribute,			    walker->data->offset);		}		walker = walker->next;	}	return (a);}/* *  append_AVList() - Adds the Attribute-Value pair to the given AVList. *  If the attr is present in the list, then it appends the value to *  the previous value. */void append_AVList(list, attr, value, vsize)     AVList *list;     char *attr;     char *value;     int vsize;{	AVPair *avp;	char *buf;	if ((avp = extract_AVPair(list, attr)) == NULL) {		add_AVList(list, attr, value, vsize);	} else {		/* replace the data */		buf = (char *) xmalloc(avp->vsize + vsize + 2);		memcpy(buf, avp->value, avp->vsize);		buf[avp->vsize] = '\n';		memcpy(buf + avp->vsize + 1, value, vsize);		xfree(avp->value);		avp->value = buf;		avp->vsize += vsize + 1;		avp->offset = -1;	}}/* *  create_template() - Creats a new Template structure. */Template *create_template(type, url)     char *type;     char *url;{	static Template *t = NULL;	t = (Template *) xmalloc(sizeof(Template));	if (type == NULL)		t->template_type = xstrdup("FILE");	else		t->template_type = xstrdup(type);	t->url = xstrdup(url);	t->list = NULL;	t->offset = -1;	t->length = -1;	return (t);}/* *  free_template() - Cleans up the template. */void free_template(t)     Template *t;{	if (!t)		return;	if (t->list)		free_AVList(t->list);	if (t->template_type)		xfree(t->template_type);	if (t->url)		xfree(t->url);	xfree(t);}/* *  Template Parsing and Printing code * *  Template Parsing can read from memory or from a file. *  Template Printing can write to memory or to a file. */static FILE *outputfile = NULL;	/* user's file */Buffer *bout = NULL;/* *  init_print_template() - Print template to memory buffer or to *  a file if fp is not NULL.  Returns NULL if printing to a file; *  otherwise returns a pointer to the Buffer where the data is stored. */Buffer *init_print_template(fp)     FILE *fp;{	if (fp) {		outputfile = fp;		return (NULL);	} else {		bout = create_buffer(BUFSIZ);		return (bout);	}}/* *  output_char() - writes a single character to memory or a file. */static void output_char(c)     char c;{	output_buffer(&c, 1);}/* *  output_buffer() - writes a buffer to memory or a file. */static void output_buffer(s, sz)     char *s;     int sz;{	if (outputfile)		fwrite(s, sizeof(char), sz, outputfile);	else		add_buffer(bout, s, sz);}/* *  print_template() - Prints a SOIF Template structure into a file *  or into memory.   MUST call init_print_template_file() or *  init_print_template_string() before, and finish_print_template() after. */void print_template(template)     Template *template;{	/* Estimate the buffer size to prevent too many realloc() calls */	if (outputfile == NULL) {		AVList *walker;		int n = 0;		for (walker = template->list; walker; walker = walker->next)			n += walker->data->vsize;		if (bout->length + n > bout->size)			increase_buffer(bout, n);	/* need more */	}	print_template_header(template);	print_template_body(template);	print_template_trailer(template);	Debug(69, 1, ("print_template: Printed %s template for %s\n",		template->template_type, template->url));}void print_template_header(template)     Template *template;{	char buf[BUFSIZ];	sprintf(buf, "@%s { %s\n", template->template_type, template->url);	output_buffer(buf, strlen(buf));}void print_template_body(template)     Template *template;{	char buf[BUFSIZ];	AVList *walker;	for (walker = template->list; walker; walker = walker->next) {		if (walker->data->vsize == 0)			continue;		/* Write out an Attribute value pair */		sprintf(buf, "%s{%u}:\t", walker->data->attribute,		    (unsigned int) walker->data->vsize);		output_buffer(buf, strlen(buf));		output_buffer(walker->data->value, walker->data->vsize);		output_char('\n');	}}void print_template_trailer(template)     Template *template;{	output_char('}');	output_char('\n');	if (outputfile != NULL)		fflush(outputfile);}/* *  finish_print_template() - Cleanup after printing template. *  Buffer is no longer valid. */void finish_print_template(){	outputfile = NULL;	if (bout)		free_buffer(bout);	bout = NULL;}/* Parsing templates */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -