⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 template.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
static char *inputbuf = NULL;static FILE *inputfile = NULL;static int inputbufsz = 0, curp = 0;static size_t inputoffset = 0;static size_t inputlength = 0;void init_parse_template_file(fp)     FILE *fp;{	int i;	inputfile = fp;	i = (int) ftell(fp);	if (i < 0)		/* ftell fails for popen()'d FILE*'s? */		i = 0;	inputoffset = (size_t) i;	inputlength = 0;}void init_parse_template_string(s, sz)     char *s;     int sz;{	inputbuf = s;	inputbufsz = sz;	curp = 0;	inputfile = NULL;	inputoffset = 0;	inputlength = 0;}void finish_parse_template(){	inputfile = NULL;	curp = 0;	inputbufsz = 0;}int is_parse_end_of_input(){	if (inputfile != NULL)		return (feof(inputfile));	return (curp >= inputbufsz /**|| inputbuf[curp] == '\0'**/ );}static void backup_char(x)     char x;{	inputoffset--;	inputlength--;	if (inputfile != NULL)		ungetc(x, inputfile);	else		curp--;	return;}/* *  Note that these macros are all optimized by hand to maximize *  the parsing performance.  SOIF is parsed *A LOT* in Harvest. *  So, this code can become a bottleneck in the system. */#define input_char_file() \{ \		inputoffset++; \		inputlength++; \		c = fgetc(inputfile); \}#define input_char_mem() \{ \	if (curp >= inputbufsz) /**|| inputbuf[curp] == '\0')**/ { \		c = (char) EOF; \	} else { \		inputoffset++; \		inputlength++; \		c = inputbuf[curp];  \		curp++; \	} \}#define input_char() \{ \	if (inputfile != NULL) { \		input_char_file(); \	} else { \		input_char_mem(); \	} \}#define skip_whitespace()	\{ \	while (1) { \		input_char(); \		if (c == EOF) return(NULL); \		if (!isspace((unsigned char) c)) { backup_char(c); break; } \	} \}#define skip_tab()	\{ \	while (1) { \		input_char(); \		if (c == EOF) return(NULL); \		if (c != '\t') { backup_char(c); break; } \	} \}#define skip_whitespace_and(a)	\{ \	while (1) { \		input_char(); \		if (c == EOF) return(NULL); \		if (c == (a)) continue; \		if (!isspace((unsigned char) c)) { backup_char(c); break; } \	} \}#define skip_whitespace_break()	\{ \	while (1) { \		input_char(); \		if (c == EOF) { done = 1; break; }\		if (c == '}') { done = 1; break; }\		if (!isspace((unsigned char) c)) { backup_char(c); break; } \	} \}#define grab_token() \{ \	p = buf; \	buflen = 0; \	while (1) { \		input_char(); \		if (c == (char) EOF) return(NULL); \		if (isspace((unsigned char) c)) { backup_char(c); break; } \		*p++ = c; \		buflen++; \		if (buflen == BUFSIZ) return(NULL); \	} \	*p = '\0'; \}/* *  grab_attribute is the most heavily called macro of the bunch. *  I explicitly listed it based on input from file vs mem, to *  save a compare instruction in the innermost loop. */#define grab_attribute() \{ \	p = buf; \	buflen = 0; \	if (inputfile != NULL) { \		while (1) { \			input_char_file(); \			if (c == EOF) return(NULL); \			if (c == '{') break; \			*p++ = c; \			buflen++; \			/* don't overload the buffer */ \			if (buflen == BUFSIZ) return(NULL); \		} \	} else { \		while (1) { \			input_char_mem(); \			if (c == EOF) return(NULL); \			if (c == '{') break; \			*p++ = c; \			buflen++; \			/* don't overload the buffer */ \			if (buflen == BUFSIZ) return(NULL); \		} \	} \	*p = '\0'; \}#define grab_vsize() \{ \	p = buf; \	buflen = 0; \	while (1) { \		input_char(); \		if (c == EOF) return(NULL); \		if (c == '}') break; \		*p++ = c; \		buflen++; \		if (buflen == BUFSIZ) return(NULL); \	} \	*p = '\0'; \}/* *  parse_template() - Returns a Template structure for the template *  stored in memory or in a file.  MUST call init_parse_template_file() *  or init_parse_template_string() before, and finish_parse_template() *  after.  Returns NULL on error. */Template *parse_template(){	static Template *template = NULL;	char buf[BUFSIZ + 1], *p, *attribute, *value;	int vsize, i, done = 0, c, buflen;	size_t voffset;	template = (Template *) xmalloc(sizeof(Template));	while (1) {		/* Find starting point: @ */		input_char();		if (c == EOF) {			xfree(template);			return (NULL);		}		if (c == '@')			break;	}	template->offset = inputoffset;	/* Get Template-Type */	grab_token();	template->template_type = xstrdup(buf);	/* Get URL */	skip_whitespace_and('{');	grab_token();	template->url = xstrdup(buf);	template->list = NULL;	Debug(69, 9, ("parse_template: Grabbing Template Object: %s %s\n",		template->template_type, template->url));	/* Grab the body of the SOIF object */	while (1) {		/* Get Attribute name and value */		skip_whitespace_break();		if (done == 1)			break;		grab_attribute();		attribute = (char *) xmalloc(buflen + 1);		memcpy(attribute, buf, buflen);		attribute[buflen] = '\0';		Debug(69, 9, ("parse_template: Grabbed Attribute: %s\n",			attribute));		grab_vsize();		vsize = atoi(buf);		/* Get Value */		input_char();		if (c != ':') {			free_template(template);			xfree(attribute);			return (NULL);		}		input_char();		if (c != '\t') {			free_template(template);			xfree(attribute);			return (NULL);		}		/*		 *  Most data passes through this VERY TIGHT LOOP, so optimize.		 */		Debug(69, 9, ("parse_template: Grabbing %d bytes value\n", vsize));		value = (char *) xmalloc(vsize + 1);		voffset = inputoffset;		if (inputfile == NULL) {			if (curp + vsize < inputbufsz) {	/* enough for cpy */				memcpy(value, &inputbuf[curp], vsize);				curp += vsize;				inputoffset += vsize;				inputlength += vsize;			} else {	/* normal slow one-by-one */				for (i = 0; i < vsize; i++) {					input_char();					value[i] = c;				}			}			value[vsize] = '\0';		} else {	/* do the fast file copy */			if (fread(value, 1, vsize, inputfile) != vsize) {				free_template(template);				xfree(attribute);				xfree(value);				return (NULL);			}			inputoffset += vsize;			inputlength += vsize;		}		if (template->list == NULL) {			template->list = create_AVList(attribute, value, vsize);		} else {			FAST_add_AVList(template->list, attribute, value, vsize);		}		add_offset(template->list, attribute, voffset);		xfree(attribute);		xfree(value);	}	template->length = inputlength;	Debug(69, 1, ("parse_template: Parsed %s template for %s\n",		template->template_type, template->url));	return (template);}/* Sorting Attribute-Value Lists *//* *  attribute_cmp() - strcmp(3) for attributes.  Works with "Embed<n>" *  attributes so that they are first sorted by number, then by attribute. *  Does case insenstive compares. */static int attribute_cmp(a, b)     char *a, *b;{	if ((tolower((unsigned char) a[0]) == 'e')	    && (tolower((unsigned char) b[0]) == 'e') &&	/* quickie */	    !strncasecmp(a, "embed", 5) && !strncasecmp(b, "embed", 5)) {		char *p, *q;		int an, bn;		p = strchr(a, '<');	/* Find embedded number */		q = strchr(a, '>');		if (!p || !q)			return (strcasecmp(a, b));	/* bail */		*q = '\0';		an = atoi(p + 1);		*q = '>';		p = strchr(b, '<');	/* Find embedded number */		q = strchr(b, '>');		if (!p || !q)			return (strcasecmp(a, b));	/* bail */		*q = '\0';		bn = atoi(p + 1);		*q = '>';		if (an != bn)	/* If numbers are different */			return (an < bn ? -1 : 1);		/* otherwise, do strcmp on attr */		return (strcasecmp(strchr(a, '>') + 1, strchr(b, '>') + 1));	}	return (strcasecmp(a, b));}/* *  sort_AVList() - Uses an insertion sort to sort the AVList by attribute. *  Returns the new head of the list. */AVList *sort_AVList(avl)     AVList *avl;{	AVList *walker, *n, *a, *t;	static AVList *head;	int (*acmp) ();	Debug(69, 5, ("sort_AVList: Sorting linked list: %p\n", avl));	/* The compare function */	acmp = attribute_cmp;	/* Set the first node to be the head of the new list */	head = avl;	walker = avl->next;	head->next = NULL;	while (walker) {		/* Pick off this node */		n = walker;		walker = walker->next;		n->next = NULL;		/* Find insertion point */		for (a = head; a->next &&		    acmp(a->next->data->attribute, n->data->attribute) < 0;		    a = a->next);		if (a == head) {	/* prepend to list */			if (acmp(a->data->attribute, n->data->attribute) < 0) {				/* As the second node */				t = a->next;				a->next = n;				n->next = t;			} else {				/* As the first node */				head = n;				n->next = a;			}		} else {	/* insert into list */			t = a->next;			a->next = n;			n->next = t;		}	}	return (head);}/* *  embed_template() - Embeds the given Template t into the Template template. *  Returns NULL on error; otherwise returns template. */Template *embed_template(t, template)     Template *t, *template;{	int nembed = 0;		/* number of embedded documents in t */	AVList *walker;	char *p, *q, buf[BUFSIZ];	Debug(69, 5, ("embed_template: Embedding %s into %s\n",		t->url, template->url));	/* Find out what the last embedded document in template is */	for (walker = template->list; walker; walker = walker->next) {		if (strncasecmp(walker->data->attribute, "embed<", 6))			continue;		p = strchr(walker->data->attribute, '<') + 1;		if ((q = strchr(walker->data->attribute, '>')) != NULL) {			*q = '\0';		} else {			continue;		}		nembed = (nembed < atoi(p)) ? atoi(p) : nembed;		*q = '>';	/* replace */	}	Debug(69, 9, ("embed_template: %s has %d embedded documents\n",		template->url, nembed));	/* Now add all of the fields from t into template */	nembed++;	for (walker = t->list; walker; walker = walker->next) {		sprintf(buf, "Embed<%d>-%s", nembed, walker->data->attribute);		FAST_add_AVList(template->list, buf, walker->data->value,		    walker->data->vsize);		if (walker->data->offset != -1)			add_offset(template->list, buf, walker->data->offset);	}#ifdef USE_T_URI	add_AVList(template->list, T_URI, template->url, strlen(template->url));#endif	return (template);}/* *  sink_embedded() - Places all of the embedded attributes at the bottom *  of the list.  *Must* be sorted first. */AVList *sink_embedded(list)     AVList *list;{	AVList *start, *end, *walker, *last, *t;	static AVList *head;	Debug(69, 9, ("sink_embedded: Sinking all Embed attrs: %p\n", list));	for (walker = last = head = list, start = end = NULL;	    walker != NULL;	    last = walker, walker = walker->next) {		if (!strncasecmp(walker->data->attribute, "embed", 5)) {			start = start ? start : last;		} else if (start != NULL) {			end = end ? end : last;		}	}	if (start == NULL || end == NULL) {		/* No embedded section, or at bottom of list */		return (head);	} else if (start == head) {		last->next = start;	/* Embed section at top of list */		head = end->next;		end->next = NULL;	} else {		/* Embed section at middle of list */		t = start->next;		last->next = t;		start->next = end->next;		end->next = NULL;	}	return (head);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -