⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ftpget.bin.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 3 页
字号:
		    tokens[i], tokens[i + 1], tokens[i + 2]);		if (!strstr(buf, sbuf))			sprintf(sbuf, "%s %2s %-5s",			    tokens[i], tokens[i + 1], tokens[i + 2]);		if ((t = strstr(buf, sbuf))) {			p->date = xstrdup(sbuf);			t += strlen(sbuf);			while (strchr(WS, *t))				t++;			p->name = xstrdup(t);			if ((t = strstr(p->name, " -> "))) {				*t = '\0';				p->link = xstrdup(t + 4);			}		}		break;	}	/* try it as a DOS listing */	if (p->name == NULL &&	    !sscanf(tokens[0], "%[-0-9]", sbuf) &&	/* 04-05-70 */	    !sscanf(tokens[1], "%[0-9:apm]", sbuf)) {	/* 09:33pm */		if (!strcasecmp(tokens[2], "<dir>")) {			p->type = 'd';			sprintf(sbuf, "%s %s", tokens[0], tokens[1]);			p->date = xstrdup(sbuf);			p->name = xstrdup(tokens[3]);		}		p->type = '-';		sprintf(sbuf, "%s %s", tokens[0], tokens[1]);		p->date = xstrdup(sbuf);		p->size = atoi(tokens[2]);		p->name = xstrdup(tokens[3]);	}	for (i = 0; i < n; i++)		xfree(tokens[i]);	if (p->name == NULL) {		xfree(p->date);		xfree(p);		p = NULL;	}	return p;}char *dots_fill(len)     size_t len;{	static char buf[256];	int i = 0;	for (i = len; i < FIELDSIZE; i++)		buf[i - len] = (i % 2) ? '.' : ' ';	buf[i - len] = '\0';	return buf;}char *htmlize_list_entry(line, r)     char *line;     request_t *r;{	char *pd = NULL;	char *t = NULL;	char *link = NULL;	char *icon = NULL;	char *html = NULL;	char *eurl = NULL;	char *ename = NULL;	parts_t *parts = NULL;	link = (char *) xmalloc(2048);	icon = (char *) xmalloc(2048);	html = (char *) xmalloc(8192);	/* check .. as special case */	if (!strcmp(line, "..")) {		pd = xstrdup(r->path_escaped);		if ((t = strrchr(pd, '/')))			*(t + 1) = '\0';		else			*(pd) = '\0';		sprintf(icon, "<IMG BORDER=0 SRC=\"%s\" ALT=\"%-6s\">",		    "internal-gopher-menu", "[DIR]");		sprintf(link, "<A HREF=\"ftp://%s%s/%s\">%s</A>",		    r->userinfo,		    r->host,		    pd,		    "Parent Directory");		sprintf(html, "%s %s\n", icon, link);		xfree(pd);		xfree(icon);		xfree(link);		return (html);	}	if ((parts = parse_entry(line)) == NULL) {		sprintf(html, "%s\n", line);		return html;	}	if (!strcmp(parts->name, ".") || !strcmp(parts->name, "..")) {		/* sprintf(html, "<!-- %s -->\n", line); */		*html = '\0';		return html;	}	parts->size += 1023;	parts->size >>= 10;	parts->showname = xstrdup(parts->name);	if ((int) strlen(parts->showname) > FIELDSIZE - 1) {		*(parts->showname + FIELDSIZE - 1) = '>';		*(parts->showname + FIELDSIZE - 0) = '\0';	}	eurl = xstrdup(rfc1738_escape(r->url));	ename = xstrdup(rfc1738_escape(parts->name));	switch (parts->type) {	case '-':		sprintf(icon, "<IMG SRC=\"internal-gopher-%s\" ALT=\"%-6s\">",		    mime_get_icon(parts->name), "[FILE]");		sprintf(link, "<A HREF=\"%s%s\">%s</A>%s",		    eurl,		    ename,		    parts->showname,		    dots_fill(strlen(parts->showname)));		sprintf(html, "%s %s  [%s] %6dk\n",		    icon,		    link,		    parts->date,		    parts->size);		break;	case 'd':		sprintf(icon, "<IMG SRC=\"internal-gopher-%s\" ALT=\"%-6s\">",		    "menu", "[DIR]");		sprintf(link, "<A HREF=\"%s%s/\">%s</A>%s",		    eurl,		    ename,		    parts->showname,		    dots_fill(strlen(parts->showname)));		sprintf(html, "%s %s  [%s]\n",		    icon,		    link,		    parts->date);		break;	case 'l':		sprintf(icon, "<IMG SRC=\"internal-gopher-%s\" ALT=\"%-6s\">",		    mime_get_icon(parts->link), "[LINK]");		sprintf(link, "<A HREF=\"%s%s\">%s</A>%s",		    eurl,		    ename,		    parts->showname,		    dots_fill(strlen(parts->showname)));		sprintf(html, "%s %s  [%s]\n",		    icon,		    link,		    parts->date);		break;	default:		sprintf(html, "%s\n", line);		break;	}	xfree(parts->name);	xfree(parts->showname);	xfree(parts->date);	xfree(parts->link);	xfree(parts);	xfree(eurl);	xfree(ename);	xfree(icon);	xfree(link);	return html;		/* html should be freed by caller */}void try_readme(r)     request_t *r;{	char *t = NULL;	char *tfname = NULL;	request_t *readme = NULL;	int fd = -1;	struct stat sb;	FILE *fp = NULL;	if ((t = tempnam(NULL, progname)) == NULL)		return;	tfname = xstrdup(t);	if ((fd = open(tfname, O_WRONLY | O_CREAT, 0600)) < 0) {		xfree(tfname);		return;	}	readme = (request_t *) xmalloc(sizeof(request_t));	memset(readme, '\0', sizeof(request_t));	readme->path = xstrdup("README");	readme->cfd = fd;	readme->sfd = r->sfd;	readme->state = CWD_FAIL;	readme->flags = F_NOERRS;	process_request(readme);	close(readme->cfd);	fp = fopen(tfname, "r");	unlink(tfname);	if (fp) {		if (fstat(fileno(fp), &sb) < 0 || sb.st_size == 0) {			fclose(fp);			fp = NULL;		}	}	r->readme_fp = fp;	xfree(tfname);	xfree(readme->path);	xfree(readme);}state_t htmlify_listing(r)     request_t *r;{	int code;	char buf[8192];	char *t = NULL;	FILE *rfp = NULL;	FILE *wfp = NULL;	time_t stamp;	rfp = fdopen(dup(r->dfd), "r");	wfp = fdopen(dup(r->cfd), "w");	setbuf(rfp, NULL);	setbuf(wfp, NULL);	r->userinfo = xstrdup("");	if (strcmp(r->user, "anonymous")) {		xfree(r->userinfo);		r->userinfo = (char *)		    xmalloc(strlen(r->user) + strlen(r->pass) + 2);		sprintf(r->userinfo, "%s:%s@", r->user, r->pass);	}	r->path_escaped = xstrdup(rfc1738_escape(r->path));	stamp = time(NULL);	fprintf(wfp, "<!-- HTML listing generated by Harvest %s -->\n",	    HARVEST_VERSION);	fprintf(wfp, "<!-- %s -->\n", http_time(stamp));	fprintf(wfp, "<TITLE>\n");	fprintf(wfp, "FTP Directory: %s\n", r->url);	fprintf(wfp, "</TITLE>\n");	fprintf(wfp, "<BASE HREF=\"%s\">\n", r->url);	if (r->cmd_msg) {	/* There was a message sent with the CWD cmd */		list_t *l;		fprintf(wfp, "<PRE>\n");		for (l = r->cmd_msg; l; l = l->next)			write(r->cfd, l->ptr, strlen(l->ptr));		fprintf(wfp, "</PRE>\n");		fprintf(wfp, "<HR>\n");	} else if (r->readme_fp) {		fprintf(wfp, "<H4>README file from %s</H4>\n", r->url);		fprintf(wfp, "<PRE>\n");		while (fgets(buf, 1024, r->readme_fp))			fputs(buf, wfp);		fclose(r->readme_fp);		fprintf(wfp, "</PRE>\n");		fprintf(wfp, "<HR>\n");	}	fprintf(wfp, "<H2>\n");	fprintf(wfp, "FTP Directory: %s\n", r->url);	fprintf(wfp, "</H2>\n");	fprintf(wfp, "<PRE>\n");	if (strcmp(r->path, ".")) {		if ((t = htmlize_list_entry("..", r))) {			fputs(t, wfp);			xfree(t);		}	}	while (fgets(buf, 8192, rfp)) {		alarm(timeout);	/* reset timeout timer */		if ((t = strchr(buf, '\r')))			*t = '\0';		if ((t = strchr(buf, '\n')))			*t = '\0';		if (!strncmp(buf, "total", 5))			continue;		if ((t = htmlize_list_entry(buf, r))) {			fputs(t, wfp);			xfree(t);		}	}	fprintf(wfp, "</PRE>\n");	fprintf(wfp, "<HR>\n");	fprintf(wfp, "<ADDRESS>\n");	fprintf(wfp, "Generated %s, by %s/%s@%s\n",	    http_time(stamp), progname, HARVEST_VERSION, getfullhostname());	fprintf(wfp, "</ADDRESS>\n");	fclose(wfp);	fclose(rfp);	if ((code = read_reply(r->sfd)) > 0) {		if (code == 226)			return TRANSFER_DONE;	}	r->errmsg = xstrdup(server_reply_msg);	r->rc = code < 0 ? 4 : 5;	return FAIL_SOFT;}static int process_request(r)     request_t *r;{	if (r == (request_t *) NULL)		return 1;	while (1) {		Debug(26, 1, ("process_request: in state %s\n",			state_str[r->state]));		switch (r->state) {		case BEGIN:			r->state = parse_request(r);			break;		case PARSE_OK:			r->state = do_connect(r);			break;		case CONNECTED:			r->state = read_welcome(r);			break;		case SERVICE_READY:			r->state = do_user(r);			break;		case NEED_PASSWD:			r->state = do_passwd(r);			break;		case LOGGED_IN:			r->state = do_type(r);			break;		case TYPE_OK:			r->state = do_mdtm(r);			break;		case MDTM_OK:			r->state = do_size(r);			break;		case SIZE_OK:			r->state = do_cwd(r);			break;		case CWD_OK:			r->flags |= F_ISDIR;			if (r->flags & F_HTTPIFY) {				if (cmd_msg) {					r->cmd_msg = cmd_msg;					cmd_msg = NULL;				} else {					try_readme(r);				}			}			r->state = do_port(r);			break;		case CWD_FAIL:			r->flags &= ~F_ISDIR;			r->state = do_port(r);			break;		case PORT_OK:			r->state = r->flags & F_ISDIR ? do_list(r) : do_retr(r);			break;		case TRANSFER_BEGIN:			if (r->flags & F_HTTPIFY)				send_success_hdr(r);			r->state = do_accept(r);			break;		case DATA_TRANSFER:			if ((r->flags & F_HTTPIFY) && (r->flags & F_ISDIR))				r->state = htmlify_listing(r);			else				r->state = read_data(r);			break;		case TRANSFER_DONE:			r->state = DONE;			break;		case DONE:			return 0;		case FAIL_HARD:		case FAIL_SOFT:			fail(r);			return (r->rc);			/*NOTREACHED */		default:			errorlog("Nothing to do with state %s\n",			    state_str[r->state]);			return (1);			/*NOTREACHED */		}	}}void cleanup_path(r)     request_t *r;{	int again;	int l;	char *t = NULL;	char *s = NULL;	do {		again = 0;		l = strlen(r->path);		/* check for null path */		if (*r->path == '\0') {			t = r->path;			r->path = xstrdup(".");			xfree(t);			again = 1;		} else			/* remove any leading slashes from path */		if (*r->path == '/') {			t = r->path;			r->path = xstrdup(t + 1);			xfree(t);			again = 1;		} else			/* remove leading ./ */		if (!strncmp(r->path, "./", 2)) {			t = r->path;			r->path = xstrdup(t + 2);			xfree(t);			again = 1;		} else			/* remove any trailing slashes from path */		if (*(r->path + l - 1) == '/') {			*(r->path + l - 1) = '\0';			again = 1;		} else			/* remove trailing /. */		if (!strcmp(r->path + l - 2, "/.")) {			*(r->path + l - 2) = '\0';			again = 1;		} else			/* remove /./ */		if ((t = strstr(r->path, "/./"))) {			s = xstrdup(t + 2);			strcpy(t, s);			xfree(s);			again = 1;		} else			/* remove // */		if ((t = strstr(r->path, "//"))) {			s = xstrdup(t + 1);			strcpy(t, s);			xfree(s);			again = 1;		}	} while (again);}void usage(){	fprintf(stderr, "usage: %s [-l num] [-c num] [-t timeout] [-htmlify] filename host path A,I user pass\n", progname);	exit(1);}int main(argc, argv)     int argc;     char *argv[];{	request_t *r = NULL;	FILE *logfp = NULL;	char *t = NULL;	int httpify_flag = 0;	int rc;	if ((t = getenv ("HARVEST_XFER_TIMEOUT")) !=NULL)		timeout = atol (t);	if ((t = strrchr(argv[0], '/'))) {		progname = xstrdup(t + 1);	} else		progname = xstrdup(argv[0]);	if (getenv("HARVEST_GATHERER_LOGFILE") != (char *) NULL)		logfp = fopen(getenv("HARVEST_GATHERER_LOGFILE"), "a+");	if (logfp == (FILE *) NULL)		logfp = stderr;	init_log3(progname, logfp, stderr);	debug_init();	for (argc--, argv++; argc > 0 && **argv == '-'; argc--, argv++) {		if (!strcmp(*argv, "-"))			break;		if (!strncmp(*argv, "-D", 2)) {			debug_flag(*argv);			continue;		}		if (!strcmp(*argv, "-htmlify") || !strcmp(*argv, "-httpify")) {			httpify_flag = 1;			continue;		}		if (!strcmp(*argv, "-t")) {			if (--argc < 1)				usage();			argv++;			timeout = atoi(*argv);			if (timeout < 1)				timeout = XFER_TIMEOUT;			continue;		}		if (!strcmp(*argv, "-c")) {			if (--argc < 1)				usage();			argv++;			connect_retries = atoi(*argv);			continue;		}		if (!strcmp(*argv, "-l")) {			if (--argc < 1)				usage();			argv++;			login_retries = atoi(*argv);			continue;		}		if (!strcmp(*argv, "-v")) {			printf("%s version %s\n", progname, HARVEST_VERSION);			exit(0);		}	}	if (argc != 6)		usage();	r = (request_t *) xmalloc(sizeof(request_t));	memset(r, '\0', sizeof(request_t));	if (strcmp(argv[0], "-") == 0) {		r->cfd = 1;	} else if ((r->cfd = open(argv[0], O_WRONLY | O_CREAT, 0666)) < 0) {		perror(argv[0]);		exit(1);	}	r->host = xstrdup(argv[1]);	r->path = xstrdup(argv[2]);	r->type = xstrdup(argv[3]);	r->user = xstrdup(argv[4]);	r->pass = xstrdup(argv[5]);	r->sfd = -1;	r->dfd = -1;	r->size = -1;	r->state = BEGIN;	r->flags |= httpify_flag ? F_HTTPIFY : 0;	if (*(r->type) != 'A' && *(r->type) != 'I') {		errorlog("Invalid transfer type: %s\n", r->type);		usage();	}	cleanup_path(r);	r->url = (char *) xmalloc(strlen(r->host) + strlen(r->path) + 9);	if (strcmp(r->path, "."))		sprintf(r->url, "ftp://%s/%s/", r->host, r->path);	else		sprintf(r->url, "ftp://%s/", r->host);	signal(SIGALRM, timeout_handler);	alarm(timeout);	rc = process_request(r);	if (r->sfd > 0)		send_cmd(r->sfd, "QUIT");	if (r->sfd > 0)		close(r->sfd);	if (r->cfd > 0)		close(r->cfd);	if (r->dfd > 0)		close(r->dfd);	close(0);	close(1);	exit(rc);	/*NOTREACHED */}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -