⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 hiperfifo.c

📁 专门介绍网络爬虫设计的论文,写得非常好啊.希望大家好好学习
💻 C
字号:
/***************************************************************************** *                                  _   _ ____  _ *  Project                     ___| | | |  _ \| | *                             / __| | | | |_) | | *                            | (__| |_| |  _ <| |___ *                             \___|\___/|_| \_\_____| * * $Id: hiperfifo.c,v 1.3 2007-05-02 13:52:38 bagder Exp $ * * Example application source code using the multi socket interface to * download many files at once. * * Written by Jeff PohlmeyerRequires libevent and a (POSIX?) system that has mkfifo().This is an adaptation of libcurl's "hipev.c" and libevent's "event-test.c"sample programs.When running, the program creates the named pipe "hiper.fifo"Whenever there is input into the fifo, the program reads the input as a listof URL's and creates some new easy handles to fetch each URL via thecurl_multi "hiper" API.Thus, you can try a single URL:  % echo http://www.yahoo.com > hiper.fifoOr a whole bunch of them:  % cat my-url-list > hiper.fifoThe fifo buffer is handled almost instantly, so you can even add more URL'swhile the previous requests are still being downloaded.Note:  For the sake of simplicity, URL length is limited to 1023 char's !This is purely a demo app, all retrieved data is simply discarded by the writecallback.*/#include <stdio.h>#include <string.h>#include <stdlib.h>#include <sys/time.h>#include <time.h>#include <unistd.h>#include <sys/poll.h>#include <curl/curl.h>#include <event.h>#include <fcntl.h>#include <sys/stat.h>#include <errno.h>#define MSG_OUT stdout /* Send info to stdout, change to stderr if you want *//* Global information, common to all connections */typedef struct _GlobalInfo {  struct event fifo_event;  struct event timer_event;  CURLM *multi;  int prev_running;  int still_running;  FILE* input;} GlobalInfo;/* Information associated with a specific easy handle */typedef struct _ConnInfo {  CURL *easy;  char *url;  GlobalInfo *global;  char error[CURL_ERROR_SIZE];} ConnInfo;/* Information associated with a specific socket */typedef struct _SockInfo {  curl_socket_t sockfd;  CURL *easy;  int action;  long timeout;  struct event ev;  int evset;  GlobalInfo *global;} SockInfo;/* Update the event timer after curl_multi library calls */static int multi_timer_cb(CURLM *multi, long timeout_ms, GlobalInfo *g){  struct timeval timeout;  timeout.tv_sec = timeout_ms/1000;  timeout.tv_usec = (timeout_ms%1000)*1000;  fprintf(MSG_OUT, "multi_timer_cb: Setting timeout to %ld ms\n", timeout_ms);  evtimer_add(&g->timer_event, &timeout);  return 0;}/* Die if we get a bad CURLMcode somewhere */void mcode_or_die(char *where, CURLMcode code) {  if ( CURLM_OK != code ) {    char *s;    switch (code) {      case     CURLM_CALL_MULTI_PERFORM: s="CURLM_CALL_MULTI_PERFORM"; break;      case     CURLM_OK:                 s="CURLM_OK";                 break;      case     CURLM_BAD_HANDLE:         s="CURLM_BAD_HANDLE";         break;      case     CURLM_BAD_EASY_HANDLE:    s="CURLM_BAD_EASY_HANDLE";    break;      case     CURLM_OUT_OF_MEMORY:      s="CURLM_OUT_OF_MEMORY";      break;      case     CURLM_INTERNAL_ERROR:     s="CURLM_INTERNAL_ERROR";     break;      case     CURLM_BAD_SOCKET:         s="CURLM_BAD_SOCKET";         break;      case     CURLM_UNKNOWN_OPTION:     s="CURLM_UNKNOWN_OPTION";     break;      case     CURLM_LAST:               s="CURLM_LAST";               break;      default: s="CURLM_unknown";    }    fprintf(MSG_OUT, "ERROR: %s returns %s\n", where, s);    exit(code);  }}/* Check for completed transfers, and remove their easy handles */static void check_run_count(GlobalInfo *g){  if (g->prev_running > g->still_running) {    char *eff_url=NULL;    CURLMsg *msg;    int msgs_left;    ConnInfo *conn=NULL;    CURL*easy;    CURLcode res;    fprintf(MSG_OUT, "REMAINING: %d\n", g->still_running);    /*      I am still uncertain whether it is safe to remove an easy handle      from inside the curl_multi_info_read loop, so here I will search      for completed transfers in the inner "while" loop, and then remove      them in the outer "do-while" loop...   */    do {      easy=NULL;      while ((msg = curl_multi_info_read(g->multi, &msgs_left))) {        if (msg->msg == CURLMSG_DONE) {          easy=msg->easy_handle;          res=msg->data.result;          break;        }      }      if (easy) {          curl_easy_getinfo(easy, CURLINFO_PRIVATE, &conn);          curl_easy_getinfo(easy, CURLINFO_EFFECTIVE_URL, &eff_url);          fprintf(MSG_OUT, "DONE: %s => (%d) %s\n", eff_url, res, conn->error);          curl_multi_remove_handle(g->multi, easy);          free(conn->url);          curl_easy_cleanup(easy);          free(conn);      }    } while ( easy );  }  g->prev_running = g->still_running;}/* Called by libevent when we get action on a multi socket */static void event_cb(int fd, short kind, void *userp){  GlobalInfo *g = (GlobalInfo*) userp;  CURLMcode rc;  do {    rc = curl_multi_socket(g->multi, fd, &g->still_running);  } while (rc == CURLM_CALL_MULTI_PERFORM);  mcode_or_die("event_cb: curl_multi_socket", rc);  check_run_count(g);  if ( g->still_running <= 0 ) {    fprintf(MSG_OUT, "last transfer done, kill timeout\n");    if (evtimer_pending(&g->timer_event, NULL)) {      evtimer_del(&g->timer_event);    }  }}/* Called by libevent when our timeout expires */static void timer_cb(int fd, short kind, void *userp){  (void)fd;  (void)kind;  GlobalInfo *g = (GlobalInfo *)userp;  CURLMcode rc;  do {    rc = curl_multi_socket(g->multi, CURL_SOCKET_TIMEOUT, &g->still_running);  } while (rc == CURLM_CALL_MULTI_PERFORM);  mcode_or_die("timer_cb: curl_multi_socket", rc);  check_run_count(g);}/* Clean up the SockInfo structure */static void remsock(SockInfo *f){  if (!f) { return; }  if (f->evset) { event_del(&f->ev); }  free(f);}/* Assign information to a SockInfo structure */static void setsock(SockInfo*f, curl_socket_t s, CURL*e, int act, GlobalInfo*g){  int kind =     (act&CURL_POLL_IN?EV_READ:0)|(act&CURL_POLL_OUT?EV_WRITE:0)|EV_PERSIST;  f->sockfd = s;  f->action = act;  f->easy = e;  if (f->evset) { event_del(&f->ev); }  event_set( &f->ev, f->sockfd, kind, event_cb, g);  f->evset=1;  event_add(&f->ev, NULL);}/* Initialize a new SockInfo structure */static void addsock(curl_socket_t s, CURL *easy, int action, GlobalInfo *g) {  SockInfo *fdp = calloc(sizeof(SockInfo), 1);  fdp->global = g;  setsock(fdp, s, easy, action, g);  curl_multi_assign(g->multi, s, fdp);}/* CURLMOPT_SOCKETFUNCTION */static int sock_cb(CURL *e, curl_socket_t s, int what, void *cbp, void *sockp){  GlobalInfo *g = (GlobalInfo*) cbp;  SockInfo *fdp = (SockInfo*) sockp;  char *whatstr[]={ "none", "IN", "OUT", "INOUT", "REMOVE" };  fprintf(MSG_OUT,          "socket callback: s=%d e=%p what=%s ", s, e, whatstr[what]);  if (what == CURL_POLL_REMOVE) {    fprintf(MSG_OUT, "\n");    remsock(fdp);  } else {    if (!fdp) {      fprintf(MSG_OUT, "Adding data: %s%s\n",             what&CURL_POLL_IN?"READ":"",             what&CURL_POLL_OUT?"WRITE":"" );      addsock(s, e, what, g);    }    else {      fprintf(MSG_OUT,        "Changing action from %d to %d\n", fdp->action, what);      setsock(fdp, s, e, what, g);    }  }  return 0;}/* CURLOPT_WRITEFUNCTION */static size_t write_cb(void *ptr, size_t size, size_t nmemb, void *data){  size_t realsize = size * nmemb;  ConnInfo *conn = (ConnInfo*) data;  (void)ptr;  (void)conn;  return realsize;}/* CURLOPT_PROGRESSFUNCTION */int prog_cb (void *p, double dltotal, double dlnow, double ult, double uln){  ConnInfo *conn = (ConnInfo *)p;  fprintf(MSG_OUT, "Progress: %s (%g/%g)\n", conn->url, dlnow, dltotal);  return 0;}/* Create a new easy handle, and add it to the global curl_multi */void new_conn(char *url, GlobalInfo *g ) {  ConnInfo *conn;  CURLMcode rc;  conn = calloc(1, sizeof(ConnInfo));  memset(conn, 0, sizeof(ConnInfo));  conn->error[0]='\0';  conn->easy = curl_easy_init();  if (!conn->easy) {    fprintf(MSG_OUT, "curl_easy_init() failed, exiting!\n");    exit(2);  }  conn->global = g;  conn->url = strdup(url);  curl_easy_setopt(conn->easy, CURLOPT_URL, conn->url);  curl_easy_setopt(conn->easy, CURLOPT_WRITEFUNCTION, write_cb);  curl_easy_setopt(conn->easy, CURLOPT_WRITEDATA, &conn);  curl_easy_setopt(conn->easy, CURLOPT_VERBOSE, 0);  curl_easy_setopt(conn->easy, CURLOPT_ERRORBUFFER, conn->error);  curl_easy_setopt(conn->easy, CURLOPT_PRIVATE, conn);  curl_easy_setopt(conn->easy, CURLOPT_NOPROGRESS, 0);  curl_easy_setopt(conn->easy, CURLOPT_PROGRESSFUNCTION, prog_cb);  curl_easy_setopt(conn->easy, CURLOPT_PROGRESSDATA, conn);  fprintf(MSG_OUT,          "Adding easy %p to multi %p (%s)\n", conn->easy, g->multi, url);  rc =curl_multi_add_handle(g->multi, conn->easy);  mcode_or_die("new_conn: curl_multi_add_handle", rc);  do {    rc = curl_multi_socket_all(g->multi, &g->still_running);  } while (CURLM_CALL_MULTI_PERFORM == rc);  mcode_or_die("new_conn: curl_multi_socket_all", rc);  check_run_count(g);}/* This gets called whenever data is received from the fifo */void fifo_cb(int fd, short event, void *arg) {  char s[1024];  long int rv=0;  int n=0;  GlobalInfo *g = (GlobalInfo *)arg;  do {    s[0]='\0';    rv=fscanf(g->input, "%1023s%n", s, &n);    s[n]='\0';    if ( n && s[0] ) {      new_conn(s,arg);  /* if we read a URL, go get it! */    } else break;  } while ( rv != EOF);}/* Create a named pipe and tell libevent to monitor it */int init_fifo (GlobalInfo *g) {  struct stat st;  char *fifo = "hiper.fifo";  int socket;  fprintf(MSG_OUT, "Creating named pipe \"%s\"\n", fifo);  if (lstat (fifo, &st) == 0) {    if ((st.st_mode & S_IFMT) == S_IFREG) {      errno = EEXIST;      perror("lstat");      exit (1);    }  }  unlink(fifo);  if (mkfifo (fifo, 0600) == -1) {    perror("mkfifo");    exit (1);  }  socket = open(fifo, O_RDWR | O_NONBLOCK, 0);  if (socket == -1) {     perror("open");     exit (1);  }  g->input = fdopen(socket, "r");  fprintf(MSG_OUT, "Now, pipe some URL's into > %s\n", fifo);  event_set(&g->fifo_event, socket, EV_READ | EV_PERSIST, fifo_cb, g);  event_add(&g->fifo_event, NULL);  return (0);}int main(int argc, char **argv){  GlobalInfo g;  CURLMcode rc;  memset(&g, 0, sizeof(GlobalInfo));  event_init();  init_fifo(&g);  g.multi = curl_multi_init();  evtimer_set(&g.timer_event, timer_cb, &g);  curl_multi_setopt(g.multi, CURLMOPT_SOCKETFUNCTION, sock_cb);  curl_multi_setopt(g.multi, CURLMOPT_SOCKETDATA, &g);  curl_multi_setopt(g.multi, CURLMOPT_TIMERFUNCTION, multi_timer_cb);  curl_multi_setopt(g.multi, CURLMOPT_TIMERDATA, &g);  do {    rc = curl_multi_socket_all(g.multi, &g.still_running);  } while (CURLM_CALL_MULTI_PERFORM == rc);  event_dispatch();  curl_multi_cleanup(g.multi);  return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -