⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 sws.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 2 页
字号:
/*************************************************************************** *                                  _   _ ____  _      *  Project                     ___| | | |  _ \| |     *                             / __| | | | |_) | |     *                            | (__| |_| |  _ <| |___  *                             \___|\___/|_| \_\_____| * * Copyright (C) 1998 - 2003, Daniel Stenberg, <daniel@haxx.se>, et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms * are also available at http://curl.haxx.se/docs/copyright.html. *  * You may opt to use, copy, modify, merge, publish, distribute and/or sell * copies of the Software, and permit persons to whom the Software is * furnished to do so, under the terms of the COPYING file. * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY * KIND, either express or implied. * * $Id: sws.c,v 1.36 2003/10/30 07:32:04 bagder Exp $ ***************************************************************************//* sws.c: simple (silly?) web server   This code was originally graciously donated to the project by Juergen   Wilke. Thanks a bunch! */#include "setup.h" /* portability help from the lib directory */#include <stdio.h>#include <stdlib.h>#include <string.h>#include <stdarg.h>#include <unistd.h>#include <signal.h>#include <time.h>#include <sys/time.h>#include <sys/types.h>#include <sys/wait.h>#include <sys/socket.h>#include <netinet/in.h>#ifdef _XOPEN_SOURCE_EXTENDED/* This define is "almost" required to build on HPUX 11 */#include <arpa/inet.h> #endif#include <netdb.h>#ifndef FALSE#define FALSE 0#endif#ifndef TRUE#define TRUE 1#endifconst char *spitout(FILE *stream,        const char *main,        const char *sub, int *size);#define DEFAULT_PORT 8999#ifndef DEFAULT_LOGFILE#define DEFAULT_LOGFILE "log/sws.log"#endif#define SWSVERSION "cURL test suite HTTP server/0.1"#define REQUEST_DUMP  "log/server.input"#define RESPONSE_DUMP "log/server.response"#define TEST_DATA_PATH "data/test%d"enum {  DOCNUMBER_QUIT    = -6,  DOCNUMBER_BADCONNECT = -5,  DOCNUMBER_INTERNAL= -4,  DOCNUMBER_CONNECT = -3,  DOCNUMBER_WERULEZ = -2,  DOCNUMBER_404     = -1};/* sent as reply to a QUIT */static const char *docquit ="HTTP/1.1 200 Goodbye\r\n""\r\n";/* sent as reply to a CONNECT */static const char *docconnect ="HTTP/1.1 200 Mighty fine indeed\r\n""\r\n";/* sent as reply to a "bad" CONNECT */static const char *docbadconnect ="HTTP/1.1 501 Forbidden you fool\r\n""\r\n";/* send back this on 404 file not found */static const char *doc404 = "HTTP/1.1 404 Not Found\n"    "Server: " SWSVERSION "\n"    "Connection: close\n"    "Content-Type: text/html\n"    "\n"    "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n"    "<HTML><HEAD>\n"    "<TITLE>404 Not Found</TITLE>\n"    "</HEAD><BODY>\n"    "<H1>Not Found</H1>\n"    "The requested URL was not found on this server.\n"    "<P><HR><ADDRESS>" SWSVERSION "</ADDRESS>\n" "</BODY></HTML>\n";#ifdef HAVE_SIGNALstatic volatile int sigpipe;#endifstatic FILE *logfp;static void logmsg(const char *msg, ...){  time_t t = time(NULL);  va_list ap;  struct tm *curr_time = localtime(&t);  char loctime[80];  char buffer[256]; /* possible overflow if you pass in a huge string */     va_start(ap, msg);  vsprintf(buffer, msg, ap);  va_end(ap);  strcpy(loctime, asctime(curr_time));  loctime[strlen(loctime) - 1] = '\0';  fprintf(logfp, "%s: %d: %s\n", loctime, (int)getpid(), buffer);  fflush(logfp);}#ifdef HAVE_SIGNALstatic void sigpipe_handler(int sig){  (void)sig; /* prevent warning */  sigpipe = 1;}#endifint ProcessRequest(char *request){  char *line=request;  unsigned long contentlength=0;  char chunked=FALSE;#define END_OF_HEADERS "\r\n\r\n"  char *end;  end = strstr(request, END_OF_HEADERS);  if(!end)    /* we don't have a complete request yet! */    return 0;  /* **** Persistancy ****   *   * If the request is a HTTP/1.0 one, we close the connection unconditionally   * when we're done.   *   * If the request is a HTTP/1.1 one, we MUST check for a "Connection:"   * header that might say "close". If it does, we close a connection when   * this request is processed. Otherwise, we keep the connection alive for X   * seconds.   */  do {    if(!strncasecmp("Content-Length:", line, 15)) {      contentlength = strtol(line+15, &line, 10);      break;    }    else if(!strncasecmp("Transfer-Encoding: chunked", line,                         strlen("Transfer-Encoding: chunked"))) {      /* chunked data coming in */      chunked = TRUE;    }    if(chunked) {      if(strstr(request, "\r\n0\r\n"))        /* end of chunks reached */        return 1; /* done */      else        return 0; /* not done */    }    line = strchr(line, '\n');    if(line)      line++;  } while(line);  if(contentlength > 0 ) {    if(contentlength <= strlen(end+strlen(END_OF_HEADERS)))      return 1; /* done */    else      return 0; /* not complete yet */  }  return 1; /* done */}/* store the entire request in a file */void storerequest(char *reqbuf){  FILE *dump;  dump = fopen(REQUEST_DUMP, "ab"); /* b is for windows-preparing */  if(dump) {    fwrite(reqbuf, 1, strlen(reqbuf), dump);    fclose(dump);    logmsg("Wrote request input to " REQUEST_DUMP);  }  else {    logmsg("Failed to write request input to " REQUEST_DUMP);  }}#define REQBUFSIZ 150000#define REQBUFSIZ_TXT "149999"/* very-big-path support */#define MAXDOCNAMELEN 140000#define MAXDOCNAMELEN_TXT "139999"#define REQUEST_KEYWORD_SIZE 256static int get_request(int sock, int *part, int *open){  static char reqbuf[REQBUFSIZ], doc[MAXDOCNAMELEN];  static char request[REQUEST_KEYWORD_SIZE];  unsigned int offset = 0;  int prot_major, prot_minor;  char logbuf[256];  *part = 0; /* part zero equals none */  *open = TRUE; /* connection should remain open and wait for more commands */  while (offset < REQBUFSIZ) {    int got = sread(sock, reqbuf + offset, REQBUFSIZ - offset);    if (got <= 0) {      if (got < 0) {        perror("recv");        logmsg("recv() returned error");        return DOCNUMBER_INTERNAL;      }      logmsg("Connection closed by client");      return DOCNUMBER_INTERNAL;    }    offset += got;    reqbuf[offset] = 0;    if(ProcessRequest(reqbuf))      break;  }  if (offset >= REQBUFSIZ) {    logmsg("Request buffer overflow, closing connection");    return DOCNUMBER_INTERNAL;  }  reqbuf[offset]=0;    /* dump the request to an external file */  storerequest(reqbuf);  if(sscanf(reqbuf, "%" REQBUFSIZ_TXT"s %" MAXDOCNAMELEN_TXT "s HTTP/%d.%d",            request,            doc,            &prot_major,            &prot_minor) == 4) {    char *ptr;    int test_no=0;    /* find the last slash */    ptr = strrchr(doc, '/');    /* get the number after it */    if(ptr) {      if((strlen(doc) + strlen(request)) < 200)        sprintf(logbuf, "Got request: %s %s HTTP/%d.%d",                request, doc, prot_major, prot_minor);      else        sprintf(logbuf, "Got a *HUGE* request HTTP/%d.%d",                prot_major, prot_minor);      logmsg(logbuf);            if(!strncmp("/verifiedserver", ptr, 15)) {        logmsg("Are-we-friendly question received");        return DOCNUMBER_WERULEZ;      }      if(!strncmp("/quit", ptr, 15)) {        logmsg("Request-to-quit received");        return DOCNUMBER_QUIT;      }      ptr++; /* skip the slash */      test_no = strtol(ptr, &ptr, 10);      if(test_no > 10000) {        *part = test_no % 10000;        test_no /= 10000;      }      else        *part = 0;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -