📄 transfer.c

📁 harvest是一个下载html网页得机器人
💻 C
📖 第 1 页 / 共 5 页
字号:
            else if((checkprefix("WWW-Authenticate:", k->p) &&                     (401 == k->httpcode)) ||                    (checkprefix("Proxy-authenticate:", k->p) &&                     (407 == k->httpcode))) {              result = Curl_http_auth(conn, k->httpcode, k->p);              if(result)                return result;            }            else if ((k->httpcode >= 300 && k->httpcode < 400) &&                     checkprefix("Location:", k->p)) {              if(data->set.http_follow_location) {                /* this is the URL that the server advices us to get instead */                char *ptr;                char *start=k->p;                char backup;                start += 9; /* pass "Location:" */                /* Skip spaces and tabs. We do this to support multiple                   white spaces after the "Location:" keyword. */                while(*start && isspace((int)*start ))                  start++;                                /* Scan through the string from the end to find the last                   non-space. k->end_ptr points to the actual terminating zero                   letter, move pointer one letter back and start from                   there. This logic strips off trailing whitespace, but keeps                   any embedded whitespace. */                ptr = k->end_ptr-1;                while((ptr>=start) && isspace((int)*ptr))                  ptr--;                ptr++;                backup = *ptr; /* store the ending letter */                if(ptr != start) {                  *ptr = '\0';   /* zero terminate */                  conn->newurl = strdup(start); /* clone string */                  *ptr = backup; /* restore ending letter */                }              }#if 0 /* for consideration */              else {                /* This is a Location: but we have not been instructed to                   follow it */                infof(data, "We ignore this location header as instructed\n");              }#endif            }            /*             * End of header-checks. Write them to the client.             */            writetype = CLIENTWRITE_HEADER;            if (data->set.http_include_header)              writetype |= CLIENTWRITE_BODY;            if(data->set.verbose)              Curl_debug(data, CURLINFO_HEADER_IN,                         k->p, k->hbuflen);            result = Curl_client_write(data, writetype, k->p, k->hbuflen);            if(result)              return result;            data->info.header_size += k->hbuflen;            conn->headerbytecount += k->hbuflen;                          /* reset hbufp pointer && hbuflen */            k->hbufp = data->state.headerbuff;            k->hbuflen = 0;          }          while (!stop_reading && *k->str); /* header line within buffer */          if(stop_reading)            /* We've stopped dealing with input, get out of the do-while loop */            break;          /* We might have reached the end of the header part here, but             there might be a non-header part left in the end of the read             buffer. */        }                       /* end if header mode */        /* This is not an 'else if' since it may be a rest from the header           parsing, where the beginning of the buffer is headers and the end           is non-headers. */        if (k->str && !k->header && (nread > 0)) {                      if(0 == k->bodywrites) {            /* These checks are only made the first time we are about to               write a piece of the body */            if(conn->protocol&PROT_HTTP) {              /* HTTP-only checks */              if (conn->newurl) {                if(conn->bits.close) {                  /* Abort after the headers if "follow Location" is set                     and we're set to close anyway. */                  k->keepon &= ~KEEP_READ;                  FD_ZERO(&k->rkeepfd);                  *done = TRUE;                  return CURLE_OK;                }                /* We have a new url to load, but since we want to be able                   to re-use this connection properly, we read the full                   response in "ignore more" */                k->ignorebody = TRUE;                infof(data, "Ignoring the response-body\n");              }              if (conn->resume_from &&                       !k->content_range &&                       (data->set.httpreq==HTTPREQ_GET)) {                /* we wanted to resume a download, although the server                   doesn't seem to support this and we did this with a GET                   (if it wasn't a GET we did a POST or PUT resume) */                failf (data, "HTTP server doesn't seem to support "                       "byte ranges. Cannot resume.");                return CURLE_HTTP_RANGE_ERROR;              }              else if(data->set.timecondition && !conn->range) {                /* A time condition has been set AND no ranges have been                   requested. This seems to be what chapter 13.3.4 of                   RFC 2616 defines to be the correct action for a                   HTTP/1.1 client */                if((k->timeofdoc > 0) && (data->set.timevalue > 0)) {                  switch(data->set.timecondition) {                  case TIMECOND_IFMODSINCE:                  default:                    if(k->timeofdoc < data->set.timevalue) {                      infof(data,                            "The requested document is not new enough\n");                      *done = TRUE;                      return CURLE_OK;                    }                    break;                  case TIMECOND_IFUNMODSINCE:                    if(k->timeofdoc > data->set.timevalue) {                      infof(data,                            "The requested document is not old enough\n");                      *done = TRUE;                      return CURLE_OK;                    }                    break;                  } /* switch */                } /* two valid time strings */              } /* we have a time condition */            } /* this is HTTP */          } /* this is the first time we write a body part */          k->bodywrites++;          /* pass data to the debug function before it gets "dechunked" */          if(data->set.verbose) {            if(k->badheader) {              Curl_debug(data, CURLINFO_DATA_IN, data->state.headerbuff,                         k->hbuflen);              if(k->badheader == HEADER_PARTHEADER)                Curl_debug(data, CURLINFO_DATA_IN, k->str, nread);            }            else              Curl_debug(data, CURLINFO_DATA_IN, k->str, nread);          }          if(conn->bits.chunk) {            /*             * Bless me father for I have sinned. Here comes a chunked             * transfer flying and we need to decode this properly.  While             * the name says read, this function both reads and writes away             * the data. The returned 'nread' holds the number of actual             * data it wrote to the client.  */            CHUNKcode res =              Curl_httpchunk_read(conn, k->str, nread, &nread);            if(CHUNKE_OK < res) {              if(CHUNKE_WRITE_ERROR == res) {                failf(data, "Failed writing data");                return CURLE_WRITE_ERROR;              }              failf(data, "Received problem %d in the chunky parser", res);              return CURLE_RECV_ERROR;            }            else if(CHUNKE_STOP == res) {              /* we're done reading chunks! */              k->keepon &= ~KEEP_READ; /* read no more */              FD_ZERO(&k->rkeepfd);              /* There are now possibly N number of bytes at the end of the                 str buffer that weren't written to the client, but we don't                 care about them right now. */            }            /* If it returned OK, we just keep going */          }          if((-1 != conn->maxdownload) &&             (k->bytecount + nread >= conn->maxdownload)) {            nread = conn->maxdownload - k->bytecount;            if(nread < 0 ) /* this should be unusual */              nread = 0;            k->keepon &= ~KEEP_READ; /* we're done reading */            FD_ZERO(&k->rkeepfd);          }          k->bytecount += nread;          Curl_pgrsSetDownloadCounter(data, (double)k->bytecount);                      if(!conn->bits.chunk && (nread || k->badheader)) {            /* If this is chunky transfer, it was already written */            if(k->badheader && !k->ignorebody) {              /* we parsed a piece of data wrongly assuming it was a header                 and now we output it as body instead */              result = Curl_client_write(data, CLIENTWRITE_BODY,                                         data->state.headerbuff,                                         k->hbuflen);            }            if(k->badheader < HEADER_ALLBAD) {              /* This switch handles various content encodings. If there's an                 error here, be sure to check over the almost identical code                 in http_chunks.c. 08/29/02 jhrg                 Make sure that ALL_CONTENT_ENCODINGS contains all the                 encodings handled here. */#ifdef HAVE_LIBZ              switch (k->content_encoding) {              case IDENTITY:#endif                /* This is the default when the server sends no                   Content-Encoding header. See Curl_readwrite_init; the                   memset() call initializes k->content_encoding to zero.                   08/28/02 jhrg */                if(!k->ignorebody)                  result = Curl_client_write(data, CLIENTWRITE_BODY, k->str,                                              nread);#ifdef HAVE_LIBZ                break;              case DEFLATE:                 /* Assume CLIENTWRITE_BODY; headers are not encoded. */                result = Curl_unencode_deflate_write(data, k, nread);                break;              case GZIP:                /* Assume CLIENTWRITE_BODY; headers are not encoded. */                result = Curl_unencode_gzip_write(data, k, nread);                break;              case COMPRESS:          /* FIXME 08/27/02 jhrg */              default:                failf (data, "Unrecognized content encoding type. "                       "libcurl understands `identity', `deflate' and `gzip' "                       "content encodings.");                result = CURLE_BAD_CONTENT_ENCODING;                break;              }#endif            }            k->badheader = HEADER_NORMAL; /* taken care of now */            if(result)              return result;          }        } /* if (! header and data to read ) */      } while(!readdone);    } /* if( read from socket ) */    /* If we still have writing to do, we check if we have a writable       socket. Sometimes the writefdp is NULL, if no fd_set was done using       the multi interface and then we can do nothing but to attempt a       write to be sure. */    if((k->keepon & KEEP_WRITE) &&       (!writefdp || FD_ISSET(conn->writesockfd, writefdp)) ) {      /* write */      int i, si;      ssize_t bytes_written;      bool writedone=TRUE;      if ((k->bytecount == 0) && (k->writebytecount == 0))        Curl_pgrsTime(data, TIMER_STARTTRANSFER);      didwhat |= KEEP_WRITE;      /*       * We loop here to do the READ and SEND loop until we run out of       * data to send or until we get EWOULDBLOCK back       */      do {                /* only read more data if there's no upload data already           present in the upload buffer */        if(0 == conn->upload_present) {          /* init the "upload from here" pointer */          conn->upload_fromhere = k->uploadbuf;          if(!k->upload_done) {            /* HTTP pollution, this should be written nicer to become more               protocol agnostic. */            if(k->wait100_after_headers &&               (conn->proto.http->sending == HTTPSEND_BODY)) {              /* If this call is to send body data, we must take some action:                 We have sent off the full HTTP 1.1 request, and we shall now                 go into the Expect: 100 state and await such a header */              k->wait100_after_headers = FALSE; /* headers sent */              k->write_after_100_header = TRUE; /* wait for the header */              FD_ZERO (&k->writefd);            /* clear it */              k->wkeepfd = k->writefd;          /* set the keeper variable */              k->keepon &= ~KEEP_WRITE;         /* disable writing */              k->start100 = Curl_tvnow();       /* timeout count starts now */              didwhat &= ~KEEP_WRITE;  /* we didn't write anything actually */              break;            }            nread = fillbuffer(conn, BUFSIZE);          }          else            nread = 0; /* we're done uploading/reading */          /* the signed int typecase of nread of for systems that has             unsigned size_t */          if (nread<=0) {            /* done */            k->keepon &= ~KEEP_WRITE; /* we're done writing */            FD_ZERO(&k->wkeepfd);            writedone = TRUE;            break;          }          /* store number of bytes available for upload */          conn->upload_present = nread;          /* convert LF to CRLF if so asked */          if (data->set.crlf) {              if(data->state.scratch == NULL)                data->state.scratch = malloc(2*BUFSIZE);              if(data->state.scratch == NULL) {                failf (data, "Failed to alloc scratch buffer!");                return CURLE_OUT_OF_MEMORY;              }            for(i = 0, si = 0; i < nread; i++, si++) {              if (conn->upload_fromhere[i] == 0x0a) {                data->state.scratch[si++] = 0x0d;                data->state.scratch[si] = 0x0a;              }              else                data->state.scratch[si] = conn->upload_fromhere[i];            }            if(si != nread) {              /* only perform the special operation if we really did replace                 anything */              nread = si;              /* upload from the new (replaced) buffer instead */              conn->upload_fromhere = data->state.scratch;              /* set the new amount too */              conn->upload_present = nread;            }          }        }        else {          /* We have a partial buffer left from a previous "round". Use             that instead of reading more data */        }        /* write to socket (send away data) */        result = Curl_write(conn,
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -