⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 ib.c

📁 linux下的网页抓取与分析源码
💻 C
字号:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <fcntl.h>

char* host = "www.baidu.com";
char* s3 =".html";
char* s4 ="_header.txt";
char pgname[512];
char hdname[512];
int port = 80;

int main(int argc, char *argv[])
{char buffer[512];
 int isock,hd,pg;
 struct sockaddr_in pin;
 struct hostent * remoteHost;
 char message[512];
 int done = 0;
 int chars = 0;
 int l = 0;
 char* s;
 char* s1 ="Host:";
 char* s2 ="\r\n";
 host =argv[1];

 if( (remoteHost = gethostbyname(host)) == 0 )
 {
  printf("Error resolving host\n");
  exit(1);
 }

 bzero(message,sizeof(message));
 bzero(&pin,sizeof(pin));
 pin.sin_family = AF_INET;
 pin.sin_port = htons(port);
 pin.sin_addr.s_addr = ( (struct in_addr *)(remoteHost->h_addr))->s_addr;
 
 if( (isock = socket(AF_INET, SOCK_STREAM, 0)) == -1)
 {
  printf("Error opening socket!\n");
  exit(1);
 }
 sprintf(pgname,host);
 strcat(pgname,s3);
 sprintf(hdname,host);
 strcat(hdname,s4);
 sprintf(s,s1);
 strcat(s,host);
 strcat(s,s2); 
 sprintf(message, "GET / HTTP/1.1\r\n");
 strcat(message, s);
 strcat(message, "Accept: */*\r\n");
 strcat(message, "User-Agent: Mozilla/4.0(compatible)\r\n");
 strcat(message, "connection:Keep-Alive\r\n");
 strcat(message, "\r\n\r\n");

 printf("%s",message);
 hd=open(hdname,O_WRONLY|O_CREAT|O_APPEND);
 pg=open(pgname,O_WRONLY|O_CREAT|O_APPEND);

 if( connect(isock, (void *)&pin, sizeof(pin)) == -1 )
 {
  printf("Error connecting to socket\n");
  exit(1);
 }

 if( send(isock, message, strlen(message), 0) == -1)
 {
  printf("Error in send\n");
  exit(1);
 }
 
 while(done == 0)
 {
  l = recv(isock, buffer, 1, 0);
  if( l < 0 )
   done = 1;
  switch(*buffer)
  {
   case '\r':
    break;
   case '\n':
    if(chars == 0)
     done = 1;
    chars = 0;
    break;
   default:
    chars++;
    break;
  }
   printf("%c",*buffer);
   write(hd,buffer,l);
 }
  close(hd);
 do
 {
  l = recv(isock, buffer, sizeof(buffer) - 1,0);
  if( l < 0 )
   break;
  *(buffer + l) = 0;
  fputs(buffer, stdout);
  write(pg,buffer,l);
 }while( l > 0 );
 close(pg);
 close(isock);
 return 0;
}


⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -