⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmltitle.cc

📁 THIS IS HTTP CURL Example
💻 CC
字号:
/***************************************************************************** *                                  _   _ ____  _ *  Project                     ___| | | |  _ \| | *                             / __| | | | |_) | | *                            | (__| |_| |  _ <| |___ *                             \___|\___/|_| \_\_____| * * $Id: htmltitle.cc,v 1.3 2005/02/09 15:15:01 giva Exp $ */// Get a web page, parse it with libxml.//// Written by Lars Nilsson//// GNU C++ compile command line suggestion (edit paths accordingly)://// g++ -Wall -I/opt/curl/include -I/opt/libxml/include/libxml2 htmltitle.cc \// -o htmltitle -L/opt/curl/lib -L/opt/libxml/lib -lcurl -lxml2#include <stdio.h>#include <string.h>#include <stdlib.h>#include <string>#include <curl/curl.h>#include <libxml/HTMLparser.h>////  Case-insensitive string comparison//#ifdef _MSC_VER#define COMPARE(a, b) (!stricmp((a), (b)))#else#define COMPARE(a, b) (!strcasecmp((a), (b)))#endif////  libxml callback context structure//struct Context{  Context(): addTitle(false) { }  bool addTitle;  std::string title;};////  libcurl variables for error strings and returned datastatic char errorBuffer[CURL_ERROR_SIZE];static std::string buffer;////  libcurl write callback function//static int writer(char *data, size_t size, size_t nmemb,                  std::string *writerData){  if (writerData == NULL)    return 0;  writerData->append(data, size*nmemb);  return size * nmemb;}////  libcurl connection initialization//static bool init(CURL *&conn, char *url){  CURLcode code;  conn = curl_easy_init();  if (conn == NULL)  {    fprintf(stderr, "Failed to create CURL connection\n");    exit(EXIT_FAILURE);  }  code = curl_easy_setopt(conn, CURLOPT_ERRORBUFFER, errorBuffer);  if (code != CURLE_OK)  {    fprintf(stderr, "Failed to set error buffer [%d]\n", code);    return false;  }  code = curl_easy_setopt(conn, CURLOPT_URL, url);  if (code != CURLE_OK)  {    fprintf(stderr, "Failed to set URL [%s]\n", errorBuffer);    return false;  }  code = curl_easy_setopt(conn, CURLOPT_FOLLOWLOCATION, 1);  if (code != CURLE_OK)  {    fprintf(stderr, "Failed to set redirect option [%s]\n", errorBuffer);    return false;  }  code = curl_easy_setopt(conn, CURLOPT_WRITEFUNCTION, writer);  if (code != CURLE_OK)  {    fprintf(stderr, "Failed to set writer [%s]\n", errorBuffer);    return false;  }  code = curl_easy_setopt(conn, CURLOPT_WRITEDATA, &buffer);  if (code != CURLE_OK)  {    fprintf(stderr, "Failed to set write data [%s]\n", errorBuffer);    return false;  }  return true;}////  libxml start element callback function//static void StartElement(void *voidContext,                         const xmlChar *name,                         const xmlChar **attributes){  Context *context = (Context *)voidContext;  if (COMPARE((char *)name, "TITLE"))  {    context->title = "";    context->addTitle = true;  }}////  libxml end element callback function//static void EndElement(void *voidContext,                       const xmlChar *name){  Context *context = (Context *)voidContext;  if (COMPARE((char *)name, "TITLE"))    context->addTitle = false;}////  Text handling helper function//static void handleCharacters(Context *context,                             const xmlChar *chars,                             int length){  if (context->addTitle)    context->title.append((char *)chars, length);}////  libxml PCDATA callback function//static void Characters(void *voidContext,                       const xmlChar *chars,                       int length){  Context *context = (Context *)voidContext;  handleCharacters(context, chars, length);}////  libxml CDATA callback function//static void cdata(void *voidContext,                  const xmlChar *chars,                  int length){  Context *context = (Context *)voidContext;  handleCharacters(context, chars, length);}////  libxml SAX callback structure//static htmlSAXHandler saxHandler ={  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  StartElement,  EndElement,  NULL,  Characters,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  cdata,  NULL};////  Parse given (assumed to be) HTML text and return the title//static void parseHtml(const std::string &html,                      std::string &title){  htmlParserCtxtPtr ctxt;  Context context;  ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "",                                  XML_CHAR_ENCODING_NONE);  htmlParseChunk(ctxt, html.c_str(), html.size(), 0);  htmlParseChunk(ctxt, "", 0, 1);  htmlFreeParserCtxt(ctxt);  title = context.title;}int main(int argc, char *argv[]){  CURL *conn = NULL;  CURLcode code;  std::string title;  // Ensure one argument is given  if (argc != 2)  {    fprintf(stderr, "Usage: %s <url>\n", argv[0]);    exit(EXIT_FAILURE);  }  curl_global_init(CURL_GLOBAL_DEFAULT);  // Initialize CURL connection  if (!init(conn, argv[1]))  {    fprintf(stderr, "Connection initializion failed\n");    exit(EXIT_FAILURE);  }  // Retrieve content for the URL  code = curl_easy_perform(conn);  curl_easy_cleanup(conn);  if (code != CURLE_OK)  {    fprintf(stderr, "Failed to get '%s' [%s]\n", argv[1], errorBuffer);    exit(EXIT_FAILURE);  }  // Parse the (assumed) HTML code  parseHtml(buffer, title);  // Display the extracted title  printf("Title: %s\n", title.c_str());  return EXIT_SUCCESS;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -