⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 file.h

📁 Larbin互联网蜘蛛索引系统
💻 H
字号:
// Larbin// Sebastien Ailleret// 13-12-99 -> 06-02-00#ifndef FILE_H#define FILE_H#include "types.h"#include "xutils/url.h"#include "xutils/string.h"#include "xutils/Vector.h"struct Connexion;class Site;class file { protected:  // Content of the page  String *answer; public:  // Constructor  file ();  // Destructor  virtual ~file ();  // a char arrive from the server  virtual int input (char *c, int size) = 0;  // Is it a robots.txt  virtual bool isRobots ();};class html : public file { private:  // Where are we  url *here;  // Should we get the page or just follow links  bool needAnswer;  // State of our read : answer, headers, tag, html...  int state;  // What is the current tag  int tagId;  int tagPos;  // What is the content of this tag  String *content;  // Headers of the page  String *headers;  // base de l'URL  char *base;  /** parse the answer code line */  inline int parseCmdline ();  /** parse a line of header   * @return 0 if OK, 1 if content type not understandable for us   */  int parseHeader ();  /** Try to understand this tag   */  void matchTag (char c);  /** read the content of an interesting tag   */  void tagContent ();  /** links extracted from this page */  Vector<char> links; public:  // Constructor  html (url *here);  // Destructor  virtual ~html ();  /** a string is arriving   * return 0 usually, 1 if don't want any more input   * in the latter case, errno is set to FetchError reason   */  virtual int input (char *c, int size);  // Is it a robots.txt  inline virtual bool isRobots () { return false; }  /** return the url of this file */  inline url *getUrl () { return here; }  /** Is this page interesting ? */  inline bool isInteresting () { return needAnswer; }  /** return the content of content */  inline String *getPage () { return answer; }  /** return the headers */  inline String *getHeaders () { return headers; }  /** return the links */  inline Vector<char> *html::getLinks () { return &links; }};class robots : public file { private:  // url to get after this robots.txt  url *next;  // test http headers  bool parseHeaders ();  // suppress commentary  void uncomment ();  // read the file  void parseRobots ();  uint pos;  /** Get a token   * try to understand it as a path, so correct errors of lazzy webmasters   */  char *getGoodToken(); public:  // Constructor  robots (url *next);  // Destructor  virtual ~robots ();  // a char arrive from the server  virtual int input (char *c, int size);  // parse the file (once everything has been read)  virtual void parse (bool isError);  // if we fetch a robots, answer the url to get after  inline url *toGet() { return next; }};#endif // FILE_H

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -