📄 global.h
字号:
// Larbin// Sebastien Ailleret// 18-11-99 -> 15-04-00/* This class contains all global variables */#ifndef GLOBAL_H#define GLOBAL_H#include <adns.h>#include <time.h>#include "xfetcher/file.h"#include "xfetcher/hashTable.h"#include "xutils/url.h"#include "xutils/Vector.h"#include "xutils/GenericFifo.h"#include "xutils/string.h"#include "xinterf/output.h"#include "xutils/ConstantSizedFifo.h"#include "xutils/ConstantSizedFifoPriority.h"// define for the state of a connection#define EMPTY 0#define CONNECTING 1#define WRITE 2#define OPEN 3class Site;class Interval;/** This represent a connection : we have a fixed number of them * fetchOpen links them with servers * fetchPipe reads those which are linked */struct Connexion { char state; // what about this socket : EMPTY, CONNECTING, WRITE, OPEN int pos; // What part of the request has been sent // and how much have we received // pos is also used for reporting fetchError (awful hack) int socket; // number of the fds time_t timeout; // timeout for this connexion String request; // what is the http request file *parser; // parser for the connexion (is it a robots.txt or an html file) /** Constructor */ Connexion (); /** Dectructor : it is never used since we reuse connections */ ~Connexion (); /** Recycle a connexion */ void recycle ();};class global { public: /** Constructor : see global.cc for details */ global (int argc, char * argv[]); /** Destructor : never used */ ~global (); /** List of pages allready seen (one bit per page) */ static hashTable *seen; /** URLs for the sequencer : each one has a different priority */ static GenericFifo<url> *URLsInput; /** This one has the lowest priority */ static GenericFifo<url> *URLsInternal; /** hashtable of the site we accessed (cache) */ static Site *siteList; /** Sites which have at least one url to fetch */ static GenericFifo<Site> *okSites; /** Sites which have at least one url to fetch * but need a dns call */ static GenericFifo<Site> *dnsSites; /** Informations for the fetch * This array contain all the connections (empty or not) */ static Connexion **connexions; /** Internal state of adns */ static adns_state ads; /** free connection for fetchOpen : connections with state==EMPTY */ static ConstantSizedFifoPriority<Connexion> *freeConns; /** free connection for fetchOpen : connections waiting for end user */ static ConstantSizedFifo<Connexion> *userConns; /** Sum of the sizes of a fifo in Sites */ static Interval *inter; /** How deep should we go inside a site */ static uint depthInSite; /** how many seconds should we wait beetween 2 calls at the same server * 0 if you are only on a personnal server, >=30 otherwise */ static time_t waitDuration; /** Name of the bot */ static char *userAgent; /** Name of the man who lauch the bot */ static char *sender; /** http headers to send with requests * sends name of the robots, from field... */ static char *headers; /* internet address of the proxy (if any) */ static sockaddr_in *proxyAddr; /** connect to this server through a proxy using connection conn * return >0 in case of success (connecting or connected), 0 otherwise */ static char getProxyFds (Connexion *conn); /** Are we doing a specific search */ static bool isSpecific; /** What is the content-type we look for */ static char *contentType; /** Is there a privileged extension */ static char *privilegedExt; /** Limit to domain */ static Vector<char> *domains; /** forbidden extensions * extensions which are allways to avoid : .ps, .pdf... */ static Vector<char> forbExt; /** First URL to fetch : initiate the search */ static char *firstUrl; /** number of parallel connexions * your kernel must support a little more than nb_conn file descriptor * select should be able to handle as many fds */ static uint nb_conn; /** number of parallel dns calls */ static uint dnsConn; /** port on which is launched the http statistic webserver */ static unsigned short int httpPort; /** port on which input wait for queries */ static unsigned short int inputPort; /** parse configuration file */ static void parseFile (char *file); /** read the domain limit */ static void manageDomain (String &content, uint *pos); /** read the forbidden extensions */ static void manageExt (String &content, uint *pos);};#endif // GLOBAL_H
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -