📄 htrobman.h
字号:
/* Webbot - the W3C Mini Robot WEBBOT - THE W3C MINI ROBOT *//*** (c) COPRIGHT MIT 1995.** Please first read the full copyright statement in the file COPYRIGH.*//* This program illustrates how to travers links using the Anchor object */#ifndef HTROBMAN_H#define HTROBMAN_H#include "WWWLib.h" /* Global Library Include file */#include "WWWApp.h" /* Application stuff */#include "WWWTrans.h"#include "WWWInit.h"#include "WWWSQL.h"#include "HText.h"#include "HTRobot.h" /* Implemented here */#ifndef W3C_VERSION#define W3C_VERSION "Unspecified"#endif#define APP_NAME "W3CRobot"#define APP_VERSION W3C_VERSION#define COMMAND_LINE "http://www.w3.org/Robot/User/CommandLine"#define ROBOTS_TXT "/robots.txt"#define DEFAULT_OUTPUT_FILE "robot.out"#define DEFAULT_RULE_FILE "robot.conf"#define DEFAULT_LOG_FILE "log-clf.txt"#define DEFAULT_HIT_FILE "log-hit.txt"#define DEFAULT_REL_FILE "log-rel.txt"#define DEFAULT_LM_FILE "log-lastmodified.txt"#define DEFAULT_TITLE_FILE "log-title.txt"#define DEFAULT_REFERER_FILE "log-referer.txt"#define DEFAULT_REJECT_FILE "log-reject.txt"#define DEFAULT_NOTFOUND_FILE "log-notfound.txt"#define DEFAULT_CONNEG_FILE "log-conneg.txt"#define DEFAULT_NOALTTAG_FILE "log-alt.txt"#define DEFAULT_FORMAT_FILE "log-format.txt"#define DEFAULT_CHARSET_FILE "log-charset.txt"#define DEFAULT_MEMLOG "robot.mem"#define DEFAULT_PREFIX ""#define DEFAULT_IMG_PREFIX ""#define DEFAULT_DEPTH 0#define DEFAULT_DELAY 50 /* Write delay in ms */#define DEFAULT_SQL_SERVER "localhost"#define DEFAULT_SQL_DB "webbot"#define DEFAULT_SQL_USER "webbot"#define DEFAULT_SQL_PW ""#if 0#define HT_MEMLOG /* Is expensive in performance! */#endif#define MILLIES 1000#define DEFAULT_TIMEOUT 20 /* timeout in secs */typedef enum _MRFlags { MR_IMG = 0x1, MR_LINK = 0x2, MR_PREEMPTIVE = 0x4, MR_TIME = 0x8, MR_SAVE = 0x10, MR_QUIET = 0x20, MR_REAL_QUIET = 0x40, MR_VALIDATE = 0x80, MR_END_VALIDATE = 0x100, MR_KEEP_META = 0x200, MR_LOGGING = 0x400, MR_DISTRIBUTIONS = 0x800, MR_NOROBOTSTXT = 0x1000, MR_NOMETATAGS = 0x2000, MR_BFS = 0x4000} MRFlags;typedef struct _Robot { int depth; /* How deep is our tree */ int ndoc; int *cdepth; /* Number of nodes per level */ int cnt; /* Count of requests */ int cindex; /* Number assigned to each document */ HTList * hyperdoc; /* List of our HyperDoc Objects */ HTList * htext; /* List of our HText Objects */ HTList * fingers; HTList * queue; /* Queue */ int cq; int timer; int waits; char * cwd; /* Current dir URL */ char * rules; char * prefix; char * img_prefix; char * logfile; /* clf log */ HTLog * log; char * reffile; /* referer log */ HTLog * ref; char * rejectfile; /* unchecked links */ HTLog * reject; char * notfoundfile; /* links that returned 404 */ HTLog * notfound; char * connegfile; /* links that were conneg'ed */ HTLog * conneg; char * noalttagfile; /* images without alt tags*/ HTLog * noalttag; char * hitfile; /* links sorted after hit counts */ char * relfile; /* link sorted after relationships */ HTLinkType relation; /* Specific relation to look for */ char * titlefile; /* links with titles */ char * mtfile; /* media types encountered */ char * charsetfile; /* charsets encountered */ char * lmfile; /* sortef after last modified dates */ char * outputfile; FILE * output; char * furl; /* First url */ MRFlags flags; long get_bytes; /* Total number of bytes processed using GET*/ long get_docs; /* Total number of documents using GET */ long head_bytes; /* bytes processed bytes processed using HEAD */ long head_docs; /* Total number of documents using HEAD*/ long other_docs; ms_t time; /* Time of run */#ifdef HT_POSIX_REGEX regex_t * include; regex_t * exclude; regex_t * check; regex_t * exc_robot; /* Robots.txt exclusion */#endif#ifdef HT_MYSQL HTSQLLog * sqllog; char * sqlserver; char * sqldb; char * sqluser; char * sqlpw; char * sqlrelative; BOOL sqlexternals; int sqlflags;#endif} Robot;typedef struct _Finger { Robot * robot; HTRequest * request; HTParentAnchor * dest;} Finger;/*** The HyperDoc object is bound to the anchor and contains information about** where we are in the search for recursive searches*/typedef struct _HyperDoc { HTParentAnchor * anchor; int depth; int hits; int code; int index; char * title; HTMethod method;} HyperDoc;/*** This is the HText object that is created every time we start parsing an** HTML object*/struct _HText { HTRequest * request; BOOL follow;};/*** A structure for calculating metadata distributions*/typedef struct _MetaDist { HTAtom * name; int hits;} MetaDist;#ifdef HT_POSIX_REGEXPUBLIC regex_t * get_regtype (Robot * mr, const char * regex_str, int cflags);#endifPUBLIC int OutputData(const char * fmt, ...);PUBLIC HyperDoc * HyperDoc_new (Robot * mr,HTParentAnchor * anchor, int depth);PUBLIC BOOL HyperDoc_delete (HyperDoc * hd);PUBLIC Robot * Robot_new (void);PUBLIC Finger * Finger_new (Robot * robot, HTParentAnchor * dest, HTMethod method);PUBLIC BOOL Robot_registerHTMLParser (void);PUBLIC void Cleanup (Robot * me, int status);PUBLIC void SetSignal (void);PUBLIC void VersionInfo (void);PUBLIC int terminate_handler (HTRequest * request, HTResponse * response, void * param, int status) ;PUBLIC int my_terminate_handler (HTRequest * request, HTResponse * response, void * param, int status) ;PUBLIC void Serving_queue(Robot *mr);PUBLIC char *get_robots_txt(char *uri);#endif/* ___________________________________ @(#) $Id: HTRobMan.html,v 1.6 1999/02/24 18:47:31 frystyk Exp $ */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -