⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htrobman.h

📁 www工具包
💻 H
字号:
/*                                                                Webbot - the W3C Mini Robot                               WEBBOT - THE W3C MINI ROBOT                                              *//***      (c) COPRIGHT MIT 1995.**      Please first read the full copyright statement in the file COPYRIGH.*//*   This program illustrates how to travers links using the Anchor object    */#ifndef HTROBMAN_H#define HTROBMAN_H#include "WWWLib.h"                           /* Global Library Include file */#include "WWWApp.h"                                     /* Application stuff */#include "WWWTrans.h"#include "WWWInit.h"#include "WWWSQL.h"#include "HText.h"#include "HTRobot.h"                                     /* Implemented here */#ifndef W3C_VERSION#define W3C_VERSION             "Unspecified"#endif#define APP_NAME                "W3CRobot"#define APP_VERSION             W3C_VERSION#define COMMAND_LINE            "http://www.w3.org/Robot/User/CommandLine"#define ROBOTS_TXT              "/robots.txt"#define DEFAULT_OUTPUT_FILE     "robot.out"#define DEFAULT_RULE_FILE       "robot.conf"#define DEFAULT_LOG_FILE        "log-clf.txt"#define DEFAULT_HIT_FILE        "log-hit.txt"#define DEFAULT_REL_FILE        "log-rel.txt"#define DEFAULT_LM_FILE         "log-lastmodified.txt"#define DEFAULT_TITLE_FILE      "log-title.txt"#define DEFAULT_REFERER_FILE    "log-referer.txt"#define DEFAULT_REJECT_FILE     "log-reject.txt"#define DEFAULT_NOTFOUND_FILE   "log-notfound.txt"#define DEFAULT_CONNEG_FILE     "log-conneg.txt"#define DEFAULT_NOALTTAG_FILE   "log-alt.txt"#define DEFAULT_FORMAT_FILE     "log-format.txt"#define DEFAULT_CHARSET_FILE    "log-charset.txt"#define DEFAULT_MEMLOG          "robot.mem"#define DEFAULT_PREFIX          ""#define DEFAULT_IMG_PREFIX      ""#define DEFAULT_DEPTH           0#define DEFAULT_DELAY           50                      /* Write delay in ms */#define DEFAULT_SQL_SERVER      "localhost"#define DEFAULT_SQL_DB          "webbot"#define DEFAULT_SQL_USER        "webbot"#define DEFAULT_SQL_PW          ""#if 0#define HT_MEMLOG               /* Is expensive in performance! */#endif#define MILLIES                 1000#define DEFAULT_TIMEOUT         20                        /* timeout in secs */typedef enum _MRFlags {    MR_IMG              = 0x1,    MR_LINK             = 0x2,    MR_PREEMPTIVE       = 0x4,    MR_TIME             = 0x8,    MR_SAVE             = 0x10,    MR_QUIET            = 0x20,    MR_REAL_QUIET       = 0x40,    MR_VALIDATE         = 0x80,    MR_END_VALIDATE     = 0x100,    MR_KEEP_META        = 0x200,    MR_LOGGING          = 0x400,    MR_DISTRIBUTIONS    = 0x800,    MR_NOROBOTSTXT      = 0x1000,    MR_NOMETATAGS       = 0x2000,    MR_BFS              = 0x4000} MRFlags;typedef struct _Robot {    int                 depth;                       /* How deep is our tree */    int                 ndoc;    int                *cdepth;                /* Number of nodes per level */    int                 cnt;                            /* Count of requests */    int                 cindex;         /* Number assigned to each document */    HTList *            hyperdoc;            /* List of our HyperDoc Objects */    HTList *            htext;                  /* List of our HText Objects */    HTList *            fingers;    HTList *            queue;                  /* Queue */    int                 cq;    int                 timer;    int                 waits;    char *              cwd;                    /* Current dir URL */    char *              rules;    char *              prefix;    char *              img_prefix;    char *              logfile;                /* clf log */    HTLog *             log;    char *              reffile;                /* referer log */    HTLog *             ref;    char *              rejectfile;             /* unchecked links */    HTLog *             reject;    char *              notfoundfile;           /* links that returned 404 */    HTLog *             notfound;    char *              connegfile;             /* links that were conneg'ed */    HTLog *             conneg;    char *              noalttagfile;           /* images without alt tags*/    HTLog *             noalttag;    char *              hitfile;                /* links sorted after hit counts */    char *              relfile;                /* link sorted after relationships */    HTLinkType          relation;               /* Specific relation to look for */    char *              titlefile;              /* links with titles */    char *              mtfile;                 /* media types encountered */    char *              charsetfile;            /* charsets encountered */    char *              lmfile;                 /* sortef after last modified dates */    char *              outputfile;    FILE *              output;    char *              furl;                              /* First url */    MRFlags             flags;    long                get_bytes;      /* Total number of bytes processed using GET*/    long                get_docs;       /* Total number of documents using GET */    long                head_bytes;     /* bytes processed bytes processed using HEAD */    long                head_docs;      /* Total number of documents using HEAD*/    long                other_docs;    ms_t                time;           /* Time of run */#ifdef HT_POSIX_REGEX    regex_t *           include;    regex_t *           exclude;    regex_t *           check;    regex_t *           exc_robot;     /* Robots.txt exclusion */#endif#ifdef HT_MYSQL    HTSQLLog *          sqllog;    char *              sqlserver;    char *              sqldb;    char *              sqluser;    char *              sqlpw;    char *              sqlrelative;    BOOL                sqlexternals;    int                 sqlflags;#endif} Robot;typedef struct _Finger {    Robot * robot;    HTRequest * request;    HTParentAnchor * dest;} Finger;/***  The HyperDoc object is bound to the anchor and contains information about**  where we are in the search for recursive searches*/typedef struct _HyperDoc {    HTParentAnchor *    anchor;    int                 depth;    int                 hits;    int                 code;    int                 index;    char *              title;    HTMethod            method;} HyperDoc;/*** This is the HText object that is created every time we start parsing an** HTML object*/struct _HText {    HTRequest *         request;    BOOL                follow;};/***  A structure for calculating metadata distributions*/typedef struct _MetaDist {    HTAtom *            name;    int                 hits;} MetaDist;#ifdef HT_POSIX_REGEXPUBLIC regex_t * get_regtype (Robot * mr, const char * regex_str, int cflags);#endifPUBLIC int OutputData(const char  * fmt, ...);PUBLIC HyperDoc * HyperDoc_new (Robot * mr,HTParentAnchor * anchor, int depth);PUBLIC BOOL HyperDoc_delete (HyperDoc * hd);PUBLIC Robot * Robot_new (void);PUBLIC Finger * Finger_new (Robot * robot, HTParentAnchor * dest, HTMethod method);PUBLIC BOOL Robot_registerHTMLParser (void);PUBLIC void Cleanup (Robot * me, int status);PUBLIC void SetSignal (void);PUBLIC void VersionInfo (void);PUBLIC int terminate_handler (HTRequest * request, HTResponse * response,                               void * param, int status) ;PUBLIC int my_terminate_handler (HTRequest * request, HTResponse * response,                               void * param, int status) ;PUBLIC void Serving_queue(Robot *mr);PUBLIC char *get_robots_txt(char *uri);#endif/*      ___________________________________                              @(#) $Id: HTRobMan.html,v 1.6 1999/02/24 18:47:31 frystyk Exp $                                                                                             */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -