📄 ckspider.h
字号:
// CkSpider.h: interface for the CkSpider class.
//
//////////////////////////////////////////////////////////////////////
#ifndef _CKSPIDER_H
#define _CKSPIDER_H
#pragma once
#include "CkString.h"
class CkSpiderProgress;
/*
IMPORTANT: Objects returned by methods as non-const pointers must be deleted
by the calling application.
*/
#include "CkObject.h"
// CLASS: CkSpider
class CkSpider : public CkObject
{
private:
CkSpiderProgress *m_callback;
void *m_impl;
bool m_utf8; // If true, all input "const char *" parameters are utf-8, otherwise they are ANSI strings.
// Don't allow assignment or copying these objects.
CkSpider(const CkSpider &) { }
CkSpider &operator=(const CkSpider &) { return *this; }
public:
//void *getImpl(void) const { return m_impl; }
//CkSpider(void *impl) : m_impl(impl) { }
CkSpider();
virtual ~CkSpider();
CkSpiderProgress *get_EventCallbackObject(void) const;
void put_EventCallbackObject(CkSpiderProgress *progress);
// BEGIN PUBLIC INTERFACE
bool SaveLastError(const char *filename);
void LastErrorXml(CkString &str);
void LastErrorHtml(CkString &str);
void LastErrorText(CkString &str);
bool get_Utf8(void) const;
void put_Utf8(bool b);
void SleepMs(long millisec);
void get_LastHtmlDescription(CkString &strOut);
void get_LastHtmlKeywords(CkString &strOut);
void get_LastHtmlTitle(CkString &strOut);
void get_LastHtml(CkString &strOut);
bool get_LastFromCache(void);
void get_LastModDate(SYSTEMTIME &sysTime);
void get_LastUrl(CkString &strOut);
void get_LastModDateStr(CkString &strOut);
void SkipUnspidered(long index);
bool FetchRobotsText(CkString &strOut);
void get_Domain(CkString &strOut);
void AddMustMatchPattern(const char *pattern);
void AddAvoidOutboundLinkPattern(const char *pattern);
bool GetAvoidPattern(long index, CkString &strOut);
void AddAvoidPattern(const char *pattern);
bool GetOutboundLink(long index, CkString &strOut);
bool GetFailedUrl(long index, CkString &strOut);
bool GetSpideredUrl(long index, CkString &strOut);
bool GetUnspideredUrl(long index, CkString &strOut);
bool RecrawlLast(void);
void ClearOutboundLinks();
void ClearFailedUrls();
void ClearSpideredUrls();
int get_WindDownCount(void);
void put_WindDownCount(long newVal);
// These times are in milliseconds.
long get_HeartbeatMs(void);
void put_HeartbeatMs(long newVal);
int get_NumAvoidPatterns(void);
int get_NumOutboundLinks(void);
int get_NumFailed(void);
int get_NumSpidered(void);
int get_NumUnspidered(void);
bool CrawlNext(void);
bool get_ChopAtQuery(void);
void put_ChopAtQuery(bool newVal);
bool get_AvoidHttps(void);
void put_AvoidHttps(bool newVal);
int get_MaxResponseSize(void);
void put_MaxResponseSize(long newVal);
int get_MaxUrlLen(void);
void put_MaxUrlLen(long newVal);
void get_CacheDir(CkString &strOut);
void put_CacheDir(const char *dir);
bool get_UpdateCache(void);
void put_UpdateCache(bool newVal);
bool get_FetchFromCache(void);
void put_FetchFromCache(bool newVal);
int get_ConnectTimeout(void);
void put_ConnectTimeout(long numSeconds);
void put_UserAgent(const char *ua);
const char *userAgent(void);
void get_UserAgent(CkString &strOut);
int get_ReadTimeout(void);
void put_ReadTimeout(long numSeconds);
void AddUnspidered(const char *url);
void Initialize(const char *domain);
void GetDomain(const char *url, CkString &domainOut);
void GetBaseDomain(const char *domain, CkString &domainOut);
void CanonicalizeUrl(const char *url, CkString &urlOut);
CkString m_resultString;
const char *getDomain(const char *url);
const char *getBaseDomain(const char *domain);
const char *canonicalizeUrl(const char *url);
const char *getAvoidPattern(long index);
const char *getOutboundLink(long index);
const char *getFailedUrl(long index);
const char *getSpideredUrl(long index);
const char *getUnspideredUrl(long index);
const char *cacheDir(void);
const char *avoidPattern(long index);
const char *outboundLink(long index);
const char *failedUrl(long index);
const char *spideredUrl(long index);
const char *unspideredUrl(long index);
const char *domain(void);
const char *lastHtmlDescription(void);
const char *lastHtmlKeywords(void);
const char *lastHtmlTitle(void);
const char *lastHtml(void);
const char *lastUrl(void);
const char *lastModDateStr(void);
const char *fetchRobotsText(void);
const char *lastErrorText(void);
const char *lastErrorXml(void);
const char *lastErrorHtml(void);
// END PUBLIC INTERFACE
};
#endif
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -