📄 options.h
字号:
/* OpenWebSpider * * Authors: Stefano Alimonti AND Stefano Fantin * Version: 0.7 * E-Mails: shen139 [at] openwebspider (dot) org AND stefanofantinguz@yahoo.it * * * This file is part of OpenWebSpider * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */#ifndef __OPTIONS#define __OPTIONS#ifdef WIN32 #pragma comment(lib,"WS2_32.lib")#else #define SOCKET int #define SOCKADDR_IN struct sockaddr_in #define LPSOCKADDR struct sockaddr* #define SOCKET_ERROR -1 #define DWORD long long int #define LPVOID void* #define HANDLE pthread_t#endif/*Host*/#define PORT 80#define MAXHOSTSIZE 100#define MAXPAGESIZE 255#define MAXURLSIZE MAXHOSTSIZE + MAXPAGESIZE + 30/*Html*/#define MAXPACKETSIZE 200000#define MAXTAGSIZE 20#define MAXDESCRIPTIONSIZE 255#define MAXTAGLENGTH 10000#define MAXHTTPSTATUSSIZE 50/*robots.txt*/#define MAXDISALLOW 1000#define MAXCRAWLDELAY 999/*RANK*/#define MAXPRLEV 10/*SQL*/#define MAXQUERYSIZE MAXPACKETSIZE + 50000#define MAXUSERQUERYSIZE 200/* minimum delay between 2 pings */#define MYSQL_MIN_PING_DELAY 60000/*Socket*/#define FIRSTTIMEOUT 50000 /*MSeconds*/#define TIMEOUTs 10000 /*MSeconds*//*Thread && Mutex*/#define MAXMUTEX 10#define MAXTHREAD 1100#define NO_BLOCK -1#define BLOCKTHRDHST 0#define BLOCKDB1 1#define BLOCKINDEX 2#define BLOCKEXH 3#define BLOCKEXCRAWL 4#define AVGTHREADDELAY 100000 /*100 seconds*//* External modules */#define MAXMSGERRORSIZE 1000/*OWS Server*/#define OWSSERVERMAXLOGINS 10#define MAXCOMMANDSIZE 1000#define MAXKEYWORDSIZE 10#define MAXARGUMENTSIZE MAXCOMMANDSIZE - MAXKEYWORDSIZE - 1#define __SERVR_COMMANDERR SEND(sock, "\r\n<div align='center'>Command not understood<div align='center'>\r\n")/*Misc*/#define MAXKEYSIZE 20#define MAXEXTERNALNODE 1000#define MAXOUTPUTLINE 500#define MAXREGULAREXPRESSIONSIZE 100#define MAXCUSTOMEXTENSIONS 10#define MAXCUSTOMEXTENSIONSIZE 50#define MAXEXTENSIONSIZE 10/*Parse Config File*/#define MAXCONFKEYSIZE 100#define MAXCONFARGSIZE 100/*Encoding*/#define UTF8_ENCODING 0#define ASCII_ENCODING 1/*OWS index*/#define OWSINDEXMINWORDSIZE 1#define OWSINDEXMAXWORDSIZE 30#define LEXICONWORDSIZE 2000/* store the index to the DB every OWSINDEXMAXSWAPDELAY pages */#define OWSINDEXMAXSWAPDELAY 60#define INDEXERTOKENS " ,.;:-_@#!\"\'\\/<>^[]{}()\r\n\t*%$&=+-|!?"unsigned int lexicon_number_of_elements;unsigned int lexicon_actual_size;int nThread = 20;char DB1[MAXCONFARGSIZE];char DB2[MAXCONFARGSIZE];char MYSQLSERVER1[MAXCONFARGSIZE];char MYSQLSERVER2[MAXCONFARGSIZE];char USERDB1[MAXCONFARGSIZE];char USERDB2[MAXCONFARGSIZE];char PASSDB1[MAXCONFARGSIZE];char PASSDB2[MAXCONFARGSIZE];unsigned int MYSQLSERVER_PORT1;unsigned int MYSQLSERVER_PORT2;char OWS_SERVER_PASSWORD[MAXCONFARGSIZE];typedef struct sHost{ char Host[MAXHOSTSIZE]; char Page[MAXPAGESIZE]; char Description[MAXDESCRIPTIONSIZE]; unsigned short int port; unsigned short int type; unsigned short int viewed; unsigned short int level; unsigned int host_id;}SHOST;/*Current Host*/struct sHost IndexingHost;/*Global MySQL*/MYSQL gMysqlDB1;MYSQL gMysqlDB2;char gTable[20];/*global Mutex*/volatile unsigned long hMutex[MAXMUTEX];/*global Status of Threads*/DWORD thrdStatus[MAXTHREAD];/*thrdStatus[]==0 -> Thread is alivethrdStatus[]==1 -> Thread is dead....*/typedef struct sHandleConnection{ SOCKET sock; SOCKADDR_IN client;}SHC;unsigned int bytesDownloaded = 0;unsigned int nErrorPages = 0;char startTime[10];DWORD startTimeMS;/*SWITCHes*/unsigned int xCacheHtml = 0;unsigned int xCacheHtmlCompressed = 0;unsigned int nPagesViewed = 0;unsigned int nRelationships = 1;unsigned int bDontIndexPages = 0;unsigned int bTesting = 0;unsigned int starthostonly = 0;unsigned int bFreeIndexingMode = 0;unsigned int bUpdate = 0;unsigned int actAsAServerPort = 0;unsigned int bBuildOwsOwnIndex = 0;struct __crawler_limits{ unsigned int nMaxPagesPerSite; unsigned int nMaxDepthLevel; unsigned int nMaxSecondsPerSite; unsigned int nMaxBytesPerSite; unsigned int nMaxErrorPerSite;} CRAWLER_LIMITS;struct __extra_limits{ unsigned int nMaxPagesPerSite; unsigned int nMaxDepthLevel; unsigned int nMaxSecondsPerSite; unsigned int nMaxBytesPerSite;} EXTRA_LIMITS;unsigned int scan_mode=0;/* scan_mode==0 => Real time search //Deprecated * scan_mode==1 => Index * scan_mode==2 => Indexed search * scan_mode==0xFF => uninitialized *//*SIGNALs*/unsigned int iQuit = 0;unsigned int bKillThread = 0;unsigned int bKillThreadReserved = 0;unsigned int iStop = 0;unsigned int iDoNextHost = 0;/***/struct sHost* nextHost = NULL;/***/unsigned int bSwapping = 0;unsigned int bAddExternalHost = 0;unsigned int bUseRegularExpressionA = 0;unsigned int bUseRegularExpressionB = 0;/*STRUCTUREs*//* * bTag = Tag begin for? * eTag = Attribute * flag = 0 : <tag1 attr=123> xyz </tag1> eg.: <a href="/index.php">Home</a> * = 1 : <tag2 attr2="test"> eg.: <base href="http://www.openwebspider.org/"> */struct{ char* bTag; char* eTag; int flag;} taglist[] = { { "base" ,"href" ,0 }, { "a" ,"href" ,1 }, { "ref" ,"href" ,0 }, { "area" ,"href" ,0 }, { "frame" ,"src" ,0 }, { "iframe" ,"src" ,0 },/* ****EXAMPLE****** { "img" ,"src" ,0 }, { "body" ,"background" ,0 }, ****EXAMPLE****** */ { "" ,"" ,-1 } };const char *PlainTextExtension[]= { ".txt",".c", ".cpp",".bas", ".pas",".h",".xml", "\0" };const char *HtmlExtensions[]= { ".htm",".html", ".php", ".asp", ".cgi", ".mspx", ".aspx",".shtml", ".pl",".phtml", ".cfm",".ch2", ".jsp",".msnw", ".php3",".xml","\0" };char CustomExtensions[MAXCUSTOMEXTENSIONS][MAXCUSTOMEXTENSIONSIZE];#ifdef USE_REGEXregex_t regexPageFilter;regex_t regexContentFilter;#endifint iLastPing[MAXMUTEX];char lstRobotsExclusions[MAXDISALLOW][MAXPAGESIZE];int iRobCrawlDelay=0; //Crawl delay get by robots.txtint iCrawlDelay=0; //crawl delay get by program argumetsint bRobotsOK;/*module handler*/void* modHandler;struct{ char* functName; char* functInit; void* handler; void* initHandler; unsigned short int isInitialized;} loadableModules[] = { { "modFilter", "modInitFilter" , NULL, NULL }, { "\0", "\0", NULL, NULL }, };/* http://www1.tip.nl/~t876506/utf8tbl.html */struct{ char* htmlChar; char* rep; int type; /* type: 0 UTF8 1 ASCII */} ahList[] = { { "nbsp", " " , ASCII_ENCODING }, { "amp", "&" , ASCII_ENCODING }, { "euro", "0xE282AC" , UTF8_ENCODING }, { "cent", "0xC2A2" , UTF8_ENCODING }, { "copy", "0xC2A9" , UTF8_ENCODING }, { "trade", "0xE284A2" , UTF8_ENCODING }, /* if you have problems with these lines please contact me */ { "Aacute","0xC381" , UTF8_ENCODING }, { "aacute","0xC3A1" , UTF8_ENCODING }, { "Eacute","0xC389" , UTF8_ENCODING }, { "eacute","0xC3A9" , UTF8_ENCODING }, { "Iacute","0xC38D" , UTF8_ENCODING }, { "iacute","0xC3AD" , UTF8_ENCODING }, { "Oacute","0xC393" , UTF8_ENCODING }, { "oacute","0xC3B3" , UTF8_ENCODING }, { "Uacute","0xC39A" , UTF8_ENCODING }, { "uacute","0xC3BA" , UTF8_ENCODING }, { "Agrave","0xC380" , UTF8_ENCODING }, { "agrave","0xC3A0" , UTF8_ENCODING }, { "Egrave","0xC388" , UTF8_ENCODING }, { "egrave","0xC3A8" , UTF8_ENCODING }, { "Igrave","0xC38C" , UTF8_ENCODING }, { "igrave","0xC3AC" , UTF8_ENCODING }, { "Ograve","0xC392" , UTF8_ENCODING }, { "ograve","0xC3B2" , UTF8_ENCODING }, { "Ugrave","0xC399" , UTF8_ENCODING }, { "ugrave","0xC3B9" , UTF8_ENCODING }, { "Acirc", "0xC382" , UTF8_ENCODING }, { "acirc", "0xC3A2" , UTF8_ENCODING }, { "Ecirc", "0xC38A" , UTF8_ENCODING }, { "ecirc", "0xC3AA" , UTF8_ENCODING }, { "Icirc", "0xC38E" , UTF8_ENCODING }, { "icirc", "0xC3AE" , UTF8_ENCODING }, { "Ocirc", "0xC394" , UTF8_ENCODING }, { "ocirc", "0xC3B4" , UTF8_ENCODING }, { "Ucirc", "0xC39B" , UTF8_ENCODING }, { "ucirc", "0xC3BB" , UTF8_ENCODING }, { "Auml", "0xC384" , UTF8_ENCODING }, { "auml", "0xC5A0" , UTF8_ENCODING }, { "Euml", "0xC38B" , UTF8_ENCODING }, { "euml", "0xC3AB" , UTF8_ENCODING }, { "Iuml", "0xC38F" , UTF8_ENCODING }, { "iuml", "0xC3AF" , UTF8_ENCODING }, { "Ouml", "0xC396" , UTF8_ENCODING }, { "ouml", "0xC3B6" , UTF8_ENCODING }, { "Uuml", "0xC39C" , UTF8_ENCODING }, { "uuml", "0xC3BC" , UTF8_ENCODING }, { "Aring", "0xC385" , UTF8_ENCODING }, { "aring", "0xC3A5" , UTF8_ENCODING }, { "AElig", "0xC386" , UTF8_ENCODING }, { "aelig", "0xC3A6" , UTF8_ENCODING }, { "Ccedil", "0xC387" , UTF8_ENCODING }, { "ccedil", "0xC3A7" , UTF8_ENCODING },/* { "",'' }, */ { NULL, 0 } };typedef struct functArg{ struct sHost* hostInfo; char* html; unsigned int htmlLength; char* text; unsigned int textLength; int PagesViewed; long int bytesDownloaded; void* mysqlDB1; void* mysqlDB2;}FUNCTION_ARGUMENT;#endif/*EOF*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -