📄 misc.h
字号:
/* OpenWebSpider* * Authors: Stefano Alimonti AND Stefano Fantin * Version: 0.7 * E-Mails: shen139 [at] openwebspider (dot) org AND stefanofantinguz@yahoo.it*** This file is part of OpenWebSpider** This program is free software; you can redistribute it and/or modify* it under the terms of the GNU General Public License as published by* the Free Software Foundation; either version 2 of the License, or* (at your option) any later version.** This program is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the* GNU General Public License for more details.** You should have received a copy of the GNU General Public License* along with this program; if not, write to the Free Software* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA**/#ifndef __MISC#define __MISC#ifndef WIN32int GetTickCount(){ struct timeval tv; gettimeofday(&tv, NULL); return tv.tv_sec*1000L+tv.tv_usec/1000L;}void SetConsoleTitle(char* msg){ return;}int stricmp(char*a,char*b){ return strcasecmp(a,b);}int strnicmp(char*a,char*b,int c){ return strncasecmp(a,b,c);}void Sleep(int n){ usleep((unsigned)n*1000); return;}char* _strupr(char*a){ int m,i; m=strlen(a); for(i=0;i<m;i++) a[i]=(char)toupper(a[i]); return a;}char* _strlwr(char*a){ int m,i; m=strlen(a); for(i=0;i<m;i++) a[i]=(char)tolower(a[i]); return a;}int closesocket(int s){ return close(s);}int ExitThread(int a){ pthread_exit(&a);}int TerminateThread(pthread_t thread,int nothing){ return pthread_cancel(thread);}void CloseHandle(HANDLE a){ return;}#endifint InitMysql(){ SetConsoleTitle("Connecting to mysql..."); printf("Connecting to Mysql server n.1 (%s)...",MYSQLSERVER1); //Hosts if(sqlConnect(MYSQLSERVER1, USERDB1, PASSDB1, DB1,&gMysqlDB1, MYSQLSERVER_PORT1)==0) { fprintf(stderr, "ERROR\r\nFailed to connect to database(%s): Error: %s\r\n",DB1,mysql_error(&gMysqlDB1)); ERROR_LOG(mysql_error(&gMysqlDB1)) return -1; } printf("OK\r\nConnecting to Mysql server n.2 (%s)...",MYSQLSERVER2); //Pages if(sqlConnect(MYSQLSERVER2, USERDB2, PASSDB2, DB2,&gMysqlDB2, MYSQLSERVER_PORT2)==0) { fprintf(stderr, "ERROR\r\nFailed to connect to database(%s): Error: %s\r\n",DB2,mysql_error(&gMysqlDB2)); ERROR_LOG(mysql_error(&gMysqlDB2)) mysql_close(&gMysqlDB2); return -1; } printf("OK\r\n"); SetConsoleTitle("Connecting to mysql...OK");return 1;}int InitCrawler(struct sHost currentHst){ memset(iLastPing,0,sizeof(iLastPing)); printf("\r\n"); printf("Start Host : %s\r\n",currentHst.Host); printf("Start Page : %s\r\n", currentHst.Page); printf("Scan Mode : Index\r\n"); printf("Mode : %s\r\n",(starthostonly==1)?"Single Host":"Recursive"); printf("Mysql server n.1 : %s\r\n",MYSQLSERVER1); printf("Mysql server n.2 : %s\r\n",MYSQLSERVER2); printf(" --- Global Limits ---\r\n"); printf("Max pages : %i\r\n",CRAWLER_LIMITS.nMaxPagesPerSite); printf("Max depth level : %i\r\n",CRAWLER_LIMITS.nMaxDepthLevel); printf("Max seconds : %i\r\n",CRAWLER_LIMITS.nMaxSecondsPerSite); printf("Max bytes : %i\r\n",CRAWLER_LIMITS.nMaxBytesPerSite); printf(" -----------------------\r\n"); printf("Surfing the net... (press CTRL+C to exit)\r\n"); if(actAsAServerPort) { CreateServerThread(actAsAServerPort); Sleep(200); } if(!StartUpWinsock()) { fprintf(stderr,"WSAStartup() error\r\n"); ERROR_LOG("WSAStartup() error") return -1; } /* connect to mysql servers */ if(InitMysql()==-1) return -1; SetConsoleTitle("Creating temp table..."); do { RandomTable(gTable); } while(!CreateTmpTable(gTable)); //Loop until creates a new tmp table!!! signal(SIGINT, sigdie); signal(SIGTERM, sigdie);return 1;}int setHostExtras(int host_id){char sqlQuery[MAXQUERYSIZE];MYSQL_ROW row;MYSQL_RES gRes;MYSQL_RES** tmpRes=NULL; tmpRes=(MYSQL_RES**)malloc(sizeof(MYSQL_RES)); if(tmpRes==NULL) MemoryCorruptedHandler("setHostExtras"); sprintf(sqlQuery,"select hostlist_extras.max_pages,hostlist_extras.max_level,hostlist_extras.max_seconds, hostlist_extras.max_bytes from hostlist left outer join hostlist_extras on hostlist.id = hostlist_extras.host_id WHERE hostlist.id = %d ", host_id); my_mysql_query_and_store_results(&gMysqlDB1, sqlQuery, tmpRes, &gRes, BLOCKDB1); if((row = mysql_fetch_row(&gRes))) { if(row[0]) /* max_pages */ { if( atoi(row[0]) > 0 ) { EXTRA_LIMITS.nMaxPagesPerSite = atoi(row[0]); } } if(row[1]) /* max_level */ { if( atoi(row[1]) > 0 ) { EXTRA_LIMITS.nMaxDepthLevel = atoi(row[1]); } } if(row[2]) /* max_seconds */ { if( atoi(row[2]) > 0 ) { EXTRA_LIMITS.nMaxSecondsPerSite = atoi(row[2]); } } if(row[3]) /* max_bytes */ { if( atoi(row[3]) > 0 ) { EXTRA_LIMITS.nMaxBytesPerSite = atoi(row[3]); } } } if(*tmpRes) { mysql_free_result(*tmpRes); } FREE(tmpRes);return 1;}int CrawlerMainLoop(struct sHost currentHst){ /* this is the first URL */ InitIndexing(currentHst); while(1) { /* set the current host as indexed and try to get another host to be indexed */ if((ReturnFirstUrl(¤tHst))==-1) { fprintf(stderr,"\nBuffer empty\n"); break; } /* check whether iQuit as been set by ReturnFirstUrl */ if(iQuit==1) { break; } InitIndexing(currentHst); } DoQuit();return 1;}int InitIndexing(struct sHost currentHst){ int condition = 1; char* sqlQuery; DWORD avgSec; time_t long_time; struct tm *newtime; struct sHost *robots_txt; #ifdef WIN32 char strTitle[3000];#endif iRobCrawlDelay = 0; bRobotsOK = 0; nPagesViewed = 0; bytesDownloaded = 0; nErrorPages = 0; startTimeMS = 0; bKillThread = 0; avgSec = 0; EXTRA_LIMITS.nMaxBytesPerSite = 0; EXTRA_LIMITS.nMaxDepthLevel = 0; EXTRA_LIMITS.nMaxPagesPerSite = 0; EXTRA_LIMITS.nMaxSecondsPerSite = 0; memset(lstRobotsExclusions,0,sizeof(lstRobotsExclusions)); sqlQuery = malloc(MAXQUERYSIZE); if(sqlQuery==NULL) MemoryCorruptedHandler("InitIndexing"); /* try to free the memory used */ lstFreeAll(lstFirst); /* does this host exist? */ if( currentHst.host_id == 0 ) currentHst.host_id = GetHostId( currentHst ); if( currentHst.host_id == 0) //no
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -