📄 misc.h
字号:
//puts current hostname in the db as "Scanning host in progress.." (viewed==2) sprintf(sqlQuery,"INSERT INTO hostlist (hostname, port, status, lastvisit) VALUES('%s', %i, 2, curdate());", currentHst.Host, currentHst.port); else //yes sprintf(sqlQuery,"UPDATE hostlist SET port=%i, status = 2, lastvisit=curdate() WHERE hostname =\'%s\' limit 1", currentHst.port, currentHst.Host); my_mysql_query(&gMysqlDB1, sqlQuery,NO_BLOCK); /* 8legs mod */ if( currentHst.host_id == 0 ) currentHst.host_id = GetHostId( currentHst ); robots_txt=(struct sHost*)malloc(sizeof(struct sHost)); if(robots_txt==NULL) MemoryCorruptedHandler("InitIndexing"); currentHst.viewed = 0; memcpy(robots_txt,¤tHst,sizeof(struct sHost)); strcpy(robots_txt->Page, "/robots.txt"); robots_txt->level = 1; robots_txt->type = 1; robots_txt->host_id = currentHst.host_id; lstFirst = lstInit(*robots_txt); FREE(robots_txt); currentHst.level = 1; lstAddHost(&lstFirst,currentHst); memcpy(&IndexingHost,¤tHst,sizeof(struct sHost)); { int (*modInitFilter)(char*, char*); char sError[MAXDESCRIPTIONSIZE]; int ret; if( ( modInitFilter = GetInitModFunctionHandlerByName("modFilter")) ) { ret=modInitFilter(currentHst.Host,sError); if(ret==0) { FREE(sqlQuery); printf("\nmodInitFilter(): %s\n\n",sError); ERROR_LOG(sError); return 0; } } } SetConsoleTitle("..."); setHostExtras( currentHst.host_id ); printf(" --- This site Limits ---\r\n"); printf("Max pages : %i\r\n", (EXTRA_LIMITS.nMaxPagesPerSite==0) ? CRAWLER_LIMITS.nMaxPagesPerSite : EXTRA_LIMITS.nMaxPagesPerSite); printf("Max depth level : %i\r\n", (EXTRA_LIMITS.nMaxDepthLevel==0) ? CRAWLER_LIMITS.nMaxDepthLevel : EXTRA_LIMITS.nMaxDepthLevel); printf("Max seconds : %i\r\n", (EXTRA_LIMITS.nMaxSecondsPerSite==0) ? CRAWLER_LIMITS.nMaxSecondsPerSite : EXTRA_LIMITS.nMaxSecondsPerSite); printf("Max bytes : %i\r\n", (EXTRA_LIMITS.nMaxBytesPerSite==0) ? CRAWLER_LIMITS.nMaxBytesPerSite : EXTRA_LIMITS.nMaxBytesPerSite); printf(" -----------------------\r\n"); if(bUpdate==0) { printf("Deleting old index for %s...",currentHst.Host); fflush(stdout); sprintf(sqlQuery,"DELETE ii FROM pagelist, ii WHERE pagelist.hostname =\'%s\' AND ii.pageid = pagelist.id ",currentHst.Host); my_mysql_query(&gMysqlDB2, sqlQuery,NO_BLOCK); sprintf(sqlQuery,"DELETE FROM pagelist WHERE hostname =\'%s\' ",currentHst.Host); my_mysql_query(&gMysqlDB2, sqlQuery,NO_BLOCK); printf("OK\r\n"); printf("Deleting old rels for %s...",currentHst.Host); sprintf(sqlQuery,"DELETE FROM rels WHERE host_id = %d",currentHst.host_id); my_mysql_query(&gMysqlDB1, sqlQuery,NO_BLOCK); printf("OK\r\n"); } /* set startTimeMS before creating threads */ startTimeMS=GetTickCount(); SetConsoleTitle("Creating threads..."); CreateThreads(); /**************************MT********************************/ printf("\r\n"); time( &long_time ); newtime=localtime(&long_time); sprintf(startTime,"%i:%i:%i",newtime->tm_hour ,newtime->tm_min ,newtime->tm_sec ); while(condition) {#ifdef WIN32 sprintf(strTitle,"OpenWebSpiderV%s | Pages: %i | Time: %i sec | host: %s",VERSION,nPagesViewed,(int)((GetTickCount()-startTimeMS)/1000),currentHst.Host); SetConsoleTitle(strTitle);#endif CheckThreads(); Sleep(300); if(iQuit==1) { printf("\r\n\r\nQuitting: Killing threads...\n\n"); KillThreads(); iQuit=0; bKillThread=0; sprintf(sqlQuery,"UPDATE hostlist SET status = 1,indexed_pages=%d,time_sec=%d,bytes_downloaded=%d, error_pages=%d WHERE hostname = \'%s\' limit 1" ,nPagesViewed, (int)((GetTickCount()-startTimeMS)/1000), bytesDownloaded, nErrorPages ,currentHst.Host); printStats(¤tHst,0); my_mysql_ping(&gMysqlDB1,NO_BLOCK); my_mysql_query(&gMysqlDB1, sqlQuery,NO_BLOCK); FREE(sqlQuery); FlushTempTable(gTable); if( bBuildOwsOwnIndex == 1 ) { /* all pages are swapped to the table pagelist */ /* are we using ows own index? */ /* if so: build the index for the current hostname */ BuildOwsOwnIndex(¤tHst, 1); } CalcPageRank( currentHst ); DoQuit(); }/*if(iQuit==1)*/ if(bKillThread==1) { SetConsoleTitle("Killing threads"); KillThreads(); CreateThreads(); }/*if(bKillThread==1)*/ thrdBlock(BLOCKTHRDHST); if(/*iDoNextHost==1 ||*/ /*Switching to the next host*/ (lstGetNodeByVal(lstFirst,0)==NULL && lstGetNodeByVal(lstFirst,2)==NULL)) { /* set the status of the pages to be indexed and of that in indexing as indexed */ /*lstSetNodeStatus(lstFirst,0,1); lstSetNodeStatus(lstFirst,2,1);*/ thrdUnBlock(BLOCKTHRDHST); SetConsoleTitle("Killing threads"); bKillThread=1; KillThreads(); if(iDoNextHost==1) { //sprintf(sqlQuery,"UPDATE hostlist SET status = 1,indexed_pages=%i WHERE hostname =\'%s\' limit 1",nPagesViewed,currentHst.Host); sprintf(sqlQuery,"UPDATE hostlist SET status = 1,indexed_pages=%d,time_sec=%d,bytes_downloaded=%d, error_pages=%d WHERE hostname = \'%s\' limit 1" ,nPagesViewed, (int)((GetTickCount()-startTimeMS)/1000), bytesDownloaded, nErrorPages ,currentHst.Host); my_mysql_ping(&gMysqlDB1,NO_BLOCK); my_mysql_query(&gMysqlDB1, sqlQuery,NO_BLOCK); iDoNextHost=0; } FlushTempTable(gTable); if( bBuildOwsOwnIndex == 1 ) { /* all pages are swapped to the table pagelist */ /* are we using ows own index? */ /* if so: build the index for the current hostname */ BuildOwsOwnIndex(¤tHst, 1); } CalcPageRank(currentHst); /* this host has been indexed! Proceed to the next? */ break; }//if(iDoNextHost==1 || (lstGetNodeByVal(lstFirst,0)==NULL && lstGetNodeByVal(lstFirst,2)==NULL)) thrdUnBlock(BLOCKTHRDHST); }/*while(condition)*/ FREE(sqlQuery); return 1;}int checkLimits(){ if( (EXTRA_LIMITS.nMaxPagesPerSite == 0 && CRAWLER_LIMITS.nMaxPagesPerSite>0 && nPagesViewed >= CRAWLER_LIMITS.nMaxPagesPerSite) /* Check the number of pages indexed */ || (EXTRA_LIMITS.nMaxSecondsPerSite == 0 && CRAWLER_LIMITS.nMaxSecondsPerSite>0 && (int)((GetTickCount()-startTimeMS)/1000) >= CRAWLER_LIMITS.nMaxSecondsPerSite) /* Check the number of seconds */ || (EXTRA_LIMITS.nMaxBytesPerSite == 0 && CRAWLER_LIMITS.nMaxBytesPerSite>0 && bytesDownloaded >= CRAWLER_LIMITS.nMaxBytesPerSite) /* Check the number of bytes downloaded */ || (EXTRA_LIMITS.nMaxPagesPerSite>0 && nPagesViewed >= EXTRA_LIMITS.nMaxPagesPerSite) /* Check the number of pages indexed */ || (EXTRA_LIMITS.nMaxSecondsPerSite>0 && (int)((GetTickCount()-startTimeMS)/1000) >= EXTRA_LIMITS.nMaxSecondsPerSite) /* Check the number of seconds */ || (EXTRA_LIMITS.nMaxBytesPerSite>0 && bytesDownloaded >= EXTRA_LIMITS.nMaxBytesPerSite) || (CRAWLER_LIMITS.nMaxErrorPerSite>0 && nErrorPages >= CRAWLER_LIMITS.nMaxErrorPerSite) /* Check the number of error pages */ ) return 1;return 0;}/** flag=0 -> complete stats* flag=1 -> in-complete stats* flag=2 -> switched to the next host*/void printStats(struct sHost* Host,int flag){ time_t long_time; struct tm *newtime; FILE* file; time( &long_time ); newtime=localtime(&long_time); if(flag==1) printf("\r\n + STATS(*)\r\n"); else if(flag==2) printf("\r\n + STATS(2)\r\n"); else printf("\r\n + STATS\r\n"); printf(" - Host:\t\t%s\r\n",Host->Host ); printf(" - Pages:\t\t%i\r\n",nPagesViewed); printf(" - Downloaded:\t\t%i Kb\r\n",(int)bytesDownloaded/1024); printf(" - Scan time: %is (%s - %i:%i:%i)\r\n\r\n",(int)((GetTickCount()-startTimeMS)/1000),startTime,newtime->tm_hour ,newtime->tm_min ,newtime->tm_sec ); if((file = fopen("stats.log","a"))!=NULL) { if(flag==1) fprintf(file," + STATS(*)\r\n"); else if(flag==2) fprintf(file," + STATS(S)\r\n"); else fprintf(file," + STATS\r\n"); fprintf(file," - %i\\%i\\%i %i:%i:%i -- OpenWebSpider version: %s --\r\n",newtime->tm_mday ,newtime->tm_mon +1, newtime->tm_year +1900,newtime->tm_hour ,newtime->tm_min ,newtime->tm_sec,VERSION); fprintf(file," - Host:\t\t\t%s\r\n",Host->Host ); fprintf(file," - Pages:\t\t%i\r\n",nPagesViewed); fprintf(file," - Downloaded:\t\t%i Kb\r\n",(int)bytesDownloaded/1024); fprintf(file," - Scan time: %is (%s - %i:%i:%i) \r\n",(int)((GetTickCount()-startTimeMS)/1000),startTime,newtime->tm_hour ,newtime->tm_min ,newtime->tm_sec); fprintf(file,"============================================================\r\n\r\n"); fclose(file); }}void MemoryCorruptedHandler(char* funct){ printf("\r\n\r\nMemory corrupted\r\n"); if(funct) printf("Function: %s\r\n",funct); printf("Exiting...\r\n\r\n"); exit(0);}void DoQuit(){ if(actAsAServerPort) { printf("\n\nFreeing Sockets..."); closesocket(OWS_Server_fd); printf("OK\n\n"); } iQuit=0; bKillThread=0; DropTempTable(gTable); mysql_close(&gMysqlDB1); mysql_close(&gMysqlDB2); Sleep(200); printf("Bye\n\n"); //getchar();getchar();getchar();getchar();getchar(); SetConsoleTitle("Bye byE"); exit(0); return;}#endif/*EOF*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -