⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 functions.h

📁 网页抓取程序
💻 H
字号:
/* OpenWebSpider * *  Authors:     Stefano Alimonti AND Stefano Fantin *  Version:     0.7 *  E-Mails:     shen139 [at] openwebspider (dot) org AND stefanofantinguz@yahoo.it * * * This file is part of OpenWebSpider * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA * */#ifndef __FNCT#define __FNCT#include "macro.h"#include "options.h"/*openwebspider-0.5.c*/int usage(char *txt);void sigdie(int a);/* Ows Own Index */typedef struct _INVERTED_INDEX{	unsigned int doc_id;	unsigned int position;	struct _INVERTED_INDEX* next;	struct _INVERTED_INDEX* last;}INVERTED_INDEX;typedef struct _OOI_NODE{	unsigned int id;	char* field;	INVERTED_INDEX* ii;}OOI_NODE;/*hstlist.h*/typedef struct _NODE{	void* field;	struct _NODE* next;}NODE;NODE* lstFirst;NODE* lstInit(struct sHost host);int lstAddHost(NODE** first,struct sHost host);NODE* lstGetNodeByHost(NODE* first, struct sHost host);NODE* lstGetNodeByVal(NODE* first,int val);int lstSetNodeStatus(NODE* first,int sub, int bus);int lstDebugNodes(NODE* first,int viewed);/*htmlfnct.h*/int ParseHTTPRequest(char* recvdpkt,char* htmlOut,int maxout,char* httpHeader, char* stuff,int level);int ForgeHTTPPacket(struct sHost hst,char * packet);char* RemoveTag(char* html, char* startTag, char* endTag);int BetweenTag(char* html, char* tag,char* out,int endwithstarttag,int maxout);int UnHtml(char* html, char* text,int maxout);int LookForUrls(char *html,struct sHost hst);/*list.h*/int lstFreeAll(NODE* first);NODE* lstGetNodeX(NODE* first, int x);NODE* lstGetLastNode(NODE* first, int* NumOfNodes);/*Ows Own Index*/INVERTED_INDEX* InitII();OOI_NODE* InitLexicon();void insertion_sort(OOI_NODE* lexicon, int n);int lstAddWord(OOI_NODE** lexicon,char* word);int ndzLookForWord(OOI_NODE* lexicon, char* word);void FreeOwsIndex(OOI_NODE* lexicon);/*misc.h*/#ifndef WIN32  int GetTickCount();  void SetconsoleTilte(char*);  int stricmp(char*,char*);  int strnicmp(char*,char*,int);  void Sleep(int);  char* _strupr(char*);  char* _strlwr(char*a);  int closesocket(int);  int ExitThread(int);  int TerminateThread(pthread_t,int);  void CloseHandle(HANDLE);#endifint InitMysql();int InitCrawler(struct sHost currentHst);int setHostExtras(int host_id);int CrawlerMainLoop(struct sHost currentHst);int InitIndexing(struct sHost currentHst);void MemoryCorruptedHandler(char* funct);void printStats(struct sHost* Host,int flag);void DoQuit();int checkLimits();/*mymutex.h*/void init_mutex();void thrdBlock(int iHndl);void thrdUnBlock(int iHndl);void UnBlockAll();/*rank.h*/int GetHostRank(int host_id);int CalcPageRank(struct sHost host);/*robots.h*/int ParseRobotsTxt(char* html,struct sHost host);int CheckRobotExclusion(char* page);/*socket.h*/int setnonblock(SOCKET sock,int timeout);int StartUpWinsock();int LoadSocket(SOCKET *sock,struct sHost* shost,SOCKADDR_IN *saddr);int RecvPackets(SOCKET *sock,char* packet, int maxlen);int ListenToPort(int port, SOCKET* fd);/*sqlfnct.h*/int sqlConnect(char* hostname, char* username, char* password, char* table,MYSQL* rMysql, unsigned int port);void my_mysql_ping(MYSQL* mysql,int iMutex);int my_mysql_query(MYSQL*mysql, char* sqlQuery,int iMutex);int my_mysql_query_and_store_results(MYSQL*mysql, char* sqlQuery,MYSQL_RES** tRes,MYSQL_RES* srRes,int iMutex);/*strfnct.h*/int atoupper(char* in, char* out, int maxout);int UnToken(char* str,char* Tokens,char* out,int len);int bTokenIn(char* str,char* Tokens,int len);int OnlyOneSpace(char* str,char* out,int len);int ReplaceChr(char* string, char bus, char sub);int ReplaceStr(char* string,char* strOut, char* bus, char* sub);void RemoveShit(char* text);char* strtrim(char* in,char* out);char* strWord(char* where,char* word);char * my_stristr(char* s,char* find);typedef struct __myCStr{char* myString;int myStrLen;}MYCSTR;int myCStrLen(struct __myCStr* myCStr);int myCStrLenUpdate(struct __myCStr* myCStr);char* myCStrCpy(struct __myCStr* myCStr, char* text);char* myCStrnCpy(struct __myCStr* myCStr, char* text, int n);char* myCStrCat(struct __myCStr* myCStr, char* text);char* myCStrnCat(struct __myCStr* myCStr, char* text, int n);char* myCStrMemCpy(struct __myCStr* myCStr, char* blob, int blobLen);char* myCStrMemCat(struct __myCStr* myCStr, char* blob, int blobLen);/*temptamble.h*/void RandomTable(char* table);int CreateTmpTable(char* table);int FlushTempTable(char* table);int DropTempTable(char* table);/*thread.h*/#ifdef WIN32  unsigned __stdcall #else  void* #endifmainThread(LPVOID pthrdNum);void KillThreads();void CreateThreads();void CheckThreads();void CreateServerThread(int port);void CreateHandleConnectionThread(struct sHandleConnection* struct_connection);/*urlfunct.h*/int ReturnFirstUrl(struct sHost* Host);int AddUrl(struct sHost hst, unsigned int level,struct sHost* from);int AddExternalHost(struct sHost Host,struct sHost* from);int ParseUrl(char* url,struct sHost* sh,struct sHost* currentHost);int GenerateURL(struct sHost Host,char* URL);unsigned int PortNumFromHostname(char* hostname);int GetDir(char* Page,char* dir);int CheckPage(char* page);int PageType(struct sHost* Host);void unencode(char *src, char *last, char *dest);int GetHostId(struct sHost host);int pRelationships(struct sHost* links,struct sHost* linked,int level);/*modules.h*/void* GetModFunctionHandlerByName(char* functName);void* GetInitModFunctionHandlerByName(char* functName);void* myGetProcAddress(void* handler,char* funct);int myLoadModules(char* filename, void* handler);int myLoadModules(char* filename,void* handler);/*server.h*/void owsServer_HTML_Header(SOCKET sock, unsigned int login_status);void owsServer_HTML_Footer(SOCKET sock);void CheckSession(unsigned long IpAddr);typedef struct structLastAccess{    unsigned long LoginOKIP;    unsigned long LastAccessMS;}SLA;struct structLastAccess listAccess[OWSSERVERMAXLOGINS];#ifdef WIN32  unsigned __stdcall #else  void* #endifStartOWSServer(LPVOID port);#ifdef WIN32  unsigned __stdcall #else  void* #endifHandleConnection(void* npSocket);int HandleRequest(SOCKET sock, char* command, unsigned int login_status,unsigned long IpAddr);/*search.h*/int IndexedSearch(MYSQL* mysql,char* Query);int IndexedSearchXML2Sock(MYSQL* mysql,char* Query,SOCKET sock);/*parse_conf.h*/int ReadConfFile();/*indexer.h*/int IndexPage(char* html, struct sHost host, unsigned int htmlLength);int IsPageIndexed(struct sHost* host);int sqlTextToUTF8(char* text, char* out, int maxout);int BuildOwsOwnIndex(struct sHost* host, unsigned int flag);int IndexPage2(char* text, unsigned int page_id, OOI_NODE** lexicon);int GetWordId(char* word);void UpdateInvertedIndex(OOI_NODE* lexicon, char* word, unsigned int doc_id, unsigned int position);void StoreOwsIndex(OOI_NODE* lexicon);#endif/*EOF*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -