⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 functions.h

📁 网页抓取程序
💻 H
字号:


/* ================================================================ */
/* definizioni */
#define OWSINDEXMINWORDSIZE  1
#define OWSINDEXMAXWORDSIZE  30

/* la dimensione iniziale del lexicon */
#define LEXICONWORDSIZE		 15000
/* incremento di dimensioni del lexicon */
#define LEXICON0INCREMENT    50

/* dimensione iniziale dell'inverted index */
#define IISIZE               1
/* incremento */
#define IIINCREMENT          1

#define MINWORDSIZE         2
#define MAXWORDSIZE         30
#define MAXWORDSxQUERY      20
#define MAXQUERYSIZE        MAXWORDSxQUERY * MAXWORDSIZE + MAXWORDSxQUERY

#define MAXRESULTSSIZE      10000

/* 1000 = 100 pagine da 10 risultati; o ; 10 pagine da 100 risultati */
/* facility(funzione): ogni singolo nodo ordina i risultati per rank e passa al nodo 
                       superiore solo 1000 risultati(al massimo)
                       (lavorare con un massimo di 1000 risultati dovrebbe essere
                        abbastanza veloce :-) )
*/
#define MAXRESULTSxNODE     1000

/* MAX number of concurrent searches */
#define MAXPARRSEARCH       20

/*Socket*/
#define FIRSTTIMEOUT         50000      /*MSeconds*/
#define TIMEOUTs             3000       /*MSeconds*/

#define DEFAULTTCPPORT       9090

/* numero massimo di sotto-nodi */
#define MAXSUBNODES         10

#define SLEEP_DELAY1        1

/* intervallo in secondi fra 2 controlli dei sotto-nodi non connessi */
#define CHECK_INTERVAL      10000   


/* ================================================================ */


/* ================================================================ */
/* strutture */
typedef struct _INVERTED_INDEX
{
	unsigned int doc_id;
	unsigned int position;
}INVERTED_INDEX;

typedef struct _OOI_NODE
{
	unsigned int id;
	unsigned int value;
	INVERTED_INDEX* ii;
    unsigned int nii;       /* number of inverted index nodes*/
    unsigned int ii_size;
}OOI_NODE;

typedef struct _RANKS
{
	unsigned int page;
	unsigned int rank;
}RANKS;


typedef struct sHandleConnection
{
    SOCKET sock;
    SOCKADDR_IN client;
}SHC;


typedef struct __myCStr
{
    char* myString;
    int myStrLen;
}MYCSTR;


struct __query
{
    unsigned int available;
    
	unsigned int queryArray[20];
    unsigned int nWords;
	
	/* query testuale per nodo Mysql */
	char text_query[255];

    unsigned int NodeHasSent[MAXSUBNODES];
    //MYCSTR results;
    RANKS* rkRes;                               /* [MAXRESULTSxNODE] */
    unsigned int nRkRes;
}queryStt[MAXPARRSEARCH];

/* ================================================================ */



/* ================================================================ */
/* macro */
#define FREE(a)                         if(a)free(a)

#define SEND(sock, buf)                 send(sock, buf, (int)strlen(buf), 0)

#define SEND_BIN(sock, buf, size)       send(sock, (char*)buf, size, 0)

/* macro usata dalla insertion_sort() per effettuare lo swap */
#define swap(n1, n2)			\
{								\
	OOI_NODE tmp;				\
								\
	tmp.id = n1.id;				\
	tmp.value = n1.value;		\
	tmp.ii = n1.ii;				\
    tmp.nii = n1.nii;           \
    tmp.ii_size = n1.ii_size;   \
								\
	n1.id = n2.id;				\
	n1.value = n2.value;		\
	n1.ii = n2.ii;				\
    n1.nii = n2.nii;            \
    n1.ii_size = n2.ii_size;    \
								\
	n2.id = tmp.id;				\
	n2.value = tmp.value;		\
	n2.ii = tmp.ii;				\
    n2.nii = tmp.nii;           \
    n2.ii_size = tmp.ii_size;   \
								\
}								\

#define swap2(n1, n2)			\
{								\
	INVERTED_INDEX tmp;			\
								\
	tmp.doc_id = n1.doc_id;		\
	tmp.position = n1.position;	\
								\
	n1.doc_id = n2.doc_id;		\
	n1.position = n2.position;  \
								\
	n2.doc_id= tmp.doc_id;		\
	n2.position = tmp.position;	\
								\
}								\

#define swapRankList(n1, n2)    \
{								\
	RANKS tmp;      	        \
								\
	tmp.page = n1.page;		    \
	tmp.rank = n1.rank;	        \
								\
	n1.page = n2.page;		    \
	n1.rank = n2.rank;          \
								\
	n2.page = tmp.page;		    \
	n2.rank = tmp.rank;     	\
								\
}								\
/* ================================================================ */


/* ================================================================ */
/* port */
#ifndef WIN32

int GetTickCount()
{
	struct timeval tv;
	gettimeofday(&tv, NULL);
	return tv.tv_sec*1000L+tv.tv_usec/1000L;
}

int closesocket(int s)
{
	return close(s);
}

int ExitThread(int a)
{
	pthread_exit(&a);
}

void Sleep(int n)
{
	usleep((unsigned)n*1000);	
	return;
}

#endif


/* ================================================================ */


/* ================================================================ */
/* prototipi */

/* sql.h */
int sqlConnect(char* hostname, char* username, char* password, char* table ,int port, MYSQL* rMysql);
int my_mysql_query_and_store_results(MYSQL*mysql, char* sqlQuery,MYSQL_RES** tRes,MYSQL_RES* srRes);
static int snprintf_mysql_escaped_sql_statement(MYSQL * mysql, char * buf, size_t size, const char * fmt, ...);

/* list.h */
INVERTED_INDEX* InitII();
OOI_NODE* InitLexicon();
void insertion_sort(OOI_NODE* lexicon, int n);
void insertion_sort2(INVERTED_INDEX* ii, int n) ;
int lstAddWord(OOI_NODE** lexicon,unsigned int wordID);
int ndzLookForWord(OOI_NODE* lexicon, unsigned int wordID);
int iiLookForDocId(INVERTED_INDEX* ii, unsigned int doc_id, unsigned int elements);
void FreeOwsIndex(OOI_NODE* lexicon);
void UpdateInvertedIndex(OOI_NODE** lexicon, unsigned int wordID, unsigned int doc_id, unsigned int position);
void printII(INVERTED_INDEX* ii, int elem);

/* search.h */
RANKS* ooiSearch(OOI_NODE* lexicon,unsigned int* wordArray, unsigned int words, int* numResults);
int GetFreeQuerySlot();
int ActiveQuery();
int GetQuerySlotByText(unsigned int* queryArray, unsigned int n);
int GetQuerySlotByTextQuery(char* text_query);

/* parser.h */
int parseQueries(char* query, unsigned int queryArray[MAXWORDSxQUERY], unsigned int* words);
int GetWordId(char* word);
int parseTextQueries(char* query, unsigned int queryArray[MAXWORDSxQUERY], unsigned int* words);
char* SortRANKSResults(RANKS* results, unsigned int* nResults, int lLimit, int rLimit);
char* queryArrayToText(int* queryArray, int elements);
int CheckUIntArray(unsigned int* a1, unsigned int n1, unsigned int* a2, unsigned int n2);
RANKS* mergeResults(RANKS* r1, unsigned int n1, RANKS* r2, unsigned int n2);
RANKS* limitResults(RANKS* r, unsigned int limit);


/*server.h*/
void CreateHandleConnectionThread(struct sHandleConnection* struct_connection);
int ListenToPort(int port, SOCKET* fd);
int StartOWSServer(unsigned int port);

#ifdef WIN32
unsigned __stdcall 
#else
void* 
#endif
HandleConnection(void* connection);

void WaitResults(int slot);

/* subnodes.h */
int setnonblock(SOCKET sock,int to);
int StartUpWinsock();
void EmptySocketBuffer(SOCKET sock);
int ConnectToAllSubNodes();
void SendAMessageToAllNodesOOI(unsigned int* query, int words);
void SendAMessageToAllNodesMysql(char* query);
char* RecvAMessageFromAllNodes();
void CreateRecvMessagesThread();
void InitQueryBuf(int full, int slot);

#ifdef WIN32
unsigned __stdcall 
#else
void* 
#endif
RecvAMessages(void* __nnnn__);


#ifdef WIN32
unsigned __stdcall 
#else
void* 
#endif
CheckSubNodes(void* __nnnn__);


/* rankList.h */
RANKS* InitRankList(int elements);
int InsertDocRank(RANKS* rankList, unsigned int doc, unsigned int rank,int elements);
void insertion_sortRanks(RANKS* rankList, int n, int sort, int order);
int ranksLookForPage(RANKS* rankList, unsigned int page, int elements);

/* sgnl.h */
void sigdie(int a);

/* ================================================================ */

/* ================================================================ */
/* global */

unsigned int bIsMySQLConnected;
MYSQL glMysql;
char glMysqlHost[50];
char glMysqlPort[10];
char glMysqlUser[50];
char glMysqlPass[50];
char glMysqlDB[50];

unsigned int number_of_elements = 0;
unsigned int actual_size = LEXICONWORDSIZE;

OOI_NODE* glLexicon;
RANKS*    glRanks;
unsigned int nRanks;

/* lista dei socket dei sotto-nodi */
struct _subNodesStruct
{
    /* numero di sotto-nodi (valore preso dal .conf) */
    unsigned int saddrElements;
    SOCKADDR_IN saddrSubNodes[MAXSUBNODES];
    SOCKET socks[MAXSUBNODES];
    unsigned int isConnected[MAXSUBNODES];
} subNodesStruct;

/* segnale di uscita */
int glQuit = 0;

/* ================================================================ */

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -