⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 core.c

📁 a meta serach engine
💻 C
📖 第 1 页 / 共 2 页
字号:
#include <stdio.h>#include <stdlib.h>																								// per atoi#include <string.h>																								// per strlen, strcat#include <unistd.h>																								// per write#include <sys/types.h>																							// per open#include <sys/stat.h>																							// per open#include <fcntl.h>																								// per open#include <sys/select.h>																							// per select#include <errno.h>																								// per errno#include "macro.h"#include "searchengines.h"#include "core.h"#include "bench.h"#include "parser.h"#include "socketutils.h"#include "cmdline.h"#include "utils.h"char *query;// resetStatistic, printStatistic, printFound ---------------------------------// Reset or print the statistics on the done work// ----------------------------------------------------------------------------void resetStatistic() {	stats.ld_time		= 0;	stats.rx_time		= 0;	stats.rx_bytes		= 0;	stats.tx_time		= 0;	stats.tx_bytes		= 0;	stats.sl_time		= 0;	stats.prs_time		= 0;	stats.prs_result	= 0;	stats.req_result	= 0;	}void printStatistic() {	printf("<STATS>\n");	printf("\t<LOADING time=\"%d\"/>\n",stats.ld_time);	printf("\t<READ bytes=\"%d\"/>\n", stats.rx_bytes);	printf("\t<WRITE time=\"%d\" bytes=\"%d\"/>\n",stats.tx_time, stats.tx_bytes);	printf("\t<SELECT time=\"%d\"/>\n",stats.sl_time);	printf("\t<PARSER time=\"%d\" results=\"%d\"/>\n",stats.prs_time, stats.prs_result);	printf("\t<REQUEST query=\"%s\" result=\"%d\"/>\n",query, stats.req_result);	printf("</STATS>\n");}void printFound(struct engineWork e[], short used) {	short i;	printf("<FOUND>\n");	for (i=0; i<used; i++)		if (e[i].entry.found!=NULL) {			printf("\t<ENGINE name=\"%s\" found=\"%s\"/>\n", e[i].info->name, e[i].entry.found);			free(e[i].entry.found);		}	printf("</FOUND>\n");}void printError(struct engineWork e[], short used) {	short i;	printf("<ERROR>\n");	for (i=0; i<used; i++)		if (e[i].error!=NULL) {			printf("\t<ENGINE name=\"%s\" error=\"%s\"/>\n", e[i].info->name, e[i].error);			free(e[i].error);		}	printf("</ERROR>\n");}void printRxTimes(struct engineWork e[], short used) {	short i;	printf("<RXTIMES>\n");	for (i=0; i<used; i++)		printf("\t<ENGINE name=\"%s\" rxTime=\"%d\"/>\n", e[i].info->name, e[i].rxTime);	printf("</RXTIMES>\n");	}void printParsedResults(struct engineWork e[], short used) {	short i;	printf("<PARSER>\n");	for (i=0; i<used; i++)		printf("\t<ENGINE name=\"%s\" requested=\"%d\" parsed=\"%d\"/>\n", e[i].info->name, e[i].requested, e[i].parsed);	printf("</PARSER>\n");	}// ----------------------------------------------------------------------------// setReadInterest, setWriteInterest,  setNoInterest --------------------------// Enable and disable the WRITING or the READING interest for the engine// ----------------------------------------------------------------------------void setReadInterest(struct engineWork *e) {	e->readretry  = 5;	e->writeretry = 0;}void setWriteInterest(struct engineWork *e) {	e->readretry  = 0;	e->writeretry = 5;}void setNoInterest(struct engineWork *e) {	e->readretry  = 0;	e->writeretry = 0;	socketClose(e->socket);																						// close the engine's socket}// ----------------------------------------------------------------------------// engineError ----------------------------------------------------------------// Set  no  interest  for  the  broken engine, and print the error in the FOUND// variable// ----------------------------------------------------------------------------int engineError(struct engineWork *e, char *error) {	e->error = malloc(strlen(error)+strlen(strerror(errno))+4);										// reserve memory for error description	strcpy(e->error,error);																						// copy the passed string	strcat(e->error," (");																						// insert the separator	strcat(e->error,strerror(errno));																		// insert the ERRNO description	strcat(e->error,")");																						// insert the tail chars	setNoInterest(e);																								// reset interest in the broken engine	return -1;}// ----------------------------------------------------------------------------// engineInit -----------------------------------------------------------------// Initialize engine's working parameters// ----------------------------------------------------------------------------int engineInit(struct engineWork *e) {	e->entry.url	= NULL;																						// reset newEntry URL pointer	e->entry.title	= NULL;																						// reset newEntry TITLE pointer	e->entry.text	= NULL;																						// reset newEntry TEXT pointer	e->entry.found	= NULL;																						// reset newEntry FOUND pointer	e->entry.rank	= 1;																							// reset the new-entry RANK counter		e->rxTime		= 0;																							// reset the RX time counter	e->error			= NULL;																						// reset the engine ERROR report	e->parsed		= 0;																							// reset the number of requested result	e->requested	= 0;																							// reset the number of parsed result		e->socket = createTCPSocket();																			// create engine TCP socket	if (e->socket==-1) return engineError(e, "createTCPSocket");									// if error, report it and stop engine		setSocketOptions(e->socket);																				// set socket options (NON_BLOCK)		setWriteInterest(e);																							// set WRITING INTEREST	return 1;}// ----------------------------------------------------------------------------// readyToWrite, readyToRead --------------------------------------------------// Return TRUE if the engine have something to read/write and is ready to do it// ----------------------------------------------------------------------------char readyToWrite(struct engineWork e, fd_set *wset) {	return (e.writeretry>0 && FD_ISSET(e.socket, wset));}char readyToRead(struct engineWork e, fd_set *rset) {	return (e.readretry>0 && FD_ISSET(e.socket, rset));}// ----------------------------------------------------------------------------// search----------------------------------------------------------------------// Launch the search of the query Q on the given engines, parsing their setting// ----------------------------------------------------------------------------int search(char *q, short used, char *argv[]){	struct engineWork engine[used];																			// array of used search engines	struct timeval timeout;																						// timeout structure for SELECT	short maxfd;																									// maximum socket value	fd_set rset, wset;																							// read & write structures for SELECT	short readysocket, i;	query = q;																										// set the global query var to given	bench(1);																										// start LOADING timer	resetStatistic();																								// reset STATS structure	loadEnginesInformations("engines.dat");																// build the usable web engines list	for (i=0; i<used; i++) {		engineInit(&engine[i]);																					// create TCP-socket, set W interest		engineSetup(argv[i], &engine[i]);																	// setup the engine with given arg		engineConnect(&engine[i]);																				// connect the engine	}	stats.ld_time = bench(1);																					// save LOADING time and reset timer		while (stillAny(engine, used)) {																			// while at least one've smthing to R/W		setTimeout(5000, &timeout);																			// set SELECT timeout		setSocketSet(engine, used, &rset, &wset);															// prepare the FD_SET structures		maxfd = maxEngineSocket(engine, used) + 1;														// calculate the maximum between FDs				bench(1);		readysocket = select(maxfd, &rset, &wset, NULL, &timeout);									// wait for a ready socket		stats.sl_time += bench(1);																				// save time spent in SELECT			switch (readysocket) {			case -1:																									// ERROR, then retry				if (config.verbose) printf("SELECT: failed!\n");				continue;						case 0:																									// TIMEOUT, decrease retrycount & retry				if (config.verbose) printf("SELECT: timeout!\n");				decreaseRetry(engine, used);				continue;						default:																									// AT LEAST ONE IS READY				for (i=0; i<used; i++) {																		// look between the used engines					if (readyToWrite(engine[i], &wset)) {													// if it's socket is ready for writing						tx(&engine[i]);																			// try to send waiting datas					}					if (readyToRead(engine[i], &rset)) {													// if it's socket is ready for reading						rx(&engine[i]);																			// try to read waiting datas						if (config.result_parsing) parse(&engine[i]);									// parse what read					}				}				continue;		}	}	printStatistic();																								// print statistics	printRxTimes(engine, used);																				// print RX times for the engines	printParsedResults(engine, used);																		// print statistic about the parser	printFound(engine, used);																					// print grabbed "found results"	printError(engine, used);																					// print encountered errors	freeSearchEnginesMemory();																					// free memory of usable web engines	return 0;}// ----------------------------------------------------------------------------// clearInputBuffer, clearOutputBuffer ----------------------------------------// Clear and initialize INPUT or OUTPUT buffer, and pointers.// ----------------------------------------------------------------------------void clearInputBuffer(struct engineWork *e) {	bzero(e->ib, ibsize);	e->ib_free = e->ib;	e->ib_next = e->ib;}void clearOutputBuffer(struct engineWork *e) {	bzero(e->ob, obsize);	e->ob_next = e->ob;}// ----------------------------------------------------------------------------// dumpToDisk -----------------------------------------------------------------// Append  the  content of the cache to a file on the disk with the name of the// engine host// ----------------------------------------------------------------------------void dumpToDisk(struct engineWork *e) {	short f;	f = open(e->info->host, O_CREAT|O_APPEND|O_WRONLY, S_IRUSR|S_IWUSR);	write(f, e->ib_free, strlen(e->ib_free));	close(f);}// ----------------------------------------------------------------------------// enginePageCalc -------------------------------------------------------------// Calculate the %PAGE index for queries.//    0) %PAGE = page//		1) %PAGE = page + 1//    2) %PAGE = page * rpp//    3) %PAGE = (page * rpp) + 1// ----------------------------------------------------------------------------char *enginePageCalc(char pagecalc, short rpp, short page) {	short n;		switch (pagecalc) {		case 0:			n = (page - 1);			break;		case 1:			n = (page - 1) + 1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -