⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 site.cc

📁 100 病毒源碼,原始碼,無毒 ......
💻 CC
字号:
// Larbin// Sebastien Ailleret// 08-02-00 -> 15-05-00#include <unistd.h>#include <errno.h>#include <iostream.h>#include <string.h>#include <assert.h>#include <time.h>#include <fcntl.h>#include <sys/socket.h>#include <netinet/in.h>#include <netdb.h>#include <adns.h>#include <arpa/inet.h>#include <ctype.h>#include "types.h"#include "xutils/debug.h"#include "xutils/Site.h"#include "xutils/text.h"#include "xutils/connexion.h"#include "xutils/ConstantSizedFifoPriority.h"///////////////////////////////////////////////////////////// class Interval////////////////////////////////////////////////////////////** Constructor */Interval::Interval (uint size) {  this->size = size;  pos = 0;  pthread_mutex_init (&lock, NULL);  pthread_cond_init (&nonFull, NULL);}/** Destructor : never used */Interval::~Interval () {  pthread_mutex_destroy (&lock);  pthread_cond_destroy (&nonFull);}/** Ask the permission to put an url */void Interval::putOne () {  pthread_mutex_lock(&lock);  while (pos >= size) {	pthread_cond_wait(&nonFull, &lock);  }  pos++;  pthread_mutex_unlock(&lock);}/** How many urls can we put * block until at least one is possible */uint Interval::putAll () {  pthread_mutex_lock(&lock);  while (pos >= size) {	pthread_cond_wait(&nonFull, &lock);  }  if (pos == size-1) {	pos = size;	pthread_mutex_unlock(&lock);	return 1;  } else {	// This avoid some unnecessary signals (maybe stupid)	assert (pos < size-1);	int res = size-1 - pos;	pos = size-1;	pthread_mutex_unlock(&lock);	return res;  }}/** Warn an url has been retrieved */void Interval::getOne () {  pthread_mutex_lock(&lock);  if (pos-- == size) {	pthread_cond_broadcast(&nonFull);  }  pthread_mutex_unlock(&lock);}///////////////////////////////////////////////////////////// class Site////////////////////////////////////////////////////////////** Constructor : initiate fields used by the program */Site::Site () {  pthread_mutex_init (&lock, NULL);  name = newString("");  forbidden = NULL;  addr = NULL;  in = 0;  out = 0;  size = fifoSiteSize;  tab = new url *[size];  inFifo = false;}/** Destructor : This one is never used */Site::~Site () {  cerr << "Some site is deleted (should not happen\n";  pthread_mutex_destroy (&lock);  delete [] name;  if (forbidden != NULL) {	delete forbidden;  }  if (addr != NULL) {	delete addr;  }  while (in != out) {	delete tab[out];	out = (out+1) % size;  }  delete [] tab;}/** connect to this server using connection conn  * return the state of the socket */char Site::getFds (Connexion *conn) {  // use proxy  if (global::proxyAddr != NULL)	return global::getProxyFds(conn);  // no proxy  assert (addr != NULL);  int fd = socket(AF_INET, SOCK_STREAM, 0);  if (fd < 0)    return EMPTY;  conn->socket = fd;  for (;;) {    fcntl(fd, F_SETFL, O_NONBLOCK);    if (connect(fd, (struct sockaddr*) addr,                sizeof (struct sockaddr_in)) == 0) {      // success      return WRITE;    } else if (errno == EINPROGRESS) {      // would block      return CONNECTING;    } else {      // error      (void) close(fd);      return EMPTY;    }  }}/** Put an url in the fifo * If there are too much, put it back in UrlsInternal (ie on disk) */void Site::putUrl (url *u) {  pthread_mutex_lock(&lock);  if ((in - out + size) % size > maxUrlsBySite	  && global::URLsInternal->getLength() > ramUrls/2) {	// Already enough Urls in memory for this Site	global::URLsInternal->put(u);	global::inter->getOne();  } else {	// All right, put this url inside at the end of the queue	tab[in] = u;	in = (in + 1) % size;	// Change size if necessary	if (in == out) {	  uint i;	  url **tmp = new url*[2*size];	  for (i=out; i<size; i++) {		tmp[i] = tab[i];	  }	  for (i=0; i<in; i++) {		tmp[i+size] = tab[i];	  }	  in += size;	  size *= 2;	  delete [] tab;	  tab = tmp;	}	// Put Site in fifo if not yet in	if (!inFifo) {	  inFifo = true;	  if (!strcmp(name, tab[out]->getHost())		  && port == tab[out]->getPort()		  && lastUpdate + dnsValidTime >= time(NULL)) {		global::okSites->put(this);	  } else {		global::dnsSites->put(this);	  }	}  }  pthread_mutex_unlock(&lock);}/** Put an prioritarian url in the fifo * Up to now, it's very naive * because we have no memory of priority inside the url */void Site::putPriorityUrl (url *u) {  pthread_mutex_lock(&lock);  if (in == out) {	// first url on this site	tab[in] = u;	in = (in+1) % size;  } else {	// store the url in second position (the first might be in use)	uint tmp = out;	out = (out + size - 1) % size;	tab[out] = tab[tmp];	tab[tmp] = u;	// Change size if necessary	if (in == out) {	  uint i;	  url **tmp = new url*[2*size];	  for (i=out; i<size; i++) {		tmp[i] = tab[i];	  }	  for (i=0; i<in; i++) {		tmp[i+size] = tab[i];	  }	  in += size;	  size *= 2;	  delete [] tab;	  tab = tmp;	}  }  // Put Site in fifo if not yet in  if (!inFifo) {	inFifo = true;	if (!strcmp(name, tab[out]->getHost())		&& port == tab[out]->getPort()		&& lastUpdate + dnsValidTime >= time(NULL)) {	  global::okSites->put(this);	} else {	  global::dnsSites->put(this);	}  }  pthread_mutex_unlock(&lock);}/** Get an url from the fifo * resize tab if too big * the lock must be set when calling this method */url *Site::getUrl () {  url *u = tab[out];  out = (out + 1) % size;  if (size > fifoSiteSize && ((in - out + size) % size)*3 < size) {	// if the tab is too big, reduce it	url **tmp = new url*[size/2];	uint i;	for (i=0; ((out+i) % size) != in; i++) {	  tmp[i] = tab[(out+i) % size];	}	out = 0;	in = i;	size /= 2;	delete [] tab;	tab = tmp;  }  return u;}/** fetch the first page in the fifo * never perform dns calls */void Site::fetchNonBlock () {  pthread_mutex_lock(&lock);  if (in == out) {	// no more url to read	// This is possible because this function can be called recursively	// (fetchBlock cannot)	inFifo = false;	pthread_mutex_unlock(&lock);  } else {	if (!strcmp(name, tab[out]->getHost())		&& port == tab[out]->getPort()		&& lastUpdate + dnsValidTime >= time(NULL)) {	  int waitD = lastAccess + global::waitDuration - time(NULL);	  if (waitD > 0) {		// We should wait a little		sleep(waitD);	  }	  // all right, let's go	  urls();	  url *u = getUrl();	  pthread_mutex_unlock(&lock);#ifndef NOSTATS	  if (global::isSpecific          && endWithIgnoreCase(global::privilegedExt, u->getFile())) {		extensionTreated();	  }#endif	  global::inter->getOne();	  goodSite(u);	} else {	  // That's a new site => don't manage it	  pthread_mutex_unlock(&lock);	  global::dnsSites->put(this);	}  }}/** Init a new dns query */void Site::newQuery (uint *nbCalls) {  // Update our stats  newId();  if (global::proxyAddr != NULL) {    // we use a proxy, no need to get the sockaddr    // give anything for going on    siteSeen();    siteDNS();    // Get the robots.txt    dnsOK(new sockaddr_in);  } else if (isdigit(name[0])) {    // the name already in numbers-and-dots notation	siteSeen();	sockaddr_in *saddr = new sockaddr_in;	if (inet_aton(name, &saddr->sin_addr)) {	  // Yes, it is in numbers-and-dots notation	  siteDNS();	  // compute the new addr	  saddr->sin_family = AF_INET;	  saddr->sin_port = htons(port);	  // Get the robots.txt	  dnsOK(saddr);	} else {	  // No, it isn't : this site is a non sense	  delete saddr;	  dnsErr();	}   } else {    (*nbCalls)++;    adns_query quer = NULL;    // adns_query *quer = new adns_query;    crash("Submit an adns query");    int res = adns_submit(global::ads, name,                          (adns_rrtype) adns_r_addr,                          (adns_queryflags) 0,                          this, &quer);    crash("End of submission");    assert (res == 0);  }}/** The dns query ended with success */void Site::dnsAns (adns_answer *ans) {  siteSeen();  assert (addr == NULL);  if (ans->status != adns_s_ok) {	// No addr inet	dnsErr();  } else {	siteDNS();	// compute the new addr	sockaddr_in *saddr = new sockaddr_in;	memcpy (saddr,			&ans->rrs.addr->addr.inet,			sizeof (sockaddr_in));	saddr->sin_family = AF_INET;	saddr->sin_port = htons(port);	// Get the robots.txt	dnsOK(saddr);  }}/** we've got a good dns answer * get the robots.txt */void Site::dnsOK (sockaddr_in *saddr) {  urls();  addr = saddr;  pthread_mutex_lock(&lock);  url *u = getUrl();  pthread_mutex_unlock(&lock);#ifndef NOSTATS  if (global::isSpecific && endWithIgnoreCase(global::privilegedExt, u->getFile())) {	extensionTreated();  }#endif  global::inter->getOne();  stateBlock(14);  Connexion *conn = global::freeConns->getPriority();  stateBlock(15);  char res = getFds(conn);  if (res != EMPTY) {	conn->timeout = time(NULL) + timeoutPage;	if (global::proxyAddr != NULL) {	  conn->request.addString("GET http://");	  conn->request.addString(name);	  char tmp[15];	  sprintf(tmp, ":%u", port);	  conn->request.addString(tmp);	  conn->request.addString("/robots.txt HTTP/1.0\r\nHost: ");	} else {	  conn->request.addString("GET /robots.txt HTTP/1.0\r\nHost: ");	}	conn->request.addString(u->getHost());	conn->request.addString(global::headers);	conn->parser = new robots(u);	conn->pos = 0;	// This must be done in last	// Be careful, There is no lock !!!!	conn->state = res;	// keep lock while fetching  } else {	// Unable to get a socket	fetchFail(u, noConnection);	answers(noConnection);  // stat	delete u;	global::freeConns->put(conn);	fetchNonBlock();  }}/** Cannot get the inet addr */void Site::dnsErr () {  pthread_mutex_lock(&lock);  url *u = getUrl();  while (!strcmp(name, u->getHost())		 && port == u->getPort()) {	fetchFail(u, noConnection);	urls();	answers(noDNS);#ifndef NOSTATS	if (global::isSpecific && endWithIgnoreCase(global::privilegedExt, u->getFile())) {	  extensionTreated();	}#endif	delete u;	global::inter->getOne();	if (in == out) {	  break;	} else {	  u = getUrl();	}  }  if (in != out) {	// put the url in the tab	out = (out + size - 1) % size;	tab[out] = u;	global::dnsSites->put(this);  } else {	inFifo = false;  }  pthread_mutex_unlock(&lock);}/** We have an url on the good site : Connect it */void Site::goodSite (url *u) {  // That's the good site  if (addr == NULL) {	// We didn't manage to get the inet addr of this site	fetchFail(u, noDNS);	answers(noDNS);	delete u;	fetchNonBlock();  } else {	// Connection is possible	Connexion *conn = global::freeConns->get();	connectUrl(conn, u);  }}/* try to connect to a site * and ask for an file * if you modifie this function, don't forget connectThisUrl */void Site::connectUrl (Connexion *conn, url *u) {  if (testRobots(u->getFile())) {	// We're allowed to fetch this one	// open the socket	char res = getFds(conn);	if (res != EMPTY) {	  lastAccess = time(NULL);	  conn->timeout = lastAccess + timeoutPage;	  conn->request.addString("GET ");	  if (global::proxyAddr != NULL) {		char *tmp = u->giveUrl();		conn->request.addString(tmp);		delete [] tmp;	  } else {		conn->request.addString(u->getFile());	  }	  conn->request.addString(" HTTP/1.0\r\nHost: ");	  conn->request.addString(u->getHost());	  conn->request.addString(global::headers);	  conn->parser = new html (u);	  conn->pos = 0;	  // This must be done in last	  // Be careful, There is no lock !!!!	  conn->state = res;	  // We keep the lock while fetching	} else {	  // Unable to connect	  fetchFail(u, noConnection);	  answers(noConnection);	  delete u;	  global::freeConns->put(conn);	  fetchNonBlock();	}  } else {	// We're not welcome on this site	fetchFail(u, forbiddenRobots);	answers(forbiddenRobots);	delete u;	global::freeConns->put(conn);	fetchNonBlock();  }}/* try to connect to a site * and ask for an file * do not perform any freeConns.get (might cause deadlock) * if you modifie this function, don't forget connectUrl */void Site::connectThisUrl (Connexion *conn, url *u) {  if (testRobots(u->getFile())) {	// We're allowed to fetch this one	// open the socket	char res = getFds(conn);	if (res != EMPTY) {	  lastAccess = time(NULL);	  conn->timeout = lastAccess + timeoutPage;	  conn->request.addString("GET ");	  if (global::proxyAddr != NULL) {		char *tmp = u->giveUrl();		conn->request.addString(tmp);		delete [] tmp;	  } else {		conn->request.addString(u->getFile());	  }	  conn->request.addString(" HTTP/1.0\r\nHost: ");	  conn->request.addString(u->getHost());	  conn->request.addString(global::headers);	  conn->parser = new html (u);	  conn->pos = 0;	  // This must be done in last	  // Be careful, There is no lock !!!!	  conn->state = res;	  // We keep the lock while fetching	} else {	  // Unable to connect	  fetchFail(u, noConnection);	  answers(noConnection);	  delete u;	  global::freeConns->put(conn);	  putInFifo();	}  } else {	// We're not welcome on this site	fetchFail(u, forbiddenRobots);	answers(forbiddenRobots);	delete u;	global::freeConns->put(conn);	putInFifo();  }}/** test if a file can be fetched thanks to the robots.txt */bool Site::testRobots(char *file) {  if (forbidden == NULL) {	return true;  } else {	int i=0;	while ((*forbidden)[i] != NULL) {	  if (startWith((*forbidden)[i], file)) {		return false;	  }	  i++;	}	return true;  }}/** Delete the old identity of the site */void Site::newId () {  // The lock protects tab, which can be change by putUrl  pthread_mutex_lock(&lock);  assert (strcmp(name, tab[out]->getHost())		  || port != tab[out]->getPort()		  || lastUpdate + dnsValidTime <= time(NULL));  // Change the identity of this site#ifndef NDEBUG  if (name[0] == 0) {	addsite();  }#endif // NDEBUG  url *u = tab[out];  pthread_mutex_unlock(&lock);  delete [] name;  name = newString(u->getHost());  port = u->getPort();  lastUpdate = time(NULL);  lastAccess = 0;  // Delete old forbidden list  if (forbidden != NULL) {	delete forbidden;	forbidden = NULL;  }  if (addr != NULL) {	delete addr;	addr = NULL;  }}/** After a fetch, decide whether or not the site must be * put in okSites or dnsSites */void Site::putInFifo () {  assert (inFifo);  pthread_mutex_lock(&lock);  if (in == out) {	inFifo = false;  } else if (!strcmp(name, tab[out]->getHost())			 && port == tab[out]->getPort()			 && lastUpdate + dnsValidTime >= time(NULL)) {	global::okSites->put(this);  } else {	global::dnsSites->put(this);  }  pthread_mutex_unlock(&lock);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -