⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 proxyharvest.py

📁 It fetches lists of proxy from Internet and check/validate them.
💻 PY
字号:
#!/usr/bin/python## ProxyHarvest.py v1.1# Extract IP:Port from a proxylist site# linux ONLY# code from low1z lurking at darkc0de.com# this code is protected under the gpl# get your copy at <http://www.gnu.org/licenses/>## update from 0.9 - 1.1 notes# - fetch planetlab list & clean our list with it# 	- havent seen such a function in a script yet... # - validate external ip with whatsmyip.com# 	- 3rd party API, but very reliable & nice ... ASP thou# - due to urllib1/2 limitations there is no way yet to except username/passwd input# - socket timeout for validationimport sys, urllib, urllib2, re, httplib, sets, socket, timefrom time import time, localtime, strftimefrom socket import gethostbyaddroutput = 'proxylist.txt'sleeptimer = 3socket.setdefaulttimeout(2)alivelist = []myipadress = urllib.urlopen('http://www.whatismyip.com/automation/n09230945.asp').read()anon_list = []trans_list = []planetlab = []sites = ['http://www.darkc0de.com/cgi-bin/proxies.py',	 'http://www.1proxyfree.com/', 	 'http://www.atomintersoft.com/products/alive-proxy/socks5-list/',	 'http://www.samair.ru/proxy/proxy-01.htm',	 'http://www.proxylist.net/',	 'http://www.proxylists.net/http_highanon.txt']def StripTags(text):	return re.sub(r'<[^>]*?>','', text)def timer():	now = strftime('%H:%M:%S-%d/%b/%Y', localtime())	return nowdef ipcheck(proxy):	try:		pxhandle = urllib2.ProxyHandler({"http": proxy})		opener = urllib2.build_opener(pxhandle)		urllib2.install_opener(opener)		myip = urllib2.urlopen('http://www.whatismyip.com/automation/n09230945.asp').read()		xs =  re.findall(('\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}'), StripTags(myip))		if xs[0] == myipadress or myipadress == myip:			trans_list.append(proxy)			print proxy[:-1],"\t- ALIVE BUT TRANSPARENT - Date :", timer()		elif xs == None:			pass		else:			anon_list.append(proxy)			print proxy[:-1],"\t- ALIVE - EXT-iP :",xs[0], "Date :", timer()	except KeyboardInterrupt:		print "\n\nall your base belongs to CTRL+C\n\n"		sys.exit(0)	except:		passdef proxyvalidator(proxylist):        finalcount = 0	for proxy in proxylist:		proxy.replace('\n', '')		try:			proxies = {'http': "http://"+proxy[:-1]}			opener = urllib.FancyURLopener(proxies)			try:				loopchk = opener.open("http://www.google.com").read()# main response check			except:				pass		except(IOError,socket.timeout), detail: 			pass		ipcheck(proxy)				alivelist.append(proxy)		finalcount += 1	return alivelistdef getsamairdotru():	counter = 1	pxycnt = 0	maxpages = 10	urls = []	pfile = file(output, 'a')	print "gathering sites, this takes", 60*sleeptimer/maxpages, "Minutes..."	while counter <= maxpages:		if counter < 10: # workaround for page-01 to page-09			opener = urllib2.build_opener()			opener.addheaders = [('User-agent', 'Mozilla/5.0')]			url = opener.open('http://www.samair.ru/proxy/proxy-0'+repr(counter)+'.htm').read()		else:			opener = urllib2.build_opener()			opener.addheaders = [('User-agent', 'Mozilla/5.0')]			url = opener.open('http://www.samair.ru/proxy/proxy-'+repr(counter)+'.htm').read()		strings = re.findall(('\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}:\d{1,5}'), StripTags(url))		for string in strings:			pfile.write(string+"\n")			pxycnt = pxycnt+1		if counter == maxpages/2:			print "halfway done..."		time.sleep(sleeptimer)		counter = counter+1				opener.close()	print pxycnt, "\t: Proxies received from : http://www.samair.ru/proxy/"	pfile.close()def getsinglesitelist(site):        pxycnt = 0        urls = []        pfile = file(output, 'a')        opener = urllib2.build_opener()        opener.addheaders = [('User-agent', 'Mozilla/5.0')]        url = opener.open(site).read()        strings = re.findall(('\d{1,3}[.]\d{1,3}[.]\d{1,3}[.]\d{1,3}[:]\d{1,5}'), StripTags(url))        for string in strings:		pfile.write(string+"\n")                pxycnt = pxycnt+1	print pxycnt, "\t: Proxies recieved from :", site.split("//",3)[1]        opener.close()        pfile.close()def getplanetlabs(): # gets rid of planetlab proxies	opener = urllib2.build_opener()        url = opener.open('http://fall.cs.princeton.edu/codeen/tabulator.cgi?table=table_all').read()	strings = re.findall(('\d{1,3}[.]\d{1,3}[.]\d{1,3}[.]\d{1,3}'), StripTags(url))	for string in strings:		planetlab.append(string)	print "PlanetLab Proxylist Loaded :", len(planetlab), "\n"def cleanup(): # uniq entries in outputfile	pfile = open(output, 'r').readlines()	outfile = file(output, 'w')	sorted = []	finalcount = 0	psremove = 0	for proxy in pfile:		if proxy.split(':',1)[0] not in planetlab:			if proxy not in sorted:				sorted.append(proxy)				outfile.write(proxy)				finalcount += 1		if proxy.split(':',1)[0] in planetlab:			psremove += 1	print "\n", psremove, "\t: useless PlanetLab Servers removed!"	print finalcount,"\t: unique Proxies harvested check",output, "\n"	print "+-[Starting Validation]-----------------------------------------------------+"	outfile.close()def fileConst(): # construct the final file	fileC = open(output, 'w')	falive = []	fileC.write('+ ANONYMOUS PROXIES\n\n')	for anon in anon_list:		fileC.write(anon)		if anon in alivelist:			alivelist.remove(anon)        fileC.write('\n\n+ TRANSPARENT PROXIES\n\n')        for trans in trans_list:                fileC.write(trans)		if trans in alivelist:			alivelist.remove(trans)	fileC.write('\n\n+ WORKING BUT UNCLEAR PROXIES\n\n')	alivelist.sort()	for alive in alivelist:		fileC.write(alive)	fileC.close()	def helpme():	print "| -s  / -sitecollect   :: gathers proxylists                              |"	print "| -m  / -multipage     :: get incremental pages                           |"	print "| -a  / -all           :: do ALL!!!                                       |"	print "| -vl / - validatelist :: check a file                                    |"	print "+-------------------------------------------------------------------------+"print "+-------------------------------------------------------------------------+"print "|              ProxyHarvest.py 1.1                                        |"print "|            low1z 2009 // darkc0de                                       |"print "+-------------------------------------------------------------------------+"getplanetlabs()print "IP:", myipadress, ":", timer(), "\n"if len(sys.argv) <= 1:        print "\n\t < use -help to get options >\n"        sys.exit(1)for arg in sys.argv[1:]:	validate = 0 # WTF	if arg.lower() == "-h" or arg.lower() == "-help":        	helpme()			if arg.lower() == "-s" or arg.lower() == "-sitecollect":		for site in sites:			try:			        getsinglesitelist(site)			except:				print "Error   :", site		cleanup()	        proxylist = open(output, 'r').readlines()		proxyvalidator(proxylist)	if arg.lower() == "-m" or arg.lower() == "-multipage":		getsamairdotru()		cleanup()		print "takes ages to print out good proxies, be PATIENT!"		try:        		proxylist = open(output, 'r').readlines()			proxyvalidator(proxylist)		except:			pass	if arg.lower() == "-a" or arg.lower() == "-all":                for site in sites:                        getsinglesitelist(site)		getsamairdotru()		cleanup()		proxylist = open(output, 'r').readlines()				proxyvalidator()	if arg.lower() == "-vl" or arg.lower() == "-validatelist":                try:			proxyfile = open(sys.argv[2], 'r').readlines()                except(IndexError):                        print "Error: check you proxy file ...\n"		proxyvalidator(proxyfile)fileConst()print "\n+-[ANON LIST]-------------------------------------------------------------+\n"for anon_proxy in anon_list:	try:         	haddr = gethostbyaddr(anon_proxy.split(':',1)[0])	except:		haddr = '-'	print anon_proxy.replace('\n',''), "\t:\t", haddr[0]print "\n", len(anon_list), ": Total tested AnonProxies\n"print "+-[TRANS LIST]--------------------------------------------------------------+\n"for trans_proxy in trans_list:	print trans_proxy.replace('\n','')print "\n", len(trans_list), ": Total tested Transparent Proxies\n"print "+-[OTHER SERVERS]-----------------------------------------------------------+\n"if len(alivelist) > 16:	print "\n", (len(alivelist)-(len(trans_list)+len(anon_list))), "Alive but unverified Servers, check", output	print "to have a look, lines in proxylist are unsorted :-("else:	for alive in alivelist:		if alive not in trans_list:			if alive not in anon_list:				 print alive.replace('\n','')

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -