📄 proxyharvest.py
字号:
#!/usr/bin/python## ProxyHarvest.py v1.1# Extract IP:Port from a proxylist site# linux ONLY# code from low1z lurking at darkc0de.com# this code is protected under the gpl# get your copy at <http://www.gnu.org/licenses/>## update from 0.9 - 1.1 notes# - fetch planetlab list & clean our list with it# - havent seen such a function in a script yet... # - validate external ip with whatsmyip.com# - 3rd party API, but very reliable & nice ... ASP thou# - due to urllib1/2 limitations there is no way yet to except username/passwd input# - socket timeout for validationimport sys, urllib, urllib2, re, httplib, sets, socket, timefrom time import time, localtime, strftimefrom socket import gethostbyaddroutput = 'proxylist.txt'sleeptimer = 3socket.setdefaulttimeout(2)alivelist = []myipadress = urllib.urlopen('http://www.whatismyip.com/automation/n09230945.asp').read()anon_list = []trans_list = []planetlab = []sites = ['http://www.darkc0de.com/cgi-bin/proxies.py', 'http://www.1proxyfree.com/', 'http://www.atomintersoft.com/products/alive-proxy/socks5-list/', 'http://www.samair.ru/proxy/proxy-01.htm', 'http://www.proxylist.net/', 'http://www.proxylists.net/http_highanon.txt']def StripTags(text): return re.sub(r'<[^>]*?>','', text)def timer(): now = strftime('%H:%M:%S-%d/%b/%Y', localtime()) return nowdef ipcheck(proxy): try: pxhandle = urllib2.ProxyHandler({"http": proxy}) opener = urllib2.build_opener(pxhandle) urllib2.install_opener(opener) myip = urllib2.urlopen('http://www.whatismyip.com/automation/n09230945.asp').read() xs = re.findall(('\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}'), StripTags(myip)) if xs[0] == myipadress or myipadress == myip: trans_list.append(proxy) print proxy[:-1],"\t- ALIVE BUT TRANSPARENT - Date :", timer() elif xs == None: pass else: anon_list.append(proxy) print proxy[:-1],"\t- ALIVE - EXT-iP :",xs[0], "Date :", timer() except KeyboardInterrupt: print "\n\nall your base belongs to CTRL+C\n\n" sys.exit(0) except: passdef proxyvalidator(proxylist): finalcount = 0 for proxy in proxylist: proxy.replace('\n', '') try: proxies = {'http': "http://"+proxy[:-1]} opener = urllib.FancyURLopener(proxies) try: loopchk = opener.open("http://www.google.com").read()# main response check except: pass except(IOError,socket.timeout), detail: pass ipcheck(proxy) alivelist.append(proxy) finalcount += 1 return alivelistdef getsamairdotru(): counter = 1 pxycnt = 0 maxpages = 10 urls = [] pfile = file(output, 'a') print "gathering sites, this takes", 60*sleeptimer/maxpages, "Minutes..." while counter <= maxpages: if counter < 10: # workaround for page-01 to page-09 opener = urllib2.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] url = opener.open('http://www.samair.ru/proxy/proxy-0'+repr(counter)+'.htm').read() else: opener = urllib2.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] url = opener.open('http://www.samair.ru/proxy/proxy-'+repr(counter)+'.htm').read() strings = re.findall(('\d{1,3}.\d{1,3}.\d{1,3}.\d{1,3}:\d{1,5}'), StripTags(url)) for string in strings: pfile.write(string+"\n") pxycnt = pxycnt+1 if counter == maxpages/2: print "halfway done..." time.sleep(sleeptimer) counter = counter+1 opener.close() print pxycnt, "\t: Proxies received from : http://www.samair.ru/proxy/" pfile.close()def getsinglesitelist(site): pxycnt = 0 urls = [] pfile = file(output, 'a') opener = urllib2.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] url = opener.open(site).read() strings = re.findall(('\d{1,3}[.]\d{1,3}[.]\d{1,3}[.]\d{1,3}[:]\d{1,5}'), StripTags(url)) for string in strings: pfile.write(string+"\n") pxycnt = pxycnt+1 print pxycnt, "\t: Proxies recieved from :", site.split("//",3)[1] opener.close() pfile.close()def getplanetlabs(): # gets rid of planetlab proxies opener = urllib2.build_opener() url = opener.open('http://fall.cs.princeton.edu/codeen/tabulator.cgi?table=table_all').read() strings = re.findall(('\d{1,3}[.]\d{1,3}[.]\d{1,3}[.]\d{1,3}'), StripTags(url)) for string in strings: planetlab.append(string) print "PlanetLab Proxylist Loaded :", len(planetlab), "\n"def cleanup(): # uniq entries in outputfile pfile = open(output, 'r').readlines() outfile = file(output, 'w') sorted = [] finalcount = 0 psremove = 0 for proxy in pfile: if proxy.split(':',1)[0] not in planetlab: if proxy not in sorted: sorted.append(proxy) outfile.write(proxy) finalcount += 1 if proxy.split(':',1)[0] in planetlab: psremove += 1 print "\n", psremove, "\t: useless PlanetLab Servers removed!" print finalcount,"\t: unique Proxies harvested check",output, "\n" print "+-[Starting Validation]-----------------------------------------------------+" outfile.close()def fileConst(): # construct the final file fileC = open(output, 'w') falive = [] fileC.write('+ ANONYMOUS PROXIES\n\n') for anon in anon_list: fileC.write(anon) if anon in alivelist: alivelist.remove(anon) fileC.write('\n\n+ TRANSPARENT PROXIES\n\n') for trans in trans_list: fileC.write(trans) if trans in alivelist: alivelist.remove(trans) fileC.write('\n\n+ WORKING BUT UNCLEAR PROXIES\n\n') alivelist.sort() for alive in alivelist: fileC.write(alive) fileC.close() def helpme(): print "| -s / -sitecollect :: gathers proxylists |" print "| -m / -multipage :: get incremental pages |" print "| -a / -all :: do ALL!!! |" print "| -vl / - validatelist :: check a file |" print "+-------------------------------------------------------------------------+"print "+-------------------------------------------------------------------------+"print "| ProxyHarvest.py 1.1 |"print "| low1z 2009 // darkc0de |"print "+-------------------------------------------------------------------------+"getplanetlabs()print "IP:", myipadress, ":", timer(), "\n"if len(sys.argv) <= 1: print "\n\t < use -help to get options >\n" sys.exit(1)for arg in sys.argv[1:]: validate = 0 # WTF if arg.lower() == "-h" or arg.lower() == "-help": helpme() if arg.lower() == "-s" or arg.lower() == "-sitecollect": for site in sites: try: getsinglesitelist(site) except: print "Error :", site cleanup() proxylist = open(output, 'r').readlines() proxyvalidator(proxylist) if arg.lower() == "-m" or arg.lower() == "-multipage": getsamairdotru() cleanup() print "takes ages to print out good proxies, be PATIENT!" try: proxylist = open(output, 'r').readlines() proxyvalidator(proxylist) except: pass if arg.lower() == "-a" or arg.lower() == "-all": for site in sites: getsinglesitelist(site) getsamairdotru() cleanup() proxylist = open(output, 'r').readlines() proxyvalidator() if arg.lower() == "-vl" or arg.lower() == "-validatelist": try: proxyfile = open(sys.argv[2], 'r').readlines() except(IndexError): print "Error: check you proxy file ...\n" proxyvalidator(proxyfile)fileConst()print "\n+-[ANON LIST]-------------------------------------------------------------+\n"for anon_proxy in anon_list: try: haddr = gethostbyaddr(anon_proxy.split(':',1)[0]) except: haddr = '-' print anon_proxy.replace('\n',''), "\t:\t", haddr[0]print "\n", len(anon_list), ": Total tested AnonProxies\n"print "+-[TRANS LIST]--------------------------------------------------------------+\n"for trans_proxy in trans_list: print trans_proxy.replace('\n','')print "\n", len(trans_list), ": Total tested Transparent Proxies\n"print "+-[OTHER SERVERS]-----------------------------------------------------------+\n"if len(alivelist) > 16: print "\n", (len(alivelist)-(len(trans_list)+len(anon_list))), "Alive but unverified Servers, check", output print "to have a look, lines in proxylist are unsorted :-("else: for alive in alivelist: if alive not in trans_list: if alive not in anon_list: print alive.replace('\n','')
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -