📄 connector.py
字号:
""" HarvestManUrlConnector.py - Module to manage and retrieve data
from an internet connection using urllib2. This software is
part of the HARVESTMan(R) program.
Author: Anand B Pillai (anandpillai at letterboxes dot org).
For licensing information see the file LICENSE.txt that
is included in this distribution.
Modification History
====================
Jan 2 2004 Anand 1.4 bug fix version development started.
Feb 10 2004 Anand 1.3.1 bug fix version released.
Added FileHandler handler for local files.
Apri 20 2004 Anand 1.3.4 bug fix release. Fixed a bug with
configuring proxies.
June 5-9 Anand 1.4 *Rewrote intranet handling routines.
There is no longer a flag specifying
intranet setup in the configuration file.
Instead the program determines whether the
url is in intranet/internet by trying to
resolve the hostname, depending on proxy.
*Reduced dependency of urlparser on
this class. We decide on directory/file like
urls by fetching the actual url here, which
is then set in the original url object.
*Rewrote arguments for connect(...) call.
Added an extra argument for url object.
*Merged connect routines for intranet/extranet
connections.
*Set download status in the url objects. This
is the error number, if any. 0 denotes successful
download.
*Prefixed urllib2 namespace for its methods instead
of doing a separate import of all methods.
*Close file like object returned by urlopen
after extracting data.
"""
import sys
import md5
import socket
import time
import copy
from threading import Lock, Condition, Event
from Queue import Queue, Empty, Full
import urllib2
import urllib
from common import *
# HarvestManUrlParser module
from urlparser import HarvestManUrlParser, HarvestManUrlParserError
from cookiemgr import CookieManager
__protocols__=["http", "ftp"]
class HarvestManNetworkConnector:
""" This class keeps the internet settings and configures
the network. """
def __init__(self):
# use proxies flag
self.__useproxy=0
# check for ssl support in python
self.__initssl=False
# Number of socket errors
self.__sockerrs = 0
# Config object
self.__cfg = GetObject('config')
if hasattr(socket, 'ssl'):
self.__initssl=True
__protocols__.append("https")
self.initiailize_proxy()
self.__configure()
def initiailize_proxy(self):
proxystring = 'proxy:80'
# proxy variables
# dictionary of protocol:proxy values
self.__proxydict = {"http" : proxystring,
"https" : proxystring,
"ftp" : proxystring}
# dictionary of protocol:proxy auth values
self.__proxyauth = {"http" : '',
"https" : '',
"ftp" : '' }
def set_useproxy(self, val=1):
""" Set the value of use-proxy flag """
self.__useproxy=val
def set_ftp_proxy(self, proxyserver, proxyport, authinfo=(), encrypted=True):
""" Set ftp proxy """
if encrypted:
self.__proxydict["ftp"] = bin_decrypt(proxyserver) + ':' + str(proxyport)
else:
self.__proxydict["ftp"] = proxyserver + ':' + str(proxyport)
if authinfo:
try:
username, passwd = authinfo
except ValueError:
username, passwd = '', ''
if encrypted:
passwdstring= bin_decrypt(username) + ':' + bin_decrypt(passwd)
else:
passwdstring = username + ':' + passwd
self.__proxyauth["ftp"] = passwdstring
def set_https_proxy(self, proxyserver, proxyport, authinfo=(), encrypted=True):
""" Set https(ssl) proxy """
if encrypted:
self.__proxydict["https"] = bin_decrypt(proxyserver) + ':' + str(proxyport)
else:
self.__proxydict["https"] = proxyserver + ':' + str(proxyport)
if authinfo:
try:
username, passwd = authinfo
except ValueError:
username, passwd = '', ''
if encrypted:
passwdstring= bin_decrypt(username) + ':' + bin_decrypt(passwd)
else:
passwdstring = username + ':' + passwd
self.__proxyauth["https"] = passwdstring
def set_http_proxy(self, proxyserver, proxyport, authinfo=(), encrypted=True):
""" Set http proxy """
if encrypted:
self.__proxydict["http"] = bin_decrypt(proxyserver) + ':' + str(proxyport)
else:
self.__proxydict["http"] = proxyserver + ':' + str(proxyport)
if authinfo:
try:
username, passwd = authinfo
except ValueError:
username, passwd = '', ''
if encrypted:
passwdstring= bin_decrypt(username) + ':' + bin_decrypt(passwd)
else:
passwdstring=username + ':' + passwd
self.__proxyauth["http"] = passwdstring
def set_proxy(self, server, port, authinfo=(), encrypted=True):
""" Set generic (all protocols) proxy values.
For most users, only this method will be called,
rather than the specific method for each protocol,
as proxies are normally shared for all tcp/ip protocols """
# Modified Sep 02 2003 Anand
# Modified method to call the specific protocol methods
if encrypted:
proxystring = bin_decrypt(server) + ':' + str(port)
else:
proxystring = str(server) + ':' + str(port)
for p in __protocols__:
# eval helps to do this dynamically
s='self.set_' + p + '_proxy'
func=eval(s, locals())
func(server, port, authinfo, encrypted)
func(server, port, authinfo, encrypted)
func(server, port, authinfo, encrypted)
def set_authinfo(self, username, passwd, encrypted=True):
""" Set authentication information for proxy.
Note: If this function is used all protocol specific
authentication will be replaced by this authentication. """
if encrypted:
passwdstring = bin_decrypt(username) + ':' + bin_decrypt(passwd)
else:
passwdstring = username + ':' + passwd
self.__proxyauth = {"http" : passwdstring,
"https" : passwdstring,
"ftp" : passwdstring }
def configure_protocols(self):
""" Just a wrapper """
self.__configure_protocols()
def configure_network(self):
""" Just a wrapper """
self.__configure_network()
def __configure(self):
""" Wrapping up wrappers """
self.__configure_network()
self.__configure_protocols()
def __configure_network(self):
""" Initialise network for the user """
# First: Configuration of network (proxies/intranet etc)
# Check for proxies in the config object
if self.__cfg.proxy and not self.__cfg.intranet:
self.set_useproxy()
proxy = self.__cfg.proxy
index = proxy.rfind(':')
if index != -1:
port = proxy[(index+1):].strip()
server = proxy[:index]
# strip of any 'http://' from server
index = server.find('http://')
if index != -1:
server = server[(index+7):]
self.set_proxy(server, int(port))
else:
port = self.__cfg.proxyport
server = self.__cfg.proxy
self.set_proxy(server, int(port))
# Set proxy username and password, if specified
puser, ppasswd = self.__cfg.puser, self.__cfg.ppasswd
if puser and ppasswd: self.set_authinfo(puser, ppasswd)
def __configure_protocols(self):
""" Configure protocol handlers """
# Second: Configuration of protocol handlers.
# TODO: Verify gopher protocol
# TODO: Add CacheFTPHandler instead of FTPHandler.
authhandler = urllib2.HTTPBasicAuthHandler()
# set timeout for sockets to thread timeout, for Python 2.3
version_number = (sys.version.split())[0]
if version_number=='2.3':
socket.setdefaulttimeout( self.__cfg.timeout )
# If we are behing proxies/firewalls
if self.__useproxy:
if self.__proxyauth:
httpproxystring = 'http://' + self.__proxyauth['http'] + '@' + self.__proxydict['http']
ftpproxystring = 'http://' + self.__proxyauth['ftp'] + '@' + self.__proxydict['ftp']
httpsproxystring = 'http://' + self.__proxyauth['https'] + '@' + self.__proxydict['https']
else:
httpproxystring = 'http://' + self.__proxydict['http']
ftpproxystring = 'http://' + self.__proxydict['ftp']
httpsproxystring = 'http://' + self.__proxydict['https']
proxy_support = urllib2.ProxyHandler({"http" : httpproxystring,
"https": httpsproxystring,
"ftp": ftpproxystring})
# build opener and install it
if self.__initssl:
opener = urllib2.build_opener(authhandler,
proxy_support,
urllib2.HTTPHandler,
urllib2.CacheFTPHandler,
urllib2.GopherHandler,
urllib2.HTTPSHandler,
urllib2.HTTPRedirectHandler,
urllib2.FileHandler )
else:
opener = urllib2.build_opener(authhandler,
proxy_support,
urllib2.HTTPHandler,
urllib2.CacheFTPHandler,
urllib2.GopherHandler,
urllib2.HTTPRedirectHandler,
urllib2.FileHandler )
else:
# Direct connection to internet
if self.__initssl:
opener = urllib2.build_opener(authhandler,
urllib2.HTTPHandler,
urllib2.CacheFTPHandler,
urllib2.HTTPSHandler,
urllib2.HTTPRedirectHandler,
urllib2.GopherHandler,
urllib2.FileHandler )
else:
opener = urllib2.build_opener( authhandler,
urllib2.HTTPHandler,
urllib2.CacheFTPHandler,
urllib2.HTTPRedirectHandler,
urllib2.GopherHandler,
urllib2.FileHandler )
opener.addheaders = [ ('User-agent', GetObject('USER_AGENT')) ]
urllib2.install_opener(opener)
return 0
# Get methods
def get_useproxy(self):
""" Find out if we are using proxies """
return self.__useproxy
def get_proxy_info(self):
return (self.__proxydict, self.__proxyauth)
def is_intranet(self):
return self.__cfg.intranet
def increment_socket_errors(self, val=1):
self.__sockerrs += val
def decrement_socket_errors(self, val=1):
self.__sockerrs -= val
def get_socket_errors(self):
return self.__sockerrs
class HarvestManUrlConnector:
""" Class which helps to connect to the internet """
def __str__(self):
return `self` # + str(self.__dict__)
def __init__(self):
""" Constructor for this class """
# file like object returned by
# urllib2.urlopen(...)
self.__freq = urllib2.Request('file://')
# data downloaded
self.__data = ''
# error dictionary
self.__error={ 'msg' : '',
'number': 0,
'fatal' : False
}
# for keeping track of bytes downloaded
self.__bytes = 0L
# time to wait before reconnect
# in case of failed connections
self.__sleeptime = 0.5
# local url object
self.__urlobject = None
# global network configurator
self.network_conn = GetObject('connector')
def __proxy_query(self, queryauth=1, queryserver=0):
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -