📄 webagent.pyc_dis
字号:
#! /usr/bin/env python
# emacs-mode: -*- python-*-
import logging
import cStringIO
import urlparse
import string
import twisted.internet.interfaces
import twisted.web.client
from zope.interface import implements
from twisted.internet import reactor
from twisted.internet import ssl
import Image
import config
import const
import control
import sslproxy
log = logging.getLogger(const.CONST_APP_LOGGER)
mdWebRspStat = {200: 0,
404: 0}
class clsMyHttpPageGetter(twisted.web.client.HTTPPageGetter):
def __init__(self):
self.coRequest = None
self.cbSuccessGet = True
self.status = None
self.csHost = None
self.followRedirect = True
self.cbRedirected = False
self.cbLostConnection = False
self.headers = {}
self.trunked = False
self.ciReqID = None
def sendCommand(self, command, path):
self.transport.write(("%s %s HTTP/1.1\r\n" % (command,
path)))
def connectionMade(self):
"""
把自己增加到factory的FreeList中
"""
self.factory.cnAddFreeWebClient(self)
def cnAddConsumer(self, aoConsumer):
"""
添加请求的发起及处理实体
"""
self.coRequest = aoConsumer
self.csHost = aoConsumer.csGetHost()
self.cbRedirected = False
self.ciReqID = self.coRequest.ciID
def cnProcess(self, abRedirect = False, asRedirectURL = None):
"""
发起一次请求
"""
if (self.coRequest is None):
return
lsMethod = "GET"
lsFullURL = self.coRequest.csGetFullURL()
lsHost = self.coRequest.csGetHost()
if abRedirect:
lsFullURL = asRedirectURL
lsHost = urlparse.urlparse(asRedirectURL)[1]
self.sendCommand(lsMethod, lsFullURL)
self.sendHeader("Host", lsHost)
self.sendHeader("User-Agent", self.coRequest.csUserAgent)
if (config.use_squid and config.keep_squid_connection):
self.sendHeader("proxy-connection", "keep-alive")
if (len(self.coRequest.csReferer) > 0):
self.sendHeader("referer", self.coRequest.csReferer)
if ((len(self.coRequest.clCookies) > 0) and (lsHost == self.coRequest.csGetHost())):
self.sendHeader("Cookie", "; ".join(self.coRequest.clCookies))
self.endHeaders()
def connectionLost(self, reason):
self.cbLostConnection = True
log.debug("connection is lost")
if (self.status is None):
self.cnFakeRsponse()
self.handleResponseEnd()
self.factory.cnRemoveWebClient(self, self.ciReqID)
self.transport.loseConnection()
def cnFakeRsponse(self):
"""
当连接squid失败,或者无响应就断开的时候
仿造一个响应
"""
self.headers = {}
self.headers["cache-control"] = ('no-store',)
self.cbSuccessGet = True
def handleResponse(self, response):
"""
整个应答完成的回调函数
"""
if (self.coRequest is None):
return
if self.cbSuccessGet:
self.coRequest.cnSetWebRspHeader(self.headers)
if self.cbRedirected:
log.debug(("task(%d) redirected to %s" % (self.coRequest.ciID,
self.coRequest.csGetFullURL())))
else:
log.debug(("task(%d) %d data read" % (self.coRequest.ciID,
len(response))))
self.coRequest.write(response)
self.coRequest = None
self.cnReset()
def cnReset(self):
self.firstLine = 1
self._HTTPClient__buffer = ""
self.headers = {}
if (not self.cbLostConnection):
self.factory.cnReturnFreeWebClient(self, self.ciReqID)
self.cbSuccessGet = True
self.status = None
self.length = None
def handleStatusDefault(self):
mdWebRspStat[int(self.status)] = (mdWebRspStat.get(int(self.status), 0) + 1)
log.warn(("unhandle status %s for %s" % (self.status,
self.coRequest.csGetFullURL())))
self.cbSuccessGet = False
def handleStatus_301(self):
"""
处理转向请求,注意仍然要通过squid来处理,不能由默认父类
处理,因为,他会自动生成多一个直接到网址的连接并把该连接
加入到我们的连接池中
"""
mdWebRspStat[int(self.status)] = (mdWebRspStat.get(int(self.status), 0) + 1)
log.debug(("%s for %s" % (self.status,
self.coRequest.csGetFullURL())))
lsLocation = self.headers.get("location")
if (not lsLocation):
log.warn("location not found,unable to redirect")
self.cbSuccessGet = False
lsURL = lsLocation[0]
if self.followRedirect:
if (not self.coRequest.cbSetRedirect(lsURL)):
self.cbSuccessGet = False
else:
self.factory.cnAddConsumer(self.coRequest)
self.factory.resumeProducing()
self.cbRedirected = True
else:
log.warn("redirect not allow")
def handleStatus_404(self):
mdWebRspStat[404] += 1
log.warn(("404 for %s" % self.coRequest.csGetFullURL()))
self.cbSuccessGet = False
def handleStatus_200(self):
mdWebRspStat[200] += 1
log.debug(("200 for task(%d), size(%s)" % (self.coRequest.ciID,
self.headers.get("content-length", ('not specified',))[0])))
if (self.headers.get("content-length", ('1',))[0] == "0"):
self.transport.loseConnection()
self.cbSuccessGet = True
def handleHeader(self, key, value):
if ((key.lower() == "transfer-encoding") and (value.lower() == "chunked")):
self.trunked = True
twisted.web.client.HTTPPageGetter.handleHeader(self, key, value)
def handleResponsePart(self, data):
"""
我们额外增加trunk数据的处理吧
这里可以进行优化,不需要每次添加都重新parse所有的数据
"""
twisted.web.client.HTTPPageGetter.handleResponsePart(self, data)
llData = []
lbTrunkDone = False
if self.trunked:
lsRest = self._HTTPClient__buffer
if (not lsRest.endswith("0\r\n\r\n")):
return
while True:
try:
(lsData, lsRest,) = twisted.web.http.fromChunk(lsRest)
except ValueError:
break
llData.append(lsData)
if (lsRest == "0\r\n\r\n"):
lbTrunkDone = True
break
if lbTrunkDone:
self._HTTPClient__buffer = string.join(llData, "")
self.length = 0
handleStatus_201 = handleStatus_200
handleStatus_202 = handleStatus_200
handleStatus_302 = handleStatus_301
handleStatus_303 = handleStatus_301
class clsWebAgent(twisted.web.client.HTTPClientFactory):
implements(twisted.internet.interfaces.IProducer)
protocol = clsMyHttpPageGetter
def __init__(self, aiMaxClient, asHost, aiPort):
self.clConsumer = []
self.clFreeWebClient = []
self.ciMaxWebClient = aiMaxClient
self.csHost = asHost
self.ciPort = aiPort
self.ciCurrentClientCount = 0
self.cookies = {}
import twisted.python.util
self.headers = twisted.python.util.InsensitiveDict()
self.waiting = 0
self.cdRunningProcess = {}
def cbUseProxy(self, asHost):
"""
判断指定的主机是否需要使用proxy
返回true表示需要使用,返回false表示不使用
"""
for filter in config.squid_filter:
if filter.match(asHost):
return False
return True
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -