⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 webagent.pyc_dis

📁 Cache服务器模块,可以完成图片的缓存处理功能. 很好用的.
💻 PYC_DIS
📖 第 1 页 / 共 2 页
字号:
#! /usr/bin/env python
# emacs-mode: -*- python-*-

import logging
import cStringIO
import urlparse
import string
import twisted.internet.interfaces
import twisted.web.client
from zope.interface import implements
from twisted.internet import reactor
from twisted.internet import ssl
import Image
import config
import const
import control
import sslproxy
log = logging.getLogger(const.CONST_APP_LOGGER)
mdWebRspStat = {200: 0,
 404: 0}
class clsMyHttpPageGetter(twisted.web.client.HTTPPageGetter):

    def __init__(self):
        self.coRequest = None
        self.cbSuccessGet = True
        self.status = None
        self.csHost = None
        self.followRedirect = True
        self.cbRedirected = False
        self.cbLostConnection = False
        self.headers = {}
        self.trunked = False
        self.ciReqID = None



    def sendCommand(self, command, path):
        self.transport.write(("%s %s HTTP/1.1\r\n" % (command,
         path)))



    def connectionMade(self):
        """
    把自己增加到factory的FreeList中
    """
        self.factory.cnAddFreeWebClient(self)



    def cnAddConsumer(self, aoConsumer):
        """
    添加请求的发起及处理实体
    """
        self.coRequest = aoConsumer
        self.csHost = aoConsumer.csGetHost()
        self.cbRedirected = False
        self.ciReqID = self.coRequest.ciID



    def cnProcess(self, abRedirect = False, asRedirectURL = None):
        """
    发起一次请求
    """
        if (self.coRequest is None):
            return
        lsMethod = "GET"
        lsFullURL = self.coRequest.csGetFullURL()
        lsHost = self.coRequest.csGetHost()
        if abRedirect:
            lsFullURL = asRedirectURL
            lsHost = urlparse.urlparse(asRedirectURL)[1]
        self.sendCommand(lsMethod, lsFullURL)
        self.sendHeader("Host", lsHost)
        self.sendHeader("User-Agent", self.coRequest.csUserAgent)
        if (config.use_squid and config.keep_squid_connection):
            self.sendHeader("proxy-connection", "keep-alive")
        if (len(self.coRequest.csReferer) > 0):
            self.sendHeader("referer", self.coRequest.csReferer)
        if ((len(self.coRequest.clCookies) > 0) and (lsHost == self.coRequest.csGetHost())):
            self.sendHeader("Cookie", "; ".join(self.coRequest.clCookies))
        self.endHeaders()



    def connectionLost(self, reason):
        self.cbLostConnection = True
        log.debug("connection is lost")
        if (self.status is None):
            self.cnFakeRsponse()
        self.handleResponseEnd()
        self.factory.cnRemoveWebClient(self, self.ciReqID)
        self.transport.loseConnection()



    def cnFakeRsponse(self):
        """
    当连接squid失败,或者无响应就断开的时候
    仿造一个响应
    """
        self.headers = {}
        self.headers["cache-control"] = ('no-store',)
        self.cbSuccessGet = True



    def handleResponse(self, response):
        """
    整个应答完成的回调函数
    """
        if (self.coRequest is None):
            return
        if self.cbSuccessGet:
            self.coRequest.cnSetWebRspHeader(self.headers)
        if self.cbRedirected:
            log.debug(("task(%d) redirected to %s" % (self.coRequest.ciID,
             self.coRequest.csGetFullURL())))
        else:
            log.debug(("task(%d) %d data read" % (self.coRequest.ciID,
             len(response))))
            self.coRequest.write(response)
        self.coRequest = None
        self.cnReset()



    def cnReset(self):
        self.firstLine = 1
        self._HTTPClient__buffer = ""
        self.headers = {}
        if (not self.cbLostConnection):
            self.factory.cnReturnFreeWebClient(self, self.ciReqID)
        self.cbSuccessGet = True
        self.status = None
        self.length = None



    def handleStatusDefault(self):
        mdWebRspStat[int(self.status)] = (mdWebRspStat.get(int(self.status), 0) + 1)
        log.warn(("unhandle status %s for %s" % (self.status,
         self.coRequest.csGetFullURL())))
        self.cbSuccessGet = False



    def handleStatus_301(self):
        """
    处理转向请求,注意仍然要通过squid来处理,不能由默认父类
    处理,因为,他会自动生成多一个直接到网址的连接并把该连接
    加入到我们的连接池中
    """
        mdWebRspStat[int(self.status)] = (mdWebRspStat.get(int(self.status), 0) + 1)
        log.debug(("%s for %s" % (self.status,
         self.coRequest.csGetFullURL())))
        lsLocation = self.headers.get("location")
        if (not lsLocation):
            log.warn("location not found,unable to redirect")
            self.cbSuccessGet = False
        lsURL = lsLocation[0]
        if self.followRedirect:
            if (not self.coRequest.cbSetRedirect(lsURL)):
                self.cbSuccessGet = False
            else:
                self.factory.cnAddConsumer(self.coRequest)
                self.factory.resumeProducing()
                self.cbRedirected = True
        else:
            log.warn("redirect not allow")



    def handleStatus_404(self):
        mdWebRspStat[404] += 1
        log.warn(("404 for %s" % self.coRequest.csGetFullURL()))
        self.cbSuccessGet = False



    def handleStatus_200(self):
        mdWebRspStat[200] += 1
        log.debug(("200 for task(%d), size(%s)" % (self.coRequest.ciID,
         self.headers.get("content-length", ('not specified',))[0])))
        if (self.headers.get("content-length", ('1',))[0] == "0"):
            self.transport.loseConnection()
        self.cbSuccessGet = True



    def handleHeader(self, key, value):
        if ((key.lower() == "transfer-encoding") and (value.lower() == "chunked")):
            self.trunked = True
        twisted.web.client.HTTPPageGetter.handleHeader(self, key, value)



    def handleResponsePart(self, data):
        """
    我们额外增加trunk数据的处理吧
    这里可以进行优化,不需要每次添加都重新parse所有的数据
    """
        twisted.web.client.HTTPPageGetter.handleResponsePart(self, data)
        llData = []
        lbTrunkDone = False
        if self.trunked:
            lsRest = self._HTTPClient__buffer
            if (not lsRest.endswith("0\r\n\r\n")):
                return
            while True:
                try:
                    (lsData, lsRest,) = twisted.web.http.fromChunk(lsRest)
                except ValueError:
                    break
                llData.append(lsData)
                if (lsRest == "0\r\n\r\n"):
                    lbTrunkDone = True
                    break

            if lbTrunkDone:
                self._HTTPClient__buffer = string.join(llData, "")
                self.length = 0


    handleStatus_201 = handleStatus_200
    handleStatus_202 = handleStatus_200
    handleStatus_302 = handleStatus_301
    handleStatus_303 = handleStatus_301

class clsWebAgent(twisted.web.client.HTTPClientFactory):
    implements(twisted.internet.interfaces.IProducer)
    protocol = clsMyHttpPageGetter

    def __init__(self, aiMaxClient, asHost, aiPort):
        self.clConsumer = []
        self.clFreeWebClient = []
        self.ciMaxWebClient = aiMaxClient
        self.csHost = asHost
        self.ciPort = aiPort
        self.ciCurrentClientCount = 0
        self.cookies = {}
        import twisted.python.util
        self.headers = twisted.python.util.InsensitiveDict()
        self.waiting = 0
        self.cdRunningProcess = {}



    def cbUseProxy(self, asHost):
        """
    判断指定的主机是否需要使用proxy
    返回true表示需要使用,返回false表示不使用
    """
        for filter in config.squid_filter:
            if filter.match(asHost):
                return False

        return True

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -