⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pagesum2.py

📁 该软件根据网络数据生成NetFlow记录。NetFlow可用于网络规划、负载均衡、安全监控等
💻 PY
📖 第 1 页 / 共 2 页
字号:
#! /usr/bin/env python################################################################################                                                                             ##   Copyright 2005 University of Cambridge Computer Laboratory.               ##                                                                             ##   This file is part of Nprobe.                                              ##                                                                             ##   Nprobe is free software; you can redistribute it and/or modify            ##   it under the terms of the GNU General Public License as published by      ##   the Free Software Foundation; either version 2 of the License, or         ##   (at your option) any later version.                                       ##                                                                             ##   Nprobe is distributed in the hope that it will be useful,                 ##   but WITHOUT ANY WARRANTY; without even the implied warranty of            ##   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             ##   GNU General Public License for more details.                              ##                                                                             ##   You should have received a copy of the GNU General Public License         ##   along with Nprobe; if not, write to the Free Software                     ##   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA ##                                                                             ################################################################################from sys import argvimport getoptimport osimport sysimport reimport commandsfrom math import sqrtimport Numericfrom urlparse import urlparsefrom socket import inet_atonimport np_warningsfrom np_treestats import NOB_THRESHfrom np_TCPConn import D_BIGDELfrom nprobe import CT_TEXT_HTML, CT_TEXT_XML, http_server_objtype_stringfrom histo import Histogram, HistogramErrorN_SERV_BKTS = 1000replist = []def rep(s):    replist.append(s)def rep_now(s):    print s    rep(s)def rep_per(args):    boss_v, boss_t = args[0]    if not boss_v:        return    rep('%d %s:' % (boss_v, boss_t))    for v, t in args[1:]:        if v:            pcf = 100.0/boss_v            rep('\t%d %s (%.2f%%)' % (v, t, v*pcf))def rep_rep():    try:        sf = open(sumfilenm, 'w')    except IOError, s:        print 'Couldn\'t open summary file', s        sys.exit(1)            for s in replist:        print s        sf.write(s + '\n')                   acc_fields = ['totpages', 'nobs', 'nconns', 'npdel', 'npbdel', 'npbdel85',              'no_del_del', 'sp', 'lp', 'unlinked', 'refr', 'inv', 'rvst']class accum:    def __init__(self, what):        self.what = what        for f in acc_fields:            setattr(self, f, 0)        self.ud = {}        self.sd = {}        self.cd = {}        self.nservd = Numeric.zeros(N_SERV_BKTS,)    def report(self):        rep(self.what)        rep('%d servers %d clients %d obs %d conns' \              % (self.nserv, self.ncli, self.nobs, self.nconns))        rep_per([(self.nurl, 'URLs')])        page_per = [            (self.totpages, 'Page downloads'),            (self.sp, 'lt %d obs' % (nob_thresh)),            (self.lp, 'gt %d obs' % (nob_thresh))            ]        single = self.unlinked+self.refr+self.inv+self.rvst        if single:            page_per += [                (single, 'single objects not in trees'),                (self.unlinked, 'unlinked'),                (self.refr, 'refreshes'),                (self.rvst, 'revisits'),                (self.inv, 'invalid')                ]        if self.npdel:            page_per += [                (self.npdel, 'delayed'),                (self.npbdel, 'long delayed'),                (self.npbdel85, 'long delayed at 85%')                ]        if self.no_del_del:            page_per += [(self.no_del_del, 'delays not adding to total')]        rep_per(page_per)        rep_per([(self.nserv, 'Servers'),                 (self.nsdel, 'delayed'),                 (self.nsbdel, 'long delayed')                 ])        rep_per([(self.nurl, 'URLs'),                 (self.nudel, 'delayed'),                 (self.nubdel, 'long delayed')                 ])        rep('Distribution of servers/page')        for i in range(N_SERV_BKTS):            ns = self.nservd[i]            if ns:                rep('  %d %12d' % (i, ns))        rep('\n')def accum_tot(a, b, c):    c.ud = {}    c.ud.update(a.ud)    for v in b.ud.items():        s = v[1]        e = c.ud.setdefault(v[0], [0,0,0,0])        e[0] += s[0]        e[1] += s[1]        e[2] += s[2]        e[3] += s[3]    c.sd = {}    c.sd.update(a.sd)    for v in b.sd.items():        s = v[1]        e = c.sd.setdefault(v[0], [0,0,0,0])        e[0] += s[0]        e[1] += s[1]        e[2] += s[2]        e[3] += s[3]    c.cd = {}    c.cd.update(a.cd)    for v in b.cd.items():        s = v[1]        e = c.cd.setdefault(v[0], [0,0,0,0])        e[0] += s[0]        e[1] += s[1]        e[2] += s[2]        e[3] += s[3]    for i in range(N_SERV_BKTS):        c.nservd[i] = a.nservd[i] + b.nservd[i]             for f in acc_fields:        setattr(c, f, getattr(a, f) + getattr(b, f))def do_pagestuff(sd):    def dl_accum(d, s, e):        a = [0]*(e-s+1)        for v in d.values():            n = 0            for i in range(s, e+1):                a[n] += v[i]                n += 1        return a    def by_0(a, b):        return int(b[0]-a[0])    def by_1(a, b):        return int(b[1]-a[1])    def by_2(a, b):        return int(b[2]-a[2])    def by_3_0(a, b):        return int(b[3][0]-a[3][0])    def by_3_1(a, b):        return int(b[3][1]-a[3][1])    def by_3_2(a, b):        return int(b[3][2]-a[3][2])    def by_4_1(a, b):        return int(b[4][1]-a[4][1])    def by_4_2(a, b):        return int(b[4][2]-a[4][2])    def by_5_1(a, b):        return int(b[5][1]-a[5][1])    def by_5_2(a, b):        return int(b[5][2]-a[5][2])    def by_7(a, b):        return long(b[7]-a[7])    def add_3(a, b):        a[0] += b[0]        a[1] += b[1]        a[2] += b[2]    def object_type_string(otype):            return http_server_objtype_string(otype).replace('/', '-')            slist = []    tot_bytes = 0    tot_disc_p = tot_disc_o = tot_disc_oo = 0    allptots = [0, 0, 0]    allotots = [0, 0, 0]    allootots = [0, 0, 0]    plinksd = {}    intlinksd = {}    otypesd = {}    uhist = Histogram(lower=0, bucketsz=1)    page_n = 0        for s, (hdd, pd, od, odd) in sd.items():        ptots = dl_accum(pd, 1, 4)        add_3(allptots, ptots)        otots = dl_accum(od, 0, 3)        add_3(allotots, otots)        ootots = dl_accum(odd, 0, 3)        add_3(allootots, ootots)        totb = ptots[-1] + otots[-1] + ootots[-1]        tot_bytes += totb        disc_p = len(pd)        tot_disc_p += disc_p        disc_o = len(od)        tot_disc_o += disc_o        disc_oo = len(odd)        tot_disc_oo += disc_oo        slist.append((disc_p, disc_o, disc_oo, ptots, otots, ootots, s, totb))        for p in pd.values():            page_n += 1            uhist.add(p[8])            ld = p[0] # links to page            for ltype, n in ld.items():                plinksd[ltype] = plinksd.setdefault(ltype, 0) + n            ld = p[5] # links within page            for ltype, (max_ndisc, nd_disc, followed, dups) in ld.items():                ent = intlinksd.setdefault(ltype, (Histogram(lower=0, bucketsz=1), Histogram(lower=0, bucketsz=1), Histogram(lower=0, bucketsz=1), Histogram(lower=0, bucketsz=1)))                for h, v, div in [                    (ent[0], max_ndisc, 0),                    (ent[1], nd_disc, 1),                    (ent[2], followed, 1),                    (ent[3], dups, 1)                    ]:                    if v:                        if div:                            v = v/p[3]                        h.add(v)            otypes = p[7]            for ot, n in otypes.items():                h = otypesd.setdefault(ot, Histogram(lower=0, bucketsz=1))                h.add(n)                                slist.sort()    slist.reverse()    for lab, srt, f1, f2, tot in [       ('discrete page references', by_0, 0, None, tot_disc_p),       ('discrete object references', by_1, 1, None, tot_disc_o),       ('discrete others object references', by_2, 2, None, tot_disc_oo),       ('pages encountered', by_3_0, 3, 0, allptots[0]),       ('pages requested', by_3_1, 3, 1, allptots[1]),       ('pages downloaded', by_3_2, 3, 2, allptots[2]),       ('objects requested', by_4_1, 4, 1, allotots[1]),       ('objects downloaded', by_4_2, 4, 2, allotots[2]),       ('others objects requested', by_5_1, 5, 1, allootots[1]),       ('others objects downloaded', by_5_2, 5, 2, allootots[2]),#       ('bytes downloaded', by_7, 7, None, tot_bytes)       ]:        rep('top servers by %s:\n' % (lab))        slist.sort(srt)        rest_val = 0        rest_pc = 0.0        for stuff in slist:            if f2 == None:                val = stuff[f1]            else:                val = stuff[f1][f2]            pc = (val*100.0)/tot            if pc >= 2.5:                rep('\t%s %d (%.2f%%)' % (stuff[6], val, pc))            else:                rest_val += val                rest_pc += pc        rep('\tOther %d (%.2f%%)' % (rest_val, rest_pc))        rep('\n')    # links to pages    rep('Link types to pages encountered:\n')    pagelinks = [(n, ltype) for ltype, n in plinksd.items()]    pagelinks.sort()    pagelinks.reverse()    rest_val = 0    rest_pc = 0.0    for n, ltype in pagelinks:        pc = (n*100.0)/allptots[0]        if pc >= 2.5:            rep('0x%x %d (%.2f%%)' % (ltype, n, pc))        else:            rest_val += n            rest_pc += pc    rep('Other %d (%.2f%%)' % (rest_val, rest_pc))    rep('\n')        linksdir = os.path.join(basedir, 'links_data')        try:        os.makedirs(linksdir)    except OSError,s:        if str(s).find('File exists') < 0:            raise    intlinks = [(lt, lhists) for lt, lhists in intlinksd.items()]    intlinks.sort()    comm = 'First column is number of links, second is No. pages occurring'    for lt, hists in intlinks:        lts = '0x%x' % (lt)        for h, fn, tit in [            (hists[0], 'oa_max', 'max over all sightings of a page'),            (hists[1], 'ave_seen', 'average No. over page downloads'),            (hists[2], 'ave_followed', 'average No. followed'),            (hists[3], 'ave_duplicated', 'average No. duplicated')            ]:            fnm = os.path.join(linksdir, '%s-%s' % (lts, fn))            #f = open(fnm, 'w')            try:                h.results(zeros=0, file=fnm, title=tit, comment=comm)            except HistogramError, s:                if str(s).find('No samples presented') >= 0:                    continue                else:                    raise    fnm = os.path.join(linksdir, 'disc_urls')    try:        uhist.results(zeros=0, file=fnm, title='disc_urls', comment='Max number of discrete URL links of all types over all sightings of a page\n - first column is number of URLs, second is No. pages occurring')    except HistogramError, s:        if str(s).find('No samples presented') < 0:            raise        typesdir = os.path.join(basedir, 'types_per_page')        try:        os.makedirs(typesdir)    except OSError,s:        if str(s).find('File exists') < 0:            raise    for type, h in otypesd.items():        typestr = object_type_string(type)        fnm = os.path.join(typesdir, typestr)        try:            h.results(zeros=0, file=fnm, comment='Distribution of downloaded object types per page\n - first column is number of obs. of the type, second column is No. pages occuring', title='Downloaded types distribution')        except HistogramError, s:            if str(s).find('No samples presented') < 0:                raise         scriptname = os.path.basename(argv[0])ofnm = Nonenob_thresh = NOB_THRESHtry:    optlist, args = getopt.getopt(sys.argv[1:], 'o:n:')except getopt.error, s:    print '%s: %s' % (scriptname, s)    usage(scriptname)    sys.exit(1)for opt in optlist:    if opt[0] == '-o':        ofnm = opt[1]    if opt[0] == '-n':        nob_thresh = opt[1]start_re = re.compile('# Run start = ([0-9]*).*')start = 0accums = [accum('NOT IN TREES:'), accum('IN_TREES:'), accum('TOTAL:')]basedir = os.path.dirname(args[0])basedir = os.path.join(basedir, 'Page_results')try:    os.makedirs(basedir)except OSError,s:    if str(s).find('File exists') < 0:        raisefrange = []suffs = []pref_re = re.compile('(.\.rep\.\d*)-(.\.rep\.\d*)(\..*)')print 'files from', os.getcwd(), ':'rep('files from %s:' % (os.getcwd()))for fnm in args:    fnm = os.path.basename(fnm)    print '   ', os.path.basename(fnm)    m = pref_re.match(fnm)    if m:        #print m.group(1), m.group(2), m.group(3)        frange.append(m.group(1))        frange.append(m.group(2))        suffs.append(m.group(3))if frange:    for suff in suffs[1:]:        if suff != suffs[0]:            print 'Ouch mixed suffix:', suff    frange.sort()    ofnm = frange[0] + '-' + frange[-1] + suffs[0]    #print frangeelse:    pref_re = re.compile('(.\.rep\.\d*)\.(.*)')     m = pref_re.match(os.path.basename(fnm))    if m:         #print m.group(1), m.group(2)        ofnm = m.group(0)if not ofnm:    pref_re = re.compile('.*\.Pages')     m = pref_re.match(os.path.basename(fnm))

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -