⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 pagesum2.py

📁 该软件根据网络数据生成NetFlow记录。NetFlow可用于网络规划、负载均衡、安全监控等
💻 PY
📖 第 1 页 / 共 2 页
字号:
    if m:        ofnm = os.path.basename(fnm)if not ofnm:    ofnm = raw_input('output files base? - base is %s\n?' % (basedir))    if not ofnm:        ofnm = os.path.basename(fnm)if ofnm[0] == '-':    ofnm = ofnm[1:]    outfbase = os.path.join(basedir, ofnm)#print 'writing to'rep('Output written to:')writes = [['', []], ['.lt%d' % (nob_thresh), []], ['.gt%d' % (nob_thresh), []]]ofnms = []for suff2, write in writes:    for suff1 in ['.dur', '.dur85', '.ndur', '.ndur85', '.del', '.del85', '.acc_del', '.ddf']:        outfnm = outfbase + suff1 + suff2        ofnms.append(outfnm)        try:            f = open(outfnm, 'w')        except IOError, s:            print 'Couldn\'t open data file', s            sys.exit(1)        write.append(f.write)        #print 'writing to', os.path.basename(outfnm)        #print outfnm        rep(outfnm)sumfilenm = outfbase + '.Summary'#print 'Summary file is', sumfilenmrep('Summary file is %s' % (sumfilenm))#sys.exit(0)totp = 0toto = 0badl = 0min_tm = 99999999999999999.9999max_tm = 0single_unlinked = 0## The following for page and object downloads info gathering#pagefirst = inobs = 1 # flagsnamecache = {}## ## entries for 'P' keyed entry are:# {(URL) host: (0/hostdata, 1/page_dict, 2/object_dict, 3/others_object_dict}#  hostdata = {IP_addrs}#  page_dict = {(URL path) page: [0/{linktypes (to page)}, 1/times-seen,#                                 2/-requested, 3/-downloaded,#                                 4/accum bytes, 5/{linktypes (in page)#                                 6/{page type}, 7/{downloaded object types},#                                 8/max discrete URL links in page]#  object_dict = {(URL path) object: [0/times-seen,#                                 1/-requested, 2/-downloaded, 3/accum bytes,#                                  4/ {page_type}]# others_object_dict = same as object_dict but objects served for pages on different server#servs = {'T':{}, 'P':{}, 'U':{}, 'R':{}, 'I':{}, 'V':{}}line_err = 0###for fnm in args:        if line_err == 1:        s = replist.pop()        rep_now('%s - **File truncated**' % (s))    line_err = 0        print os.path.basename(fnm)    f = open(fnm, 'r')    lno = -1    for l in f.readlines():        lno += 1         l = l.replace('\n', '')        if l[0] == '#':            m = None            m = start_re.match(l)            if m:                strt = long(m.group(1))/1000000.0                #print 'start = ', start                if start == 0:                    start = strt                strt -= start                print 'start is', start, 'offset is', strt            if inobs:                inobs = 0 # # denotes end of objects                pass            continue        sf = l.rstrip().split(' ')        intree = 1        #print sf        #url, cli, serv, by, no, nc, tm, dur, ndur, dur85, ndur85, dflags = sf        rt = sf[0]        if rt == 'T':            if len(sf) < 14:                rep_now('Bad line file %s line %d \"%s"' % (fnm, lno, l))                line_err += 1                continue            toto += 1            o_obno = int(sf[1])            sf = sf[2:]            o_url = sf[0]            o_serv = sf[1]            o_connorder = int(sf[2])            o_nbytes = int(sf[3])            o_ltype = int(sf[4], 16)            o_type = int(sf[5])            o_retcode = int(sf[6])            o_tm = float(sf[7]) + strt            o_finger = (int(sf[8]), int(sf[9]), int(sf[10]))            o_iscont = int(sf[11])            if o_iscont:                o_alinks = int(sf[12])                nltypes = int(sf[13])                #assert len(sf) == 14 + nltypes*5                if len(sf) != 14 + nltypes*5:                    rep_now('Bad line file %s line %d \"%s"' % (fnm, lno, l))                    line_err += 1                    continue                ldata = []                for i in range(nltypes):                    data = []                    off = 14 + (i*5)                    for j in range(5):                        data.append(int(sf[off+j]))                    ldata.append(data)            else:                #assert len(sf) == 12                if len(sf) != 12:                    rep_now('Bad line file %s line %d \"%s"' % (fnm, lno, l))                    line_err += 1                    continue        elif rt == 'P':            #assert len(sf) == 20            if len(sf) != 20:                rep_now('Bad line file %s line %d \"%s"' % (fnm, lno, l))                line_err += 1                continue            totp += 1            pagenum = int(sf[1])            sf = sf[2:]            url = sf[0]            cli = sf[1]            serv = sf[2]            by = int(sf[3])            no = int(sf[4])            nc = int(sf[5])            ltype = int(sf[6], 16)            ctype = int(sf[7])            nservs = int(sf[8])            tm = float(sf[9]) + strt            dur = int(sf[10])            ndur = int(sf[11])            dur85 = int(sf[12])            ndur85 = int(sf[13])            dflags = int(sf[14])            acc_del = int(sf[15])            delv = long(sf[16])            cnt_del = int(sf[17])            pagefirst = inobs = 1            cont_seen = 0        elif rt in ['U', 'R', 'I', 'V']:            #assert len(sf) == 10            if len(sf) != 10:                rep_now('Bad line file %s line %d \"%s"' % (fnm, lno, l))                line_err += 1                continue            totp += 1            intree = 0            url = sf[1]            cli = sf[2]            serv = sf[3]            by = int(sf[4])            ctype = int(sf[5])            no = 1            nc = 1            tm = float(sf[6])/1000 + strt            dur = int(sf[7])            dur85 = dur            ndur = int(sf[8])            ndur85 = ndur            dflags = int(sf[9])            acc_del = cnt_del = dur - ndur            delv = acc_del*acc_del            nservs = 1        ##     if ctype == CT_TEXT_HTML or ctype == CT_TEXT_XML:##                 #single object page##                 single_unlinked += 1##                 intree = 1##                 ltype = 0                    else:            print 'pagesum - bad line %s %d: \"%s\"' % (fnm, lno, l)            badl += 1            continue        if rt == 'T':            up = urlparse(o_url)            o_host = up[1].split(':')[0]            if not o_host:                try:                    o_host = namecache[o_serv]                except KeyError:                    o_host = o_serv            else:                namecache[o_serv] = o_host            obnm = up[2]            if not obnm:                obnm = 'NK'                            got = o_retcode == 200 or o_retcode == 206            if pagefirst and o_host == host and obnm == page: #it's the page root container                    #print 'xx',                    pd[2] += 1                    pd[4] += o_nbytes                    if got:                        pd[3] += 1                        ptd = pd[6] # page object type dict                        ptd[o_type] = ptd.setdefault(o_type, 0) + 1                        cont_seen = 1            else: # its a constituent object                if o_host == host: # served by page host                    od = hd[2]                else:                    od = servs['P'].setdefault(host, ({}, {}, {}, {}))[3]                odd = od.setdefault(obnm, [0, 0, 0, 0, {}])                #print 'XXX',                 odd[0] += 1                odd[1] += 1                odd[3] += o_nbytes                if got:                    odd[2] += 1                    ptd = odd[4]                    ptd[o_type] = ptd.setdefault(o_type, 0) + 1            if got:                ptd = pd[7]                ptd[o_type] = ptd.setdefault(o_type, 0) + 1            if o_iscont and cont_seen:                # add in page links data (transitive in case of frames)                pd[8] = max(o_alinks, pd[8])                dd = pd[5]                for lt in ldata:                    ld = dd.setdefault(lt[0], [0, 0, 0, 0])                    ld[0] = max(ld[0], lt[1])                    for i in range(2,5):                        ld[i-1] += lt[i]                                                        pagefirst = 0                    else:            if ndur == 0:                continue            #print rt, tm            min_tm = min(tm, min_tm)            max_tm = max(tm, max_tm)            acc = accums[intree]            acc.totpages += 1            ue = acc.ud.setdefault(url, [0,0,0,0])            se = acc.sd.setdefault(serv, [0,0,0,0])            ce = acc.cd.setdefault(cli, [0,0,0,0])            ue[0] += 1            se[0] +=1            ce[0] += 1            dl = dur - ndur            dl85 = dur85 - ndur85            if dl:                acc.npbdel += 1                ue[1] += 1                se[1] += 1            if dl85:                acc.npbdel85 += 1                ue[2] += 1                se[2] += 1            if dflags:                acc.npdel += 1                ue[3] += 1                se[3] += 1            acc.nobs += no            acc.nconns += nc            #print totdur, durlessdel, tot85dur, dur85lessdel            if not intree:                if no != 1:                    print 'Single object goof %d objects %s %d: %s' \                          % (no, fnm, lno, l)                if rt == 'U':                    acc.unlinked += 1                elif rt == 'R':                    acc.refr += 1                elif rt == 'I':                    acc.inv += 1                elif rt == 'V':                    acc.rvst += 1                else:                    print 'Invalid non-tree reason %s %d: %s' % (fnm, lno, l)                    sys.exit(1)            write = writes[0][1]            write[0]('%.3f\t%d\n' % (tm, dur))            write[1]('%.3f\t%d\n' % (tm, dur85))            write[2]('%.3f\t%d\n' % (tm, ndur))            write[3]('%.3f\t%d\n' % (tm, ndur85))            if dl:                write[4]('%.3f\t%d\n' % (tm, dl))            if acc_del and dur:                write[6]('%.3f\t%.2f\n' % (tm, ((acc_del/no)*100.0)/ndur))                write[7]('%.3f\t%.2f\n' % (    tm, (sqrt(delv/no))/ndur)   )            if dl85:                write[5]('%.3f\t%d\n' % (tm, dur85-ndur85))            if no > nob_thresh:                write = writes[2][1]                acc.lp += 1            else:                write = writes[1][1]                acc.sp += 1            write[0]('%.3f\t%d\n' % (tm, dur))            write[1]('%.3f\t%d\n' % (tm, dur85))            write[2]('%.3f\t%d\n' % (tm, ndur))            write[3]('%.3f\t%d\n' % (tm, ndur85))            if dl:                write[4]('%.3f\t%d\n' % (tm, dur-ndur))            if acc_del and dur:                write[6]('%.3f\t%.2f\n' % (tm, ((acc_del/no)*100.0)/ndur))                write[7]('%.3f\t%.2f\n' % (tm, (sqrt(delv/no))/ndur))            if dl85:                write[5]('%.3f\t%d\n' % (tm, dur85-ndur85))            if acc_del and not (dur-ndur):                acc.no_del_del += 1            acc.nservd[nservs] += 1            # page/object info gathering            if rt == 'P':                up = urlparse(url)                host = up[1].split(':')[0]                if not host:                    try:                        host = namecache[serv]                    except KeyError:                        host = serv                else:                    namecache[serv] = host                page = up[2]                if not page:                    page = 'NK'                                hd = servs['P'].setdefault(host, ({}, {}, {}, {}))                hhd = hd[0]                # accumulate IP addrs for this (URL) host                hhd[serv] = hhd.setdefault(serv, 0) + 1                #accumulate pages from this (URL) host                pd = hd[1].setdefault(page, [{}, 0, 0, 0, 0, {}, {}, {}, 0])                #print '%x' % ltype                ld = pd[0]                # accumulate link types to page                ld[ltype] = ld.setdefault(ltype, 0) + 1                # accumulate times seen                pd[1] += 1if not totp:    print 'No pages in page file(s)'    sys.exit(1)print totp, 'pages'print toto, 'objects'print '%d/%d bad lines' % (badl, totp)print 'times:', min_tm, max_tm    accum_tot(accums[0], accums[1], accums[2])for i in [1, 0, 2]:    acc = accums[i]    acc.nurl = len(acc.ud)    acc.nserv = len(acc.sd)    acc.ncli = len(acc.cd)    acc.nsdel = 0    acc.nsbdel = 0    for s in acc.sd.values():        if s[1]:            acc.nsbdel += 1        if s[3]:            acc.nsdel += 1    acc.nudel = 0    acc.nubdel = 0    for u in acc.ud.values():        if u[1]:            acc.nubdel += 1        if u[3]:            acc.nudel += 1     acc.report()userv = 0itsd = accums[1].sdfor s in accums[0].sd.keys():    if not itsd.has_key(s):        userv += 1        uurl = 0itud = accums[1].udfor s in accums[0].ud.keys():    if not itud.has_key(s):        uurl += 1        rep('%d servers %d urls not seen in trees' % (userv, uurl))rep('%d single unlinked' % (single_unlinked))do_pagestuff(servs['P'])        rep_rep()## for fn in ofnms:##     #tmpfile = os.tempnam('/tmp')##     tmpfile = fn + '.sorted'##     sortcmd = 'sort -n -o %s %s' % (tmpfile, fn) ##     mvcmd = 'mv %s %s ' % (tmpfile, fn)##     for cmd in [sortcmd, mvcmd]:##         status, output = commands.getstatusoutput(cmd)##         if status:##             print cmd, 'failed with', output

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -