📄 pagesum.py
字号:
#! /usr/bin/env python################################################################################ ## Copyright 2005 University of Cambridge Computer Laboratory. ## ## This file is part of Nprobe. ## ## Nprobe is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## Nprobe is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Nprobe; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ## ################################################################################from sys import argvimport getoptimport osimport sysimport reimport commandsfrom math import sqrtimport Numericimport np_warningsfrom np_treestats import NOB_THRESHfrom np_TCPConn import D_BIGDELfrom nprobe import CT_TEXT_HTML, CT_TEXT_XMLN_SERV_BKTS = 1000replist = []def rep(s): replist.append(s)def rep_per(args): boss_v, boss_t = args[0] if not boss_v: return rep('%d %s:' % (boss_v, boss_t)) for v, t in args[1:]: if v: pcf = 100.0/boss_v rep('\t%d %s (%.2f%%)' % (v, t, v*pcf))def rep_rep(): try: sf = open(sumfilenm, 'w') except IOError, s: print 'Couldn\'t open summary file', s sys.exit(1) for s in replist: print s sf.write(s + '\n') acc_fields = ['totpages', 'nobs', 'nconns', 'npdel', 'npbdel', 'npbdel85', 'no_del_del', 'sp', 'lp', 'unlinked', 'refr', 'inv', 'rvst']class accum: def __init__(self, what): self.what = what for f in acc_fields: setattr(self, f, 0) self.ud = {} self.sd = {} self.cd = {} self.nservd = Numeric.zeros(N_SERV_BKTS,) def report(self): rep(self.what) rep('%d servers %d clients %d obs %d conns' \ % (self.nserv, self.ncli, self.nobs, self.nconns)) rep_per([(self.nurl, 'URLs')]) page_per = [ (self.totpages, 'Page downloads'), (self.sp, 'lt %d obs' % (nob_thresh)), (self.lp, 'gt %d obs' % (nob_thresh)) ] single = self.unlinked+self.refr+self.inv+self.rvst if single: page_per += [ (single, 'single objects not in trees'), (self.unlinked, 'unlinked'), (self.refr, 'refreshes'), (self.rvst, 'revisits'), (self.inv, 'invalid') ] if self.npdel: page_per += [ (self.npdel, 'delayed'), (self.npbdel, 'long delayed'), (self.npbdel85, 'long delayed at 85%%') ] if self.no_del_del: page_per += [(self.no_del_del, 'delays not adding to total')] rep_per(page_per) rep_per([(self.nserv, 'Servers'), (self.nsdel, 'delayed'), (self.nsbdel, 'long delayed') ]) rep_per([(self.nurl, 'URLs'), (self.nudel, 'delayed'), (self.nubdel, 'long delayed') ]) rep('Distribution of servers/page') for i in range(N_SERV_BKTS): ns = self.nservd[i] if ns: rep(' %d %12d' % (i, ns)) rep('\n')def accum_tot(a, b, c): c.ud = {} c.ud.update(a.ud) for v in b.ud.items(): s = v[1] e = c.ud.setdefault(v[0], [0,0,0,0]) e[0] += s[0] e[1] += s[1] e[2] += s[2] e[3] += s[3] c.sd = {} c.sd.update(a.sd) for v in b.sd.items(): s = v[1] e = c.sd.setdefault(v[0], [0,0,0,0]) e[0] += s[0] e[1] += s[1] e[2] += s[2] e[3] += s[3] c.cd = {} c.cd.update(a.cd) for v in b.cd.items(): s = v[1] e = c.cd.setdefault(v[0], [0,0,0,0]) e[0] += s[0] e[1] += s[1] e[2] += s[2] e[3] += s[3] for i in range(N_SERV_BKTS): c.nservd[i] = a.nservd[i] + b.nservd[i] for f in acc_fields: setattr(c, f, getattr(a, f) + getattr(b, f)) scriptname = os.path.basename(argv[0])ofnm = Nonenob_thresh = NOB_THRESHtry: optlist, args = getopt.getopt(sys.argv[1:], 'o:n:')except getopt.error, s: print '%s: %s' % (scriptname, s) usage(scriptname) sys.exit(1)for opt in optlist: if opt[0] == '-o': ofnm = opt[1] if opt[0] == '-n': nob_thresh = opt[1]start_re = re.compile('# Run start = ([0-9]*).*')start = 0accums = [accum('NOT IN TREES:'), accum('IN_TREES:'), accum('TOTAL:')]basedir = os.path.dirname(args[0])frange = []suffs = []pref_re = re.compile('(.\.rep\.\d*)-(.\.rep\.\d*)(\..*)')print 'files from', os.getcwd(), ':'rep('files from %s:' % (os.getcwd()))for fnm in args: fnm = os.path.basename(fnm) print ' ', os.path.basename(fnm) m = pref_re.match(fnm) if m: #print m.group(1), m.group(2), m.group(3) frange.append(m.group(1)) frange.append(m.group(2)) suffs.append(m.group(3))if frange: for suff in suffs[1:]: if suff != suffs[0]: print 'Ouch mixed suffix:', suff frange.sort() ofnm = frange[0] + '-' + frange[-1] + suffs[0] #print frangeelse: pref_re = re.compile('(.\.rep\.\d*)\.(.*)') m = pref_re.match(os.path.basename(fnm)) if m: #print m.group(1), m.group(2) ofnm = m.group(0)if not ofnm: pref_re = re.compile('.*\.Pages') m = pref_re.match(os.path.basename(fnm)) if m: ofnm = os.path.basename(fnm)if not ofnm: ofnm = raw_input('output files base? - base is %s\n?' % (basedir)) if not ofnm: ofnm = os.path.basename(fnm)if ofnm[0] == '-': ofnm = ofnm[1:] outfbase = os.path.join(basedir, ofnm)#print 'writing to'rep('Output written to:')writes = [['', []], ['.lt%d' % (nob_thresh), []], ['.gt%d' % (nob_thresh), []]]ofnms = []for suff2, write in writes: for suff1 in ['.dur', '.dur85', '.ndur', '.ndur85', '.del', '.del85', '.acc_del', '.ddf']: outfnm = outfbase + suff1 + suff2 ofnms.append(outfnm) try: f = open(outfnm, 'w') except IOError, s: print 'Couldn\'t open data file', s sys.exit(1) write.append(f.write) #print 'writing to', os.path.basename(outfnm) #print outfnm rep(outfnm)sumfilenm = outfbase + '.Summary'#print 'Summary file is', sumfilenmrep('Summary file is %s' % (sumfilenm))#sys.exit(0)totp = 0badl = 0min_tm = 99999999999999999.9999max_tm = 0single_unlinked = 0for fnm in args: print os.path.basename(fnm) f = open(fnm, 'r') lno = 0 for l in f.readlines(): l = l.replace('\n', '') if l[0] == '#': m = start_re.match(l) if m: strt = long(m.group(1))/1000000.0 #print 'start = ', start if start == 0: start = strt strt -= start print 'start is', start, 'offset is', strt continue sf = l.rstrip().split(' ') totp += 1 intree = 1 #print sf #url, cli, serv, by, no, nc, tm, dur, ndur, dur85, ndur85, dflags = sf le = len(sf) if le == 18: pagenum = int(sf[0]) sf = sf[1:] how = '' url = sf[0] cli = sf[1] serv = sf[2] by = int(sf[3]) no = int(sf[4]) nc = int(sf[5]) lnk = int(sf[6]) ctype = lnk >> 16 ltype = lnk & 0xFFFF nservs = int(sf[7]) tm = float(sf[8]) + strt dur = int(sf[9]) ndur = int(sf[10]) dur85 = int(sf[11]) ndur85 = int(sf[12]) dflags = int(sf[13]) acc_del = int(sf[14]) delv = long(sf[15]) cnt_del = int(sf[16]) elif le == 10: intree = 0 how = sf[0] url = sf[1] cli = sf[2] serv = sf[3] by = int(sf[4]) ctype = int(sf[5]) no = 1 nc = 1 tm = float(sf[6])/1000 + strt dur = int(sf[7]) dur85 = dur ndur = int(sf[8]) ndur85 = ndur dflags = int(sf[9]) acc_del = cnt_del = dur - ndur delv = acc_del*acc_del nservs = 1 ## if ctype == CT_TEXT_HTML or ctype == CT_TEXT_XML:## #single object page## single_unlinked += 1## intree = 1## ltype = 0 else: print 'pagesum - bad line %s %d: %s %d fields' % (fnm, lno, l, le) badl += 1 continue if ndur == 0: continue #print how, tm min_tm = min(tm, min_tm) max_tm = max(tm, max_tm) acc = accums[intree] acc.totpages += 1 ue = acc.ud.setdefault(url, [0,0,0,0]) se = acc.sd.setdefault(serv, [0,0,0,0]) ce = acc.cd.setdefault(cli, [0,0,0,0]) ue[0] += 1 se[0] +=1 ce[0] += 1 dl = dur - ndur dl85 = dur85 - ndur85 if dl: acc.npbdel += 1 ue[1] += 1 se[1] += 1 if dl85: acc.npbdel85 += 1 ue[2] += 1 se[2] += 1 if dflags: acc.npdel += 1 ue[3] += 1 se[3] += 1 acc.nobs += no acc.nconns += nc #print totdur, durlessdel, tot85dur, dur85lessdel if not intree: if no != 1: print 'Single object goof %d objects %s %d: %s' \ % (no, fnm, lno, l) if how == 'U': acc.unlinked += 1 elif how == 'R': acc.refr += 1 elif how == 'I': acc.inv += 1 elif how == 'V': acc.rvst += 1 else: print 'Invalid non-tree reason %s %d: %s' % (fnm, lno, l) sys.exit(1) write = writes[0][1] write[0]('%.3f\t%d\n' % (tm, dur)) write[1]('%.3f\t%d\n' % (tm, dur85)) write[2]('%.3f\t%d\n' % (tm, ndur)) write[3]('%.3f\t%d\n' % (tm, ndur85)) if dl: write[4]('%.3f\t%d\n' % (tm, dl)) if acc_del and dur: write[6]('%.3f\t%.2f\n' % (tm, ((acc_del/no)*100.0)/ndur)) write[7]('%.3f\t%.2f\n' % ( tm, (sqrt(delv/no))/ndur) ) if dl85: write[5]('%.3f\t%d\n' % (tm, dur85-ndur85)) if no > nob_thresh: write = writes[2][1] acc.lp += 1 else: write = writes[1][1] acc.sp += 1 write[0]('%.3f\t%d\n' % (tm, dur)) write[1]('%.3f\t%d\n' % (tm, dur85)) write[2]('%.3f\t%d\n' % (tm, ndur)) write[3]('%.3f\t%d\n' % (tm, ndur85)) if dl: write[4]('%.3f\t%d\n' % (tm, dur-ndur)) if acc_del and dur: write[6]('%.3f\t%.2f\n' % (tm, ((acc_del/no)*100.0)/ndur)) write[7]('%.3f\t%.2f\n' % (tm, (sqrt(delv/no))/ndur)) if dl85: write[5]('%.3f\t%d\n' % (tm, dur85-ndur85)) if acc_del and not (dur-ndur): acc.no_del_del += 1 acc.nservd[nservs] += 1 lno += 1 if not totp: print 'No pages in page file(s)' sys.exit(1)print totp, 'pages'print '%d/%d bad lines' % (badl, totp)print 'times:', min_tm, max_tm accum_tot(accums[0], accums[1], accums[2])for i in [1, 0, 2]: acc = accums[i] acc.nurl = len(acc.ud) acc.nserv = len(acc.sd) acc.ncli = len(acc.cd) acc.nsdel = 0 acc.nsbdel = 0 for s in acc.sd.values(): if s[1]: acc.nsbdel += 1 if s[3]: acc.nsdel += 1 acc.nudel = 0 acc.nubdel = 0 for u in acc.ud.values(): if u[1]: acc.nubdel += 1 if u[3]: acc.nudel += 1 acc.report()userv = 0itsd = accums[1].sdfor s in accums[0].sd.keys(): if not itsd.has_key(s): userv += 1 uurl = 0itud = accums[1].udfor s in accums[0].ud.keys(): if not itud.has_key(s): uurl += 1 rep('%d servers %d urls not seen in trees' % (userv, uurl))rep('%d single unlinked' % (single_unlinked)) rep_rep()## for fn in ofnms:## #tmpfile = os.tempnam('/tmp')## tmpfile = fn + '.sorted'## sortcmd = 'sort -n -o %s %s' % (tmpfile, fn) ## mvcmd = 'mv %s %s ' % (tmpfile, fn)## for cmd in [sortcmd, mvcmd]:## status, output = commands.getstatusoutput(cmd)## if status:## print cmd, 'failed with', output
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -