📄 np_webagents.py
字号:
#! /usr/bin/env python################################################################################ ## Copyright 2005 University of Cambridge Computer Laboratory. ## ## This file is part of Nprobe. ## ## Nprobe is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## Nprobe is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with Nprobe; if not, write to the Free Software ## Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ## ################################################################################################################################################################ Collect and collate Web Server and User Agent Data from HTTP Header Fields#import reimport sysfrom sys import argvimport getoptREP_TARGET_PERCENT = 100class WebAgents: def __init__(self): self.sad = {} self.uad = {} self.nservers_seen = [] self.nuagents_seen = [] self.unrec_servers = [] self.unrec_uagents = [] self.compile_servers() self.compile_uagents()############################################################################### def compile_servers(self): self.smatches = [] for r in [ '^(?P<marque>Apache)' '(' '(?P<vers>/\d+\.\d+\.\d+)?' '.+?' '(\s*\((?P<os1>.*?)\))?' '(\s*\((?P<os2>.*?)\))?' '.*' ')?' '(?P<premarque>)', '(?P<marque>Microsoft-IIS)/' '(?P<vers>\d+\.\d+)' '(?P<os1>)?' '(?P<os2>)?' '.*' '(?P<premarque>)', '(?P<marque>WindWeb|(((Allegro-Software|ZyXEL)-)?RomPager)|Boa' '|Netscape|thttpd|WebSTAR|(S|s)quid|Lotus-Domino' '|Caudium|AV|CERN|3Com|ARGUS)' '(-(Brew|Commerce|FastTrack))?' '(/(v|V)?(?P<vers>\d+(\.\d+)+)(pre\d)?(\.STABLE\d)?)?' '(?P<os1>)?' '(?P<os2>)?' '.*' '(?P<premarque>)', '(?P<marque>(Cougar|APN HTTPD))' '(\s(v|V)?(?P<vers>\d+(\.\d+)+))?' '(?P<os1>)?' '(?P<os2>)?' '.*' '(?P<premarque>)', '(?P<marque>(Agranat|Virata)-EmWeb)' '(/(?P<vers>R\d+(_\d+)+))?' '(?P<os1>)?' '(?P<os2>)?' '.*' '(?P<premarque>)', '(?P<marque>httpd|Rapidsite/Apa|DCLK|WebLogic)' '(-?(Creative|AdSvr|HttpSvr))?' '(?P<vers>)?' '(?P<os1>)?' '(?P<os2>)?' '.*' '(?P<premarque>)', '^(?P<premarque>(IBM_HTTP_(Server|SERVER))|(Stronghold))' '/\d(\.\d+)+' '\s+' '(?P<marque>Apache)' '/(?P<vers>\d(\.\d+)+(-dev)?)' '.*?' '.*?' '(\((?P<os1>Unix|Win32)\))|(?P<os2>(RedHat))' '.*?', '^(?P<premarque>NetCache|Oracle)' '.*?' '\(' '(?P<marque>NetApp|Apache)' '/(?P<vers>\d(\.\d+)+(\.R\d(D\d)?)?)' '.*' '(?P<os2>)(?P<os1>)', '^(?P<premarque>Oracle)' '.*?' '(?P<marque>Apache)' '/(?P<vers>\d(\.\d+)*)' '\s+' '\((?P<os1>Win32|Unix)\)' '.*' '(?P<os2>)' ]: self.smatches.append(re.compile(r)) ############################################################################### def compile_uagents(self): self.umatches = [] WINDOWS = '((Windows|Win)\s*(((NT|XP)(\s+\d\.\d)?)|98|95|9x|32|XP|2000|ME|Me|CE|95/98/2000/NT))' OTHER_OS = 'Mac_PowerPC|PPC|Linux|Sun|OpenBSD|(OS X)|(IRIX\d*)|SunOS|FreeBSD|OSF1' OSS = '%s|%s' % (WINDOWS, OTHER_OS) #'%s(?#WINDOWS)|Mac_PowerPC' for r in [ '.*?' '(?P<premarque>)' '(?P<marque>MSIE)' '\s+' '(?P<vers>\d(\.\d+)+[a-z]?);' '.+?' '(?P<os1>%s)' '.*' '(?P<os2>)' % WINDOWS, '.*?' '\(' '.*?' '(?P<os1>Windows|Macintosh|X11)' ';.*?' '(?P<os2>%s)' '.*?' '\)' '.*?' '(?P<premarque>)' '(?P<marque>Netscape|(Gecko(?!/\d+ Netscape)))\d?/' '(?P<vers>(\d\.\d+(\.\d*)?)|(\d+))' '.*' % OSS, '.*?' '\(' '.*?' '(?P<premarque>)' '(?P<marque>Slurp|grub)' '.*?' '\)' '.*' '(?P<vers>)(?P<os1>)(?P<os2>)', '(?P<premarque>)' '(?P<marque>Googlebot|Scooter|libwww-perl)/' '(?P<vers>\d+(\.\d+)*)' '.*' '(?P<os1>)(?P<os2>)', '(?P<premarque>)' '^(?P<marque>Opera)/' '(?P<vers>\d\.\d+)' '\s+' '\(' '(?P<os1>%s)' '.*?' '\)' '.*' '(?P<os2>)' % OSS, '.*?' '\(.*?' '(?P<os1>%s)' '.*?\)' '\s+' '(?P<premarque>)' '(?P<marque>Opera)\s+(?P<vers>\d\.\d+)' '.*' '(?P<os2>)' % OSS, '^Mozilla/\d\.\d+.*?' '(?P<premarque>)(?P<marque>)(?P<vers>)' '\(.*?' '(?P<os1>(Macintosh|Mac|X11))?' '.*?' '(?P<os2>%s)' '.*?' '\)' '.*' % OSS ]: self.umatches.append(re.compile(r)) ############################################################################### def add_h(self, shost, chost, sa, ua): ad = self.uad.setdefault(ua, {}) n = ad.setdefault(chost, 0) ad[chost] = n+1 ad = self.sad.setdefault(sa, {}) n = ad.setdefault(shost, 0) ad[chost] = n+1############################################################################### def aggregate_h(self): def get_os1(match): for os in [match.group('os2'), match.group('os1'), 'Not stated']: if os: break return os def get_os2(match): os = '' spc = '' for o in [match.group('os1'), match.group('os2')]: if o: os += spc + o spc = ' ' if not os: os = 'Not stated' return os def list_os(osd): oslist = [(n, o) for (o, n) in osd.iteritems()] oslist.sort() oslist.reverse() return oslist for var, d, matches, get_os in \ [('servers', self.sad, self.smatches, get_os1), ('uagents', self.uad, self.umatches, get_os2)]: try: unknowns = d[None] n = len(unknowns) del d[None] sdict = {'Not known': [n, {'Not known':n}]} tot_inst = n except KeyError: sdict = {} tot_inst = 0 olist = [] for agent, hdict in d.items(): nhosts = len(hdict) tot_inst += nhosts for s in matches: match = s.match(agent) if match: break if match: premarque = match.group('premarque') marque = match.group('marque') if premarque: marque = premarque + '/' + marque if not marque: marque = 'Not stated' vers = match.group('vers') os = get_os(match) else: marque = 'Unrecognised' vers = 'Not known' os = 'Not known' olist.append((nhosts, agent)) #olist.append((agent, nhosts)) ss = sdict.setdefault(marque, [0, {}]) ss[0] += nhosts n = ss[1].setdefault(os, 0) ss[1][os] += nhosts sortlist = [(n, agent, list_os(os)) for (agent, (n, os)) in sdict.iteritems()] sortlist.sort() sortlist.reverse() setattr(self, 'n%s_seen' % (var), sortlist) setattr(self, '%s_tot_inst' % (var), tot_inst) olist.sort() olist.reverse() setattr(self, 'unrec_%s' % (var), olist)############################################################################### def report_h(self, file=None, verbose=0): def write(s): if file: file.write('%s\n' % s) def report(s): print s write(s) def vreport(s): if verbose: print s write(s) def vvreport(s): if verbose > 1: print s write(s) for label, key in [('Servers', 'servers'), ('User Agents', 'uagents')]: #for label, key in [('User Agents', 'uagents')]: alist = getattr(self, 'n%s_seen' % (key)) olist = getattr(self, 'unrec_%s' % (key)) tot_inst = getattr(self, '%s_tot_inst' % (key)) print report(label) report('%s\n' % ('='*len(label))) rep_target = (tot_inst*REP_TARGET_PERCENT)/100 reported = 0 rec_not_rep = 0 max_not_rep = 0 not_rep = [] for n, agent, oslist in alist: if reported < rep_target: report('%s %d (%.3f%%)' % (agent, n, (n*100.00)/tot_inst)) for n2, o in oslist: vreport('\t%s %d (%.3f%%)' % (o, n2, (n2*100.0)/tot_inst)) else: not_rep.append((n, agent)) rec_not_rep += n max_not_rep = max(max_not_rep, n) reported += n if rec_not_rep: report('\n%s Recognised but not individually reported %d (%.3f%%)\n' % (label, rec_not_rep, (rec_not_rep*100.0)/tot_inst)) for n, agent in not_rep: vvreport('%s %d' % (agent, n)) report('\tMaximum instance %d (%.3f%%)\n' % (max_not_rep, (max_not_rep*100.0)/tot_inst)) max_unrec = 0 max_unrec_agent = '' vvreport('\nUnrecognised %s' % label) vvreport('=============%s\n' % ('='*len(label))) for n, agent in olist: vvreport('%s %d' % (agent, n)) if n > max_unrec: max_unrec = n max_unrec_agent = agent report('\nMaximum unrecognised instance %d (%.3f%%)\n\t\'%s\'\n' % (max_unrec, (max_unrec*100.0)/tot_inst, max_unrec_agent))############################################################################### def save_h(self, dir): try: f = open(dir + '/agents_seen', 'w') except OsError, s: print 'np_WebAgents - can\'t create agents file' print str(s) sys.exit(1) for lab, d in [('User Agents', self.uad), ('Servers', self.sad)]: l = [] for a, hd in d.items(): l.append((len(hd), a)) l.sort() l.reverse() f.write('%s\n\n' % (lab)) for n, a in l: f.write('%s - %d\n' % (a, n)) f.write('\n\n')############################################################################### def get_h(self, file): tm = re.compile('^(?P<type>User Agents|Servers)$') am = re.compile('^(?P<agent>.*?) - (?P<n>\d+)$') try: f = open(file, 'r') except IOError, s: print 'np_WebAgents - can\'t open agents file' print str(s) sys.exit(1) for l in f.readlines(): match = am.match(l) if match: #continue #print match.group('agent'), match.group('n') hd = {} for n in range(int(match.group('n'))): hd[n] = 1 d[match.group('agent')] = hd else: match = tm.match(l) if match: type = match.group('type') if type == 'User Agents': d = self.uad elif type == 'Servers': d = self.sad else: d = None###############################################################################def main(): verbose = 0 file = None try: optlist, args = getopt.getopt(sys.argv[1:], 'vf:') except getopt.error, s: print 'np_WebAgents.py Error' print str(s) sys.exit(1) for opt in optlist: if opt[0] == '-v': verbose += 1 if opt[0] == '-f': try: file = open(opt[1], 'w') except IOError, s: print 'Error' print str(s) sys.exit(1) if not args: print 'np_WebAgents.py Error - Agents file not specified' sys.exit(1) agents = WebAgents() agents.get_h(args[0]) agents.aggregate_h() agents.report_h(file=file, verbose=verbose) ############################################################################### # Call main when run as scriptif __name__ == '__main__': main()
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -