📄 robots.pm
字号:
# added Webdup http://www.webdup.com/en/index.html# added Wells Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_t_z_1484\b # added WordPress http://wordpress.org/# added wume crawler http://wume.cse.lehigh.edu/~xiq204/crawler/ # added Xenu's Link Sleuth (with ')# added xirq http://www.xirq.com/# added yoogliFetchAgent http://www.yoogli.com/# added Z-Add Link Checker http://w3.z-add.co.uk/linkcheck/# -- fix - some robots were reported with _ where _ should have been a space.# changed Xenu Link Sleuth# changed microsoft\_url\_control -> microsoft\surl\scontrol# changed favorites\ssweeper -> favorites\ssweeper# -- updates# updated AskJeeves to Ask# to do MS Search 4.0 Robot#package AWSROB;# Robots list was found at http://www.robotstxt.org/wc/active/all.txt# Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html # Rem: To avoid bad detection, some robot's ids were removed from this list:# - Robots with ID of 3 letters only# - Robots called 'webs' and 'tcl'# Rem: Some robots mostly used for downloading have also been removed, i.e. wget# Rem: directhit changed into direct_hit (its real id)# Rem: calif changed into calif[^r] to avoid confusion between Tiscalifreenet browser# Rem: fish changed into [^a]fish to avoid confusion between Madsafish browser# Rem: roadrunner changed into road_runner# Rem: lycos changed to lycos_ to avoid confusion with lycos-online browser# Rem: voyager changed into ^voyager\/ to avoid to exclude voyager and amigavoyager browser# RobotsSearchIDOrder# It contains all matching criteria to search for in log fields. This list is# used to know in which order to search Robot IDs.# Most frequent ones are in list1, used when LevelForRobotsDetection is 1 or more# Minor robots are in list2, used when LevelForRobotsDetection is 2 or more# Note: Robots IDs are in lower case, ' ' and '+' are changed into '_' and are quoted.#-------------------------------------------------------@RobotsSearchIDOrder_list1 = (# Common robots (In robot file)'appie','architext','jeeves','bjaaland','ferret','googlebot','gulliver','virus\_detector', # Must be before harvest'harvest','htdig','linkwalker','lycos_','moget','muscatferret','myweb','nomad','scooter','yahoo!\sslurp\schina', # Must come before singluar slurp or yahoo'slurp','^voyager\/','weblayers',# Common robots (Not in robot file)'antibot','bruinbot','digout4u','echo!','fast\-webcrawler','ia_archiver\-web\.archive\.org', # Must be before ia_archiver to avoid confusion with alexa'ia_archiver','jennybot','mercator','netcraft','msnbot\-media','msnbot','petersnews','unlost_web_crawler','voila','webbase','webcollage','cfetch','zyborg', # Must be before wisenut 'wisenutbot');@RobotsSearchIDOrder_list2 = (# Less common robots (In robot file)'[^a]fish','abcdatos','acme\.spider','ahoythehomepagefinder','alkaline','anthill','arachnophilia','arale','araneo','aretha','ariadne','powermarks','arks','aspider','atn\.txt','atomz','auresys','backrub','bbot','bigbrother','blackwidow','blindekuh','bloodhound','borg\-bot','brightnet','bspider','cactvschemistryspider','calif[^r]','cassandra','cgireader','checkbot','christcrawler','churl','cienciaficcion','collective','combine','conceptbot','coolbot','core','cosmos','cruiser','cusco','cyberspyder','desertrealm','deweb','dienstspider','digger','diibot','direct_hit','dnabot','download_express','dragonbot','dwcp','e\-collector','ebiness','elfinbot','emacs','emcspider','esther','evliyacelebi','fastcrawler','fdse','felix','fetchrover','fido','finnish','fireball','fouineur','francoroute','freecrawl','funnelweb','gama','gazz','gcreep','getbot','geturl','golem','grapnel','griffon','gromit','gulperbot','hambot','havindex','hometown','htmlgobble','hyperdecontextualizer','iajabot','iconoclast','ilse','imagelock','incywincy','informant','infoseek','infoseeksidewinder','infospider','inspectorwww','intelliagent','irobot','iron33','israelisearch','javabee','jbot','jcrawler','jobo','jobot','joebot','jubii','jumpstation','kapsi','katipo','kilroy','ko_yappo_robot','kummhttp','labelgrabber\.txt','larbin','legs','linkidator','linkscan','lockon','logo_gif','macworm','magpie','marvin','mattie','mediafox','merzscope','meshexplorer','mindcrawler','mnogosearch','momspider','monster','motor','muncher','mwdsearch','ndspider','nederland\.zoek','netcarta','netmechanic','netscoop','newscan\-online','nhse','northstar','nzexplorer','objectssearch','occam','octopus','openfind','orb_search','packrat','pageboy','parasite','patric','pegasus','perignator','perlcrawler','phantom','phpdig','piltdownman','pimptrain','pioneer','pitkow','pjspider','plumtreewebaccessor','poppi','portalb','psbot','python','raven','rbse','resumerobot','rhcs','road_runner','robbie','robi','robocrawl','robofox','robozilla','roverbot','rules','safetynetrobot','search\-info','search_au','searchprocess','senrigan','sgscout','shaggy','shaihulud','sift','simbot','site\-valet','sitetech','skymob','slcrawler','smartspider','snooper','solbot','speedy','spider_monkey','spiderbot','spiderline','spiderman','spiderview','spry','sqworm','ssearcher','suke','sunrise','suntek','sven','tach_bw','tagyu\sagent',
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -