don't log webbots moves

This commit is contained in:
Frédéric Péters 2005-09-26 09:47:58 +00:00
parent fc83f4e9f9
commit 0841cabbba
2 changed files with 193 additions and 0 deletions

179
data/webbots Normal file
View File

@ -0,0 +1,179 @@
AbachoBOT
abcdatos_botlink
http://www.abcdatos.com/botlink/
AESOP_com_SpiderMan
ah-ha.com crawler (crawler@ah-ha.com)
ia_archiver
Scooter
Mercator
Scooter2_Mercator_3-1.0
roach.smo.av.com-1.0
Tv<nn>_Merc_resh_26_1_D-1.0
AltaVista-Intranet
jan.gelin@av.com
FAST-WebCrawler
crawler@fast.no
Wget
Acoon Robot
antibot
Atomz
AxmoRobot
Buscaplus Robi
http://www.buscaplus.com/robi/
CanSeek/
support@canseek.ca
ChristCRAWLER
http://www.christcrawler.com/
Clushbot
http://www.clush.com/bot.html
Crawler
admin@crawler.de
DaAdLe.com ROBOT/
RaBot
Agent-admin/ phortse@hanmail.net
contact/jylee@kies.co.kr
RaBot
Agent-admin/ webmaster@kisco.go.kr
DeepIndex
DittoSpyder
Jack
EARTHCOM.info
Speedy Spider
ArchitextSpider
ArchitectSpider
EuripBot
Arachnoidea
arachnoidea@euroseek.net
EZResult
Fast PartnerSite Crawler
FAST Data Search Crawler
FAST Data Search Document Retriever
KIT-Fireball
france.misesajour.com
FyberSearch
GalaxyBot
http://www.galaxy.com/galaxybot.html
geckobot
GenCrawler
GeonaBot
getRAX
Googlebot
googlebot@googlebot.com
http://googlebot.com/
moget/2.0
moget@goo.ne.jp
Aranha
Slurp.so/1.0
slurp@inktomi.com
Slurp/2.0j
slurp@inktomi.com
www.inktomisearch.com
Slurp/2.0-KiteHourly
slurp@inktomi.com;
www.inktomi.com/slurp.html
Slurp/2.0-OwlWeekly
spider@aeneid.com
www.inktomi.com/slurp.html
Slurp/3.0-AU
slurp@inktomi.com
Toutatis 2.5-2
Hubater
http://www.almaden.ibm.com/cs/crawler
IlTrovatore-Setaccio
IncyWincy
UltraSeek
InfoSeek Sidewinder
Mole2/1.0
webmaster@intags.de
MP3Bot
C-PBWF-ip3000.com-crawler
ip3000.com-crawler
http://www.istarthere.com
spider@istarthere.com
Knowledge.com/
kuloko-bot/0.2
LNSpiderguy
Linknzbot
lookbot
MantraAgent
NetResearchServer
www.loopimprovements.com/robot.html
Lycos_Spider_(T-Rex)
JoocerBot
HenryTheMiragoRobot
MojeekBot
mozDex/
MSNBOT/0.1
http://search.msn.com/msnbot.htm)
Navadoo Crawler
Gulliver
ObjectsSearch/0.01
PicoSearch/
PJspider
DIIbot
nttdirectory_robot
super-robot@super.navi.ocn.ne.jp
griffon
griffon@super.navi.ocn.ne.jp
Spider/maxbot.com
admin@maxbot.com
various (fakes agent on each access)
gazz/1.0
gazz@nttrd.com
???
NationalDirectory-SuperSpider
dloader(NaverRobot)/
dumrobo(NaverRobot)/
Openfind piranha,Shark
robot-response@openfind.com.tw
Openbot/
psbot
www.picsearch.org/bot.html
CrawlerBoy Pinpoint.com
user<n>.ip3000.com
QweeryBot
http://qweerybot.qweery.com)
AlkalineBOT
SeznamBot
Search-10
Fluffy the spider
info@searchhippo.com)
Scrubby/
asterias
speedfind ramBot xtreme
Kototoi/0.1
SearchByUsa
Searchspider/
SightQuestBot/
http://www.sightquest.com/bot.htm
Spider_Monkey/
Surfnomore Spider v1.1
Robot@SuperSnooper.Com
teoma_agent1
teoma_admin@hawkholdings.com
Teradex_Mapper
mapper@teradex.com
ESISmartSpider
Spider TraficDublu
Tutorial Crawler
http://www.tutorgig.com/crawler
updated/0.1beta
crawler@updated.com
UK Searcher Spider
Vivante Link Checker
appie
Nazilla
www.WebWombat.com.au
marvin/infoseek
marvin-team@webseek.de
MuscatFerret
WhizBang! Lab
ZyBorg
(info@WISEnut.com)
WIRE WebRefiner:
webrefiner@wire.co.uk
WSCbot
Yandex
Yellopet-Spider
libwww-perl
Iron33

View File

@ -23,6 +23,16 @@ def disable():
global logger
logger = None
def is_bot():
botfile = os.path.join(get_publisher().data_dir, 'webbots')
if not os.path.exists(botfile):
return False
user_agent = get_request().get_environ('HTTP_USER_AGENT', '')
for bot_ua_string in [x.strip() for x in open(botfile).readlines()]:
if bot_ua_string in user_agent:
return True
return False
def log(lvl, msg, session = None):
if not logger:
return
@ -31,6 +41,10 @@ def log(lvl, msg, session = None):
user_id = session.user
if not user_id:
user_id = 'anonymous'
if is_bot():
user_id = 'bot'
if user_id == 'bot' and lvl < logging.ERROR:
return # don't log bot accesses
address = get_request().get_environ('REMOTE_ADDR', '-')
path = get_request().get_path()
session_id = session.get_session_id() or '[nosession]'