# AWSTATS ROBOTS DATABASE
#-------------------------------------------------------
# If you want to add robots to extend AWStats database detection capabilities,
# you must add an entry in RobotsSearchIDOrder_listx and RobotsHashIDLib.

# The entry in RobotsSearchIDOrder_listx is a Perl regular expression
# (see http://perldoc.perl.org/perlreref.html). AWSTats applies these
# expressions to the user agent string in the order given by the lists. The
# first match specifies the robot.
#
# Note: This regular expression must not contain any whitespace.
# Otherwise AWStats will produce lines in the database that
# will be misinterpreted and as a consequence the corresponding data in the
# generated HTML reports will be wrong. If you want to match whitespace in
# the user agent string, use other constructs like '\s', '[:blank:]',
# '\p{IsSpace}', '\x20' etc.
#
# The corresponding entry in RobotsHashIDLib contains the regular expression
# as key, followed by a string containing HTML-text. AWStats inserts this
# text into reports to describe the bot. If possible the text should contain
# a link to the bot home page. This makes it easier for sysadmins to find
# the information necessary e.g. to adapt the robots.txt file.
#
# An entry in the RobotsAffiliateLib is not necessary. An entry in this list
# contains as first part the regular expression specifying the bot. The
# second part is a string that gives the Company or product managing the bot.
# This information is not used yet.
#
# There are several sorts of bots that AWStats is not able to detect and
# therefore a considerable amount of bot generated traffic counts
# as user traffic:
#
# a) A crawler that identifies itself in the referrer string, but not in
#    the user agent string. An example is the crawler from semalt.semalt.com.
#
# b) Crawlers that correctly access robots.txt but identify themselves in
#    in the user agent string only once or just a few times. Most of the
#    time a user agent string ist used that does not contain hints that
#    a bot is involved. An example is the iCjobs spider.
#    msnbot-UDiscovery/2.0b seems to show this behaviour too.
#
#
#
#-------------------------------------------------------
 
# 2018-03-13 RobC 
#              Added 36 robots and one generic ( survey ) using v 7.7 robots file as base.
#              Also moved robot "Obot" into generics so that it is singled out as an individual Robot.         
#

# Modified by makoto_hobbit　ホビット 2020.9.30

# 2016-09-02 RobC 
#              Fixed a few errors and added a few missing bots from awstats 7.5 release.
#
# 2016-08-28 RobC 
#              Complete re-build of this file almost from scratch.
#              dropped many old bots, added many new bots and reordered file.
#              edited and added regex expressions to stop spaces causing problems.
#              You should tune file by placing the most common robots crawling your site at top 
#              in List1.
#
#
#              N.B. many bots need to be in correct order so don't chnage order without checking if
#              change will cause counts to be allocated to wrong bot. Not always simple.
#
#
# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html
#              added dipsie (not tested with real data).
#              added DomainsDB.net http://domainsdb.net/
#              added ia_archiver-web.archive.org (was inadvertently grouped with Alexa traffic)
#              added Nutch (used by looksmart (furl?))
#              added rssImagesBot
#              added Sqworm
#              added t\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e
#              added w3c css-validator
#              added documentation link to bot home pages for above and selected major bots.
#                    In the case of international bots, choose .com page.
#                    Included tool tip (html "title").
#                    To do: parameterize to match both AWStats language and tooltips settings.
#                    To do: add html links for all bots based on current documentation in source
#                           files referenced below.
#              changed '\wbot[\/\-]', to '\wbot[\/\-]' (removed comma)
#              made minor grammar corrections to notes below
# 2005-08-24	added YahooSeeker-Testing
#              	added w3c-checklink
#              	updated url for ask.com
# 2005-08-24   	added Girafabot http://www.girafa.com/
# 2005-08-30   	added PluckFeedCrawler http://www.pluck.com/
#		added Gaisbot/3.0 (robot05@gais.cs.ccu.edu.tw; )
#		dded geniebot (wgao@genieknows.com)
#		added BecomeBot link http://www.become.com/site_owners.html
#		added topicblogs http://www.topicblogs.com/
#		added Powermarks; seen used by referrer spam
#		added YahooSeeker
#		added NG/2. http://www.exabot.com/
# 2005-09-15	added link for Walhello appie
#		added bender focused_crawler
#		updated YahooSeeker description (blog crawler)
# 2005-09-16	added link for http://linkchecker.sourceforge.net
# 		added ConveraCrawler/0.9d ( http://www.authoritativeweb.com/crawl)
#		added Blogslive  info@blogslive.com intelliseek.com
#		added BlogPulse (ISSpider-3.0) intelliseek.com
# 2005-09-26	added Feedfetcher-Google (http://www.google.com/feedfetcher.html)
#		added EverbeeCrawler
#		added Yahoo-Blogs http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html
#		added link for Bloglines http://www.bloglines.com
# 2005-10-19	fixed Feedfetcher-Google (http://www.google.com/feedfetcher.html)
# 		added Blogshares Spiders (Synchronized V1.5.1)
#		added yacy
# 2005-11-21	added Argus www.simpy.com
#		added BlogsSay :: RSS Search Crawler (http://www.blogssay.com/)
#		added MJ12bot http://majestic12.co.uk/bot.php
#		added OpenTaggerBot (http://www.opentagger.com/opentaggerbot.htm)
#		added OutfoxBot/0.3 (For internet experiments; outfox.agent@gmail.com)
#		added RufusBot Rufus Web Miner http://64.124.122.252.webaroo.com/feedback.html
#		added Seekbot (http://www.seekbot.net/bot.html)
#		added Yahoo-MMCrawler/3.x (mms-mmcrawler-support@yahoo-inc.com)
#               added link for BaiDuSpider
#		added link for Blogshares Spider
#		added link for StackRambler http://www.rambler.ru/doc/faq.shtml
#		added link for WISENutbot
#		added link for ZyBorg/1.0 (wn-14.zyborg@looksmart.net; http://www.WISEnutbot.com.  Moved location to above wisenut to avoid classification as wisenut
# 2005-12-15
#		added FAST Enteprise Crawler/6 (www dot fastsearch dot com). Note spelling Enteprise not Enterprise.
#		added findlinks http://wortschatz.uni-leipzig.de/findlinks/
#		added IBM Almaden Research Center WebFountain™ http://www.almaden.ibm.com/cs/crawler [hc3]
#		added INFOMINE/8.0 VLCrawler (http://infomine.ucr.edu/useragents)
#		added lmspider (lmspider@scansoft.com) http://www.nuance.com/
#		added noxtrumbot http://www.noxtrum.com/
#		added SandCrawler (Microsoft)
#		added SBIder http://www.sitesell.com/sbider.html
#		added SeznamBot http://fulltext.seznam.cz/
#		added sohu-search http://corp.sohu.com/ (looked for //robots.txt not /robots.txt)
#		added the ruffle SemanticWeb crawler v0.5 - http://www.unreach.net
#		added WebVulnCrawl/1.0 libwww-perl/5.803 (looked for //robots.txt not /robots.txt)
#		added Yahoo! Japan keyoshid http://www.yahoo.co.jp/
#		added Y!J http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html
#		added link for GigaBot
#		added link for MagpieRSS
#		added link for MSIECrawler
# 2005-12-21
#		added aipbot http://www.aipbot.com aipbot@aipbot.com [matthys70 users.sourceforge.net]
#		added Everest-Vulcan Inc./0.1 (R&D project; http://everest.vulcan.com/crawlerhelp)
#		added Fast-Search-Engine http://www.fast-search-engine.com/ [matthys70  users.sourceforge.net]
#		added g2Crawler (nobody@airmail.net) http://crawler.instantnetworks.net/
#		added Jakarta commons-httpclient http://jakarta.apache.org/commons/httpclient/ (hit robots.txt).  May be used as robot or browser - a site may want to remove this entry.
#		added OmniExplorer_Bot http://www.omni-explorer.com/ [matthys70 users.sourceforge.net]
#		added USTC-Semantic-Group ai.ustc.edu.cn/mas/en/research/index.php ?
# 2005-12-22
#		added EARTHCOM.info www.earthcom.info
#		added HTTrack off-line browser 'httrack','HTTrack', http://www.httrack.com/ [Moizes Gabor]
#		added KummHttp http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b [Moizes Gabor]
# 2006-01-01
#		added Dulance http://www.dulance.com/bot.jsp
#		added MojeekBot http://www.mojeek.com/bot.html
#		added nicebot http://www.egghelp.org/setup.htm ?
#		added Snappy http://www.urltrends.com/faq.php
#		added sohu agent
#		added VORTEX http://marty.anstey.ca/robots/vortex/ [matthys70 users.sourceforge.net]
#		added zspider http://feedback.redkolibri.com/
# 2006-01-13
#		added boitho.com-dc http://www.boitho.com/dcbot.html
#		added IRLbot http://irl.cs.tamu.edu/crawler
#		added virus_detector virus_harvester@securecomputing.com
#		added Wavefire http://www.wavefire.com; info@wavefire.com

#		added WebFilter Robot
# 2006-01-24
#		added Shim-Crawler http://www.logos.ic.i.u-tokyo.ac.jp/crawler/; crawl@logos.ic.i.u-tokyo.ac.jp
#		added Exabot exabot.com
#		added LetsCrawl.com http://letscrawl.com
#		added ichiro http://help.goo.ne.jp/door/crawlerE.html
# 2006-01-27    additional 22 robots from a list provided by Moizes Gabor
#		added ALeadSoftbot	http://www.aleadsoft.com/bot.htm
#		added CipinetBot	http://www.cipinet.com/bot.html
#		added Cuasarbot	http://www.cuasar.com/
#		added Dumbot	http://www.dumbfind.com/
#		added Extreme_Picture_Finder	http://www.exisoftware.com/
#		added Fooky.com/ScorpionBot/ScoutOut	http://www.fooky.com/scorpionbots
#		added IlTrovatore-Setaccio	http://www.iltrovatore.it/aiuto/motore_di_ricerca.html	bot@iltrovatore.it
#		added InsurancoBot	http://www.fastspywareremoval.com/
#		added InternetArchive	http://lucene.apache.org/nutch/bot.html 	nutch-agent@lucene.apache.org
#		added KazoomBot	http://www.kazoom.ca/bot.html	kazoombot@kazoom.ca
#		added Kurzor	http://www.easymail.hu/	cursor@easymail.hu
#		added NutchCVS	http://lucene.apache.org/nutch/bot.html	nutch-agent@lucene.apache.org
#		added NutchOSU-VLIB	http://lucene.apache.org/nutch/bot.html	nutch-agent@lucene.apache.org
#		added Orbiter	http://www.dailyorbit.com/bot.htm
#		added PHP_version_tracker	http://www.nexen.net/phpversion/bot.php
#		added SuperBot	http://www.sparkleware.com/superbot/
#		added SynooBot	http://www.synoo.de/bot.html	webmaster@synoo.com
#		added TestBot	http://www.agbrain.com/
#		added TutorGigBot	http://www.tutorgig.info/
#		added WebIndexer	mailto://webindexerv1@yahoo.com
#		added WebMiner	http://64.124.122.252/feedback.html
# 2006-02-01
#		added heritrix https://sourceforge.net/forum/message.php?msg_id=3550202
#		added Zeus Webster Pro https://sourceforge.net/forum/message.php?msg_id=3141164
#               additional robots from a list provided by Moizes Gabor [ mojzi -a-t- free mail hu ]
#		added Candlelight_Favorites_Inspector
#		added DomainChecker
#		added EasyDL
#		added FavOrg
#		added Favorites_Sweeper
#		added Html_Link_Validator
#		added Internet_Ninja
#		added JRTwine_Software_Check_Favorites_Utility
#		fixed Microsoft_URL_Control
#		added miniRank
#		added Missigua_Locator
#		added NPBot
#		added Ocelli
#		added Onet.pl_SA
#		added proodleBot
#		added SearchGuild_DMOZ_Experiment
#		added Susie
#		added Website_Monitoring_Bot
#		added Xenu_Link_Sleuth
# 2006-05-15
#		added ASPseek http://www.aspseek.org/
#		added AdamM Bot http://home.blic.net/adamm/
#		added archive.org_bot http://crawls.archive.org/collections/bncf/crawl.html
#		added arianna.libero.it (Italian Portal/search engine)
#		added Biz360 spider http://www.biz360.com
#		added BlogBridge Service http://www.blogbridge.com/
#		added BlogSearch http://www.icerocket.com/
#		added libcrawl
#		added edgeio-relanshanbottriever http://www.edgeio.com
#		added FeedFlow http://feedflow.com/about
#		added Biblioteca Nazionale Centrale di Firenze (Italian National Archive) http://www.bncf.firenze.sbn.it/raccolta.txt
#		added Java catchall - used by many spam bots
#		added lanshanbot http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_g_l_140406_1%5Cb
#		added msnbot-media http://search.msn.com/msnbot.htm
#		added MT::Telegraph::Agent
#		added Netluchs http://www.netluchs.de/ (German SE bot)
#		added oBot http://www.webmasterworld.com/forum11/1616.htm
#		added Onfolio http://www.onfolio.com/  (IE Toolbar plugin) - hit rss feeds.
#		added ping.blo.gs http://blo.gs/ping.php blog bot
#		added Sphere Scout http://www.sphere.com/
#		added sproose crawler http://www.sproose.com/bot.html
#		added SyndicAPI http://syndicapi.com/bot.html
#		added Yahoo! Mindset http://mindset.research.yahoo.com/
#		added msrabot
#		added Vagabondo & Vagabondo-WAP http://www.wise-guys.nl/Contact/index.php?botselected=webagents)#=uk
#		fixed Missigua Locator detection (Missigua_Locator -> Missigua Locator)
#		changed echo to echo! to avoid conflict with the bonecho (Firefox 2.0) browser.
#			This requires you to reprocess historic logs if you want EchO! to be recognized for older reports.
# 2006-05-17
#		added Alpha Search Agent # 62.152.125.60 Eurologon Srl
#		added Krugle http://www.krugle.com/crawler/info.html the search engine for developers
#		added Octora Beta Bot http://www.octora.com/ # Blog and Rss Search Engine
#		added UbiCrawler http://law.dsi.unimi.it/ubicrawler/
#		added Yahoo! Slurp China http://misc.yahoo.com.cn/help.html
#			You must reprocess old logs for the Yahoo! Slurp China bot to be detected in old reports
# 2006-05-20
#		added 1-More Scanner http://www.myzips.com/software/1-More-Scanner.phtml
#		added Accoona-AI-Agent http://www.accoona.com/
#		added ActiveBookmark http://www.libmaster.com/active_bookmark.php
#		added BIGLOTRON http://www.biglotron.com/robot.html
#		added Bookmark-Manager http://bkm.sourceforge.net/
#		added cbn00glebot
#		added Cerberian Drtrs http://www.pgts.com.au/cgi-bin/psql?robot_info=25240
#		added CFNetwork http://www.cocoadev.com/index.pl?CFNetwork
#		added CheckWeb link validator http://p.duby.free.fr/chkweb.htm
#		added Computer and Automation Research Institute Crawler http://www.ilab.sztaki.hu/~stamas/publications/p184-benczur.html
#		added ConveraCrawler http://www.authoritativeweb.com/crawl/
#		added ConveraMultiMediaCrawler http://www.authoritativeweb.com/crawl/
#		added CSE HTML Validator Lite Online http://online.htmlvalidator.com/php/onlinevallite.php
#		added Cursor http://adcenter.hu/docs/en/bot.html
#		added Custo http://www.netwu.com/custo/
#		added DataFountains/DMOZ Downloader http://infomine.ucr.edu/
#		added Deepindex http://www.deepindex.net/faq.php
#		added DNSGroup http://www.dnsgroup.com/
#		added DoCoMo http://www.nttdocomo.co.jp/
#		added dumm.de-Bot http://www.dumm.de/
#		added ETS v http://www.freetranslation.com/help/
#		added eventax http://www.eventax.de/
#		added FAST Enterprise Crawler * crawleradmin.t-info@telekom.de http://www.telekom.de/
#		added FAST Enterprise Crawler http://www.fast.no/
#		added FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de http://www.telekom.de/
#		added FeedValidator http://feedvalidator.org/
#		added FilmkameraBot http://www.filmkamera.at/bot.html
#		added Findexa Crawler http://www.findexa.no/gulesider/article26548.ece
#		added Global Fetch http://www.wesonet.com/
#		added GOFORITBOT http://www.goforit.com/about/
#		added GoForIt.com http://www.goforit.com/about/
#		added GPU p2p crawler http://gpu.sourceforge.net/search_engine.php
#		added HooWWWer http://cosco.hiit.fi/search/hoowwwer/
#		added HPPrint
#		added HTMLParser http://htmlparser.sourceforge.net/
#		added Hundesuche.com-Bot http://www.hundesuche.com/
#		added InfoBot http://www.infobot.org/
#		added InfociousBot http://corp.infocious.com/tech_crawler.php
#		added InternetSupervision http://internetsupervision.com/
#		added isearch2006 http://www.yahoo.com.cn/
#		added IUPUI_Research_Bot http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/
#		added KalamBot http://64.124.122.251/feedback.html
#		added kamano.de NewsFeedVerzeichnis http://www.kamano.de/
#		added Kevin http://dznet.com/kevin/
#		added KnowItAll http://www.cs.washington.edu/research/knowitall/
#		added Knowledge.com http://www.knowledge.com/
#		added Kouaa Krawler http://www.kouaa.com/
#		added ksibot http://ego.ms.mff.cuni.cz/
#		added Link Valet Online http://www.htmlhelp.com/tools/valet/
#		added lwp-request http://search.cpan.org/~gaas/libwww-perl-5.69/bin/lwp-request
#		added lwp-trivial http://search.cpan.org/src/GAAS/libwww-perl-5.805/lib/LWP/Simple.pm
#		added MapoftheInternet.com http://MapoftheInternet.com/
#		added Matrix S.p.A. - FAST Enterprise Crawler http://tin.virgilio.it/
#		added Megite http://www.megite.com/
#		added Metaspinner http://index.meta-spinner.de/
#		added Mini-reptile
#		added Misterbot http://www.misterbot.fr/
#		added Miva http://www.miva.com/
#		added Mizzu Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_m_141105_2\b
#		added MSRBOT http://research.microsoft.com/research/sv/msrbot/
#		added MS SharePoint Portal Server - MS Search 4.0 Robot http://support.microsoft.com/default.aspx?scid=kb;en-us;284022
#		added Mydoyouhike http://www.doyouhike.net/my
#		added NASA Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_140506_2\b
#		added NetSprint http://www.netsprint.pl/serwis/
#		added NimbleCrawler http://www.healthline.com/
#		added OpenWebSpider http://www.openwebspider.org/
#		added Oracle Ultra Search http://www.oracle.com/technology/products/ultrasearch/index.html
#		added OSSProxy http://www.marketscore.com/FAQ.Aspx
#		added passwordmaker.org http://passwordmaker.org/
#		added PEAR HTTP Request class http://pear.php.net/
#		added PEERbot http://www.peerbot.com/
#		added PHP version tracker http://www.nexen.net/phpversion/bot.php
#		added PictureOfInternet http://malfunction.org/poi/
#		added plinki http://www.plinki.com/
#		added Port Huron Labs http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1133\b
#		added PostFavorites http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_n_s_1135\b
#		added ProjectWF-java-test-crawler
#		added PyQuery http://sourceforge.net/projects/pyquery/
#		added Schizozilla http://spamhuntress.com/2005/03/18/gizmo/
#		added Scumbot
#		added Sensis Web Crawler http://www.sensis.com.au/
#		added snap.com beta crawler http://www.snap.com/
#		added Steeler http://www.tkl.iis.u-tokyo.ac.jp/~crawler/
#		added STEROID  Download http://faqs.org.ru/progr/pascal/delphi_internet2.htm
#		added Suchfin-Bot http://www.suchfin.de/
#		added Sunrise http://www.sunrisexp.com/
#		added Tagyu Agent http://www.tagyu.com/
#		added Tcl http client package http://www.tcl.tk/man/tcl8.4/TclCmd/http.htm
#		added TeragramCrawlerSURF http://www.teragram.com/
#		added Test Crawler http://netp.ath.cx/
#		added UnChaos Bot Hybrid Web Search Engine http://www.unchaos.com/
#		added unido-bot http://www.unchina.org/unido/unido/our_projects/3_3.html
#		added UniversalFeedParser http://feedparser.org/ (seen from md301000.inktomisearch.com)
#		added updated http://www.updated.com/
#		added Vermut http://vermut.aol.com
#		added versus crawler from eda.baykan@epfl.ch http://www.epfl.ch/Eindex.html
#		added Vespa Crawler (Yahoo Norway?) http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb
#		added VSE http://www.vivisimo.com/
#		added webcrawl.net http://www.webcrawl.net/
#		added Web Downloader http://www.krasu.ru/soft/chuchelo/
#		added Webdup http://www.webdup.com/en/index.html
#		added Wells Search http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_t_z_1484\b
#		added WordPress http://wordpress.org/
#		added wume crawler http://wume.cse.lehigh.edu/~xiq204/crawler/
#		added Xenu's Link Sleuth (with ')
#		added xirq http://www.xirq.com/
#		added yoogliFetchAgent http://www.yoogli.com/
#		added Z-Add Link Checker http://w3.z-add.co.uk/linkcheck/
#		-- fix - some robots were reported with _ where _ should have been a space.
#		changed Xenu Link Sleuth
#		changed microsoft[_+\s]url[_+\s]control -> microsoft_url_control
#		changed favorites_sweeper -> favorites_sweeper
#		-- updates
#		updated AskJeeves to Ask
# 2012-06-05 Albrecht Mueller
#              added Grabber from SDSC (San Diego Supercomputer Center).
# 2013-09-30 Albrecht Mueller
# AWStats probably cannot detect this bot as it identifies itself in
# the referrer field and not in the user agent string.
#92.113.100.35 - - [29/Sep/2013:17:22:46 +0200] "GET /robots.txt HTTP/1.1" 200 516 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
#92.113.100.35 - - [29/Sep/2013:17:22:49 +0200] "GET /tghome.htm HTTP/1.1" 200 4445 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"
#92.113.100.35 - - [29/Sep/2013:17:22:51 +0200] "GET / HTTP/1.1" 200 5467 "http://extrabot.com/help/frytygativyheku.htm" "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0" "-"

# to do  MS Search 4.0 Robot

#package AWSROB;


# Robots list was found at http://www.robotstxt.org/wc/active/all.txt
# Other robots can be found at http://www.jafsoft.com/searchengines/webbots.html
# Rem: To avoid bad detection, some robot's ids were removed from this list:
#      - Robots with ID of 3 letters only
#      - Robots called 'webs' and 'tcl'
# Rem: directhit changed into direct_hit (its real id)
# Rem: calif changed into calif[^r] to avoid confusion between Tiscalifreenet browser
# Rem: fish changed into [^a]fish to avoid confusion between Madsafish browser
# Rem: roadrunner changed into road_runner
# Rem: lycos changed to lycos_ to avoid confusion with lycos-online browser
# Rem: voyager changed into ^voyager\/ to avoid to exclude voyager and amigavoyager browser

# RobotsSearchIDOrder
# It contains all matching criteria to search for in log fields. This list is
# used to know in which order to search Robot IDs.
# Most frequent ones are in list1, used when LevelForRobotsDetection is 1 or more
# Minor robots are in list2, used when LevelForRobotsDetection is 2 or more
# Note: Robots IDs are in lower case, '_', ' ' and '+' are changed into '[_+\s]' and are quoted.
#-------------------------------------------------------
@RobotsSearchIDOrder_list1 = (
# Japanese Robots+alpha
# Added by Ryu 2007.01.06
'008/',
'another_html\-lint',
'adsenserbot',
'amfibibot',
'baiduimagespider',
'baidumobaider',
'becomejpbot',
'bibliotheca',
'binetx',
'birubot/',
'bizpal\srss\saggregator',
'bookmark\srenewal\scheck\sagent',
'bpimagewalker',
'camelstampede',
'camontspider',
'cazoodlebot',
'charlotte',
'cityreview\srobot',
'co2h2onacl\@gmail\.com',
'coast\swebmaster',
'cococ',
'comaneci_bot',
'crooz',
'businessdbbot',
'dealgates\sbot',
'depspid/',
'diffbrowser/',
'ec2linkfinder',
'ec_favorite',
'empas_robot',
'e\-societyrobot',
'everyfeed-spider',
'faedit/',
'fast\smetaweb\scrawler',
'fastladder\sfeedfetcher',
'feed24\.com',
'feedbringer',
'feedchecker',
'feed\sparser',
'feedpath',
'feedshow',
'^fly/',
'freshreader',
'g10\sreader',
'gingercrawler',
'goo_search',
'goorssreader',
'grub\-client',
'gsa\-crawler',
'gslfbot',
'hailoobot',
'hatena',
'headline\-reader',
'hobbit\sbbtest\-net',
'html\sget',
'hyperestraier',
'hyperrobot',
'icc\-crawler',
'iframely',
'ilissurf',
'indexpert',
'internal\sdummy\sconnection',
'inweavesearchbot',
'jeeves/',
'jetbot',
'just\-crawler',
'kalooga',
'kb\.rmail',
'keybot',
'keywalkerbot',
'klsh\-pageget/',
'kotoha\.co\.jp',
'libghttp/',
'linguee\sbot',
'livedoorcheckers',
'livedoor\sfeedfetcher',
'livedoor\sscreenshot/',
'livedoor\shttpclient/',
'logict\sipv6\scrawler/',
'maldive\scrawler',
'masagool',
'masscan',
'maxamine\.com\-robot',
'metamojicrawler',
'mfcrawler',
'mogimogi/',
'metalogger',
'mlbot',
'mqbot',
'msr\-isrccrawler',
'multicrawler',
'naver',
'niyonizer',
'ndl\-japan\-research\-robot',
'nerdbynature\.bot',
'nettracker',
'newsalloy',
'www\.notconfigured\.com',
'\sobot',
'onetszukaj',
'openbot',
'openseemox\sbot',
'oracle\ssecure\senterprise\ssearch',
'outfoxbot',
'page_verifier',
'pear\shttp_request\sclass',
'paipo\-bot',
'pathtraq',
'pflab',
'pipeliner',
'pixray\-seeker',
'playon\srss\sreader/',
'pockey\-gethtml',
'purebot/',
'rankurbot',
'research\-spider',
's2robot',
'scrapy/',
'search\-hp_bot',
'search17bot',
'seo\.cug\.net\slink\schecker',
'setoozbot',
'shopwiki/',
'siclab',
'sistrix\scrawler',
'sitesucker',
'smart\.apnoti\.com',
'snapbot',
'snoopy\sv',
'sogou\sweb\sspider',
'sonar/', #Added by toshi 2006.04.09
'sonarplus/',
'spamrobot@126\.com',
'^spider/',
'spinn3r',
'sproose/',
'stackrambler',
'statbot@gmail\.com',
'strategic\sboard\sbot',
'swebot',
'sysscan/',
'technoratisnoop',
'techrigybot',
'tencenttraveler', # Must be before msiecrawler
'thriceler',
'^tibot/',
'toread\-crawler/',
'trackback/',
'trademango',
'umn/',
'url\spage\sindexer',
'useragent',
'w3crobot',
'wadaino\.jp\-crawler',
'web-robot',
'webalta',
'webauto',
'webaroobot',
'webdigity\swhois\sservice',
'website\sexplorer',
'wish\-la',
'wish\-project',
'wiwi',
'wwwster/',
'yahoo!-adcrawler',
'zao\-crawler',
'zibber',
'^-$',
# Common robots (In robot file)
'bingbot/',
'bingpreview',
'msiecrawler',
'msnbot/',
'msnbot\-media/',
'adidxbot/',
'not[\x20]googlebot/',
'googlebot/',
'google[\x20]web[\x20]preview',
'googlebot\-image/',
'googlebot\-mobile/',
'google[\x20]page[\x20]speed',
'google\-sitemaps',
'googlebot\-news',
'googlebot\-video/',
'adsbot\-google[\x20]\(',
'adsbot\-google\-mobile\-apps',
'mediapartners\-google',
'feedfetcher\-google',
'google\-adwords\-instant',
'firefox/1\.5',
'yahoo![\x20]slurp[\x20]china',
'yahoo![\x20]slurp',
'baiduspider/',
'baiduspider\-image',
'baidu',
'yandexbot/',
'yandeximages/',
'YandexImageResizer',
'yandexmetrika/',
'yandexmobilebot/',
'yandex',
'electricmonk/',
'spbot/',
'seznambot/',
'msie8',
'ahrefsbot/',
'fsbot',
'007ac9[\x20]crawler',
'2345explorer/',
'360spider',
'a[\x20]simple[\x20]crawler',
'abrave',
'acapbot/',
'accoona\-ai\-agent/',
'arcemedia',
'adnormcrawlercatchbot/',
'adscanner',
'aihitbot/',
'aipbot/',
'alphabot',
'apache\-httpclient/',
'apexoo[\x20]spider',
'applebot/',
'archive\.org_bot',
'babya[\x20]discoverer',
'barkrowler',
'bdcbot/',
'bellpagesca/',
'benosey[\x20]mohawk[\x20]search',
'bhcbot',
'bidswitchbot',
'bigbozz/',
'binget/',
'bitlybot',
'bl\.uk_lddc_bot/',
'blexbot/',
'bnf.fr_bot',
'boitho\.com\-dc/',
'booglebot',
'businessbot:',
'catchbot/',
'cb/nutch',
'ccbot/',
'cliqzbot/',
'cms[\x20]crawler',
'companybook\-crawler',
'converacrawler/',
'contacts-crawler',
'contxbot',
'cosmos/',
'crawl/nutch',
'crawler4j',
'crazywebcrawler',
'crmnlcrawlagent',
'cse[\x20]html[\x20]validator',
'c\-t[\x20]bot',
'cubot',
'curl/php',
'cyencebot',
'dalvik/',
'datacrawler/',
'daumoa',
'daum',
'deepnet[\x20]explorer',
'deusu/',
'digincore',
'discordbot/',
'dispatch/',
'dnyzbot',
'docomo/',
'domain[\x20]re\-animator[\x20]bot',
'domaincrawler/',
'domainmacrocrawler/',
'domainsonocrawler/',
'domainstatsbot/',
'dotbot/',
'duckduckbot-https',
'duckduckgo\-favicons\-bot/',
'elinks/',
'elinks[\x20]\(',
'emailmarketingrobot/',
'emeraldshield\.com[\x20]webbot',
'envolk\[its\]spider/',
'eright',
'esperanzabot',
'exabot/',
'extlinksbot',
'experiancrawluk',
'facebookexternalhit/',
'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de',
'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de',
'fast\-webcrawler/',
'feosey[\x20]mohk[\x20]crawler',
'findlinks/',
'findxbot/',
'firephp/',
'firstdirectory\-bot',
'flamingo',
'flippybearbot/',
'^foo$',
'freewebmonitoring[\x20]sitechecker/',
'fujilabol',
'furlbot/',
'gaisbot/',
'gallent[\x20]spider',
'garlikcrawler/',
'getintent[\x20]crawler',
'getintentcrawler[\x20]getintent\.com',
'gigabot/',
'gipo\-crawler/nutch',
'girafabot',
'gluten[\x20]free[\x20]crawler/',
'gocrawl',
'gowikibot',
'go\-http\-client/',
'grapeshotcrawler/',
'gsitecrawler/',
'gurujibot/',
'hadibot',
'haosouspider',
'hello[\x20]crawler',
'holmes/',
'houzzbot',
'http_request2/',
'hubspot[\x20]webcrawler',
'hypercrawl/',
'icc\-crawler/',
'iconoclast',
'idgcrawler/nutch',
'idg/uk',
'idmarch[\x20]automatic\.beta/',
'inbybot',
'incutio[\x20]xml',
'indeedbot',
'influencebot',
'irlbot/',
'issuecrawler',
'istellabot/',
'james[\x20]bot',
'jigsaw/',
'jobfeed',
'jooblebot',
'komodiabot/',
'konqueror/',
'lightspeed',
'linkapediabot',
'metager\-linkchecker',
'linkchecker',
'linkcheck',
'linkdexbot/',
'linkedinbot/',
'linkpadbot/',
'links[\x20]\(',
'linksmanager\.com_bot',
'lwp::simple/',
'mail\.ru_bot/',
'makecontact',
'mappy',
'mauibot',
'meanpathbot/',
'mechanize',
'mediatoolkitbot',
'megaindex\.ru/',
'merzscope',
'meta_bot',
'mfibot/',
'microsoft.*discovery',
'missigua_locator',
'mixrankbot',
'mj12bot/',
'mojeekbot/',
'mojeek',
'mojolicious',
'mxt/nutch',
'my[\x20]nutch[\x20]spider/',
'myse/nutch',
'naaraa',
'nerdybot',
'netestate[\x20]ne[\x20]crawler',
'netresearchserver/',
'nimbostratus-bot',
'nominet',
'nrlcorpusbuilder/nutch',
'nutch\-1\.4/',
'nutch\-1\.8/',
'nutchcvs/',
'o\.uk[\x20]robot',
'ocrawler;',
'odp[\x20]link[\x20]checker',
'offline[\x20]explorer/',
'omniexplorer_bot/',
'orangebot/',
'orliac',
'outclicksbot',
'pagebiteshyperbot/',
'pcore',
'pdffillerbot/',
'peopleman',
'phantomjs',
'php/5\.2\.8',
'pinterestbot',
'piplbot',
'ploetz[\x20]\+[\x20]zeller',
'plukkie/',
'princetonbot/',
'privacyawarebot/',
'prlog/',
'proximic',
'psbot/',
'psbot\-image',
'python_wk_crawler',
'python\-urllib/',
'qcrawl',
'quick-crawler',
'researchbot',
'roboto',
'rogerbot/',
'rssingbot',
'rukicrawler/',
'safedns[\x20]search[\x20]bot/',
'safednsbot',
'safesearch[\x20]microdata[\x20]crawler',
'safesearch',
'sbl\-bot',
'scrapy',
'screaming[\x20]frog[\x20]seo[\x20]spider/',
'screenerbot[\x20]crawler[\x20]beta',
'scrubby',
'searchie/',
'securityresearch\.bot',
'seekmo',
'semanticbot',
'semrushbot/',
'semrushbot-si',
'seo\-audit\-check\-bot/',
'seobility',
'seokicks\-robot',
'seolyticscrawler/',
'seostats',
'seosys/nutch',
'seoterritory\.com[\x20]bot',
'serendeputy',
'shim\-crawler',
'siteexplorer/',
'siteexplorer\.info',
'siteimprove',
'slackbot\-linkexpanding',
'smabblerbot/',
'sogou[\x20]web[\x20]spider/',
'special_archiver/',
'spiderbot/',
'spuhexbot',
'spyonweb',
'ssearch_bot',
'streamline3bot',
'surdotlybot/',
'surveybot/',
'taiil/nutch',
'tbot\-nutch',
'teeraidbot',
'telegrambot',
'test/nutch',
'test[\x20]spider',
'testcrawler',
'the[\x20]knowledge[\x20]ai',
'tracemyfile',
'trendiction',
'turnitinbot/',
'turnitinbot',
'tweetmemebot/',
'ucy/nutch',
'uni-leipzig\.de',
'uptimebot/',
'uptimerobot/',
'url[\x20]checker',
'uxcrawlerbot',
'validator\.nu/',
'vbseo',
'vbulletin[\x20]via[\x20]php',
'vebidoobot',
'vegi[\x20]bot',
'velen',
'viz/nutch',
'voilabot',
'voila',
'vortex/',
'voyager/',
'w3c_validator/',
'w3c\-checklink/',
'wbsearchbot/',
'wbsrch/',
'wesee:ads/pagebot',
'wesee:ads/picturebot',
'wesee_bot',
'wget/',
'who\.is[\x20]bot',
'wonderbot/',
'woobot/',
'obot/',
'wotbox/',
'xaldon[\x20]webspider',
'xenu[\x20]link[\x20]sleuth',
'xenu_link_sleuth',
'xenu\'s_link_sleuth',
'xml[\x20]sitemaps[\x20]generator',
'xovibot/',
'yacybot',
'yahoo[\x20]link[\x20]preview',
'yak',
'yisouspider',
'yoozbot',
'your\-website\-sucks',
'zoominfobot',
'zspider/',
'zumbot/',
# below placed at end to catch some generics
# old robots using firefox < version 11 not identifying themselves as a robot.
'(firefox/)([0-9]\.|[0-1][0]\.)'
);
@RobotsSearchIDOrder_list2 = (
# Less common robots (In robot file)
'^mozilla$',
'^mozilla/3\.0\s\(compatible$',
'^mozilla/4\.0$',
'^mozilla/4\.0\s\(compatible;\)$',
'^mozilla/5\.0$',
'^mozilla/5\.0\s\(compatible;$',
'^mozilla/5\.0\s\(en\-us\)$',
'^mozilla/5\.0\sfirefox/3\.0\.5$',
'^mozilla/6\.0[\x20]\(compatible\)$',
'^mozilla/(.*)beta[\x20]\(windows\)',
'msie[\x20]2',
'msie[\x20]3',
'msie[\x20]4',
'msie[\x20]5',
'msie[\x20]6',
'msie\+6\.0\;',
'windows[\x20]95',
'windows[\x20]98',

# these could be removed to speed up processing as they are rarely seen
'a6\-indexer',
'abcdatos',
'abonti\.com',
'acme\.spider',
'activebookmark',
'adamm_bot',
'advbot',
'affectv\.co\.uk',
'ahoythehomepagefinder',
'aleadsoftbot',
'alkaline',
'allrati',
'alltop',
'almaden',
'alpha_search_agent',
'anthill',
'antibot',
'aport',
'appie',
'applesyndication',
'arachnophilia',
'arale',
'araneo',
'architext',
'archive\-de\.com',
'aretha',
'argus',
'ariadne',
'arianna\.libero\.it',
'arks',
'aspider',
'aspseek',
'asterias',
'asynchttpclient',
'atn\.txt',
'atomz',
'auresys',
'awbot',
'backlinktest\.com',
'backrub',
'becomebot',
'bender',
'betabot',
'bigbrother',
'biglotron',
'binglocalsearch',
'bittorrent_bot',
'biz360[_+\s]spider',
'bjaaland',
'blackwidow',
'blindekuh',
'blogbridge[_+\s]service',
'blogged_crawl',
'bloglines',
'bloglovin',
'blogpulse',
'blogsearch',
'blogshares',
'blogslive',
'blogssay',
'bloodhound',
'bncf\.firenze\.sbn\.it/raccolta\.txt',
'bobby',
'bookmark\-manager',
'borg\-bot',
'boris',
'brightnet',
'bruinbot',
'bubing',
'bumblebee',
'butterfly',
'buzztracker',
'cactvschemistryspider',
'calif[^r]',
'candlelight[_+\s]favorites[_+\s]inspector',
'careerbot',
'carpathia',
'cassandra',
'catbot',
'cbn00glebot',
'cerberian\sdrtrs',
'cfetch',
'cgireader',
'chattertrap',
'check_http',
'checkweb_link_validator',
'christcrawler',
'churl',
'cienciaficcion',
'cipinetbot',
'imagecoccoc',
'coccoc',
'coldfusion',
'collective',
'combine',
'commons\-httpclient',
'computer_and_automation_research_institute_crawler',
'conceptbot',
'contentmatch',
'converamultimediacrawler',
'coolbot',
'copubbot',
'core',
'covario',
'cruiser',
'cscrawler',
'cuasarbot',
'cursor',
'cusco',
'custo',
'cyberspyder',
'datafountains/dmoz_downloader',
'dataprovider\.com',
'daviesbot',
'daylifefeedfetcher',
'daypopbot',
'deepindex',
'desertrealm',
'deweb',
'dienstspider',
'digger',
'digout4u',
'diibot',
'dipsie\.bot',
'direct_hit',
'discobot',
'dlvr\.it',
'dnabot',
'dnsgroup',
'doccheckbot',
'checkbot',
'domainappender',
'domainchecker',
'domainsdb\.net',
'download_express',
'dragonbot',
'dreamwidth',
'drupal',
'dulance',
'dumbot',
'dumm\.de\-bot',
'dwcp',
'e\-collector',
'earthcom\.info',
'easydl',
'ebiness',
'eccp',
'echo!',
'edgeio\-retriever',
'elfinbot',
'emacs',
'emcspider',
'enteprise',
'ernst[:blank:]2\.0',
'esther',
'ets_v',
'eventax',
'everbeecrawler',
'everest\-vulcan',
'evliyacelebi',
'exactseek',
'extreme[_+\s]picture[_+\s]finder',
'ezoom',
'ezresult',
'facebook',
'facebot',
'fast\-search\-engine',
'matrix_s\.p\.a\._\-_fast_enterprise_crawler',
'fast_enterprise_crawler',
'fastbot',
'fastcrawler',
'favicon',
'favorg',
'favorites_sweeper',
'fdse',
'feedburner',
'feedflow',
'feedmyinbox',
'feedroll\.com',
'feedsky',
'feedster',
'feedvalidator',
'feedzira',
'felix',
'ferret',
'fetchbot',
'fetchrover',
'fever/',
'fido',
'filmkamerabot',
'filterdb\.iss\.net',
'finderlein[_+\s]research[_+\s]crawler',
'findexa_crawler',
'finnish',
'fireball',
'firmilybot',
'flexum',
'foaf\-search\.net',
'fooky\.com/scorpionbot',
'fouineur',
'francoroute',
'freecrawl',
'freenews',
'funnelweb',
'g2crawler',
'gama',
'gazz',
'gcreep',
'geniebot',
'genieo',
'geohasher',
'getbot',
'geturl',
'gigablastopensource',
'global_fetch',
'gnodspider',
'goforit\.com',
'goforitbot',
'golem',
'gonzo',
'gougou',
'gpu_p2p_crawler',
'grabber',
'grapeshot',
'grapnel',
'griffon',
'gromit',
'grub',
'gulliver',
'gulperbot',
'hambot',
'hanrss',
'virus[_+\s]detector',		# Must be before harvest
'harvest',
'havindex',
'henrythemiragorobot',
'heritrix',
'hl_ftien_spider',
'hometown',
'hoowwwer',
'hpprint',
'htdig',
'html[_+\s]link[_+\s]validator',
'htmlgobble',
'htmlparser',
'httrack',
'hundesuche\.com\-bot',
'hyperdecontextualizer',
'ia_archiver\-web\.archive\.org',
'ia_archiver',
'iajabot',
'iaskspider',
'i\-bot',
'icarus6j',
'ichiro',
'icjobs\.de',
'ilse',
'iltrovatore\-setaccio',
'imagelock',
'implisensebot',
'inagist',
'incywincy',
'infobot',
'infociousbot',
'infohelfer',
'infomine',
'informant',
'infoseeksidewinder',
'infoseek',
'infospider',
'inspectorwww',
'insurancobot',
'integromedb\.org',
'intelliagent',
'internet[_+\s]ninja',
'internetarchive',
'internetseer',
'internetsupervision',
'ips\-agent',
'irobot',
'iron33',
'isearch2006',
'israelisearch',
'iupui_research_bot',
'izsearch',
'jacobin[\x20]club',
'jakarta',
'jbot',
'jcrawler',
'jennybot',
'jobboerse',
'jobot',
'jobo',
'joebot',
'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility',
'js\-kit',
'jubii',
'jumpstation',
'justview',
'kalambot',
'kamano\.de_newsfeedverzeichnis',
'kapsi',
'katipo',
'kazoombot',
'kevin',
'keyoshid',
'kilroy',
'kinja\-imagebot',
'kinjabot',
'knowitall',
'knowledge\.com',
'ko[_+\s]yappo[_+\s]robot',
'kouaa_krawler',
'krugle',
'ksibot',
'kummhttp',
'kurzor',
'labelgrabber\.txt',
'lanshanbot',
'larbin',
'largesmall[\x20]crawler',
'legs',
'letscrawl\.com',
'libcrawl',
'lilina',
'link_valet_online',
'linkbot',
'linkdex\.com',
'linkidator',
'linkscan',
'linkstats[\x20]bot',
'linkwalker',
'lipperhey',
'livejournal\.com',
'lmspider',
'loadtimebot',
'lockon',
'logo_gif',
'longurl',
'lssrocketcrawler',
'ltbot',
'ltx71',
'lwp\-request',
'lwp\-trivial',
'lycos[_+\s]',
'macworm',
'madaali\.de',
'magpierss',
'magpie',
'mapoftheinternet\.com',
'marvin',
'mattie',
'mediabot',
'mediafox',
'megaindex',
'megite',
'memorybot',
'mercator',
'meshexplorer',
'metager2\-verification\-bot',
'metajobbot',
'bbot',
'metaspinner',
'metauri',
'miadev',
'microsoft[_+\s]url[_+\s]control',
'microsoft[\x20]bits',
'microsoft\-webdav\-miniredir',
'mindcrawler',
'mindupbot',
'mini\-reptile',
'minirank',
'misterbot',
'miva',
'mizzu_labs',
'mnogosearch',
'moget',
'momspider',
'monster',
'motor',
'movabletype',
'ms[_+\s]search[_+\s]6\.0[_+\s]robot',
'ms_search_4\.0_robot',
'msnbot\-udiscovery',
'msrabot',
'msrbot',
'mt::telegraph::agent',
'muncher',
'muscatferret',
'mwdsearch',
'mydoyouhike',
'myweb',
'nagios',
'nasa_search',
'ndspider',
'nederland\.zoek',
'netcarta',
'netcraft',
'netluchs',
'netmechanic',
'netnewswire',
'netscoop',
'netsprint',
'netvibes',
'newrelicpinger',
'newscan\-online',
'newsfox',
'newsgatoronline',
'nextgensearchbot',
'nhse',
'nicebot',
'nimblecrawler',
'ning',
'nomad',
'northstar',
'noxtrumbot',
'npbot',
'nzexplorer',
'objectssearch',
'occam',
'ocelli',
'octopus',
'octora_beta_bot',
'onet\.pl[_+\s]sa',
'onfolio',
'openfind',
'opentaggerbot',
'openwebspider',
'optimizer',
'oracle_ultra_search',
'orb_search',
'orbiter',
'packrat',
'pageboy',
'panscient',
'parasite',
'passwordmaker\.org',
'patric',
'pear_http_request_class',
'peerbot',
'pegasus',
'perignator',
'perman',
'petersnews',
'phantom',
'php[_+\s]version[_+\s]tracker',
'phpcrawl',
'phpdig',
'picmole',
'pictureofinternet',
'piltdownman',
'pimptrain',
'ping\.blo\.gs',
'pingdom',
'pioneer',
'pita',
'pitkow',
'pjspider',
'plinki',
'pluckfeedcrawler',
'feedcrawl',
'plumtreewebaccessor',
'pogodak',
'pompos',
'popdexter',
'port_huron_labs',
'poppi',
'portalb',
'postfavorites',
'postpost',
'postrank',
'powermarks',
'printfulbot',
'proodlebot',
'protopage',
'publiclibraryarchive',
'pyquery',
'python',
'qihoobot',
'quipply',
'qwantify',
'r6\_',
'rambler',
'ratingburner',
'raven',
'rbse',
'redalert',
'regator',
'relevantnoise\.com',
'resumerobot',
'rhcs',
'riddler',
'road_runner',
'robbie',
'robi',
'robocrawl',
'robofox',
'robozilla',
'rojo',
'rome[\x20]client',
'roverbot',
'rpt\-httpclient',
'rssgraffiti',
'rssimagesbot',
'ruffle',
'rufusbot',
'rules',
'safeads\.xyz',
'safetynetrobot',
'sage\+\+',
'sandcrawler',
'savetheworldheritage',
'sbider',
'schizozilla',
'scooter',
'scoutjet',
'scumbot',
'search\-info',
'search_au',
'searchguild[_+\s]dmoz[_+\s]experiment',
'searchmetricsbot',
'searchprocess',
'seekbot',
'semalt',
'senrigan',
'sensis_web_crawler',
'seodiver',
'seokicks\.de',
'seoscanners',
'sgscout',
'shaggy',
'shaihulud',
'shareaholicbot',
'shoutcast',
'sift',
'simbot',
'simplepie',
'sistrix',
'site\-valet',
'sitebot',
'sitedomain\-bot',
'sitetech',
'skimbot',
'skymob',
'slcrawler',
'slurp',
'slysearch',
'smartspider',
'smtbot',
'snap\.com_beta_crawler',
'snappy',
'snooper',
'sohu\-search',
'sohu',
'solbot',
'speedy',
'sphere_scout',
'spider[_+\s]monkey',
'spiderline',
'spiderlytics',
'spiderman',
'spiderview',
'spip',
'sproose_crawler',
'spry',
'sqworm',
'ssearcher',
'steeler',
'steroid__download',
'stq_bot',
'stratagems[\x20]kumo',
'suchfin\-bot',
'suke',
'summify\.com',
'sunrise',
'suntek',
'superbot',
'superfeedr',
'susie',
'sven',
'syndic8',
'syndicapi',
'synoobot',
'synthesio',
't\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e',
'tach_bw',
'tagyu_agent',
'tailrank',
'tarantula',
'tarspider',
'tcl_http_client_package',
'techbot',
'technoratibot',
'templeton',
'teoma',
'teragramcrawlersurf',
'test_crawler',
'testbot',
'thumbsniper',
'titan',
'titin',
'tkwww',
'tlspider',
'topblogsinfo',
'topicblogs',
'topix\.net',
'trapit',
'trileet',
'turtlescanner',
'turtle',
'tutorgigbot',
'tweetedtimes',
'twiceler',
'twisted[\x20]pagegetter',
'twitterbot',
'twitterfeed',
'ubicrawler',
'ucsd',
'udmsearch',
'ultraseek',
'um\-ic',
'um\-ln',
'unchaos_bot_hybrid_web_search_engine',
'unido\-bot',
'unisterbot',
'universalfeedparser',
'unlost_web_crawler',
'unwindfetchor',
'updated',
'urlck',
'ustc\-semantic\-group',
'vagabondo\-wap',
'vagabondo',
'valkyrie',
'vermut',
'versus_crawler_from_eda\.baykan@epfl\.ch',
'verticrawl',
'vespa_crawler',
'victoria',
'visionsearch',
'voidbot',
'voltron',
'vse/',
'vwbot',
'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa',
'w3index',
'w3m2',
'wallpaper',
'wanderer',
'wapspider',
'wapspirlider',
'watchmouse',
'wavefire',
'waybackarchive\.org',
'wazzup',
'web_downloader',
'webbandit',
'webbase',
'webcatcher',
'webclipping\.com',
'webcollage',
'webcompass',
'webcopy',
'webcrawl\.net',
'webdup',
'webfetcher',
'webfilter',
'webfoot',
'webinator',
'webindexer',
'weblayers',
'weblinker',
'webminer',
'webmirror',
'webmoose',
'webquest',
'webreader',
'webreaper',
'website[_+\s]monitoring[_+\s]bot',
'websnarf',
'webspider',
'bspider',
'webvac',
'webvulncrawl',
'webwalker',
'webwalk',
'webwatch',
'wells_search',
'wer\-liefert\-was',
'wesee:search',
'wevikabot',
'whatuseek',
'whowhere',
'windows\-rss\-platform',
'wired\-digital',
'zyborg',
'wisenutbot',
'wiumi',
'wmir',
'wolp',
'wombat',
'wonderer',
'woozweb',
'wordpress',
'worm',
'wume_crawler',
'wwwc',
'wwweasel',
'wz101',
'xget',
'xirq',
'xydo',
'y!j',
'yahoo![\x20]searchmonkey',
'yahoo!_mindset',
'yahoo\-blogs',
'yahoo\-mmcrawler',
'yahoo\-newscrawler',
'yahoo[\x20]pipes',
'yahoo\-verticalcrawler',
'yahoocachesystem',
'yahooexternalcache',
'yahoofeedseeker',
'yahooseeker\-testing',
'yahooseeker',
'yahooysmcm',
'yammer',
'yanga',
'yet\-another\-spider',
'yeti/',
'yie8',
'yodaobot',
'yooglifetchagent',
'youdao',
'yourls',
'z\-add_link_checker',
'zealbot',
'zemanta',
'zend_http_client',
'zeus',
'zhuaxia',
'[^a]fish',
'[\x20]netseer[\x20]',
'^[1-3]$',
'^finbot',
'^motorola$',
'^msie',
'^webindex$',
'1\-more_scanner',
'w3c\-webcon',
'lwp',
'uri::fetch'
);

@RobotsSearchIDOrder_listgen = (
# Generic robot
'nbot',
'ng/1\.',
'ng/2\.',
'libwww\-perl',
'cfnetwork',
'urllib',
'javabee',
'projectwf\-java\-test\-crawler',
'java',
'loocalcrawler/nutch',
'nutchosu\-vlib',
'nutch',
'perlcrawler',
'perl',
'robot',
'blog',
'checker',
'crawl',
'discover',
'feed',
'fetcher',
'hunter',
'link',
'scanner',
'seek',
'sitemap',
'spider',
'survey',
'sucker',
'validator',
'bot[\s_+:,\.\;/\\\-]',
'[\s_+:,\.\;/\\\-]bot',
'curl',
'php',
'ruby/',
# Moving oBot here so it doesn't get assigned for other *obot robots
'oBot/',
'no_user_agent'
);


# RobotsHashIDLib
# List of robots names ('robot id','robot clear text')
#-------------------------------------------------------
%RobotsHashIDLib   = (
# Japanese Robots+alpha
# Added by Ryu 2006.03.15
'008/','<a href="http://www.80legs.com/webcrawler.html">80legs web crawler</a>',
'another_html\-lint','Another HTML-lint',
'adsenserbot','<a href="http://adsenser.jp">AdSenserBot</a>',
'amfibibot','Amfibibot',
'baiduimagespider','<a href="http://www.baidu.com/search/spider.html">BaiduImageSpider</a>',
'baidumobaider','<a href="http://www.baidu.jp/spider/">BaiduMobaider</a>',
'becomejpbot','<a href="http://www.become.co.jp/site_owner.html">BecomeJPBot</a>',
'bibliotheca','<a href="http://www.hitachi.co.jp/Prod/comp/soft1/textsearch/product/component/bib21/">Bibliotheca</a>',
'binetx','Binetx',
'birubot/','Birubot',
'bizpal\srss\saggregator','<a href="http://bizpal.jp">BizPal RSS Aggregator</a>',
'bookmark\srenewal\scheck\sagent','<a href="http://www.bookmark.ne.jp">Bookmark Renewal Check Agent</a>',
'bpimagewalker','<a href="http://www.brandprotect.com">BPImageWalker</a>',
'camelstampede','CamelStampede',
'camontspider','<a href="http://epweb2.ph.bham.ac.uk/user/slater/camont/info.html">CamontSpider</a>',
'cazoodlebot','<a href="http://www.cazoodle.com">CazoodleBot</a>',
'charlotte','<a href="http://www.searchme.com/support/">Charlotte</a>',
'cityreview\srobot','<a href="http://www.cityreview.org/crawler/">Cityreview Robot</a>',
'co2h2onacl\@gmail\.com','co2h2onacl@gmail.com Crawler',
'coast\swebmaster','COAST WebMaster',
'cococ','<a href="http://am13.net/wiki/index.php?cococ">cococ</a>',
'comaneci_bot','<a href="http://help.i-know.jp/?crawler">Comaneci bot</a>',
'crooz','DoCoMo CROOZ',
'businessdbbot','<a href="http://www.businessdb.com/bot.php">Sirketce/BusinessDb</a>',
'dealgates\sbot','<a href="http://spider.dealgates.com/bot.html">DealGates Bot</a>',
'depspid/','<a href="http://about.depspid.net">DepSpid</a>',
'diffbrowser/','<a href="http://www010.upp.so-net.ne.jp/suede/diffbrowser.html">DiffBrowser</a>',
'ec2linkfinder','EC2LinkFinder',
'ec_favorite','eC_favorite',
'empas_robot','EMPAS ROBOT',
'e\-societyrobot','<a href="http://www.yama.info.waseda.ac.jp/~yamana/es/">e-SocietyRobot</a>',
'everyfeed-spider','Everyfeed Spider',
'faedit/','<a href="http://www.srcw.net/wiki/index.php?FaEdit">FaEdit</a>',
'fast\smetaweb\scrawler','<a href="http://fastsearch.com">FAST MetaWeb Crawler</a>',
'fastladder\sfeedfetcher','<a href="http://fastladder.com">Fastladder FeedFetcher</a>',
'feed24\.com','<a href="http://feed24.com">Feed24.com</a>',
'feedbringer','<a href="http://feedbringer.net" rel="nofollow" title="Bot home page">FEEDBRINGER</a>',
'feedchecker','FeedChecker',
'feed\sparser','<a href="http://rss-search.net">Feed Parser</a>',
'feedpath','<a href="http://feedpath.jp" rel="nofollow" title="Bot home page">Feedpath</a>',
'feedshow','<a href="http://www.feedshow.com" rel="nofollow" title="Bot home page">Feedshow</a>',
'^fly/','fly sinet.ad.jp',
'freshreader','<a href="http://www.freshreader.com" rel="nofollow" title="Bot home page">FreshReader</a>',
'g10\sreader','<a href="http://wordg10.com" rel="nofollow" title="Bot home page">G10 Reader</a>',
'gingercrawler','<a href="http://www.gingersoftware.com/crawler_agent.htm" rel="nofollow" title="Bot home page">GingerCrawler</a>',
'goo_search','goo search',
'goorssreader','goo RSS Reader',
'grub\-client','Grub Client',
'gsa\-crawler','<a href="http://www.google.com/enterprise/gsa/index.html" rel="nofollow" title="Bot home page">GSA Crawler</a>',
'gslfbot','GSLFbot',
'hailoobot','<a href="http://www.hailoo.com/spider.html" rel="nofollow" title="Bot home page">Hailoobot</a>',
'hatena','<a href="http://a.hatena.ne.jp/help" rel="nofollow" title="Bot home page">はてなアンテナ</a>',
'headline\-reader','Headline-Reader',
'hobbit\sbbtest\-net','<a href="http://hobbitmon.sourceforge.net" rel="nofollow" title="Bot home page">Hobbit bbtest-net</a>',
'html\sget','HTML Get(SPAM)',
'hyperestraier','<a href="http://hyperestraier.sourceforge.net/index.html" rel="nofollow" title="Bot home page">HyperEstraier</a>',
'hyperrobot','HyperRobot InfoWeb',
'icc\-crawler','<a href="http://kc.nict.go.jp/icc/crawl-ja.html">ICC-Crawler</a>',
'iframely','<a href="http://iframely.com">Iframely</a>',
'ilissurf','<a href="http://software.fujitsu.com/jp/ilis_univ/surf/" rel="nofollow" title="iLisSurf - FUJITSU Japan">iLisSurf</a>',
'indexpert','indexpert',
'internal\sdummy\sconnection','internal dummy connection for <a href="http://httpd.apache.org/docs/2.0/mod/mod_dav.html" rel="nofollow" title="Apache モジュール mod_dav">Apache WebDAV</a>',
'inweavesearchbot','<a href="http://www.hitachi-system.co.jp/inweave/">InWeave</a>',
'jetbot','<a href="http://www.jetrun.jp">jetbot</a>',
'just\-crawler','<a href="http://www.justsystems.com/jp/tech/crawler/">JUST-CRAWLER</a>',
'kalooga','<a href="http://www.kalooga.com/info.html?page=crawler">KaloogaBot</a>',
'kb\.rmail','<a href="http://www.r-mail.org">kb.Rmail</a>',
'keybot','<a href="http://www.keybot.com">Keybot Translation Search Machine</a>',
'keywalkerbot','<a href="http://www.keywalker.co.jp/crawl/bot.html">Keywalkerbot</a>',
'klsh\-pageget/','<a href="http://www.kondo-net.gr.jp/klsh/">KLSH-PageGet</a>',
'kotoha\.co\.jp','コトハコ',
'libghttp/','libghttp(SPAM?)',
'linguee\sbot','<a href="http://www.linguee.com">Linguee Bot</a>',
'livedoorcheckers','Livedoor Checkers',
'livedoor\sfeedfetcher','<a href="http://reader.livedoor.com" rel="nofollow" title="Bot home page">livedoor FeedFetcher</a>',
'livedoor\sscreenshot/','<a href="http://reader.livedoor.com" rel="nofollow" title="Bot home page">livedoor ScreenShot</a>',
'livedoor\shttpclient/','livedoor HttpClient',
'logict\sipv6\scrawler/','<a href="http://logict.net" rel="nofollow" title="Bot home page">Logict IPv6 Crawler</a>',
'maldive\scrawler','Maldive crawler',
'masagool','<a href="http://sagool.jp">MaSagool</a>',
'masscan','<a href="https://github.com/robertdavidgraham/masscan">robertdavidgraham/masscan</a>',
'maxamine\.com\-robot','maxamine.com-robot',
'metamojicrawler','<a href="http://www.metamoji.com/jp/crawler.html">MetamojiCrawler</a>',
'metalogger','Metalogger',
'mfcrawler','MFcrawler',
'mogimogi/','mogimogi',
'mlbot','<a href="http://www.metadatalabs.com">MLBot</a>',
'mqbot','<a href="http://metaquerier.cs.uiuc.edu">MQbot</a>',
'msr\-isrccrawler','MSR-ISRCCrawler',
'multicrawler','<a href="http://sw.deri.org/2006/04/multicrawler/robots.html">MultiCrawler</a>',
'naver','NaverBot',
'niyonizer','NIYONIZER',
'ndl\-japan\-research\-robot','国立国会図書館',
'nerdbynature\.bot','<a href="http://www.nerdbynature.net" rel="nofollow" title="NerdByNature">NerdByNature</a>',
'nettracker','NetTracker',
'newsalloy','<a href="http://www.NewsAlloy.com" rel="nofollow" title="Bot home page">NewsAlloy</a>',
'www\.notconfigured\.com','<a href="http://www.notconfigured.com" rel="nofollow" title="Bot home page">www.notconfigured.com Crawler</a>',
'\sobot','oBot',
'onetszukaj','<a href="http://szukaj.onet.pl">OnetSzukaj</a>',
'openbot','Openfind data gatherer',
'openseemox\sbot','<a href="http://www.openseemox.com">OPENSEEMOX</a>',
'oracle\ssecure\senterprise\ssearch','Oracle Secure Enterprise Search',
'outfoxbot','<a href="http://www.yodao.com/help/webmaster/spider/" rel="nofollow" title="YodaoBot">old OutfoxBot</a>',
'page_verifier','<a href="http://www.securecomputing.com/PageVerifier.cfm">page_verifier</a>',
'pear\shttp_request\sclass','<a href="http://pear.php.net">PEAR HTTP_Request class</a>',
'paipo\-bot','<a href="http://paipo.jp">PAIPO-Bot</a>',
'pathtraq','<a href="http://pathtraq.com/about">Pathtraq</a>',
'pflab','pflab',
'pipeliner','PipeLine Spider',
'pixray\-seeker','<a href="http://www.pixray.com/pixraybot">Pixray-Seeker</a>',
'playon\srss\sreader/','<a href="http://playon.jp/rss/">PLAYON RSS READER</a>',
'pockey\-gethtml','Pockey GetHTML',
'purebot/','<a href="http://www.puritysearch.net">Purebot</a>',
'rankurbot','<a href="http://rankur.com">RankurBot</a>',
'research\-spider','<a href="http://www.freedownloadscenter.com/Network_and_Internet/Web_Searching_Tools/Research_Spider.html">Research Spider</a>',
's2robot','S2Robot',
'scrapy/','<a href="http://scrapy.org">Scrapy</a>',
'search\-hp_bot','<a href="http://search-hp.com" rel="nofollow" title="Bot home page">search-hp_bot</a>',
'search17bot','<a href="http://www.search17.com/bot.php" rel="nofollow" title="Bot home page">Search17Bot</a>',
'seo\.cug\.net\slink\schecker','<a href="http://seo.cug.net">seo.cug.net link checker</a>',
'setoozbot','<a href="http://www.setooz.com/bot.html">SETOOZBOT</a>',
'shopwiki/','<a href="http://www.shopwiki.com/wiki/Help:Bot">ShopWiki</a>',
'siclab','siclab',
'sistrix\scrawler','<a href="http://crawler.sistrix.net" rel="nofollow" title="Bot home page">SISTRIX Crawler</a>',
'sitesucker','<a href="http://www.sitesucker.us">SiteSucker</a>',
'smart\.apnoti\.com','<a href="http://smart.apnoti.com/index/aboutApnotiWebCrawler">smart.apnoti.com Robot</a>',
'snapbot','Snapbot',
'snoopy\sv','<a href="http://sourceforge.net/projects/snoopy/">Snoopy</a>',
'sogou\sweb\sspider','<a href="http://www.sogou.com/docs/help/webmasters.htm#07">Sogou web spider</a>',
'sonar/','<a href="http://boxer.ne.jp/product_list/sonar/">Sonar Crawler</a>', #Added by toshi 2006.04.09
'sonarplus/','<a href="http://boxer.ne.jp/product_list/sonar_plus/">Sonar PLUS Crawler</a>',
'spamrobot@126\.com','126.com',
'^spider/','Spider',
'spinn3r','<a href="http://spinn3r.com/robot" rel="nofollow" title="Bot home page">Spinn3r</a>',
'sproose/','<a href="http://www.sproose.com/bot.html" rel="nofollow" title="Bot home page">sproose bot</a>',
'stackrambler','StackRambler',
'statbot@gmail\.com','Gmail Com (Google)',
'strategic\sboard\sbot','<a href="http://www.strategicboard.com" rel="nofollow" title="Bot home page">Strategic Board Bot</a>',
'swebot','<a href="http://swebot.net" rel="nofollow" title="Bot home page">SWEbot</a>',
'sysscan/','<a href="http://swebot.net" rel="nofollow" title="Bot home page">SWEbot</a>',
'technoratisnoop','TechnoratiSnoop(SPAM?)',
'techrigybot','<a href="http://www.techrigy.com" rel="nofollow" title="Bot home page">TechrigyBot</a>',
'tencenttraveler','TencentTraveler', 	# Must be before msiecrawler.
'thriceler','<a href="http://www.kuill.com/robots/spider.html" rel="nofollow" title="Bot home page">Thriceler</a>',
'^tibot/','Tibot',
'toread\-crawler/','<a href="http://news.toread.cc/crawler.php" rel="nofollow" title="Bot home page">Toread-Crawler</a>',
'trackback/','TrackBack(SPAM?)',
'trademango','TradeMango',
'umn/','<a href="http://www.nori-s.net/soft/umn/">URLマネージャ</a>',
'url\spage\sindexer','URL Page Indexer',
'useragent','USERAGENT(SPAM?)',
'w3crobot','Webbot - the Libwww Robot',
'wadaino\.jp\-crawler','<a href="http://wadaino.jp">話題の.jpクローラー</a>',
'web-robot','web-robot',
'webalta','<a href="http://www.webalta.net/ru/about_webmaster.html">WebAlta Crawler</a>',
'webauto','<a href="http://www.yanasoft.co.jp/webautodoc.html">WebAuto</a>',
'webaroobot','<a href="http://www.webaroo.com/rooSiteOwners.html">Webaroo Bot</a>',
'webdigity\swhois\sservice','<a href="http://www.webdigity.com/ws/">webdigity whois service</a>',
'website\sexplorer','<a href="http://www.umechando.com/webex/">Website Explorer</a>',
'wish\-la','<a href="http://wish.slis.tsukuba.ac.jp/jp/">Wish Project(wish la)</a>',
'wish\-project','<a href="http://wish.slis.tsukuba.ac.jp/jp/">Wish Project</a>',
'wiwi','<a href="http://wi2.jp">WiWi</a>',
'wwwster/','<a href="mailto:gue@cis.uni-muenchen.de">wwwster</a>',
'yahoo!-adcrawler','<a href="http://help.yahoo.com/yahoo_adcrawler">Yahoo! Ad Crawler</a>',
'zao\-crawler','Zao Crawler',
'zibber','<a href="http://www.zibb.com/CrawlerInformaion.aspx">Zibb Crawler</a>',
'^-$','-(SPAM?)',
# Common robots (In robot file)
'bingbot/','<a href="http://www.bing.com/bingbot.htm" rel="nofollow" title="Bing home page">Bingbot</a>',
'bingpreview','BingPreview',
'msiecrawler','<a href="http://msdn.microsoft.com/workshop/delivery/offline/linkrel.asp" rel="nofollow" title="Bot home page.">MSIECrawler</a>',
'msnbot/','<a href="http://search.msn.com/msnbot.htm" rel="nofollow" title="Bot home page">MSNBot</a>',
'msnbot\-media/','<a href="http://search.msn.com/msnbot.htm" rel="nofollow" title="Bot home page">MSNBot-media</a>',
'adidxbot/','AdIdxBot Microsoft Ad Quality control',
'not[\x20]googlebot/','NOT Googlebot',
'googlebot/','Googlebot',
'google[\x20]web[\x20]preview','Google Web Preview',
'googlebot\-image/','<a href="http://www.google.com/bot.html" rel="nofollow" title="Bot home page">Googlebot-Image</a>',
'googlebot\-mobile/','Googlebot-Mobile',
'google[\x20]page[\x20]speed','Google Page Speed',
'google\-sitemaps','google-sitemaps',
'googlebot\-news','Googlebot-News',
'googlebot\-video/','Googlebot-Video',
'adsbot\-google[\x20]\(','<a href="http://www.google.com/adsbot.html" rel="nofollow" title="AdsBot-Google home page">AdsBot-Google</a>',
'adsbot\-google\-mobile\-apps','AdsBot-Google-Mobile-Apps',
'mediapartners\-google','<a href="https://adwords.google.com" title="Bot home page">Google AdSense</a>',
'feedfetcher\-google','<a href="http://www.google.com/feedfetcher.html" rel="nofollow" title="Bot home page">Feedfetcher-Google</a>',
'google\-adwords\-instant','Google-Adwords-Instant',
'firefox/1\.5','Nautic Expo using Firefox/1.5',
'yahoo![\x20]slurp[\x20]china','Yahoo! Slurp China',
'yahoo![\x20]slurp','Yahoo! Slurp',
'baiduspider/','<a href="http://www.baidu.com/search/spider.html" rel="nofollow" title="Bot home page">BaiDuSpider</a>',
'baiduspider\-image','Baiduspider-image',
'baidu','Baidu ( catchall )',
'yandexbot/','YandexBot',
'yandeximages/','YandexImages',
'YandexImageResizer','YandexImageResizer',
'yandexmetrika/','YandexMetrika',
'yandexmobilebot/','YandexMobileBot',
'yandex','Yandex ( catchall )',
'electricmonk/','electricmonk',
'spbot/','<a href="http://www.seoprofiler.com/bot" rel="nofollow" title="SEOprofiler Bot">SEOprofiler Bot</a>',
'seznambot/','<a href="http://fulltext.seznam.cz" rel="nofollow" title="Bot home page">SeznamBot</a>',
'msie8','msie8 - ( Rogue Robot )',
'ahrefsbot/','<a href="http://ahrefs.com/robot/" rel="nofollow" title="Bot home page">AhrefsBot</a>',
'fsbot','<a href="http://www.adin.co.jp/fs/">Flex Search</a>',
'007ac9[\x20]crawler','<a href="http://crawler.007ac9.net/" rel="nofollow" title="007ac9 Crawler Page">007ac9 Crawler</a>, seems to belong to <a href="http://www.sistrix.com/" rel="nofollow" title="SISTRIX Home Page">SISTRIX</a>',
'2345explorer/','2345Explorer',
'360spider','<a href="https://www.google.com/search?q=360spider+-Ferrari" title="No home page, using Google search instead">360spider</a>',
'a[\x20]simple[\x20]crawler','A Simple Crawler',
'abrave','Abrave',
'acapbot/','acapbot',
'accoona\-ai\-agent/','<a href="http://www.accoona.com" rel="nofollow" title="Accoona-AI-Agent home page">Accoona-AI-Agent</a>',
'adnormcrawlercatchbot/','AdnormCrawlerCatchBot',
'adscanner','adscanner',
'aihitbot/','aiHitBot',
'aipbot/','<a href="http://www.aipbot.com" rel="nofollow" title="aipbot@aipbot.com Bot home page">aipbot</a>',
'alphabot','AlphaBot',
'apache\-httpclient/','Apache-HttpClient',
'apexoo[\x20]spider','Apexoo Spider',
'applebot/','<a href="http://www.apple.com/go/applebot" rel="nofollow" title="Applebot Home Page">Applebot</a>',
'arcemedia','AdsBot-ArceMedia',
'archive\.org_bot','<a href="http://crawls.archive.org/collections/bncf/crawl.html" rel="nofollow" title="Bot home page">archive.org bot</a>',
'babya[\x20]discoverer','Babya Discoverer',
'barkrowler','Barkrowler',
'bdcbot/','BDCbot',
'bellpagesca/','BellPagesCA',
'benosey[\x20]mohawk[\x20]search','BeNosey Mohawk Search',
'bhcbot','bhcBot',
'bidswitchbot','bidswitchbot',
'bigbozz/','BigBozz',
'binget/','BinGet',
'bitlybot','bit.ly',
'bl\.uk_lddc_bot/','bl.uk_lddc_bot',
'blexbot/','<a href="http://webmeup-crawler.com" rel="nofollow" title="BLEXBot Home Page">BLEXBot</a>, seems to belong to the <a href="http://webmeup.com" rel="nofollow" title="WebMeUp Home Page">WebMeUp backlink tool</a>',
'bnf.fr_bot','bnf.fr_bot',
'boitho\.com\-dc/','<a href="http://www.boitho.com/dcbot.html" rel="nofollow" title="Bot home page">boitho.com-dc</a>',
'booglebot','BoogleBot',
'businessbot:','BusinessBot:',
'catchbot/','CatchBot',
'cb/nutch','CB/Nutch',
'ccbot/','<a href="http://commoncrawl.org/faq/" rel="nofollow" title="Common Crawl FAQ Page">Common Crawl</a>',
'cliqzbot/','<a href="http://cliqz.com/company/cliqzbot" rel="nofollow" title="Cliqzbot Home Page">Cliqzbot</a>',
'cms[\x20]crawler','<a href="http://www.cmscrawler.com" rel="nofollow" title="CMS Crawler Home Page">CMS Crawler</a>',
'companybook\-crawler','Companybook-Crawler',
'converacrawler/','<a href="http://www.authoritativeweb.com/crawl/" rel="nofollow" title="ConveraCrawler home page">ConveraCrawler</a>',
'contacts-crawler','Contacts-Crawler',
'contxbot','contxbot',
'cosmos/','cosmos',
'crmnlcrawlagent','CRMNLCrawlAgent',
'crawl/nutch','crawl/Nutch',
'crawler4j','crawler4j',
'crazywebcrawler', '<a href="http://www.crazywebcrawler.com/" rel="nofollow" title="CrazyWeb Crawler Home Page">CrazyWeb Crawler</a>',
'cse[\x20]html[\x20]validator','<a href="http://online.htmlvalidator.com/php/onlinevallite.php" rel="nofollow" title="CSE HTML Validator Lite Online home page">CSE HTML Validator Lite Online</a>',
'c\-t[\x20]bot','C-T bot',
'cubot','CUBOT',
'curl/php','Curl/PHP',
'cyencebot','cyencebot',
'dalvik/','Dalvik',
'datacrawler/','DataCrawler',
'daumoa','<a href="http://tab.search.daum.net/aboutWebSearch.html" rel="nofollow" title="Daum">Daum</a>',
'daum','daum',
'deepnet[\x20]explorer','Deepnet Explorer',
'deusu/','<a href="https://deusu.de/robot.html" rel="nofollow" title="DeuSu">DeuSu</a>',
'digincore','Digincore',
'discordbot/','Discordbot',
'dispatch/','Dispatch',
'dnyzbot','DnyzBot',
'docomo/','DoCoMo',
'domain[\x20]re\-animator[\x20]bot','Domain Re-Animator Bot',
'domaincrawler/','DomainCrawler',
'domainmacrocrawler/','DomainMacroCrawler',
'domainsonocrawler/','DomainSONOCrawler',
'domainstatsbot/','DomainStatsBot',
'dotbot/','<a href="http://www.opensiteexplorer.org/dotbot" rel="nofollow" title="Home Page">DotBot, Open Site Explorer</a>',
'duckduckbot-https','DuckDuckBot-Https',
'duckduckgo\-favicons\-bot/','<a href="http://duckduckgo.com" rel="nofollow" title="DuckDuckGo Home Page">DuckDuckGo-Favicons-Bot</a>',
'elinks/','ELinks',
'elinks[\x20]\(','ELinks (',
'emailmarketingrobot/','EmailMarketingRobot',
'emeraldshield\.com[\x20]webbot','EmeraldShield.com WebBot',
'envolk\[its\]spider/','envolk ITS spider',
'eright','eright',
'esperanzabot','EsperanzaBot',
'exabot/','<a href="http://www.exabot.com" rel="nofollow" title="Bot home page">Exabot</a>',
'extlinksbot','ExtLinksBot',
'experiancrawluk','ExperianCrawlUK',
'facebookexternalhit/','facebookexternalhit',
'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de','<a href="http://www.telekom.de" rel="nofollow" title="FAST Enterprise Crawler * crawleradmin.t-info@telekom.de home page">FAST Enterprise Crawler * crawleradmin.t-info@telekom.de</a>',
'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de','<a href="http://www.telekom.de" rel="nofollow" title="FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de home page">FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de</a>',
'fast\-webcrawler/','FAST-WebCrawler',
'feosey[\x20]mohk[\x20]crawler','Feosey Mohk Crawler',
'findlinks/','<a href="http://wortschatz.uni-leipzig.de/findlinks/" rel="nofollow" title="Bot home page">Findlinks</a>',
'findxbot/','Findxbot',
'firephp/','FirePHP',
'firstdirectory\-bot','firstdirectory-bot',
'flamingo','Flamingo_SearchEngine',
'flippybearbot/','FlippyBearBot',
'^foo$','foo',
'freewebmonitoring[\x20]sitechecker/','FreeWebMonitoring SiteChecker',
'fujilabol','fujilabol',
'furlbot/','FurlBot',
'gaisbot/','<a href="http://gais.cs.ccu.edu.tw/robot.php" rel="nofollow" title="Bot home page">Gaisbot</a>',
'gallent[\x20]spider','Gallent Spider',
'garlikcrawler/','GarlikCrawler',
'getintent[\x20]crawler','GetIntent Crawler',
'getintentcrawler[\x20]getintent\.com','GetintentCrawler getintent.com',
'gigabot/','<a href="http://www.gigablast.com/spider.html" rel="nofollow" title="Bot home page">GigaBot</a>',
'gipo\-crawler/nutch','gipo-crawler/Nutch',
'girafabot','<a href="http://www.girafa.com" rel="nofollow" title="Bot home page">Girafabot</a>',
'gluten[\x20]free[\x20]crawler/','Gluten Free Crawler',
'gocrawl','gocrawl',
'gowikibot','Gowikibot',
'go\-http\-client/','Go-http-client',
'grapeshotcrawler/','GrapeshotCrawler',
'gsitecrawler/','GSiteCrawler',
'gurujibot/','GurujiBot',
'hadibot','hadiBot',
'haosouspider','HaosouSpider',
'hello[\x20]crawler','HELLO Crawler',
'holmes/','holmes',
'houzzbot','houzzbot',
'http_request2/','HTTP_Request2',
'hubspot[\x20]webcrawler','HubSpot Webcrawler',
'hypercrawl/','HyperCrawl',
'icc\-crawler/','ICC-Crawler',
'iconoclast','Popular Iconoclast',
'idgcrawler/nutch','IDGCrawler/Nutch',
'idg/uk','IDG/UK',
'idmarch[\x20]automatic\.beta/','<a href="http://www.idmarch.org/bot.html" rel="nofollow" title=" Home Page">IDMARCH</a>',
'inbybot','InbyBot',
'incutio[\x20]xml','Incutio XML',
'indeedbot','IndeedBot',
'influencebot','InfluenceBot',
'irlbot/','<a href="http://irl.cs.tamu.edu/crawler" rel="nofollow" title="Bot home page">IRLbot</a>',
'issuecrawler','IssueCrawler',
'istellabot/','<a href="http://www.tiscali.it" rel="nofollow" title="IstellaBot">IstellaBot</a>',
'james[\x20]bot','<a href="http://cognitiveseo.com/bot.html" rel="nofollow" title="James BOT Home Page">James BOT</a>',
'jigsaw/','Jigsaw',
'jobfeed','JobFeed',
'jooblebot','Jooblebot',
'komodiabot/','KomodiaBot',
'konqueror/','Konqueror',
'lightspeed','Lightspeed',
'linkapediabot','linkapediabot',
'metager\-linkchecker','<a href="https://www.devagroup.pl/blog/roboty-sieciowe/metager-linkchecker" rel="nofollow" title="Bot home page">MetaGer-LinkChecker</a>',
'linkchecker','<a href="http://linkchecker.sourceforge.net" rel="nofollow" title="Bot home page">LinkChecker</a>',
'linkcheck','LinkCheck',
'linkdexbot/','linkdexbot',
'linkedinbot/','LinkedInBot',
'linkpadbot/','LinkpadBot',
'links[\x20]\(','Links (',
'linksmanager\.com_bot','LinksManager.com_bot',
'lwp::simple/','LWP::Simple',
'mail\.ru_bot/','<a href="http://go.mail.ru/help/robots" rel="nofollow" title="Mail.ru bot home page">Mail.ru bot</a>',
'makecontact','makecontact',
'mappy','Mappy Crawler',
'mauibot','MauiBot',
'meanpathbot/','<a href="http://www.meanpath.com/meanpathbot.html" rel="nofollow" title="Meanpathbot Home Page">Meanpathbot</a>',
'mechanize','Mechanize',
'mediatoolkitbot','Mediatoolkitbot',
'megaindex\.ru/','<a href="https://www.megaindex.ru" rel="nofollow" title="MegaIndex.ru Home Page">MegaIndex.ru</a>',
'merzscope','MerzScope',
'meta_bot','Meta_Bot',
'mfibot/','mfibot',
'microsoft.*discovery','<a href="http://support.microsoft.com/kb/838028/en-us" rel="nofollow" title="Microsoft KB838028">Microsoft Office Protocol Discovery</a>/<a href="http://blogs.msdn.com/b/vsofficedeveloper/archive/2008/03/11/office-existence-discovery-protocol.aspx" rel="nofollow" title="Description of the Microsoft Office Existence Discovery">Microsoft Office Existence Discovery</a>',
'missigua_locator','<a href="http://www.webmasterworld.com/forum11/2690.htm" rel="nofollow" title="Missigua_Locator  home page">Missigua_Locator</a>',
'mixrankbot','MixrankBot',
'mj12bot/','<a href="http://majestic12.co.uk/bot.php" rel="nofollow" title="Bot home page.">MJ12bot</a>',
'mojeekbot/','<a href="http://www.mojeek.com/bot.html" rel="nofollow" title="Bot home page.">MojeekBot</a>',
'mojeek','mojeek',
'mojolicious','Mojolicious',
'mxt/nutch','MXT/Nutch',
'my[\x20]nutch[\x20]spider/','My Nutch Spider',
'myse/nutch','myse/Nutch',
'naaraa','Naaraa',
'nerdybot','NerdyBot',
'netestate[\x20]ne[\x20]crawler','<a href="http://www.website-datenbank.de" rel="nofollow" title="Website-Datenbank home page">Website-Datenbank</a>',
'netresearchserver/','<a href="http://loopimprovements.com/robot.html">Net Research Server (NRS)</a>',
'nimbostratus-bot','Nimbostratus-Bot',
'nominet','nominet',
'nrlcorpusbuilder/nutch','NRLCorpusBuilder/Nutch',
'nutch\-1\.4/','nutch-1.4',
'nutch\-1\.8/','nutch-1.8',
'nutchcvs/','<a href="http://lucene.apache.org/nutch/bot.html" rel="nofollow" title="NutchCVS home page">NutchCVS</a>',
'o\.uk[\x20]robot','o uk.robot',
'ocrawler;','ocrawler;',
'odp[\x20]link[\x20]checker','ODP link checker',
'offline[\x20]explorer/','Offline Explorer',
'omniexplorer_bot/','<a href="http://www.omni-explorer.com" rel="nofollow" title="Bot home page.">OmniExplorer Bot</a>',
'orangebot/','OrangeBot, no website, log entry specifies mail address', # support.orangebot@orange.com
'orliac','Orliac',
'outclicksbot','OutclicksBot',
'pagebiteshyperbot/','PageBitesHyperBot',
'pcore','Pcore',
'pdffillerbot/','pdffillerbot',
'peopleman','peopleman',
'phantomjs','PhantomJS',
'php/5\.2\.8','PHP/5.2.8',
'pinterestbot','Pinterestbot',
'piplbot','PiplBot',
'ploetz[\x20]\+[\x20]zeller','Ploetz + Zeller',
'plukkie/','<a href="http://www.botje.com/plukkie.htm" rel="nofollow" title="Plukkie">Plukkie</a>',
'princetonbot/','Princetonbot',
'privacyawarebot/','PrivacyAwareBot',
'prlog/','Prlog',
'proximic','<a href="http://www.proximic.com/info/spider.php" rel="nofollow" title="Proximic Spider home page">Proximic Spider</a>',
'psbot/','<a href="http://www.picsearch.com/bot.html" rel="nofollow" title="Bot home page">psbot</a>',
'psbot\-image','psbot-image',
'python_wk_crawler','python_wk_crawler',
'python\-urllib/','<a href="http://docs.python.org/library/urllib.html" rel="nofollow" title="Tools developed using a Python library">Python-urllib</a>',
'qcrawl','QCrawl',
'quick-crawler','Quick-Crawler',
'researchbot','ResearchBot',
'roboto','roboto',
'rogerbot/','<a href="http://moz.com/help/pro/what-is-rogerbot-" rel="nofollow" title="Rogerbot Home Page">Rogerbot</a>',
'rssingbot','RSSingBot',
'rukicrawler/','RukiCrawler',
'safedns[\x20]search[\x20]bot/','SafeDNS search bot',
'safednsbot','SafeDNSBot',
'safesearch[\x20]microdata[\x20]crawler','SafeSearch microdata crawler',
'safesearch','<a href="https://safesearch.avira.com" rel="nofollow" title="Avira SafeSearch Home Page">Avira SafeSearch</a>',
'sbl\-bot','SBL-BOT',
'scrapy','scrapy',
'screaming[\x20]frog[\x20]seo[\x20]spider/','Screaming Frog SEO Spider',
'screenerbot[\x20]crawler[\x20]beta','ScreenerBot Crawler Beta',
'scrubby','Scrubby',
'searchie/','Searchie',
'securityresearch\.bot','Security Research Bot',
'seekmo','Seekmo',
'semanticbot','semanticbot',
'semrushbot/','<a href="http://www.semrush.com/bot.html" rel="nofollow" title="SemrushBot">SemrushBot</a>',
'semrushbot-si','SemrushBot-SI',
'seo\-audit\-check\-bot/','seo-audit-check-bot',
'seobility','Seobility',
'seokicks\-robot','<a href="http://www.seokicks.de/robot.html">SEOkicks Webcrawler</a>',
'seolyticscrawler/','SEOlyticsCrawler',
'seostats','SEOstats',
'seosys/nutch','Seosys/Nutch',
'seoterritory\.com[\x20]bot','Seoterritory.com.bot',
'serendeputy','serendeputy',
'shim\-crawler','<a href="http://www.logos.ic.i.u-tokyo.ac.jp/crawler/" rel="nofollow" title="crawl@logos.ic.i.u-tokyo.ac.jp Bot home page">Shim-Crawler</a>',
'siteexplorer/','SiteExplorer',
'siteexplorer\.info','<a href="http://siteexplorer.info" rel="nofollow" title="Site Explorer home page">Site Explorer</a>',
'siteimprove','siteimprove',
'slackbot\-linkexpanding','Slackbot-LinkExpanding',
'smabblerbot/','SmabblerBot',
'sogou[\x20]web[\x20]spider/','<a href="http://www.sogou.com">Sogou Spider</a>',
'special_archiver/','special_archiver',
'spiderbot/','Spiderbot',
'spuhexbot','SpuhexBot',
'spyonweb','spyonweb',
'ssearch_bot','<a href="http://www.semantissimo.de" rel="nofollow" title="sSearch Crawler">sSearch Crawler</a>',
'streamline3bot','Streamline3Bot',
'surdotlybot/','SurdotlyBot',
'surveybot/','SurveyBot',
'taiil/nutch','taiil/Nutch',
'tbot\-nutch','tbot-nutch',
'teeraidbot','TeeRaidBot',
'telegrambot','TelegramBot',
'test/nutch','Test/Nutch',
'test[\x20]spider','Test Spider',
'testcrawler','TestCrawler',
'the[\x20]knowledge[\x20]ai', 'The Knowledge AI',
'tracemyfile','tracemyfile',
'trendiction','trendiction',
'turnitinbot/','TurnitinBot',
'turnitinbot','<a href="http://www.turnitin.com/robot/crawlerinfo.html" rel="nofollow" title="TurnitinBot Home Page">Turn It In</a>',
'tweetmemebot/','TweetmemeBot',
'ucy/nutch','UCY/Nutch',
'uni-leipzig\.de','uni-leipzig.de',
'uptimebot/','Uptimebot',
'uptimerobot/','UptimeRobot',
'url[\x20]checker','URL Checker',
'uxcrawlerbot','UXCrawlerBot',
'validator\.nu/','Validator.nu',
'vbseo','vBSEO',
'vbulletin[\x20]via[\x20]php','vBulletin via PHP',
'vebidoobot','vebidoobot',
'vegi[\x20]bot','vegi bot',
'velen','Velen',
'viz/nutch','viz/Nutch',
'voilabot','VoilaBot',
'voila','<a href="http://www.voila.fr/" rel="nofollow" title="Search Engine Home Page">Voila</a>',
'voyager/','voyager',
'vortex/','<a href="http://marty.anstey.ca/projects/robots/vortex/" rel="nofollow" title="Bot home page">VORTEX</a>',
'w3c_validator/','<a href="http://validator.w3.org" rel="nofollow" title="Bot home page">W3C Validator</a>',
'w3c\-checklink/','<a href="http://validator.w3.org/checklink/" rel="nofollow" title="Bot home page">W3C Link Checker</a>',
'wbsearchbot/','<a href="http://www.warebay.com/bot.html" rel="nofollow" title="WBSearchBot">WBSearchBot</a>',
'wbsrch/','WbSrch/',
'wesee:ads/pagebot','WeSEE:Ads/PageBot',
'wesee:ads/picturebot','WeSEE:Ads/PictureBot',
'wesee_bot','WeSEE_Bot',
'wget/','WGet tools',
'who\.is[\x20]bot','Who.is.Bot',
'wonderbot/','wonderbot',
'woobot/','woobot',
'obot/','oBot',
'wotbox/','<a href="http://www.wotbox.com/bot/" rel="nofollow" title="Wotbox Bot Home Page">Wotbox</a>',
'xaldon[\x20]webspider','Xaldon WebSpider',
'xenu[\x20]link[\x20]sleuth','<a href="http://home.snafu.de/tilman/xenulink.html" rel="nofollow" title="Description, Download, FAQ Page">Xenu'. "'" . 's Link Sleuth&trade;</a>, see <a href="http://en.wikipedia.org/wiki/Xenu%27s_Link_Sleuth" rel="nofollow" title="Wikipedia on Xenu'. "'" . 's Link Sleuth">Wikipedia</a>',
'xenu_link_sleuth','<a href="http://home.snafu.de/tilman/xenulink.html" rel="nofollow" title="Xenu Link Sleuth home page">Xenu Link Sleuth</a>',
'xml[\x20]sitemaps[\x20]generator','XML Sitemaps Generator',
'xovibot/','<a href="http://www.xovibot.net" rel="nofollow" title="XoviBot Home Page">XoviBot</a>',
'yacybot','<a href="http://yacy.net/bot.html" rel="nofollow" title="YaCy Home Page">YaCy</a>',
'yahoo[\x20]link[\x20]preview','Yahoo Link Preview',
'yak','yak-linkfluence',
'yisouspider','YisouSpider',
'yoozbot','yoozBot',
'your\-website\-sucks','Your-Website-Sucks',
'zoominfobot','zoominfobot',
'zspider/','<a href="http://feedback.redkolibri.com" rel="nofollow" title="Bot home page">zspider</a>',
'zumbot/','<a href="http://help.zum.com/inquiry" rel="nofollow" title="ZumBot home page">ZumBot</a>',
# below placed at end to catch some generics
'nbot','nbot',
'ng/1\.','<a href="http://www.exabot.com" rel="nofollow" title="Bot home page">NG 1.x (Exalead)</a>', # put at end to avoid false positive
'ng/2\.','<a href="http://www.exabot.com" rel="nofollow" title="Bot home page">NG 2.x (Exalead)</a>', # put at end to avoid false positive
'libwww\-perl','libwww-perl',
'cfnetwork','<a href="http://www.cocoadev.com/index.pl?CFNetwork" rel="nofollow" title="CFNetwork home page">CFNetwork</a>',
'urllib','urllib',
'javabee','JavaBee',
'projectwf\-java\-test\-crawler','ProjectWF-java-test-crawler',
'java',"<a href=\"http://www.projecthoneypot.org/harvester_useragents.php\" rel=\"nofollow\" title=\"Bot home page\">Java ($Message[211])</a>", # put at end to avoid false positive
'loocalcrawler/nutch','loocalcrawler/nutch',
'nutchosu\-vlib','nutchosu-vlib',
'nutch','nutch ( catchall )',
'perlcrawler','perlcrawler',
'perl','perl',
'(firefox/)([0-9]\.|[0-1][0]\.)','Firefox version 10 and lower - various robots',

# Less common robots (In robot file)
'^mozilla$','Mozilla ( Rogue Robot )',
'^mozilla/3\.0\s\(compatible$', 'mozilla/3.0 (compatible - ( Rogue Robot )',
'^mozilla/4\.0$', 'mozilla/4.0 - ( Rogue Robot )',
'^mozilla/4\.0\s\(compatible;\)$', 'mozilla/4.0 (compatible;) - ( Rogue Robot )',
'^mozilla/5\.0$', 'mozilla/5.0 - ( Rogue Robot )',
'^mozilla/5\.0\s\(compatible;$', 'mozilla/5.0 (compatible; - ( Rogue Robot )',
'^mozilla/5\.0\s\(en\-us\)$', 'mozilla/5.0 (en-us) - ( Rogue Robot )',
'^mozilla/5\.0\sfirefox/3\.0\.5$', 'mozilla/5.0 firefox/3.0.5 - ( Rogue Robot )',
'^mozilla/6\.0[\x20]\(compatible\)$','Mozilla/6.0 (compatible) - ( Rogue Robot )',
'^mozilla/(.*)beta[\x20]\(windows\)','Mozilla Beta (Windows) - ( Rogue Robot )',
'msie[\x20]2','MSIE 2 - ( Rogue Robot )',
'msie[\x20]3','MSIE 3 - ( Rogue Robot )',
'msie[\x20]4','MSIE 4 - ( Rogue Robot )',
'msie[\x20]5','MSIE 5 - ( Rogue Robot )',
'msie[\x20]6','MSIE 6 - ( Rogue Robot )',
'msie\+6\.0\;','MSIE+6.0; - ( Rogue Robot)',
'windows[\x20]95','Windows 95 - ( Rogue Robot )',
'windows[\x20]98','Windows 99 - ( Rogue Robot )',

# these could be removed to speed up processing as they are rarely seen
'a6\-indexer','<a href="http://www.a6corp.com/a6-web-scraping-policy/" rel="nofollow" title="A6-Indexer">A6-Indexer</a>',
'abcdatos','abcdatos',
'abonti\.com','<a href="http://www.abonti.com" rel="nofollow" title="Abonti WebSearch">Abonti WebSearch</a>',
'acme\.spider','Acme.Spider',
'activebookmark','<a href="http://www.libmaster.com/active_bookmark.php" rel="nofollow" title="ActiveBookmark home page">ActiveBookmark</a>',
'adamm_bot','<a href="http://home.blic.net/adamm/" rel="nofollow" title="Bot home page">AdamM Bot</a>',
'advbot','<a href="http://advbot.net/bot.html" rel="nofollow" title="AdvBot Home Page">AdvBot</a>',
'affectv\.co\.uk','<a href="http://www.affectv.co.uk" rel="nofollow" title="affectv.co.uk Home Page">affectv.co.uk</a>',
'ahoythehomepagefinder','ahoythehomepagefinder',
'aleadsoftbot','<a href="http://www.aleadsoft.com/bot.htm" rel="nofollow" title="ALeadSoftbot home page">ALeadSoftbot</a>',
'alkaline','alkaline',
'allrati','Allrati',
'alltop','alltop',
'almaden','<a href="http://www.almaden.ibm.com/cs/crawler" rel="nofollow" title="IBM Almaden Research Center WebFountain&trade; Bot home page">IBM Almaden</a> Research Center WebFountain&trade;',
'alpha_search_agent','Alpha Search Agent',
'anthill','anthill',
'antibot','antibot',
'aport','Aport',
'appie','<a href="http://www.walhello.com" rel="nofollow" title="Bot home page">Walhello appie</a>',
'applesyndication','applesyndication', 
'arachnophilia','arachnophilia',
'arale','arale',
'araneo','araneo',
'architext','architext',
'archive\-de\.com','<a href="http://archive-de.com/bot" rel="nofollow" title="Archive-de.com Home Page">Archive-de.com</a>',
'aretha','aretha',
'argus','<a href="http://www.simpy.com/bot.html" rel="nofollow" title="feedback@simpy.com Bot home page">Argus</a>',
'ariadne','ARIADNE',
'arianna\.libero\.it','<a href="http://arianna.libero.it" rel="nofollow" title="Bot home page">arianna.libero.it</a>',
'arks','arks',
'aspider','ASpider (Associative Spider)',
'aspseek','<a href="http://www.aspseek.org" rel="nofollow" title="Bot home page">ASPseek</a>',
'asterias','Asterias',
'asynchttpclient','asynchttpclient', 
'atn\.txt','ATN Worldwide',
'atomz','Atomz.com Search Robot',
'auresys','AURESYS',
'awbot','AWBot',
'backlinktest\.com','<a href="http://www.backlinktest.com/crawler.html" rel="nofollow" title="BacklinkCrawler">BacklinkCrawler</a>',
'backrub','BackRub',
'becomebot','<a href="http://www.become.com/site_owners.html" rel="nofollow" title="Bot home page">BecomeBot</a>',
'bender','<a href="http://bender.ucr.edu" rel="nofollow" title="Bot home page">bender</a> <a href="http://ivia.ucr.edu/manuals/NiFC/current/index.shtml" rel="nofollow" title="Bot home page">focused_crawler</a>',
'betabot','BetaBot',
'bigbrother','Big Brother',
'biglotron','<a href="http://www.biglotron.com/robot.html" rel="nofollow" title="Bot home page">Biglotron</a>',
'binglocalsearch','BingLocalSearch',
'bittorrent_bot','<a href="http://www.bittorrent.com" rel="nofollow" title="Bot home page">BitTorrent Bot</a>',
'biz360[_+\s]spider','<a href="http://www.biz360.com" rel="nofollow" title="blogsmanager@biz360.com Bot home page">Biz360 spider</a>',
'bjaaland','bjaaland',
'blackwidow','BlackWidow',
'blindekuh','Die Blinde Kuh',
'blogbridge[_+\s]service','<a href="http://www.blogbridge.com" rel="nofollow" title="Bot home page">BlogBridge Service</a>',
'blogged_crawl','blogged_crawl', 
'bloglines','<a href="http://www.bloglines.com" rel="nofollow" title="Bot home page">Bloglines</a>',
'bloglovin','bloglovin',
'blogpulse','<a href="http://www.intelliseek.com" rel="nofollow" title="Bot home page">BlogPulse ISSpider intelliseek.com</a>',
'blogsearch','<a href="http://www.icerocket.com" rel="nofollow" title="Bot home page">BlogSearch</a>',
'blogshares','<a href="http://blogshares.com/help.php?node=7" rel="nofollow" title="Bot home page">Blogshares Spiders</a>',
'blogslive','<a href="http://www.blogslive.com" rel="nofollow" title="info@blogslive.com Bot home page">Blogslive</a>',
'blogssay','<a href="http://www.blogssay.com" rel="nofollow" title="Bot home page">BlogsSay :: RSS Search Crawler</a>',
'bloodhound','Bloodhound',
'bncf\.firenze\.sbn\.it/raccolta\.txt','<a href="http://www.bncf.firenze.sbn.it/raccolta.txt" rel="nofollow" title="Bot home page">Biblioteca Nazionale Centrale di Firenze</a>',
'bobby','Bobby',
'bookmark\-manager','<a href="http://bkm.sourceforge.net" rel="nofollow" title="Bookmark-Manager home page">Bookmark-Manager</a>',
'borg\-bot','Borg-Bot',
'boris','Boris',
'brightnet','bright.net caching robot',
'bruinbot','<a href="http://web.archive.org" rel="nofollow" title="BruinBot home page">The web archive</a>',
'bubing','<a href="http://law.di.unimi.it/BUbiNG.html" rel="nofollow" title="BUbiNG">BUbiNG</a>',
'bumblebee','Bumblebee (relevare.com)',
'butterfly','<a href="http://corp.topsy.com/support/butterfly/">Butterfly Robot</a>',
'buzztracker','buzztracker', 
'cactvschemistryspider','CACTVS Chemistry Spider',
'calif[^r]','Calif',
'candlelight[_+\s]favorites[_+\s]inspector','<a href="http://www.candlelight.com/home.html" rel="nofollow" title="Candlelight_Favorites_Inspector  home page">Candlelight_Favorites_Inspector</a>',
'careerbot','<a href="http://www.career-x.de/bot.html" rel="nofollow" title="CareerBot home page">CareerBot</a>',
'carpathia','carpathia',
'cassandra','Cassandra',
'catbot','catbot',
'cbn00glebot','cbn00glebot',
'cerberian\sdrtrs','<a href="http://www.pgts.com.au/cgi-bin/psql?robot_info=25240" rel="nofollow" title="Bot home page">Cerberian Drtrs</a>',
'cfetch','<a href="http://www.kosmix.com/crawler.html" rel="nofollow" title="kosmix home page">Cfetch</a>',
'cgireader','Digimarc Marcspider/CGI',
'chattertrap','chattertrap', 
'check_http','check_http (nagios)', 
'checkweb_link_validator','<a href="http://p.duby.free.fr/chkweb.htm" rel="nofollow" title="CheckWeb link validator home page">CheckWeb link validator</a>',
'christcrawler','ChristCrawler.com',
'churl','churl',
'cienciaficcion','cIeNcIaFiCcIoN.nEt',
'cipinetbot','<a href="http://www.cipinet.com/bot.html" rel="nofollow" title="CipinetBot home page">CipinetBot</a>',
'imagecoccoc','imagecoccoc',
'coccoc','coccoc',
'coldfusion','coldfusion',
'collective','Collective',
'combine','Combine System',
'commons\-httpclient','<a href="http://jakarta.apache.org/commons/httpclient/" rel="nofollow" title="Bot home page">Jakarta commons-httpclient</a>',
'computer_and_automation_research_institute_crawler','<a href="http://www.ilab.sztaki.hu/~stamas/publications/p184-benczur.html" rel="nofollow" title="Computer and Automation Research Institute Crawler home page">Computer and Automation Research Institute Crawler</a>',
'conceptbot','conceptbot',
'contentmatch','<a href="http://p4p.cn.yahoo.com">Yahoo!China ContentMatch Crawler</a>',
'converamultimediacrawler','<a href="http://www.authoritativeweb.com/crawl/" rel="nofollow" title="ConveraMultiMediaCrawler home page">ConveraMultiMediaCrawler</a>',
'coolbot','coolbot',
'copubbot','<a href="http://www.copub.com/bot.php" rel="nofollow" title="CoPubbot Home Page; Note: Access to bot home page verified July 31 2017">CoPubbot</a>',
'core','Web Core / Roots',
'covario','<a href="http://www.covario.com/ids">CovarioIDS</a>',
'cruiser','Internet Cruiser Robot',
'cscrawler','CsCrawler',
'cuasarbot','<a href="http://www.cuasar.com" rel="nofollow" title="Cuasarbot home page">Cuasarbot</a>',
'cursor','<a href="http://adcenter.hu/docs/en/bot.html" rel="nofollow" title="Cursor home page">Cursor</a>',
'cusco','Cusco',
'custo','<a href="http://www.netwu.com/custo/" rel="nofollow" title="Custo home page">Custo</a>',
'cyberspyder','CyberSpyder Link Test',
'datafountains/dmoz_downloader','<a href="http://infomine.ucr.edu" rel="nofollow" title="DataFountains/DMOZ Downloader home page">DataFountains/DMOZ Downloader</a>',
'dataprovider\.com','<a href="http://www.dataprovider.com" rel="nofollow" title="Dataprovider Site Explorer">Dataprovider Site Explorer</a>',
'daviesbot','DaviesBot',
'daylifefeedfetcher','daylifefeedfetcher', 
'daypopbot','DayPop',
'deepindex','<a href="http://www.deepindex.net/faq.php" rel="nofollow" title="Deepindex home page">Deepindex</a>',
'desertrealm','Desert Realm Spider',
'deweb','DeWeb(c) Katalog/Index',
'dienstspider','DienstSpider',
'digger','Digger',
'digout4u','digout4u',
'diibot','Digital Integrity Robot',
'dipsie\.bot','<a href="http://www.dipsie.com/bot/" rel="nofollow" title="Bot home page">Dipsie</a>',
'direct_hit','Direct Hit Grabber',
'discobot','<a href="http://discoveryengine.com/discobot.html">discobot</a>',
'dlvr\.it','dlvr.it',
'dnabot','DNAbot',
'dnsgroup','<a href="http://www.dnsgroup.com" rel="nofollow" title="DNSGroup home page">DNSGroup</a>',
'doccheckbot','doccheckbot/1.0, known to <a href="http://www.projecthoneypot.org/ip_46.229.160.208" rel="nofollow" title="Info to IP 46.229.160.208">Project Honey Pot</a>',
'checkbot','Checkbot',
'domainappender',  '<a href="http://www.profound.net/domainappender" rel="nofollow" title="DomainAppender Home Page [new window]" target="_blank">DomainAppender</a>',
'domainchecker','<a href="http://net-promoter.com" rel="nofollow" title="DomainChecker home page (not confirmed)">DomainChecker</a>',
'domainsdb\.net','<a href="http://domainsdb.net" rel="nofollow" title="Bot home page">DomainsDB.net</a>',
'download_express','DownLoad Express',
'dragonbot','DragonBot',
'dreamwidth','dreamwidth',
'drupal','Drupal Site', 
'dulance','<a href="http://www.dulance.com/bot.jsp" rel="nofollow" title="Bot home page">Dulance</a>',
'dumbot','<a href="http://www.dumbfind.com" rel="nofollow" title="Dumbot home page">Dumbot</a>',
'dumm\.de\-bot','<a href="http://www.dumm.de" rel="nofollow" title="dumm.de-Bot home page">dumm.de-Bot</a>',
'dwcp','DWCP (Dridus\' Web Cataloging Project)',
'e\-collector','e-collector',
'emcspider','ananzi',
'earthcom\.info','<a href="http://www.earthcom.info" rel="nofollow" title="Bot home page">EARTHCOM.info</a>',
'easydl','easydl',
'ebiness','EbiNess',
'eccp','<a href="http://www.eniro.com" rel="nofollow" title="Eniro Sverige home page">Eniro Sverige, email: search (at) eniro.com</a>',
'echo!','echo!',
'edgeio\-retriever','<a href="http://www.edgeio.com" rel="nofollow" title="Bot home page">edgeio-retriever</a>',
'elfinbot','ELFINBOT',
'emacs','Emacs-w3 Search Engine',
'emcspider','ananzi',
'enteprise','<a href="http://www.fastsearch.com" rel="nofollow" title="Bot home page">Fast Enteprise Crawler</a>',
'ernst[:blank:]2\.0','Ernst 2.0 (does not provide any further information)',
'esther','Esther',
'ets_v','<a href="http://www.freetranslation.com/help/" rel="nofollow" title="ETS home page">ETS</a> Enterprise Translation Server',
'eventax','<a href="http://www.eventax.de" rel="nofollow" title="eventax home page">eventax</a>',
'everbeecrawler','EverbeeCrawler',
'everest\-vulcan','<a href="http://everest.vulcan.com/crawlerhelp" rel="nofollow" title="Bot home page">Everest-Vulcan</a>',
'evliyacelebi','Evliya Celebi',
'exactseek','ExactSeek Crawler',
'extreme[_+\s]picture[_+\s]finder','<a href="http://www.exisoftware.com" rel="nofollow" title="Extreme_Picture_Finder home page">Extreme_Picture_Finder</a>',
'ezoom','Ezooms',
'ezresult','Ezresult',
'facebook','FaceBook bot',
'facebot','<a href="https://developers.facebook.com/docs/opengraph/howtos/maximizing-distribution-media-content" rel="nofollow" title=" Home Page">Facebot (Facebook bot?)</a>',
'fast\-search\-engine','<a href="http://www.fast-search-engine.com" rel="nofollow" title="Bot home page">Fast-Search-Engine</a> (not fastsearch.com)',
'matrix_s\.p\.a\._\-_fast_enterprise_crawler','<a href="http://tin.virgilio.it" rel="nofollow" title="Matrix S.p.A. - FAST Enterprise Crawler home page">Matrix S.p.A. - FAST Enterprise Crawler</a>',
'fast_enterprise_crawler','<a href="http://www.fast.no" rel="nofollow" title="FAST Enterprise Crawler home page">FAST Enterprise Crawler</a>',
'fastbot','<a href="http://www.fastbot.de" rel="nofollow" title="fastbot Home Page">fastbot</a>',
'fastcrawler','FastCrawler',
'favicon','FavIconizer',
'favorg','<a href="http://www.pcmag.com/article2/0,4149,108438,00.asp" rel="nofollow" title="FavOrg home page">FavOrg</a>',
'favorites_sweeper','<a href="http://www.manitools.com/favsweep/" rel="nofollow" title="Favorites_Sweeper home page">Favorites Sweeper</a>',
'fdse','Fluid Dynamics Search Engine robot',
'feedburner','Feedburner',
'feedcrawl','FeedCrawl by feed@aobo.com',
'feedflow','<a href="http://feedflow.com/about" rel="nofollow" title="Bot home page">FeedFlow</a>',
'feedmyinbox','feedmyinbox', 
'feedroll\.com','feedroll.com',
'feedsky','<a href="http://www.feedsky.com" rel="nofollow" title="Bot home page">FeedSky</a>',
'feedster','<a href="http://www.feedster.com" rel="nofollow" title="Bot home page">Feedster</a>',
'feedvalidator','<a href="http://feedvalidator.org" rel="nofollow" title="FeedValidator home page">FeedValidator</a>',
'feedzira','feedzira',
'felix','Felix IDE',
'ferret','Wild Ferret Web Hopper #1, #2, #3',
'fetchbot','<a href="https://github.com/PuerkitoBio/fetchbot" rel="nofollow" title="Fetchbot Home Page">Fetchbot</a>',
'fetchrover','FetchRover',
'fever/','<a href="http://feedafever.com">Feed a Fever</a>',
'fido','fido',
'filmkamerabot','<a href="http://www.filmkamera.at/bot.html" rel="nofollow" title="FilmkameraBot home page">FilmkameraBot</a>',
'filterdb\.iss\.net','<a href="http://filterdb.iss.net/crawler/" rel="nofollow" title="oBot Home Page">oBot</a>',
'finderlein[_+\s]research[_+\s]crawler','Finderlein Research Crawler 1.0 (no contact information given)',
'findexa_crawler','<a href="http://www.findexa.no/gulesider/article26548.ece" rel="nofollow" title="Findexa Crawler home page">Findexa Crawler</a>',
'finnish','Hämähäkki',
'fireball','KIT-Fireball',
'firmilybot','<a href="http://www.firmily.com/bot.php" rel="nofollow" title="Firmily Bot">Firmily Bot Home page (Website was hacked on Oct. 19, 2013)</a>',
'flexum','Flexum Search Engine',
'foaf\-search\.net','<a href="http://www.foaf-search.net" rel="nofollow" title="Friend of a friend (FOAF) search engine">Friend of a friend (FOAF) search engine</a>',
'fooky\.com/scorpionbot','<a href="http://www.fooky.com/scorpionbots" rel="nofollow" title="Fooky.com/ScorpionBot/ScoutOut home page">Fooky.com/ScorpionBot/ScoutOut</a>',
'fouineur','Fouineur',
'francoroute','Robot Francoroute',
'freecrawl','Freecrawl',
'freenews','freenews', 
'funnelweb','FunnelWeb',
'g2crawler','<a href="http://crawler.instantnetworks.net" rel="nofollow" title="Bot home page (nobody@airmail.net)">G2Crawler</a>',
'gama','gammaSpider, FocusedCrawler',
'gazz','gazz',
'iconoclast','Popular Iconoclast',
'gcreep','GCreep',
'geniebot','<a href="http://www.genieknows.com" rel="nofollow" title="Bot home page">Geniebot</a>',
'genieo','<a href="http://www.genieo.com/webfilter.html" rel="nofollow" title="Genieo">Genieo</a>',
'geohasher','geohasher',
'getbot','GetBot',
'geturl','GetURL',
'gigablastopensource','<a href="http://www.gigablast.com" rel="nofollow" title="Gigablast Home page">GigablastOpenSource</a>, an Open Source Search Engine(<a href="https://github.com/gigablast/open-source-search-engine/wiki" rel="nofollow" title="at GitHub">Wiki</a>)',
'global_fetch','<a href="http://www.wesonet.com" rel="nofollow" title="Global Fetch home page">Global Fetch</a>',
'gnodspider','GNOD Spider',
'goforit\.com','<a href="http://www.goforit.com/about/" rel="nofollow" title="GoForIt.com home page">GoForIt.com</a>',
'goforitbot','<a href="http://www.goforit.com/about/" rel="nofollow" title="GOFORITBOT home page">GOFORITBOT</a>',
'golem','Golem',
'gonzo','<a href="http://www.suchen.de/faq.html" rel="nofollow" title="Bot home page">suchen.de</a>',
'gougou','GouGou',
'gpu_p2p_crawler','<a href="http://gpu.sourceforge.net/search_engine.php" rel="nofollow" title="Bot home page">GPU p2p crawler</a>',
'grabber','<a href="http://www.sdsc.edu" rel="nofollow" title="Seltsame Aktivitaeten vom San Diego Supercomputer Center">Grabber (SDSC)</a>',
'grapeshot','<a href="http://www.grapeshot.co.uk/crawler.php" rel="nofollow" title="Grapeshot Crawler">Grapeshot Crawler</a>',
'grapnel','Grapnel/0.01 Experiment',
'griffon','Griffon',
'gromit','Gromit',
'grub','<a href="http://www.grub.org">Grub.org</a>',
'gulliver','gulliver',
'gulperbot','Gulper Bot',
'hambot','HamBot',
'hanrss','hanrss', 
'harvest','Harvest',
'havindex','havIndex',
'henrythemiragorobot','<a href="http://www.miragorobot.com/scripts/mrinfo.asp" rel="nofollow" title="Bot home page">Mirago</a>',
'heritrix','<a href="http://crawler.archive.org" rel="nofollow" title="(used by a few different companies) Bot home page">Heritrix</a>',
'hl_ftien_spider','<a href="http://www.hylanda.com">Hylanda</a>',
'hometown','Hometown Spider Pro',
'hoowwwer','<a href="http://cosco.hiit.fi/search/hoowwwer/" rel="nofollow" title="HooWWWer home page">HooWWWer</a>',
'hpprint','HPPrint',
'htdig','ht://Dig',
'html[_+\s]link[_+\s]validator','<a href="http://www.lithopssoft.com" rel="nofollow" title="Html_Link_Validator home page">Html_Link_Validator</a>',
'htmlgobble','HTMLgobble',
'htmlparser','<a href="http://htmlparser.sourceforge.net" rel="nofollow" title="HTMLParser home page">HTMLParser</a>',
'httrack','<a href="http://www.httrack.com" rel="nofollow" title="Bot home page">HTTrack off-line browser</a>',
'hundesuche\.com\-bot','<a href="http://www.hundesuche.com" rel="nofollow" title="Hundesuche.com-Bot home page">Hundesuche.com-Bot</a>',
'hyperdecontextualizer','Hyper-Decontextualizer',
'ia_archiver\-web\.archive\.org','<a href="http://web.archive.org" rel="nofollow" title="Bot home page">The web archive (IA Archiver)</a>',
'ia_archiver','<a href="http://www.alexa.com" rel="nofollow" title="Bot home page">Alexa (IA Archiver)</a>',
'iajabot','iajaBot',
'iaskspider','<a href="http://www.iask.com">Sina Iask Spider</a>',
'i\-bot','i-bot',
'icarus6j','Icarus6j, email address in UA string, no website',
'ichiro','<a href="http://help.goo.ne.jp/door/crawlerE.html" rel="nofollow" title="Bot home page">ichiro</a>',
'icjobs\.de','<a href="http://www.icjobs.de" rel="nofollow" title="April 10, 2014: UA contains indentification during the first and second page access only.">iCjobs Spider</a> Note: Most traffic counts as user traffic',
'ilse','Ingrid',
'iltrovatore\-setaccio','<a href="http://www.iltrovatore.it/aiuto/motore_di_ricerca.html" rel="nofollow" title="bot@iltrovatore.it IlTrovatore-Setaccio home page">IlTrovatore-Setaccio</a>',
'imagelock','Imagelock',
'implisensebot','<span title="As on Sep. 18, 2015, the user agent string did not contain a web address.">ImplisenseBot</span>',
'inagist','inagist', 
'incywincy','IncyWincy',
'infobot','<a href="http://www.infobot.org" rel="nofollow" title="InfoBot home page">InfoBot</a>',
'infociousbot','<a href="http://corp.infocious.com/tech_crawler.php" rel="nofollow" title="InfociousBot home page">InfociousBot</a>',
'infohelfer','<a href="http://www.infohelfer.de/crawler.php" rel="nofollow" title="Infohelfer home page">Infohelfer</a>',
'infomine','<a href="http://infomine.ucr.edu/useragents/" rel="nofollow" title="Bot home page">INFOMINE VLCrawler</a>',
'informant','Informant',
'infoseeksidewinder','Infoseek Sidewinder',
'infoseek','InfoSeek Robot 1.0',
'infospider','InfoSpiders',
'inspectorwww','Inspector Web',
'insurancobot','<a href="http://www.fastspywareremoval.com" rel="nofollow" title="InsurancoBot home page">InsurancoBot</a>',
'integromedb\.org','<a href="http://www.integromedb.org/Crawler" rel="nofollow" title="IntegromeDB home page">IntegromeDB</a>',
'intelliagent','IntelliAgent',
'internet[_+\s]ninja','<a href="http://www.dti.ne.jp" rel="nofollow" title="Internet_Ninja home page">Internet_Ninja </a>',
'internetarchive','<a href="http://lucene.apache.org/nutch/bot.html" rel="nofollow" title="InternetArchive home page">InternetArchive</a>',
'internetseer','InternetSeer',
'internetsupervision','<a href="http://internetsupervision.com" rel="nofollow" title="InternetSupervision home page">InternetSupervision</a>',
'ips\-agent','ips-agent Verisign(?) - no reliable information found.',
'irobot','I, Robot',
'iron33','Iron33',
'isearch2006','<a href="http://www.yahoo.com.cn" rel="nofollow" title="isearch2006 home page">isearch2006</a>',
'israelisearch','Israeli-search',
'iupui_research_bot','<a href="http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/" rel="nofollow" title="IUPUI_Research_Bot home page">IUPUI_Research_Bot</a>',
'izsearch','<a href="http://izsearch.com" rel="nofollow" title="iZSearch Home Page">iZSearch</a>',
'jacobin[\x20]club','jacobin club',
'jakarta','jakarta',
'jbot','JBot Java Web Robot',
'jcrawler','JCrawler',
'jeeves/','<a href="http://sp.ask.com/docs/about/tech_crawling.html" rel="nofollow" title="Bot home page">Ask</a>',
'jennybot','JennyBot',
'jobboerse','<a href="http://www.xn--jobbrse-d1a.com" rel="nofollow" title="Jobb&ouml;rse Home Page">Jobb&ouml;rse</a>',
'jobot','Jobot',
'jobo','JoBo Java Web Robot',
'joebot','JoeBot',
'jrtwine[_+\s]software[_+\s]check[_+\s]favorites[_+\s]utility','<a href="http://www.jrtwine.com/Products/CheckFavs/" rel="nofollow" title="JRTwine_Software_Check_Favorites_Utility  home page">JRTwine_Software_Check_Favorites_Utility</a>',
'js\-kit','js-kit', 
'jubii','The Jubii Indexing Robot',
'jumpstation','JumpStation',
'justview','JustView',
'kalambot','<a href="http://64.124.122.251/feedback.html" rel="nofollow" title="KalamBot home page">KalamBot</a>',
'kamano\.de_newsfeedverzeichnis','<a href="http://www.kamano.de" rel="nofollow" title="kamano.de NewsFeedVerzeichnis home page">kamano.de NewsFeedVerzeichnis</a>',
'kapsi','image.kapsi.net',
'katipo','Katipo',
'kazoombot','<a href="http://www.kazoom.ca/bot.html" rel="nofollow" title="kazoombot@kazoom.ca KazoomBot home page">KazoomBot</a>',
'kevin','<a href="http://dznet.com/kevin/" rel="nofollow" title="Kevin home page">Kevin</a>',
'keyoshid','<a href="http://www.yahoo.co.jp" rel="nofollow" title="Bot home page">Yahoo! Japan keyoshid robot study</a>',
'kilroy','Kilroy',
'kinja\-imagebot','Kinja Imagebot',
'kinjabot','Kinjabot',
'knowitall','<a href="http://www.cs.washington.edu/research/knowitall/" rel="nofollow" title="KnowItAll home page">KnowItAll</a>',
'knowledge\.com','<a href="http://www.knowledge.com" rel="nofollow" title="Knowledge.com home page">Knowledge.com</a>',
'ko[_+\s]yappo[_+\s]robot','KO_Yappo_Robot',
'kouaa_krawler','<a href="http://www.kouaa.com" rel="nofollow" title="Kouaa Krawler home page">Kouaa Krawler</a>',
'krugle','<a href="http://www.krugle.com/crawler/info.html" rel="nofollow" title="Bot home page">Krugle</a>',
'ksibot','<a href="http://ego.ms.mff.cuni.cz" rel="nofollow" title="Bot home page">ksibot</a>',
'kummhttp','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_g_l_301105_2\b" rel="nofollow" title="Bot documentation page">KummHttp</a>',
'kurzor','<a href="http://www.easymail.hu" rel="nofollow" title="cursor@easymail.hu Kurzor home page">Kurzor</a>',
'labelgrabber\.txt','LabelGrabber',
'lanshanbot','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=%5Cbid_g_l_140406_1%5Cb" rel="nofollow" title="Bot Information">lanshanbot</a>',
'larbin','<a href="http://para.inria.fr/~ailleret/larbin/index-eng.html" rel="nofollow" title="Bot home page">larbin</a>',
'largesmall[\x20]crawler','largesmall crawler',
'legs','legs',
'letscrawl\.com','<a href="http://letscrawl.com" rel="nofollow" title="Bot home page">LetsCrawl.com</a>',
'libcrawl','Crawl libcrawl',
'lilina','Lilina',
'link_valet_online','<a href="http://www.htmlhelp.com/tools/valet/" rel="nofollow" title="Link Valet Online home page">Link Valet Online</a>',
'linkbot','LinkBot',
'linkdex\.com','<a href="http://www.linkdex.com/about/bots/" rel="nofollow" title="Bot home page">Linkdex</a>',
'linkidator','Link Validator',
'linkscan','LinkScan',
'linkstats[\x20]bot','<span title="As on Nov. 7, 2015, the user agent string did not contain a web address.">LinkStats Bot</span>',
'linkwalker','LinkWalker',
'lipperhey','<a href="http://www.lipperhey.com" rel="nofollow" title="Lipperhey SEO Service Home Page">Lipperhey SEO Service</a>',
'livejournal\.com','LiveJournal.com',
'lmspider','<a href="http://www.nuance.com" rel="nofollow" title="Bot home page lmspider@scansoft.com">lmspider</a>',
'loadtimebot','<a href="http://www.loadtime.net/bot.html" rel="nofollow" title="LoadTimeBot Home Page">LoadTimeBot</a>',
'lockon','Lockon',
'logo_gif','logo.gif Crawler',
'longurl','longurl',
'lssrocketcrawler','<span title="Example UA-String &quot;LSSRocketCrawler/1.0 LightspeedSystems&quot;">LSSRocketCrawler (no contact information)</span>',
'ltbot','<a href="http://www.language-tools.com" rel="nofollow" title="Language Tools Home Page">Language Tools Bot (ltbot)</a>',
'ltx71','<a href="http://ltx71.com" rel="nofollow" title="ltx71 Home Page">ltx71</a>',
'lwp\-request','<a href="http://search.cpan.org/~gaas/libwww-perl-5.69/bin/lwp-request" rel="nofollow" title="lwp-request home page">lwp-request</a>',
'lwp\-trivial','<a href="http://search.cpan.org/src/GAAS/libwww-perl-5.805/lib/LWP/Simple.pm" rel="nofollow" title="lwp-trivial home page">lwp-trivial</a>',
'lycos[_+\s]','Lycos',
'macworm','Mac WWWWorm',
'madaali\.de','<a href="http://www.madaali.de/pfadzurbotseite/bot.html" rel="nofollow" title="Link resulted in a 404 Error on Nov 6, 2014">www.madaali.de</a>',
'magpierss','MagpieRSS',
'magpie','<a href="http://magpierss.sf.net" rel="nofollow" title="Bot home page">MagpieRSS</a>',
'mapoftheinternet\.com','<a href="http://MapoftheInternet.com" rel="nofollow" title="MapoftheInternet.com home page">MapoftheInternet.com</a>',
'marvin','marvin/infoseek',
'mattie','Mattie',
'mediabot','<a href="http://isdownload.biz" rel="nofollow" title="MediaBot refers to isdownload.biz">MediaBot</a>',
'mediafox','MediaFox',
'megaindex','<a href="http://megaindex.com/crawler" rel="nofollow" title="MegaIndex Crawler Page">MegaIndex Crawler</a>, seems to belong to <a href="https://www.megaindex.ru" rel="nofollow" title="MegaIndex.ru Home Page">MegaIndex.ru</a>',
'megite','<a href="http://www.megite.com" rel="nofollow" title="Megite home page">Megite</a>',
'memorybot','<a href="http://archivethe.net/en/index.php/about/internet_memory1" rel="nofollow" title="Archivethe.net Home Page">Archivethe.net</a>',
'mercator','Mercator',
'meshexplorer','NEC-MeshExplorer',
'metager2\-verification\-bot','<a href="http://metager2.de/technology.php" rel="nofollow" title="metager2-verification-bot Home Page">metager2-verification-bot</a>',
'metajobbot','<a href="http://www.metajob.de/crawler" rel="nofollow" title="MetaJobBot">MetaJobBot</a>',
'bbot','BBot',
'metaspinner','<a href="http://index.meta-spinner.de" rel="nofollow" title="Metaspinner home page">Metaspinner</a>',
'metauri','metauri',
'miadev','<a href="http://www.mia-marktplatz.de/spider" rel="nofollow" title="MiaDev spider">MiaDev spider</a>',
'microsoft[_+\s]url[_+\s]control','<a href="http://www.webmasterworld.com/forum11/1005.htm" rel="nofollow" title="Microsoft URL Control  home page">Microsoft URL Control</a>',
'microsoft[\x20]bits','<a href="http://msdn.microsoft.com/en-us/library/bb968799%28v=vs.85%29.aspx" rel="nofollow" title="Microsoft Background Intelligent Transfer Service (BITS)?">Microsoft Background Intelligent Transfer Service (BITS)?</a>',
'microsoft\-webdav\-miniredir','microsoft-webdav-miniredir',
'mindcrawler','MindCrawler',
'mindupbot','<a href="http://datenbutler.de" rel="nofollow" title="DATENBUTLER home page">mindUpBot (datenbutler.de)</a>',
'mini\-reptile','Mini-reptile',
'minirank','<a href="http://minirank.com" rel="nofollow" title="miniRank home page">miniRank</a>',
'misterbot','<a href="http://www.misterbot.fr" rel="nofollow" title="Misterbot home page">Misterbot</a>',
'miva','<a href="http://www.miva.com" rel="nofollow" title="Miva home page">Miva</a>',
'mizzu_labs','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_m_141105_2\b" rel="nofollow" title="Mizzu Labs home page">Mizzu Labs</a>',
'mnogosearch','mnoGoSearch search engine software',
'moget','moget',
'momspider','MOMspider',
'monster','Monster',
'motor','Motor',
'movabletype','movabletype',
'ms[_+\s]search[_+\s]6\.0[_+\s]robot','<a href="http://support.microsoft.com/default.aspx?scid=kb;en-us;284022" title="Microsoft Support Page. [new window]" target="_blank">MS Search 6.0 Robot</a> (MS SharePoint Portal Server?)',
'ms_search_4\.0_robot','<a href="http://support.microsoft.com/default.aspx?scid=kb;en-us;284022" rel="nofollow" title="Bot home page.">MS SharePoint Portal Server - MS Search 4.0 Robot</a>',
'msnbot\-udiscovery','<a href="http://search.msn.com/msnbot.htm" rel="nofollow" title="Feb 18, 2015: UA contains indentification during robots.txt access only.">msnbot-UDiscovery</a> Note: AWStats counts most of its traffic as user traffic',
'msrabot','msrabot',
'msrbot','<a href="http://research.microsoft.com/research/sv/msrbot/" rel="nofollow" title="MSRBOT home page">MSRBOT</a>',
'mt::telegraph::agent','MT::Telegraph::Agent',
'muncher','Muncher',
'muscatferret','Muscat Ferret',
'mwdsearch','Mwd.Search',
'mydoyouhike','<a href="http://www.doyouhike.net/my" rel="nofollow" title="Mydoyouhike home page">Mydoyouhike</a>',
'myweb','Internet Shinchakubin',
'nagios','Nagios',
'nasa_search','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_n_s_140506_2\b" rel="nofollow" title="NASA Search home page">NASA Search</a>',
'ndspider','NDSpider',
'nederland\.zoek','Nederland.zoek',
'netcarta','NetCarta WebMap Engine',
'netcraft','<a href="http://www.netcraft.com/survey/" rel="nofollow" title="Bot home page">Netcraft</a>',
'netluchs','<a href="http://www.netluchs.de" rel="nofollow" title="Bot home page.">Netluchs</a>',
'netmechanic','<a href="http://www.netmechanic.com" rel="nofollow" title="Bot home page">NetMechanic</a>',
'netnewswire','netnewswire',
'netscoop','NetScoop',
'netsprint','<a href="http://www.netsprint.pl/serwis/" rel="nofollow" title="NetSprint home page">NetSprint</a>',
'netvibes','<a href="http://www.netvibes.com" rel="nofollow" title="Bot home page">Netvibes</a>',
'newrelicpinger','newrelicpinger',
'newscan\-online','newscan-online',
'newsfox','Fox News',
'newsgatoronline','NewsGator Online',
'nextgensearchbot','nextgensearchbot', 
'nhse','NHSE Web Forager',
'nicebot','<a href="http://www.egghelp.org/setup.htm" rel="nofollow" title="Bot home page (there may be others)">nicebot</a>',
'nimblecrawler','<a href="http://www.healthline.com" rel="nofollow" title="NimbleCrawler home page">NimbleCrawler</a>',
'ning','ning',
'nomad','nomad',
'northstar','The NorthStar Robot',
'noxtrumbot','<a href="http://www.noxtrum.com" rel="nofollow" title="Bot home page">noxtrumbot</a>',
'npbot','<a href="http://www.nameprotect.com/botinfo.html" rel="nofollow" title="NPBot home page">NPBot</a>',
'nzexplorer','nzexplorer',
'objectssearch','ObjectsSearch',
'occam','Occam',
'ocelli','<a href="http://www.globalspec.com/Ocelli/" rel="nofollow" title="Ocelli home page">Ocelli</a>',
'octopus','HKU WWW Octopus',
'octora_beta_bot','<a href="http://www.octora.com" rel="nofollow" title="Bot home page">Octora Beta Bot</a>',
'onet\.pl[_+\s]sa','<a href="http://szukaj.onet.pl" rel="nofollow" title="Onet.pl_SA home page">Onet.pl_SA</a>',
'onfolio','<a href="http://www.onfolio.com" rel="nofollow" title="Bot home page">Onfolio</a>',
'openfind','Openfind data gatherer',
'opentaggerbot','<a href="http://www.opentagger.com/opentaggerbot.htm" rel="nofollow" title="Bot home page">OpenTaggerBot</a>',
'openwebspider','<a href="http://www.openwebspider.org" rel="nofollow" title="OpenWebSpider home page">OpenWebSpider</a>',
'optimizer','<span title="As on Oct. 2, 2015, the user agent string did not contain a web address.">Optimizer</span>',
'oracle_ultra_search','<a href="http://www.oracle.com/technology/products/ultrasearch/index.html" rel="nofollow" title="Oracle Ultra Search home page">Oracle Ultra Search</a>',
'orb_search','Orb Search',
'orbiter','<a href="http://www.dailyorbit.com/bot.htm" rel="nofollow" title="Orbiter home page">Orbiter</a>',
'packrat','Pack Rat',
'pageboy','PageBoy',
'panscient','panscient',
'parasite','ParaSite',
'passwordmaker\.org','<a href="http://passwordmaker.org" rel="nofollow" title="passwordmaker.org home page">passwordmaker.org</a>',
'patric','Patric',
'pear_http_request_class','<a href="http://pear.php.net" rel="nofollow" title="PEAR HTTP Request class home page">PEAR HTTP Request class</a>',
'peerbot','<a href="http://www.peerbot.com" rel="nofollow" title="PEERbot home page">PEERbot</a>',
'pegasus','pegasus',
'perlcrawler','PerlCrawler 1.0',
'perignator','The Peregrinator',
# Modified by Ryu 2006.03.15
#'perman','Perman surfer',
'perman','<a href="http://www.bug.co.jp/nami-nori/" rel="nofollow" title="Bot home page.">波乗野郎</a>',
'petersnews','Petersnews',
'phantom','Phantom',
'php[_+\s]version[_+\s]tracker','<a href="http://www.nexen.net/phpversion/bot.php" rel="nofollow" title="PHP Version Tracker home page">PHP version tracker</a>',
'phpcrawl','<a href="http://phpcrawl.cuab.de" rel="nofollow" title="PHPCrawl Home page">PHPCrawl</a>',
'phpdig','PhpDig',
'picmole','<a href="http://www.picmole.com" rel="nofollow" title="Bot home page.">Specified address www.picmole.com was not reachable on April 21, 2014</a>',
'pictureofinternet','<a href="http://malfunction.org/poi/" rel="nofollow" title="PictureOfInternet home page">PictureOfInternet</a>',
'piltdownman','PiltdownMan',
'pimptrain','Pimptrain.com\'s robot',
'ping\.blo\.gs','<a href="http://blo.gs/ping.php" rel="nofollow" title="Bot home page.">ping.blo.gs</a>',
'pingdom','<a href="http://www.pingdom.com">Pingdom</a>',
'pioneer','Pioneer',
'pita','pita (pain in the ass?)', 
'pitkow','html_analyzer',
'pjspider','Portal Juice Spider',
'plinki','<a href="http://www.plinki.com" rel="nofollow" title="plinki home page">plinki</a>',
'pluckfeedcrawler','<a href="http://www.pluck.com" rel="nofollow" title="Bot home page.">PluckFeedCrawler</a>',
'plumtreewebaccessor','PlumtreeWebAccessor',
'pogodak','<a href="http://www.pogodak.com" rel="nofollow" title="Pogodak home page">Pogodak.com</a>',
'pompos','<a href="http://dir.com/pompos.html" rel="nofollow" title="Bot home page.">Pompos</a>',
'popdexter','Popdexter',
'poppi','Poppi',
'port_huron_labs','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_n_s_1133\b" rel="nofollow" title="Port Huron Labs home page">Port Huron Labs</a>',
'portalb','PortalB Spider',
'postfavorites','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_n_s_1135\b" rel="nofollow" title="PostFavorites home page">PostFavorites</a>',
'postpost','postpost', 
'postrank','postrank',
'powermarks','<a href="http://www.kaylon.com/power.html" title="Bot home page">Powermarks</a>', # must come before Arks; seen used by referrer spam
'printfulbot','printfulbot', 
'proodlebot','<a href="http://www.proodle.com" rel="nofollow" title="proodleBot home page">proodleBot</a>',
'protopage','<a href="http://www.protopage.com">Protopage</a>',
'publiclibraryarchive','<a href="http://publiclibraryarchive.org" rel="nofollow" title="On 23 June 2014 a page parked at GoDaddy">publiclibraryarchive.org (related to spiderlytics.com and/or waybackarchive.org?)</a>',
'pyquery','<a href="http://sourceforge.net/projects/pyquery/" rel="nofollow" title="PyQuery home page">PyQuery</a>',
'python','<a href="http://docs.python.org/library/urllib.html" title="Tools developed using a Python library" target="_blank">Python-urllib</a>',
'qihoobot','<a href="http://www.qihoo.com" rel="nofollow" title="QihooBot">QihooBot</a>',
'quipply','quipply', 
'qwantify','<a href="https://www.qwant.com" rel="nofollow" title="Qwant Home Page">Qwant</a>',
'r6\_','<a href="http://www.radian6.com/crawler">Radian 6 Crawler</a>',
'rambler','<a href="http://www.rambler.ru/doc/faq.shtml" rel="nofollow" title="Bot home page">StackRambler</a>',
'ratingburner','ratingburner', 
'raven','Raven Search',
'rbse','RBSE Spider',
'redalert','Red Alert',
'regator','regator', 
'relevantnoise\.com','<a href="http://www.relevantnoise.com" rel="nofollow" title="Relevant Noise">Relevant Noise</a>',
'resumerobot','Resume Robot',
'rhcs','RoadHouse Crawling System',
'riddler','<a href="http://riddler.io/about" rel="nofollow" title="Riddler">Riddler</a>',
'road_runner','Road Runner: The ImageScape Robot',
'robbie','Robbie the Robot',
'robi','ComputingSite Robi/1.0',
'robocrawl','RoboCrawl Spider',
'robofox','RoboFox',
'robozilla','Robozilla',
'rojo','<a href="http://rojo.com" rel="nofollow" title="Bot home page">RoJo</a> aggregator',
'rome[\x20]client','rome client',
'roverbot','Roverbot',
'rpt\-httpclient','rpt-httpclient', 
'rssgraffiti','rssgraffiti', 
'rssimagesbot','<a href="http://herbert.groot.jebbink.nl/?app=rssImages" rel="nofollow" title="Bot home page">rssImagesBot</a>',
'ruffle','<a href="http://www.unreach.net" rel="nofollow" title="Bot home page">ruffle SemanticWeb crawler</a>',
'rufusbot','<a href="http://64.124.122.252.webaroo.com/feedback.html" rel="nofollow" title="Bot home page">RufusBot Rufus Web Miner</a>',
'rules','RuLeS',
'safeads\.xyz','<a href="http://www.safeads.xyz" rel="nofollow" title="SafeAds.xyz">SafeAds.xyz</a>',
'safetynetrobot','SafetyNet Robot',
'sage\+\+','sage++',
'sandcrawler','<a href="http://www.microsoft.com" rel="nofollow" title="Bot home page">SandCrawler (Microsoft)</a>',
'savetheworldheritage','<a href="http://savetheworldheritage.org" rel="nofollow" title="On March 4, 2015 a page parked at GoDaddy">savetheworldheritage.org (related to spiderlytics.com, waybackarchive.org and/or publiclibraryarchive.org?)</a>',
'sbider','<a href="http://www.sitesell.com/sbider.html" rel="nofollow" title="Bot home page">SBIder</a>',
'schizozilla','<a href="http://spamhuntress.com/2005/03/18/gizmo/" rel="nofollow" title="Schizozilla home page">Schizozilla</a>',
'scooter','scooter',
'scoutjet','<a href="http://www.scoutjet.com">ScoutJet</a> crawler for <a href="http://blekko.com">Blekko</a>.',
'scumbot','Scumbot',
'search\-info','Sleek',
'search_au','Search.Aus-AU.COM',
'searchguild[_+\s]dmoz[_+\s]experiment','<a href="http://www.searchguild.com" rel="nofollow" title="SearchGuild_DMOZ_Experiment  home page">SearchGuild_DMOZ_Experiment</a>',
'searchmetricsbot','<a href="http://www.searchmetrics.com/en/searchmetrics-bot/" rel="nofollow" title="SearchmetricsBot">SearchmetricsBot</a>',
'searchprocess','SearchProcess',
'seekbot','<a href="http://www.seekbot.net/bot.html" rel="nofollow" title="Bot home page">Seekbot</a>',
'semalt','<a href="http://semalt.semalt.com" rel="nofollow" title="seamalt.com Home Page">seamalt.com</a>',
'senrigan','Senrigan',
'sensis_web_crawler','<a href="http://www.sensis.com.au" rel="nofollow" title="Sensis Web Crawler home page">Sensis Web Crawler</a>',
'seodiver','<a href="http://www.seodiver.com/bot" rel="nofollow" title="SEO DIVER Bot Home Page">SEO DIVER</a>',
'seokicks\.de','<a href="http://www.seokicks.de/robot.html" rel="nofollow" title="SEOkicks Webcrawler home page">SEOkicks Webcrawler</a>',
'seoscanners','<a href="http://seoscanners.net" rel="nofollow" title="On August 4, 2015 a page parked at GoDaddy">seoscanners.net</a> (related to publiclibraryarchive.org and savetheworldheritage.org?)',
'sgscout','SG-Scout',
'shaggy','ShagSeeker',
'shaihulud','Shai\'Hulud',
'shareaholicbot','shareaholicbot',
'shoutcast','Shoutcast Directory Service',
'sift','Sift',
'simbot','Simmany Robot Ver1.0',
'simplepie','simplepie', 
'sistrix','<a href="http://crawler.sistrix.net" rel="nofollow" title="SISTRIX Crawler Page">SISTRIX Crawler</a>',
'site\-valet','Site Valet',
'sitebot','<a href="http://www.sitebot.org/robot/">SiteBot</a>',
'sitedomain\-bot','<a href="http://www.sitedomain.de/sitedomain-bot/" rel="nofollow" title="Sitedomain-Bot Home Page">Sitedomain.de</a>',
'sitetech','SiteTech-Rover',
'skimbot','<a href="http://www.skimlinks.com" rel="nofollow" title="SkimBot">SkimBot</a>',
'skymob','Skymob.com',
'slcrawler','SLCrawler',
'slurp','<a href="http://help.yahoo.com/help/us/ysearch/slurp/" rel="nofollow" title="Bot home page">Yahoo Slurp</a>',
'slysearch','SlySearch',
'smartspider','Smart Spider',
'smtbot','<a href="http://www.similartech.com/smtbot" rel="nofollow" title="SMTBot Home Page">SMTBot</a>',
'snap\.com_beta_crawler','<a href="http://www.snap.com" rel="nofollow" title="snap.com beta crawler home page">snap.com beta crawler</a>',
'snappy','<a href="http://www.urltrends.com/faq.php" rel="nofollow" title="Bot home page">Snappy</a>',
'snooper','Snooper',
'sohu\-search','<a href="http://corp.sohu.com" rel="nofollow" title="Bot home page">sohu-search</a>',
'sohu','<a href="http://corp.sohu.com" rel="nofollow" title="Bot home page">sohu agent</a>',
'solbot','Solbot',
'speedy','<a href="http://www.entireweb.com/about/search_tech/speedyspider/" rel="nofollow" title="Speedy Spider home page">Speedy Spider</a>',
'sphere_scout','<a href="http://www.sphere.com" rel="nofollow" title="Bot home page">Sphere Scout</a>',
'spider[_+\s]monkey','Spider monkey',
'spiderline','Spiderline Crawler',
'spiderlytics','Spiderlytics: No homepage, e-mail only: spider (at) spiderlytics.com',
'spiderman','<a href="http://www.iscrawling.com" rel="nofollow" title="Spiderman home page">Spiderman</a>',
'spiderview','SpiderView&trade;',
'spip','<a href="http://www.spip.net" rel="nofollow" title="SPIP home page">SPIP</a>',
'sproose_crawler','<a href="http://www.sproose.com/bot.html" rel="nofollow" title="Bot home page">sproose crawler</a>',
'spry','Spry Wizard Robot',
'sqworm','<a href="http://www.websense.com" rel="nofollow" title="Bot home page (source: http://www.pgts.com.au)">Sqworm</a>',
'ssearcher','Site Searcher',
'steeler','<a href="http://www.tkl.iis.u-tokyo.ac.jp/~crawler/" rel="nofollow" title="Steeler home page">Steeler</a>',
'steroid__download','<a href="http://faqs.org.ru/progr/pascal/delphi_internet2.htm" rel="nofollow" title="STEROID  Download home page">STEROID  Download</a>',
'stq_bot','<a href="http://www.searchteq.de" rel="nofollow" title="SEARCHTEQ Home Page">SEARCHTEQ</a>',
'stratagems[\x20]kumo','Stratagems Kumo',
'suchfin\-bot','<a href="http://www.suchfin.de" rel="nofollow" title="Suchfin-Bot home page">Suchfin-Bot</a>',
'suke','<a href="http://kensaku.org" rel="nofollow" title="Bot home page">Suke</a>',
'summify\.com','<a href="http://summify.com">summify.com</a>', 
'sunrise','<a href="http://www.sunrisexp.com" rel="nofollow" title="Sunrise home page">Sunrise</a>',
'suntek','suntek search engine',
'superbot','<a href="http://www.sparkleware.com/superbot/" rel="nofollow" title="SuperBot home page">SuperBot</a>',
'superfeedr','superfeedr', 
'susie','<a href="http://www.sync2it.com/bms/susie.php" rel="nofollow" title="Susie home page">Susie</a>',
'sven','Sven',
'syndic8','Syndic8',
'syndicapi','<a href="http://syndicapi.com/bot.html" rel="nofollow" title="Bot home page">SyndicAPI</a>',
'synoobot','<a href="http://www.synoo.de/bot.html" rel="nofollow" title="webmaster@synoo.com SynooBot home page">SynooBot</a>',
'synthesio','synthesio', 
't\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e','<a href="http://www.thunderstone.com" rel="nofollow" title="Bot home page. Used by many.">T-H-U-N-D-E-R-S-T-O-N-E</a>',
'tach_bw','TACH Black Widow',
'tagyu_agent','<a href="http://www.tagyu.com" rel="nofollow" title="Bot home page">Tagyu Agent</a>',
'tailrank','<a href="http://tailrank.com/robot">TailRank</a>',
'tarantula','Tarantula',
'tarspider','tarspider',
'tcl_http_client_package','<a href="http://www.tcl.tk/man/tcl8.4/TclCmd/http.htm" rel="nofollow" title="Tcl http client package home page">Tcl http client package</a>',
'techbot','TechBOT',
'technoratibot','Technoratibot',
'templeton','Templeton',
'teoma','teoma',
'teragramcrawlersurf','<a href="http://www.teragram.com" rel="nofollow" title="TeragramCrawlerSURF home page">TeragramCrawlerSURF</a>',
'test_crawler','<a href="http://netp.ath.cx" rel="nofollow" title="Test Crawler home page">Test Crawler</a>',
'testbot','<a href="http://www.agbrain.com" rel="nofollow" title="TestBot home page">TestBot</a>',
'thumbsniper','<a href="http://thumbsniper.com" rel="nofollow" title="ThumbSniper Home Page">ThumbSniper</a>',
'titan','TITAN',
'titin','TitIn',
'tkwww','The TkWWW Robot',
'tlspider','TLSpider',
'topblogsinfo','topblogsinfo', 
'topicblogs','<a href="http://www.topicblogs.com" rel="nofollow" title="Bot home page">topicblogs</a>',
'topix\.net','topix.net', 
'trapit','trapit', 
'trileet','trileet', 
'turtlescanner','Turtle',
'turtle','Turtle',
'tutorgigbot','<a href="http://www.tutorgig.info" rel="nofollow" title="TutorGigBot home page">TutorGigBot</a>',
'tweetedtimes','<a href="http://tweetedtimes.com">TweetedTimes Bot</a>',
'twiceler','<a href="http://www.cuill.com/twiceler/robot.html" rel="nofollow" title="Twiceler home page">twiceler</a>',
'twisted[\x20]pagegetter','twisted pagegetter',
'twitterbot','<span title="As on Nov. 27, 2015, the user agent string did not contain a web address.">Twitterbot</span>',
'twitterfeed','twitterfeed',
'ubicrawler','<a href="http://law.dsi.unimi.it/ubicrawler/" rel="nofollow" title="Bot home page">UbiCrawler</a>',
'ucsd','UCSD Crawl',
'udmsearch','UdmSearch',
'ultraseek','Ultraseek',
'um\-ic','ubermetrics-technologies.com',
'um\-ln','ubermetrics-technologies.com',
'unchaos_bot_hybrid_web_search_engine','<a href="http://www.unchaos.com" rel="nofollow" title="UnChaos Bot Hybrid Web Search Engine home page">UnChaos Bot Hybrid Web Search Engine</a>',
'unido\-bot','<a href="http://www.unchina.org/unido/unido/our_projects/3_3.html" rel="nofollow" title="unido-bot home page">unido-bot</a>',
'unisterbot','UnisterBot; E-Mail only: crawler (at) unister.de',
'universalfeedparser','<a href="http://feedparser.org" rel="nofollow" title="Bot home page">UniversalFeedParser</a>',
'unlost_web_crawler','Unlost Web Crawler',
'unwindfetchor','unwindfetchor', 
'updated','<a href="http://www.updated.com" rel="nofollow" title="updated home page">updated</a>',
'urlck','URL Check',
'ustc\-semantic\-group','<a href="http://ai.ustc.edu.cn/mas/en/research/index.php" rel="nofollow" title="Bot home page">USTC-Semantic-Group</a>',
'vagabondo\-wap','<a href="http://www.wise-guys.nl/Contact/index.php?botselected=webagents&amp;lang=uk" rel="nofollow" title="Bot home page">Vagabondo-WAP</a>',
'vagabondo','<a href="http://www.wise-guys.nl/Contact/index.php?botselected=webagents&amp;lang=uk" rel="nofollow" title="Bot home page">Vagabondo</a>',
'valkyrie','Valkyrie',
'vermut','<a href="http://vermut.aol.com" rel="nofollow" title="Bot home page">Vermut</a>',
'versus_crawler_from_eda\.baykan@epfl\.ch','<a href="http://www.epfl.ch/Eindex.html" rel="nofollow" title="versus crawler from eda.baykan@epfl.ch home page">versus crawler from eda.baykan@epfl.ch</a>',
'verticrawl','Verticrawl',
'vespa_crawler','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=%5Cbid_t_z_030406_1%5Cb" rel="nofollow" title="Bot home page">Vespa Crawler</a>',
'victoria','Victoria',
'virus[_+\s]detector','<a href="http://www.securecomputing.com" rel="nofollow" title="virus_harvester@securecomputing.com; Bot home page">virus_detector</a>',
'visionsearch','vision-search',
'voidbot','void-bot',
'voltron','<span title="As on Oct. 21, 2015, the user agent string did not contain a web address.">voltron</span>',
'vse/','<a href="http://www.vivisimo.com" rel="nofollow" title="VSE home page">VSE</a>',
'vwbot','VWbot',
'w3c[_+\s]css[_+\s]validator[_+\s]jfouffa','<a href="http://jigsaw.w3.org/css-validator/" rel="nofollow" title="Bot home page">W3C jigsaw CSS Validator</a>',
'w3index','The NWI Robot',
'w3m2','W3M2',
'wallpaper','WallPaper (alias crawlpaper)',
'wanderer','the World Wide Web Wanderer',
'wapspider','w@pSpider by wap4.com',
'wapspirlider','wapspIRLider',
'watchmouse','<a href="http://www.watchmouse.com/en/" rel="nofollow" title="WatcMouse">WatchMouse Website Monitor</a>',
'wavefire','<a href="http://www.wavefire.com" rel="nofollow" title="info@wavefire.com; Bot home page">Wavefire</a>',
'waybackarchive\.org','<span title="Maybe related to spiderlytics.">No website, email: spider(at)waybackarchive.org</span>',
'wazzup','wazzup', 
'web_downloader','<a href="http://www.krasu.ru/soft/chuchelo/" rel="nofollow" title="Web Downloader home page">Web Downloader</a>',
'webbandit','WebBandit Web Spider',
'webbase', 'WebBase',
'webcatcher','WebCatcher',
'webclipping\.com','WebClipping.com',
'webcollage','<a href="http://www.jwz.org/webcollage/" rel="nofollow" title="WebCollage home page">WebCollage</a>',
'webcompass','webcompass',
'webcopy','WebCopy',
'webcrawl\.net','<a href="http://www.webcrawl.net" rel="nofollow" title="webcrawl.net home page">webcrawl.net</a>',
'webdup','<a href="http://www.webdup.com/en/index.html" rel="nofollow" title="Webdup home page">Webdup</a>',
'webfetcher','webfetcher',
'webfilter','<a href="http://www.verso.com/enterprise/netspective/webfilter.asp" rel="nofollow" title="Bot home page">WebFilter</a>',
'webfoot','The Webfoot Robot',
'webinator','Webinator',
'webindexer','<a href="mailto://webindexerv1@yahoo.com" title="WebIndexer home page">WebIndexer</a>',
'weblayers','Weblayers',
'weblinker','WebLinker',
'webminer','<a href="http://64.124.122.252/feedback.html" rel="nofollow" title="WebMiner home page">WebMiner</a>',
'webmirror','WebMirror',
'webmoose','The Web Moose',
'webquest','WebQuest',
'webreader','Digimarc MarcSpider',
'webreaper','WebReaper',
'website[_+\s]monitoring[_+\s]bot','<a href="http://InternetSupervision.com/UrlMonitor/3/" rel="nofollow" title="Website_Monitoring_Bot home page">Website_Monitoring_Bot</a>',
'websnarf','Websnarf',
'webspider','WebSpider',
'webvac','WebVac',
'webvulncrawl','WebVulnCrawl',
'webwalker','WebWalker',
'webwalk','webwalk',
# Other robots reported by users
'^finbot', '<span title="As on Sep. 10, 2015, the user agent string did not contain a web address.">finbot</span>',
'feedfetcher\-google','<a href="http://www.google.com/feedfetcher.html" rel="nofollow" title="Bot home page">Feedfetcher-Google</a>',
'gigablastopensource','<a href="http://www.gigablast.com" rel="nofollow" title="Gigablast Home page">GigablastOpenSource</a>, an Open Source Search Engine(<a href="https://github.com/gigablast/open-source-search-engine/wiki" rel="nofollow" title="at GitHub">Wiki</a>)',
'madaali\.de','<a href="http://www.madaali.de/pfadzurbotseite/bot.html" rel="nofollow" title="Link resulted in a 404 Error on Nov 6, 2014">www.madaali.de</a>',
'mediapartners\-google','<a href="https://adwords.google.com" title="Bot home page">Google AdSense</a>',
# 'Mediapartners-Google (Feb 12, 2015: no additial information in UA String, seems to use <a href="http://www.gigablast.com" rel="nofollow" title="Gigablast Home page">GigablastOpenSource</a>',
# Uses UA string "Mediapartners-Google" only, and there were accesses using an UA string "GigablastOpenSource/1.0" from the same IP-Address.
# Therefore this is probably not related to Google 4.3.2015 Albrecht Müller
'microsoft.*discovery','<a href="http://support.microsoft.com/kb/838028/en-us" rel="nofollow" title="Microsoft KB838028">Microsoft Office Protocol Discovery</a>/<a href="http://blogs.msdn.com/b/vsofficedeveloper/archive/2008/03/11/office-existence-discovery-protocol.aspx" rel="nofollow" title="Description of the Microsoft Office Existence Discovery">Microsoft Office Existence Discovery</a>',
'loocalcrawler/nutch','<a href="https://weluse.de" rel="nofollow" title="https://weluse.de/">LoocalCrawler/Nutch</a>',
'nutchosu\-vlib','<a href="http://lucene.apache.org/nutch/bot.html" rel="nofollow" title="NutchOSU-VLIB home page">NutchOSU-VLIB</a>',
'nutch','<a href="http://lucene.apache.org/nutch/" rel="nofollow" title="Bot home page. Used by many, including Looksmart.">Nutch</a>',
'bspider','BSpider',
'publiclibraryarchive','<a href="http://publiclibraryarchive.org" rel="nofollow" title="On 23 June 2014 a page parked at GoDaddy">publiclibraryarchive.org (related to spiderlytics.com and/or waybackarchive.org?)</a>',
#Observations 2014-06-23
#Domain publiclibraryarchive.org is parked at GoDaddy.com
#from https://www.projecthoneypot.org/
#81.30.151.220's User Agent Strings (honeypot classified this ip as an mail server, active about 6 years ago)
#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org)
#176.9.138.27's User Agent Strings
#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org)
#Mozilla/5.0 (compatible; Spiderlytics/1.0; +spider@spiderlytics.com)
#Mozilla/5.0 (compatible; waybackarchive.org/1.0; +spider@waybackarchive.org)
#146.0.32.165's User Agent Strings
#Mozilla/5.0 (compatible; publiclibraryarchive.org/1.0; +crawl@publiclibraryarchive.org)
#Mozilla/5.0 (compatible; savetheworldheritage.org/1.0; +crawl@savetheworldheritage.org)
#Mozilla/5.0 (compatible; seoscanners.net/1; +spider@seoscanners.net)
'siteexplorer\.info','<a href="http://siteexplorer.info" rel="nofollow" title="Site Explorer home page">Site Explorer</a>',
'turnitinbot','<a href="http://www.turnitin.com/robot/crawlerinfo.html" rel="nofollow" title="TurnitinBot Home Page">Turn It In</a>',
'turtle','Turtle',
# 2.12.2013 Project Honeypot reports at least one of the IPs used by waybackarchive with a spiderlytics UA string.
# Problably not related to the wayback machine of archive.org.
'xenu\'s_link_sleuth','<a href="http://home.snafu.de/tilman/xenulink.html" rel="nofollow" title="Xenu Link Sleuth home page">Xenu Link Sleuth</a>',
'yandex','<a href="http://yandex.com/bots" rel="nofollow" title="Bot home page">Yandex Bot</a>',
# Other id that are 99% of robots
'libwww\-perl','Perl tool',
'w3c\-webcon','WebCon - the Libwww Command Line Tool',
'lwp','LibWWW-perl',
'webwatch','WebWatch',
'wells_search','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&amp;template=detail.html&amp;match=\bid_t_z_1484\b" rel="nofollow" title="Wells Search home page">Wells Search</a>',
'wer\-liefert\-was','<a href="http://www.wlw.de/extern/crawler/Wer-liefert-was-Crawler.html" rel="nofollow" title="Page given in UA string gave a 404 Error on July 2, 2015">Wer-liefert-was Crawler</a> Note: AWStats counts most traffic as user traffic',
'wesee:search','<a href="http://www.wesee.com/en/support/bot/" rel="nofollow" title="WeSEE Bot Home Page (gave a 404-Error on Nov. 2, 2013)">WeSEE Bot</a>',
'wevikabot','<a href="http://www.wevika.de" rel="nofollow" title="WeViKa Home Page">WeViKa</a>',
'whatuseek','whatUseek Winona',
'whowhere','WhoWhere Robot',
'windows\-rss\-platform','windows-rss-platform',
'wired\-digital','Wired Digital',
'zyborg','<a href="http://www.WISEnutbot.com" rel="nofollow" title="wn-14.zyborg@looksmart.net Bot home page">ZyBorg</a>',
'wisenutbot','<a href="http://www.WISEnutbot.com" rel="nofollow" title="Bot home page">WISENutbot</a>',
'wiumi','wiumi',
'wmir','w3mir',
'wolp','WebStolperer',
'wombat','The Web Wombat',
'wonderer','Web Wombat Redback Spider',
'woozweb','Woozweb Monitoring',
'wordpress','<a href="http://wordpress.org" rel="nofollow" title="WordPress home page">WordPress</a>',
'worm','The World Wide Web Worm',
'wume_crawler','<a href="http://wume.cse.lehigh.edu/~xiq204/crawler/" rel="nofollow" title="wume crawler home page">wume crawler</a>',
'wwwc','WWWC',
'wwweasel',,'<a href="http://wwweasel.de" rel="nofollow" title="Website_Monitoring_Bot home page">WWWeasel</a>',
'wz101','WebZinger',
'xget','XGET',
'xirq','<a href="http://www.xirq.com" rel="nofollow" title="xirq home page">xirq</a>',
'xydo','xydo', 
'y!j','<a href="http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html" rel="nofollow" title="Bot home page">Y!J Yahoo Japan</a>',
'yahoo![\x20]searchmonkey','Additional Yahoo bots.',
'yahoo!_mindset','<a href="http://mindset.research.yahoo.com" rel="nofollow" title="Bot home page">Yahoo! Mindset</a>',
'yahoo\-blogs','<a href="http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html" rel="nofollow" title="Bot home page">Yahoo-Blogs</a>',
'yahoo\-mmcrawler','<a href="mailto:mms-mmcrawler-support@yahoo-inc.com?subject=Yahoo-MMCrawler Information" title="E-mail Bot">Yahoo-MMCrawler</a>',
'yahoo\-newscrawler','Additional Yahoo bots.',
'yahoo[\x20]pipes','<a href="http://pipes.yahoo.com/pipes/" rel="nofollow" title="Bot home page">Yahoo pipes</a>',
'yahoo\-verticalcrawler','Yahoo Vertical Crawler',
'yahoocachesystem','Additional Yahoo bots.',
'yahooexternalcache','Additional Yahoo bots.',
'yahoofeedseeker','<a href="http://publisher.yahoo.com/rssguide" rel="nofollow" title="Bot home page">Yahoo Feed Seeker</a>',
'yahooseeker\-testing','<a href="http://search.yahoo.com" rel="nofollow" title="Bot home page">YahooSeeker-Testing</a>',
'yahooseeker','<a href="http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html" rel="nofollow" title="Bot home page">YahooSeeker Yahoo! Blog crawler</a>',
'yahooysmcm','Additional Yahoo bots.',
'yammer','yammer', 
'yanga','Yanga WorldSearch Bot',
'yet\-another\-spider','<a href="http://188.40.112.195" rel="nofollow" title="Yet-Another-Spider home page">Yet-Another-Spider</a>',
'yeti/','Yeti',
'yie8','yie8',
'yodaobot','<a href="http://www.yodao.com/help/webmaster/spider/" rel="nofollow" title="YodaoBot">OutfoxBot/YodaoBot</a>',
'yooglifetchagent','<a href="http://www.yoogli.com" rel="nofollow" title="yoogliFetchAgent home page">yoogliFetchAgent</a>',
'youdao','<a href="http://www.youdao.com/help/webmaster/spider/" rel="nofollow" title="Bot home page">youdao</a>', 
'yourls','yourls',
'z\-add_link_checker','<a href="http://w3.z-add.co.uk/linkcheck/" rel="nofollow" title="Z-Add Link Checker home page">Z-Add Link Checker</a>',
'zealbot','ZealBot',
'zemanta','zemanta',
'zend_http_client','Zend Http Client',
'zeus','<a href="http://www.webmasterworld.com/forum11/1840.htm" rel="nofollow" title="Bot documentation">Zeus Webster Pro</a>',
'zhuaxia','<a href="http://www.zhuaxia.com">ZhuaXia</a>',
'[^a]fish','Fish search',
'[\x20]netseer[\x20]','<a href="http://www.netseer.com/crawler.html">Net Seer</a>',
'^[1-3]$',"$Message[217]",
'^finbot', '<span title="As on Sep. 10, 2015, the user agent string did not contain a web address.">finbot</span>',
'^motorola$','Suspected Bot masquerading as "Motorola"',
'^msie',"$Message[216]1", 
'^webindex$', '<span title="As on Oct. 28, 2015, the user agent string did not contain a web address.">WebIndex</span>',
'1\-more_scanner','<a href="http://www.myzips.com/software/1-More-Scanner.phtml" rel="nofollow" title="1-More Scanner home page">1-More Scanner</a>',

# Generic robot
'discover', "$Message[204]discover$Message[205]",
# Additional bots found by Sussex.
'^[1-3]$',"$Message[217]",
#'yandexbot','yandexbot', #already covered by 'yandex'
'uri::fetch','URI::Fetch',
'robot',"$Message[204]robot$Message[205]",
'blog','blog',
'checker',"$Message[204]checker$Message[205]",
'crawl',"$Message[204]crawl$Message[205]",
'discover',"$Message[204]discover$Message[205]",
'feed','feed',
'fetcher','fetcher',
'hunter',"$Message[204]hunter$Message[205]",
'link','link',
'scanner',"$Message[204]scanner$Message[205]",
'seek','seek',
'sitemap','sitemap',
'spider',"$Message[204]spider$Message[205]",
'sucker',"$Message[204]sucker$Message[205]",
'survey','survey',
'validator','validator',
'bot[\s_+:,\.\;/\\\-]',"$Message[204]bot$Message[219]",
'[\s_+:,\.\;/\\\-]bot',"$Message[204]bot$Message[220]",
'curl',"$Message[206].",
'php','A PHP script', 
'ruby/','Ruby script', 
'no_user_agent',"$Message[209]",
# Moving oBot towards the end so it does not pick up other *obot robots
'oBot/','oBot',
# Unknown robots identified by hit on robots.txt
'unknown',"$Message[210]"
);


# RobotsAffiliateLib
# This list try to tell by which Search Engine a robot is used
#-------------------------------------------------------------
%RobotsAffiliateLib = (
'bingpreview'=>'Bing',
'fast\-webcrawler'=>'AllTheWeb',
'googlebot'=>'Google',
'google\-sitemap'=>'Google',
'msnbot'=>'MSN',
'nutch'=>'Looksmart',
'scooter'=>'AltaVista',
'wisenutbot'=>'Looksmart',
'yahoo\-blogs'=>'Yahoo',
'yahoo\-verticalcrawler'=>'Yahoo',
'yahoofeedseeker'=>'Yahoo',
'yahooseeker\-testing'=>'Yahoo',
'yahooseeker'=>'Yahoo',
'yahoo\-mmcrawler'=>'Yahoo',
'yahoo!_mindset'=>'Yahoo',
'zyborg'=>'Looksmart',
'cfetch'=>'Kosmix',
'^voyager/'=>'Kosmix',
# Additional bots found by Sussex.
'feedfetcher\-google'=>'Google',
'bingbot'=>'MSN',
'twitterbot/'=>'Twitter',
'twitterfeed'=>'Twitter',
'yahoo!\sslurp'=>'Yahoo',
'yahoo\spipes'=>'Yahoo',
'yahoo-newscrawler'=>'Yahoo',
'yahoocachesystem'=>'Yahoo',
'yahooexternalcache'=>'Yahoo',
'yahoo!\ssearchmonkey'=>'Yahoo',
'yahooysmcm'=>'Yahoo'
);

# Sanity check.
# Enable this code and run perl robots.pm to check file entries are ok
#-----------------------------------------------------------------------------
#my %listcheck = ();
#foreach my $key (@RobotsSearchIDOrder_list1) {
#	if (! exists($RobotsHashIDLib{$key})) { print "Entry '$key' has been found in RobotsSearchIDOrder_list1 with no value in RobotsHashIDLib\n";}
#	if ($listcheck{$key} == 1) {print "Entry '$key' has been found in RobotsSearchIDOrder_list1 twice\n";}
#	else {
#		$listcheck{$key} = 1;
#		foreach my $key3 (@listcheck) {
#			if ($key =~ /$key3/){
#				print "Entry '$key3' is substring of '$key'. Entry '$key' hits nothing.\n";
#			}
#		}
#		push(@listcheck,$key);
#	}
#}
#foreach my $key (@RobotsSearchIDOrder_list2) {
#	if (! exists($RobotsHashIDLib{$key})) { print "Entry '$key' has been found in RobotsSearchIDOrder_list2 with no value in RobotsHashIDLib\n";}
#	if ($listcheck{$key} == 1) {print "Entry '$key' has been found in RobotsSearchIDOrder_list1 and RobotsSearchIDOrder_list2 twice\n";}
#	else {
#		$listcheck{$key} = 1;
#		foreach my $key3 (@listcheck) {
#			if ($key =~ /$key3/){
#				print "Entry '$key3' is substring of '$key'. Entry '$key' hits nothing.\n";
#			}
#		}
#	}
#}
#foreach my $key (@RobotsSearchIDOrder_listgen) { 
#	if (! exists($RobotsHashIDLib{$key})) { print "Entry '$key' has been found in RobotsSearchIDOrder_listgen with no value in RobotsHashIDLib\n"; }
#	if ($listcheck{$key} == 1) {print "Entry '$key' has been found in RobotsSearchIDOrder_list1, RobotsSearchIDOrder_list2 and RobotsSearchIDOrder_listgen twice\n";}
#	else {
#		$listcheck{$key} = 1;
#		foreach my $key3 (@listcheck) {
#			if ($key =~ /$key3/){
#				print "Entry '$key3' is substring of '$key'. Entry '$key' hits nothing.\n";
#			}
#		}
#	}
#}
#foreach my $key (keys %RobotsHashLib) {
#	my $found=0;
#	foreach my $key2 (values %RobotsHashIDLib) {
#		if ($key eq $key2) { $found=1; last; }
#	}
#	if (! $found) { die "Entry '$key' has been found in RobotsHashLib with no value in RobotsHashIDLib"; }
#}
#
#$listcheck{"unknown"} = 1;
#foreach my $key (keys %RobotsHashIDLib) {
#	if ( $listcheck{$key} != 1) { print "Entry '$key' has been found in RobotsHashIDLib but has not been found in RobotsSearchIDOrder_list1, RobotsSearchIDOrder_list2 and RobotsSearchIDOrder_listgen\n";}
#}
#
#print '@RobotsSearchIDOrder_list1 '.@RobotsSearchIDOrder_list1."\n";
#
#foreach my $key (@RobotsSearchIDOrder_list1) {
#	print "$key\n";
#}
#
#print '@RobotsSearchIDOrder_list2 '.@RobotsSearchIDOrder_list2."\n";
#
#foreach my $key (@RobotsSearchIDOrder_list2) {
#	print "$key\n";
#}
#
#print '@RobotsSearchIDOrder_listgen '.@RobotsSearchIDOrder_listgen."\n";
#
#foreach my $key (@RobotsSearchIDOrder_listgen) {
#	print "$key\n";
#}
#
#print '%RobotsHashIDLib '.keys(%RobotsHashIDLib)."\n";

#foreach my $key (keys %RobotsHashIDLib) {
#	print "$key\n";
#}

1;
