<?php
/*
 * Robot
 *
 * @copyright   Copyright &copy; 2004-2006, Katsumi Saito <katsumi@jo1upk.ymt.prug.or.jp>
 * @version     $Id: robots.cls.php,v 0.5 2006/06/04 01:08:00 upk Exp $
 * @license     http://opensource.org/licenses/gpl-license.php GNU Public License
 * 情報取得先
 * o Robot一覧
 *   http://www.robotstxt.org/wc/active/all.txt
 * o その他のRobot
 *   http://www.jafsoft.com/searchengines/webbots.html
 * o 参考にしたコード(AWStats)
 *   http://awstats.sourceforge.net/
 *   Copyright (C) 2000-2006 - Laurent Destailleur - eldy@users.sourceforge.net
 *   awstats-6.6/wwwroot/cgi-bin/lib/robots.pm
 */

class robots
{
	var $robots_lib = array(
		// 'robot id' => 'robot clear text'

		# Common robots (In robot file)
		'appie'			=> 'Walhello appie',
		'architext'		=> 'ArchitextSpider',
		'jeeves'		=> 'AskJeeves',
		'bjaaland'		=> 'Bjaaland',
		'ferret'		=> 'Wild Ferret Web Hopper #1, #2, #3',
		'googlebot'		=> 'Googlebot',			// Google
		'gulliver'		=> 'Northern Light Gulliver',
		'harvest'		=> 'Harvest',
		'htdig'			=> 'ht://Dig',
		'linkwalker'		=> 'LinkWalker',
		'lycos_'		=> 'Lycos',
		'moget'			=> 'moget',
		'muscatferret'		=> 'Muscat Ferret',
		'myweb'			=> 'Internet Shinchakubin',
		'nextgensearchbot'	=> 'ZoomInfo Crawler',		// Add UPK
		'nomad'			=> 'Nomad',
		'scooter'		=> 'Scooter',			// AltaVista
		'slurp'			=> 'Inktomi Slurp',
		'^voyager\/'		=> 'Voyager',
		'weblayers'		=> 'weblayers',

		# Common robots (Not in robot file)
		'antibot'		=> 'Antibot',
		'bruinbot'		=> 'The web archive',
		'digout4u'		=> 'Digout4u',
		'echo'			=> 'EchO!',
		'fast\-webcrawler'	=> 'Fast-Webcrawler',		// AllTheWeb
		'ia_archiver\-web\.archive\.org' => '', # Must be before ia_archiver to avoid confusion with alexa
		'ia_archiver'		=> 'Alexa (IA Archiver)',
		'jennybot'		=> 'JennyBot',
		'mercator'		=> 'Mercator',
		'msnbot'		=> 'MSNBot',			// MSN
		'netcraft'		=> 'Netcraft',
		'petersnews'		=> 'Petersnews',
		'unlost_web_crawler'	=> 'Unlost Web Crawler',
		'voila'			=> 'Voila',
		'webbase'		=> 'WebBase',
		'webcollage'		=> 'WebCollage',
		'cfetch'		=> 'cfetch',
		'zyborg'		=> 'ZyBorg',			// Must be before wisenut
		'wisenutbot'		=> 'WISENutbot',		// Looksmart

		# Less common robots (In robot file)
		'[^a]fish'		=> 'Fish search',
		'abcdatos'		=> 'ABCdatos BotLink',
		'acme\.spider'		=> 'Acme.Spider',
		'ahoythehomepagefinder'	=> 'Ahoy! The Homepage Finder',
		'alkaline'		=> 'Alkaline',
		'anthill'		=> 'Anthill',
		'arachnophilia'		=> 'Arachnophilia',
		'arale'			=> 'Arale',
		'araneo'		=> 'Araneo',
		'aretha'		=> 'Aretha',
		'ariadne'		=> 'ARIADNE',
		'powermarks'		=> '',
		'arks'			=> 'arks',
		'aspider'		=> 'ASpider (Associative Spider)',
		'atn\.txt'		=> 'ATN Worldwide',
		'atomz'			=> 'Atomz.com Search Robot',
		'auresys'		=> 'AURESYS',
		'backrub'		=> 'BackRub',
		'bbot'			=> 'BBot',
		'bigbrother'		=> 'Big Brother',
		'blackwidow'		=> 'BlackWidow',
		'blindekuh'		=> 'Die Blinde Kuh',
		'bloodhound'		=> 'Bloodhound',
		'borg\-bot'		=> 'Borg-Bot',
		'brightnet'		=> 'bright.net caching robot',
		'bspider'		=> 'BSpider',
		'cactvschemistryspider'	=> 'CACTVS Chemistry Spider',
		'calif[^r]'		=> 'Calif',
		'cassandra'		=> 'Cassandra',
		'cgireader'		=> 'Digimarc Marcspider/CGI',
		'checkbot'		=> 'Checkbot',
		'christcrawler'		=> 'ChristCrawler.com',
		'churl'			=> 'churl',
		'cienciaficcion'	=> 'cIeNcIaFiCcIoN.nEt',
		'collective'		=> 'Collective',
		'combine'		=> 'Combine System',
		'conceptbot'		=> 'Conceptbot',
		'coolbot'		=> 'CoolBot',
		'core'			=> 'Web Core / Roots',
		'cosmos'		=> 'XYLEME Robot',
		'cruiser'		=> 'Internet Cruiser Robot',
		'cusco'			=> 'Cusco',
		'cyberspyder'		=> 'CyberSpyder Link Test',
		'desertrealm'		=> 'Desert Realm Spider',
		'deweb'			=> 'DeWeb(c) Katalog/Index',
		'dienstspider'		=> 'DienstSpider',
		'digger'		=> 'Digger',
		'diibot'		=> 'Digital Integrity Robot',
		'direct_hit'		=> 'Direct Hit Grabber',
		'dnabot'		=> 'DNAbot',
		'download_express'	=> 'DownLoad Express',
		'dragonbot'		=> 'DragonBot',
		'dwcp'			=> 'DWCP (Dridus\' Web Cataloging Project)',
		'e\-collector'		=> 'e-collector',
		'ebiness'		=> 'EbiNess',
		'elfinbot'		=> 'ELFINBOT',
		'emacs'			=> 'Emacs-w3 Search Engine',
		'emcspider'		=> 'ananzi',
		'esther'		=> 'Esther',
		'evliyacelebi'		=> 'Evliya Celebi',
		'fastcrawler'		=> 'FastCrawler',
		'fdse'			=> 'Fluid Dynamics Search Engine robot',
		'felix'			=> 'Felix IDE',
		'fetchrover'		=> 'FetchRover',
		'fido'			=> 'fido',
		'finnish'		=> 'H舂臧臾ki',
		'fireball'		=> 'KIT-Fireball',
		'fouineur'		=> 'Fouineur',
		'francoroute'		=> 'Robot Francoroute',
		'freecrawl'		=> 'Freecrawl',
		'funnelweb'		=> 'FunnelWeb',
		'gama'			=> 'gammaSpider, FocusedCrawler',
		'gazz'			=> 'gazz',
		'gcreep'		=> 'GCreep',
		'getbot'		=> 'GetBot',
		'geturl'		=> 'GetURL',
		'golem'			=> 'Golem',
		'grapnel'		=> 'Grapnel/0.01 Experiment',
		'griffon'		=> 'Griffon',
		'gromit'		=> 'Gromit',
		'gulperbot'		=> 'Gulper Bot',
		'hambot'		=> 'HamBot',
		'havindex'		=> 'havIndex',
		'hometown'		=> 'Hometown Spider Pro',
		'htmlgobble'		=> 'HTMLgobble',
		'hyperdecontextualizer'	=> 'Hyper-Decontextualizer',
		'iajabot'		=> 'iajaBot',
		'iconoclast'		=> 'Popular Iconoclast',
		'ilse'			=> 'Ingrid',
		'imagelock'		=> 'Imagelock',
		'incywincy'		=> 'IncyWincy',
		'informant'		=> 'Informant',
		'infoseek'		=> 'InfoSeek Robot 1.0',
		'infoseeksidewinder'	=> 'Infoseek Sidewinder',
		'infospider'		=> 'InfoSpiders',
		'inspectorwww'		=> 'Inspector Web',
		'intelliagent'		=> 'IntelliAgent',
		'irobot'		=> 'I, Robot',
		'iron33'		=> 'Iron33',
		'israelisearch'		=> 'Israeli-search',
		'javabee'		=> 'JavaBee',
		'jbot'			=> 'JBot Java Web Robot',
		'jcrawler'		=> 'JCrawler',
		'jobo'			=> 'JoBo Java Web Robot',
		'jobot'			=> 'Jobot',
		'joebot'		=> 'JoeBot',
		'jubii'			=> 'The Jubii Indexing Robot',
		'jumpstation'		=> 'JumpStation',
		'kapsi'			=> 'image.kapsi.net',
		'katipo'		=> 'Katipo',
		'kilroy'		=> 'Kilroy',
		'ko_yappo_robot'	=> 'KO_Yappo_Robot',
		'labelgrabber\.txt'	=> 'LabelGrabber',
		'larbin'		=> 'larbin',
		'legs'			=> 'legs',
		'linkidator'		=> 'Link Validator',
		'linkscan'		=> 'LinkScan',
		'lockon'		=> 'Lockon',
		'logo_gif'		=> 'logo.gif Crawler',
		'macworm'		=> 'Mac WWWWorm',
		'magpie'		=> 'Magpie',
		'marvin'		=> 'marvin/infoseek',
		'mattie'		=> 'Mattie',
		'mediafox'		=> 'MediaFox',
		'merzscope'		=> 'MerzScope',
		'meshexplorer'		=> 'NEC-MeshExplorer',
		'mindcrawler'		=> 'MindCrawler',
		'mnogosearch'		=> 'mnoGoSearch search engine software',
		'momspider'		=> 'MOMspider',
		'monster'		=> 'Monster',
		'motor'			=> 'Motor',
		'muncher'		=> 'Muncher',
		'mwdsearch'		=> 'Mwd.Search',
		'ndspider'		=> 'NDSpider',
		'nederland\.zoek'	=> 'Nederland.zoek',
		'netcarta'		=> 'NetCarta WebMap Engine',
		'netmechanic'		=> 'NetMechanic',
		'netscoop'		=> 'NetScoop',
		'newscan\-online'	=> 'newscan-online',
		'nhse'			=> 'NHSE Web Forager',
		'northstar'		=> 'The NorthStar Robot',
		'nzexplorer'		=> 'nzexplorer',
		'objectssearch'		=> 'ObjectsSearch',
		'occam'			=> 'Occam',
		'octopus'		=> 'HKU WWW Octopus',
		'openfind'		=> 'Openfind data gatherer',
		'orb_search'		=> 'Orb Search',
		'packrat'		=> 'Pack Rat',
		'pageboy'		=> 'PageBoy',
		'parasite'		=> 'ParaSite',
		'patric'		=> 'Patric',
		'pegasus'		=> 'pegasus',
		'perignator'		=> 'The Peregrinator',
		'perlcrawler'		=> 'PerlCrawler 1.0',
		'phantom'		=> 'Phantom',
		'phpdig'		=> 'PhpDig',
		'piltdownman'		=> 'PiltdownMan',
		'pimptrain'		=> 'Pimptrain.com\'s robot',
		'pioneer'		=> 'Pioneer',
		'pitkow'		=> 'html_analyzer',
		'pjspider'		=> 'Portal Juice Spider',
		'plumtreewebaccessor'	=> 'PlumtreeWebAccessor',
		'poppi'			=> 'Poppi',
		'portalb'		=> 'PortalB Spider',
		'psbot'			=> 'psbot',
		'python'		=> 'The Python Robot',
		'raven'			=> 'Raven Search',
		'rbse'			=> 'RBSE Spider',
		'resumerobot'		=> 'Resume Robot',
		'rhcs'			=> 'RoadHouse Crawling System',
		'road_runner'		=> 'Road Runner: The ImageScape Robot',
		'robbie'		=> 'Robbie the Robot',
		'robi'			=> 'ComputingSite Robi/1.0',
		'robocrawl'		=> 'RoboCrawl Spider',
		'robofox'		=> 'RoboFox',
		'robozilla'		=> 'Robozilla',
		'roverbot'		=> 'Roverbot',
		'rules'			=> 'RuLeS',
		'safetynetrobot'	=> 'SafetyNet Robot',
		'search\-info'		=> 'Sleek',
		'search_au'		=> 'Search.Aus-AU.COM',
		'searchprocess'		=> 'SearchProcess',
		'senrigan'		=> 'Senrigan',
		'sgscout'		=> 'SG-Scout',
		'shaggy'		=> 'ShagSeeker',
		'shaihulud'		=> 'Shai\'Hulud',
		'sift'			=> 'Sift',
		'simbot'		=> 'Simmany Robot Ver1.0',
		'site\-valet'		=> 'Site Valet',
		'sitetech'		=> 'SiteTech-Rover',
		'skymob'		=> 'Skymob.com',
		'slcrawler'		=> 'SLCrawler',
		'smartspider'		=> 'Smart Spider',
		'snooper'		=> 'Snooper',
		'solbot'		=> 'Solbot',
		'speedy'		=> 'Speedy Spider',
		'spider_monkey'		=> 'spider_monkey',
		'spiderbot'		=> 'SpiderBot',
		'spiderline'		=> 'Spiderline Crawler',
		'spiderman'		=> 'SpiderMan',
		'spiderview'		=> 'SpiderView(tm)',
		'spry'			=> 'Spry Wizard Robot',
		'sqworm'		=> 'Sqworm',
		'ssearcher'		=> 'Site Searcher',
		'suke'			=> 'Suke',
		'suntek'		=> 'suntek search engine',
		'sven'			=> 'Sven',
		'tach_bw'		=> 'TACH Black Widow',
		'tarantula'		=> 'Tarantula',
		'tarspider'		=> 'tarspider',
		'techbot'		=> 'TechBOT',
		'templeton'		=> 'Templeton',
		'titan'			=> 'TITAN',
		'titin'			=> 'TitIn',
		'tkwww'			=> 'The TkWWW Robot',
		'tlspider'		=> 'TLSpider',
		'ucsd'			=> 'UCSD Crawl',
		'udmsearch'		=> 'UdmSearch',
		'urlck'			=> 'URL Check',
		'valkyrie'		=> 'Valkyrie',
		'verticrawl'		=> 'Verticrawl',
		'victoria'		=> 'Victoria',
		'visionsearch'		=> 'vision-search',
		'voidbot'		=> 'void-bot',
		'vwbot'			=> 'VWbot',
		'w3index'		=> 'The NWI Robot',
		'w3m2'			=> 'W3M2',
		'wallpaper'		=> 'WallPaper (alias crawlpaper)',
		'wanderer'		=> 'the World Wide Web Wanderer',
		'wapspider'		=> 'w@pSpider by wap4.com',
		'webbandit'		=> 'WebBandit Web Spider',
		'webcatcher'		=> 'WebCatcher',
		'webcopy'		=> 'WebCopy',
		'webfetcher'		=> 'webfetcher',
		'webfoot'		=> 'The Webfoot Robot',
		'webinator'		=> 'Webinator',
		'weblinker'		=> 'WebLinker',
		'webmirror'		=> 'WebMirror',
		'webmoose'		=> 'The Web Moose',
		'webquest'		=> 'WebQuest',
		'webreader'		=> 'Digimarc MarcSpider',
		'webreaper'		=> 'WebReaper',
		'websnarf'		=> 'Websnarf',
		'webspider'		=> 'WebSpider',
		'webvac'		=> 'WebVac',
		'webwalk'		=> 'webwalk',
		'webwalker'		=> 'WebWalker',
		'webwatch'		=> 'WebWatch',
		'whatuseek'		=> 'whatUseek Winona',
		'whowhere'		=> 'WhoWhere Robot',
		'wired\-digital'	=> 'Wired Digital',
		'wmir'			=> 'w3mir',
		'wolp'			=> 'WebStolperer',
		'wombat'		=> 'The Web Wombat',
		'worm'			=> 'The World Wide Web Worm',
		'wwwc'			=> 'WWWC Ver 0.2.5',
		'wz101'			=> 'WebZinger',
		'xget'			=> 'XGET',

		# Other robots reported by users
		'almaden'		=> 'IBM Almaden',
		'aport'			=> 'Aport',
		'argus'			=> 'Argus',		// Must be before nutch
		'asterias'		=> 'Asterias',
		'awbot'			=> 'AWBot',
		'baiduspider'		=> 'BaiDuSpider',
		'becomebot'		=> 'BecomeBot',
		'bender'		=> 'bender',
		'bloglines'		=> 'Bloglines',
		'blogpulse'		=> 'BlogPulse ISSpider intelliseek.com',
		'blogshares'		=> 'Blogshares Spiders',
		'blogslive'		=> 'Blogslive',
		'blogssay'		=> 'BlogsSay :: RSS Search Crawler',
		'bobby'			=> 'Bobby', 
		'boris'			=> 'Boris',
		'bumblebee'		=> 'Bumblebee (relevare.com)',
		'converacrawler'	=> 'ConveraCrawler',
		'cscrawler'		=> 'CsCrawler',
		'daviesbot'		=> 'DaviesBot',
		'daypopbot'		=> 'DayPop',
		'dipsie\.bot'		=> 'Dipsie',
		'domainsdb\.net'	=> 'DomainsDB.net',
		'exactseek'		=> 'ExactSeek Crawler',
		'ezresult'		=> 'Ezresult',
		'everbeecrawler'	=> 'EverbeeCrawler',
		'ezresult'		=> 'Ezresult',
		'enteprise'		=> 'Fast Enteprise Crawler',
		'feedburner'		=> 'Feedburner',
		'feedfetcher\-google'	=> 'Feedfetcher-Google',
		'feedster'		=> 'Feedster',
		'findlinks'		=> 'findlinks',
		'gaisbot'		=> 'Gaisbot',
		'geniebot'		=> 'geniebot',
		'gigabot'		=> 'GigaBot',
		'girafabot'		=> 'Girafabot',
		'gnodspider'		=> 'GNOD Spider',
		'grub'			=> 'Grub.org',
		'henrythemiragorobot'	=> 'Mirago',
		'holmes'		=> 'Holmes',
		'infomine'		=> 'INFOMINE VLCrawler',
		'internetseer'		=> 'InternetSeer',
		'justview'		=> 'JustView',
		'keyoshid'		=> 'Yahoo! Japan keyoshid robot study',		// Must come before Y!J
		'kinjabot'		=> 'Yahoo! Japan keyoshid robot study',
		'kinja\-imagebot'	=> 'Kinja Imagebot',
		'linkbot'		=> 'LinkBot',
		'metager\-linkchecker'  => 'MetaGer LinkChecker',
		'linkchecker'           => 'LinkChecker',
		'livejournal\.com'	=> 'LiveJournal.com',
		'lmspider'		=> 'lmspider',
		'magpierss'		=> 'MagpieRSS',
		'mediapartners\-google' => 'Google AdSense',
		'microsoft_url_control' => 'Microsoft URL Control',
		'mj12bot'		=> 'MJ12bot',
		'msiecrawler'		=> 'MSIECrawler',
		'nagios'		=> 'Nagios',
		'newsgatoronline'	=> 'NewsGator Online',
		'noxtrumbot'		=> 'noxtrumbot',
		'nutch'			=> 'Nutch',
		'opentaggerbot'		=> 'OpenTaggerBot',
		'outfoxbot'		=> 'OutfoxBot',
                'perman'		=> 'Perman surfer',
		'pluckfeedcrawler'	=> 'PluckFeedCrawler',
                'pompos'		=> 'Pompos',
		'popdexter'		=> 'Popdexter',
		'rambler'		=> 'StackRambler',
		'redalert'		=> 'Red Alert',
		'rojo'			=> 'RoJo',
		'rssimagesbot'		=> 'rssImagesBot',
		'ruffle'		=> 'ruffle SemanticWeb crawler',
		'rufusbot'		=> 'RufusBot Rufus Web Miner',
		'sandcrawler'		=> 'SandCrawler (Microsoft)',
		'sbider'		=> 'SBIder',
		'seekbot'		=> 'Seekbot',
		'seznambot'		=> 'SeznamBot',
		'shoutcast'		=> 'Shoutcast Directory Service',
		'slysearch'		=> 'SlySearch',
		'sohu-search'		=> 'sohu-search',
		'surveybot'		=> 'SurveyBot',
		'syndic8'		=> 'Syndic8',
		'technoratibot'		=> 'Technoratibot',
		't\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e' => 'T-H-U-N-D-E-R-S-T-O-N-E',
		'topicblogs'		=> 'topicblogs',
		'turnitinbot'		=> 'Turn It In',
		'turtlescanner'		=> 'Turtle',			// Must be before turtle
		'turtle'		=> 'Turtle',
		'ultraseek'		=> 'Ultraseek',
		'w3c\-checklink'	=> 'W3C Link Checker',
		'w3c_css_validator_jfouffa' => 'W3C jigsaw CSS Validator',
		'w3c_validator'		=> 'W3C Validator',
		'webclipping\.com'	=> 'WebClipping.com',
		'webcompass'		=> 'webcompass',
		'webvulncrawl'		=> 'WebVulnCrawl',
		'wonderer'		=> 'Web Wombat Redback Spider',
		'y!j'			=> 'Y!J Yahoo Japan',		// Must come after keyoshid Y!J
		'yacy'			=> 'yacy',
		'yahoo\-blogs'		=> 'Yahoo-Blogs',
		'yahoo\-verticalcrawler'=> 'Yahoo Vertical Crawler',	// Yahoo
		'yahoofeedseeker'	=> 'Yahoo Feed Seeker',
		'yahooseeker\-testing'	=> 'YahooSeeker-Testing',
		'yahooseeker'		=> 'YahooSeeker Yahoo! Blog crawler',
		'yahoo\-mmcrawler'	=> 'Yahoo-MMCrawler',
		'yandex'		=> 'Yandex bot',
		'zealbot'		=> 'ZealBot',
		'zyborg'		=> 'Zyborg',			// Looksmart
		'ng\/1\.'		=> 'NG 1.x (Exalead)', 		// put at end to avoid false positive
		'ng\/2\.'		=> 'NG 2.x (Exalead)',		// put at end to avoid false positive
		# UPK Add
		'OmniExplorer_Bot'	=> 'OmniExplorer Bot',
		'ichiro\/'		=> 'goo',
		'moewe'			=> 'doko.jp',
		# Generic root ID
		'robot'			=> 'Unknown robot (identified by \'robot\')',
		'crawl'			=> 'Unknown robot (identified by \'crawl\')',
		'spider'		=> 'Unknown robot (identified by \'spider\')',
		'\wbot[\/\-]'		=> 'Unknown robot (identified by \'bot/\' or \'bot-\')',

		# Unknown robots identified by hit on robots.txt
		'unknown'		=> 'Unknown robot (identified by hit on \'robots.txt\')',
	);

	// ブラウザ識別
	function get_robots_info($ua)
	{
		foreach ($this->robots_lib as $id => $name) {
			$pat = "'".$id."'si";
			// if (preg_match($pat,$ua,$regs)) return array($id,$name);
			if (preg_match($pat,$ua,$regs)) {
				return array($id,$name);
			}
		}
		return array('','');
	}

}

?>
