allinurl / goaccess

GoAccess is a real-time web log analyzer and interactive viewer that runs in a terminal in *nix systems or through your browser.
https://goaccess.io
MIT License
18.12k stars 1.1k forks source link

crawlers list #15

Closed abgit closed 11 years ago

abgit commented 11 years ago

Hi, Here goes an additional crawlers list with 330 more referrer signatures. Feel free to add it in util.c .

{"SeoCheckBot", "Crawlers"},
{"MJ12bot", "Crawlers"},
{"Genieo", "Crawlers"},
{"proximic", "Crawlers"},
{"80legs", "Crawlers"},
{"ExB Language Crawler", "Crawlers"},
{"NaverBot", "Crawlers"},
{"Qirina Hurdler", "Crawlers"},
{"ChangeDetection", "Crawlers"},
{"IntegromeDB", "Crawlers"},
{"GrapeshotCrawler", "Crawlers"},
{"IstellaBot", "Crawlers"},
{"Yahoo!", "Crawlers"},
{"sogou spider", "Crawlers"},
{"AddThis.com", "Crawlers"},
{"Zookabot", "Crawlers"},
{"ShopWiki", "Crawlers"},
{"iCjobs", "Crawlers"},
{"search.KumKie.com", "Crawlers"},
{"ZumBot", "Crawlers"},
{"Netseer", "Crawlers"},
{"magpie-crawler", "Crawlers"},
{"rogerbot", "Crawlers"},
{"ShowyouBot", "Crawlers"},
{"bitlybot", "Crawlers"},
{"Spinn3r", "Crawlers"},
{"Jyxobot", "Crawlers"},
{"Woko", "Crawlers"},
{"coccoc", "Crawlers"},
{"Wotbox", "Crawlers"},
{"Mail.Ru bot", "Crawlers"},
{"Vagabondo", "Crawlers"},
{"uMBot-FC", "Crawlers"},
{"MetaJobBot", "Crawlers"},
{"SearchmetricsBot", "Crawlers"},
{"Infohelfer", "Crawlers"},
{"Peeplo Screenshot Bot", "Crawlers"},
{"Qualidator.com Bot", "Crawlers"},
{"Daumoa", "Crawlers"},
{"Ezooms", "Crawlers"},
{"TurnitinBot", "Crawlers"},
{"NetcraftSurveyAgent", "Crawlers"},
{"Crawler4j", "Crawlers"},
{"Aboundexbot", "Crawlers"},
{"aiHitBot", "Crawlers"},
{"VoilaBot", "Crawlers"},
{"AraBot", "Crawlers"},
{"YioopBot", "Crawlers"},
{"Nuhk", "Crawlers"},
{"yacybot", "Crawlers"},
{"SEOENGBot", "Crawlers"},
{"Najdi.si", "Crawlers"},
{"linkdex.com", "Crawlers"},
{"Yahoo! JAPAN", "Crawlers"},
{"psbot", "Crawlers"},
{"trendictionbot", "Crawlers"},
{"BLEXBot", "Crawlers"},
{"spbot", "Crawlers"},
{"webmastercoffee", "Crawlers"},
{"PaperLiBot", "Crawlers"},
{"QuerySeekerSpider", "Crawlers"},
{"FacebookExternalHit", "Crawlers"},
{"UASlinkChecker", "Crawlers"},
{"Browsershots", "Crawlers"},
{"MetaURI API", "Crawlers"},
{"woriobot", "Crawlers"},
{"oBot", "Crawlers"},
{"FyberSpider", "Crawlers"},
{"Steeler", "Crawlers"},
{"Blekkobot", "Crawlers"},
{"LinkAider", "Crawlers"},
{"SEOkicks-Robot", "Crawlers"},
{"firmilybot", "Crawlers"},
{"netEstate Crawler", "Crawlers"},
{"AMZNKAssocBot", "Crawlers"},
{"OpenindexSpider", "Crawlers"},
{"BacklinkCrawler", "Crawlers"},
{"HubSpot Connect", "Crawlers"},
{"WBSearchBot", "Crawlers"},
{"MetaHeadersBot", "Crawlers"},
{"UnisterBot", "Crawlers"},
{"Strokebot", "Crawlers"},
{"GeliyooBot", "Crawlers"},
{"bot-pge.chlooe.com", "Crawlers"},
{"ownCloud Server Crawler", "Crawlers"},
{"CirrusExplorer", "Crawlers"},
{"bixocrawler", "Crawlers"},
{"ProCogSEOBot", "Crawlers"},
{"Falconsbot", "Crawlers"},
{"Dlvr.it/1.0", "Crawlers"},
{"thumbshots-de-Bot", "Crawlers"},
{"200PleaseBot", "Crawlers"},
{"discoverybot", "Crawlers"},
{"MojeekBot", "Crawlers"},
{"Panscient", "Crawlers"},
{"Plukkie", "Crawlers"},
{"R6 bot", "Crawlers"},
{"bl.uk_lddc_bot", "Crawlers"},
{"SolomonoBot", "Crawlers"},
{"Grahambot", "Crawlers"},
{"Automattic", "Crawlers"},
{"emefgebot", "Crawlers"},
{"SBSearch", "Crawlers"},
{"PiplBot", "Crawlers"},
{"TinEye", "Crawlers"},
{"FlightDeckReportsBot", "Crawlers"},
{"fastbot crawler", "Crawlers"},
{"4seohuntBot", "Crawlers"},
{"Updownerbot", "Crawlers"},
{"JikeSpider", "Crawlers"},
{"NLNZ_IAHarvester2013", "Crawlers"},
{"Nigma.ru", "Crawlers"},
{"wsAnalyzer", "Crawlers"},
{"OpenWebSpider", "Crawlers"},
{"YodaoBot", "Crawlers"},
{"SpiderLing", "Crawlers"},
{"Esribot", "Crawlers"},
{"Thumbshots.ru", "Crawlers"},
{"BlogPulse", "Crawlers"},
{"NextGenSearchBot", "Crawlers"},
{"bot.wsowner.com", "Crawlers"},
{"wscheck.com", "Crawlers"},
{"Qseero", "Crawlers"},
{"drupact", "Crawlers"},
{"HuaweiSymantecSpider", "Crawlers"},
{"PagePeeker", "Crawlers"},
{"HomeTags", "Crawlers"},
{"facebookplatform", "Crawlers"},
{"Pixray-Seeker", "Crawlers"},
{"BDFetch", "Crawlers"},
{"MeMoNewsBot", "Crawlers"},
{"ProCogBot", "Crawlers"},
{"WillyBot", "Crawlers"},
{"peerindex", "Crawlers"},
{"Job Roboter Spider", "Crawlers"},
{"MLBot", "Crawlers"},
{"WebNL", "Crawlers"},
{"Peepowbot", "Crawlers"},
{"Semager", "Crawlers"},
{"MIA Bot", "Crawlers"},
{"Eurobot", "Crawlers"},
{"DripfeedBot", "Crawlers"},
{"webinatorbot", "Crawlers"},
{"Whoismindbot", "Crawlers"},
{"Bad-Neighborhood", "Crawlers"},
{"Hailoobot", "Crawlers"},
{"akula", "Crawlers"},
{"MetamojiCrawler", "Crawlers"},
{"Page2RSS", "Crawlers"},
{"EasyBib AutoCite", "Crawlers"},
{"suggybot", "Crawlers"},
{"NerdByNature.Bot", "Crawlers"},
{"EventGuruBot", "Crawlers"},
{"quickobot", "Crawlers"},
{"gonzo", "Crawlers"},
{"CCBot", "Crawlers"},
{"bnf.fr_bot", "Crawlers"},
{"UptimeRobot", "Crawlers"},
{"Influencebot", "Crawlers"},
{"MSRBOT", "Crawlers"},
{"KeywordDensityRobot", "Crawlers"},
{"heritrix", "Crawlers"},
{"Ronzoobot", "Crawlers"},
{"RyzeCrawler", "Crawlers"},
{"ScoutJet", "Crawlers"},
{"Twikle", "Crawlers"},
{"SWEBot", "Crawlers"},
{"RADaR-Bot", "Crawlers"},
{"DCPbot", "Crawlers"},
{"Castabot", "Crawlers"},
{"percbotspider", "Crawlers"},
{"WeSEE:Search", "Crawlers"},
{"CatchBot", "Crawlers"},
{"imbot", "Crawlers"},
{"EdisterBot", "Crawlers"},
{"WASALive-Bot", "Crawlers"},
{"Accelobot", "Crawlers"},
{"PostPost", "Crawlers"},
{"factbot", "Crawlers"},
{"Setoozbot", "Crawlers"},
{"biwec", "Crawlers"},
{"GarlikCrawler", "Crawlers"},
{"Search17Bot", "Crawlers"},
{"Lijit", "Crawlers"},
{"MetaGeneratorCrawler", "Crawlers"},
{"Robots_Tester", "Crawlers"},
{"JUST-CRAWLER", "Crawlers"},
{"Apercite", "Crawlers"},
{"pmoz.info", "Crawlers"},
{"LemurWebCrawler", "Crawlers"},
{"Covario-IDS", "Crawlers"},
{"Holmes", "Crawlers"},
{"RankurBot", "Crawlers"},
{"DotBot", "Crawlers"},
{"envolk", "Crawlers"},
{"LexxeBot", "Crawlers"},
{"adressendeutschland", "Crawlers"},
{"StackRambler", "Crawlers"},
{"Abrave Spider", "Crawlers"},
{"EvriNid", "Crawlers"},
{"arachnode.net", "Crawlers"},
{"CamontSpider", "Crawlers"},
{"wikiwix-bot", "Crawlers"},
{"Nymesis", "Crawlers"},
{"trendictionbot", "Crawlers"},
{"Sitedomain-Bot", "Crawlers"},
{"SEODat", "Crawlers"},
{"SygolBot", "Crawlers"},
{"Snapbot", "Crawlers"},
{"OpenCalaisSemanticProxy", "Crawlers"},
{"ZookaBot", "Crawlers"},
{"CligooRobot", "Crawlers"},
{"cityreview", "Crawlers"},
{"nworm", "Crawlers"},
{"AboutUsBot", "Crawlers"},
{"ICC-Crawler", "Crawlers"},
{"SBIder", "Crawlers"},
{"TwengaBot", "Crawlers"},
{"Dot TK - spider", "Crawlers"},
{"EuripBot", "Crawlers"},
{"ParchBot", "Crawlers"},
{"Peew", "Crawlers"},
{"AntBot", "Crawlers"},
{"YRSpider", "Crawlers"},
{"Urlfilebot", "Crawlers"},
{"Gaisbot", "Crawlers"},
{"WatchMouse", "Crawlers"},
{"Tagoobot", "Crawlers"},
{"Motoricerca", "Crawlers"},
{"WebWatch", "Crawlers"},
{"urlfan-bot", "Crawlers"},
{"StatoolsBot", "Crawlers"},
{"page_verifier", "Crawlers"},
{"SSLBot", "Crawlers"},
{"SAI Crawler", "Crawlers"},
{"DomainDB", "Crawlers"},
{"LinkWalker", "Crawlers"},
{"WMCAI_robot", "Crawlers"},
{"voyager", "Crawlers"},
{"copyright sheriff", "Crawlers"},
{"Ocelli", "Crawlers"},
{"amibot", "Crawlers"},
{"abby", "Crawlers"},
{"NetResearchServer", "Crawlers"},
{"VideoSurf_bot", "Crawlers"},
{"XML Sitemaps Generator", "Crawlers"},
{"BlinkaCrawler", "Crawlers"},
{"nodestackbot", "Crawlers"},
{"Pompos", "Crawlers"},
{"taptubot", "Crawlers"},
{"BabalooSpider", "Crawlers"},
{"Yaanb", "Crawlers"},
{"Girafabot", "Crawlers"},
{"livedoor ScreenShot", "Crawlers"},
{"eCairn-Grabber", "Crawlers"},
{"FauBot", "Crawlers"},
{"Toread-Crawler", "Crawlers"},
{"Setoozbot", "Crawlers"},
{"MetaURI", "Crawlers"},
{"L.webis", "Crawlers"},
{"Web-sniffer", "Crawlers"},
{"FairShare", "Crawlers"},
{"Ruky-Roboter", "Crawlers"},
{"ThumbShots-Bot", "Crawlers"},
{"BotOnParade", "Crawlers"},
{"Amagit.COM", "Crawlers"},
{"HatenaScreenshot", "Crawlers"},
{"HolmesBot", "Crawlers"},
{"dotSemantic", "Crawlers"},
{"Karneval-Bot", "Crawlers"},
{"HostTracker.com", "Crawlers"},
{"AportWorm", "Crawlers"},
{"XmarksFetch", "Crawlers"},
{"FeedFinder", "Crawlers"},
{"CorpusCrawler", "Crawlers"},
{"Willow Internet Crawler", "Crawlers"},
{"OrgbyBot", "Crawlers"},
{"GingerCrawler", "Crawlers"},
{"pingdom.com_bot", "Crawlers"},
{"baypup", "Crawlers"},
{"Linguee Bot", "Crawlers"},
{"Mp3Bot", "Crawlers"},
{"192.comAgent", "Crawlers"},
{"Surphace Scout", "Crawlers"},
{"WikioFeedBot", "Crawlers"},
{"Szukacz", "Crawlers"},
{"DBLBot", "Crawlers"},
{"Thumbnail.CZ", "Crawlers"},
{"LinguaBot", "Crawlers"},
{"GurujiBot", "Crawlers"},
{"Charlotte", "Crawlers"},
{"50.nu", "Crawlers"},
{"SanszBot", "Crawlers"},
{"moba-crawler", "Crawlers"},
{"HeartRails_Capture", "Crawlers"},
{"SurveyBot", "Crawlers"},
{"MnoGoSearch", "Crawlers"},
{"smart.apnoti.com Robot", "Crawlers"},
{"Topicbot", "Crawlers"},
{"JadynAveBot", "Crawlers"},
{"OsObot", "Crawlers"},
{"WebImages", "Crawlers"},
{"WinWebBot", "Crawlers"},
{"Scooter", "Crawlers"},
{"Scarlett", "Crawlers"},
{"GOFORITBOT", "Crawlers"},
{"DKIMRepBot", "Crawlers"},
{"Yanga", "Crawlers"},
{"DNS-Digger-Explorer", "Crawlers"},
{"Robozilla", "Crawlers"},
{"adidxbot", "Crawlers"},
{"YowedoBot", "Crawlers"},
{"botmobi", "Crawlers"},
{"Fooooo_Web_Video_Crawl", "Crawlers"},
{"UptimeDog", "Crawlers"},
{"^Nail", "Crawlers"},
{"Metaspinner", "Crawlers"},
{"Touche", "Crawlers"},
{"RSSMicro", "Crawlers"},
{"SniffRSS", "Crawlers"},
{"FeedCatBot", "Crawlers"},
{"WebRankSpider", "Crawlers"},
{"Flatland Industries Web Spider", "Crawlers"},
{"DealGates Bot", "Crawlers"},
{"Link Valet Online", "Crawlers"},
{"Shelob", "Crawlers"},
{"Technoratibot", "Crawlers"},
{"Flocke bot", "Crawlers"},
{"FollowSite Bot", "Crawlers"},
{"Visbot", "Crawlers"},

btw, there's currently a duplicate crawler entry: KaloogaBot

best,

allinurl commented 11 years ago

Merged. Thanks!