#!/usr/bin/perl # agent_id - returns the short agent ID # Try to guess which browser and version matches this agent string sub which_browser{ my $Browser = $_[0]; $Browser =~ s/\t/ /g; # weird Browsers ... phoney and/or suspicious agent strings return "unknown" if ($Browser eq "IE 5.5 Compatible Browser"); return "unknown" if ($Browser eq "IE5"); # End weird Browsers if ( $Browser =~ /^Mozilla/){ if ( $Browser =~ /(Netscape\S*)/){ $Browser = $1; $Browser =~ s#6/6# 6#; } elsif ( $Browser =~ /(Galeon\S*)/){ $Browser = $1; $Browser =~ s/\)//g; } elsif ( $Browser =~ /(Opera.*)\s*\[..\]$/){ $Browser = $1; } elsif ( $Browser =~ /Gecko\/200[0-9][01][0-9][0-3][0-9].*Debian/ || $Browser =~ /Gecko\/200[0-9][01][0-9][0-3][0-9].*WebWasher/){ $Browser =~ s/ .*$//; } elsif ( $Browser =~ /Gecko\/200[0-9][01][0-9][0-3][0-9]\s+(.*)/){ $Browser = $1; } elsif ( $Browser =~ /rv:([0-9.]+).*\)\s+Gecko\/200[0-9][01][0-9][0-3][0-9]/){ $Browser = "Mozilla $1"; } elsif ( $Browser =~ /Gecko\/200[0-9][01][0-9][0-3][0-9]/){ $Browser =~ s/ .*$//; } elsif ( $Browser =~ /Crazy\s*Browser\s*(\S+)/){ $Browser = "Crazy Browser $1"; } elsif ( $Browser =~ /\(compatible;/){ my $x = $'; $x =~ s/^\s*//; my @t = split(/;\s*/,$x); if ($t[0] =~ /^MSIE /){ if ( $t[1] =~ /^Windows/ || $t[1] =~ /^Mac_PowerPC/ || $t[1] =~ /^MSNIA/ || $t[1] =~ /^MSN/ || $t[1] =~ /^AOL/ || $t[1] =~ /^CS 2000/) { $Browser = $t[0]; } elsif ( $t[1] =~ /^Linux/){ $Browser =~ s/^.*\)//; } else { $Browser = $t[1]; } } else { $Browser = $t[0]; } } elsif ( $Browser =~ /^Mozilla\/(4\.[1-9][0-9]*)/ || $Browser =~ /^Mozilla\/(4\.0[1-9]+)/){ $Browser = "Netscape $1"; # Most probably Netscape? } } elsif ( $Browser =~ /^(\w+)\/([0-9]+\.[0-9]+\.[0-9]+)/ || $Browser =~ /^(\w+)\/([0-9]+\.[0-9]+)/ || $Browser =~ /^(\w+)\/([0-9]+)/ || $Browser =~ /^(\w+)\s*\(([0-9]+\.[0-9]+)/ ){ $Browser = "$1 $2"; } $Browser =~ s/^\S+_Arachne/Arachne/; $Browser =~ s/\(.*$//; $Browser =~ s/\).*$//; $Browser =~ s/\s*$//; $Browser =~ s/\s*\[..\]$//; $Browser =~ s/-[0-9].*$//; $Browser =~ s/;$//; $Browser =~ s#/# #; $Browser =~ s/^\s*//; $Browser =~ s/'/ /g; $Browser =~ s/\+$//; $Browser = "unknown" unless ($Browser); return($Browser); } # ------------------------------------------------------------------------ # Try to guess which robot and version matches this agent string sub which_robot{ my $Robot = $_[0]; $Robot =~ s/\t/ /g; # unorthodox agent strings $Robot = "MSIECrawler" if ($Robot =~ /MSIECrawler\)$/); $Robot = "Larbin" if ($Robot =~ /larbin\@unspecified.mail/); $Robot = "Inktomi slurp" if ($Robot =~ /inktomi/i && $Robot =~ /slurp/i); $Robot = "$1 $2" if ($Robot =~ /^Openfind .* (Openbot)\/(\S+)/); $Robot = "$1 $2.$3" if ($Robot =~ /^(Scooter).*([0-9]+)\.([0-9]+)/); $Robot = "GulperBot $1" if ($Robot =~ /Gulper Web Bot (\S+)/); $Robot = "Organica $1" if ($Robot =~ /Mozilla.*Advanced Email Extractor v(\S+)/); $Robot = "$1" if ($Robot =~ /(NutchCrawler)/); $Robot = "$1 $2" if ($Robot =~ /(VoilaBot). *([0-9.]+)/); $Robot =~ s#http://##; $Robot = "$1" if ($Robot =~ /^www\.(\w+)\.com/); $Robot = "$1" if ($Robot =~ /^Mozilla\/4.0 \(compatible.*\s+(\S+[Bb]ot)\W/); # End unorthodox agent strings $Robot =~ s/^Mozilla\/\S+.*compatible.//; $Robot =~ s/^Mozilla\/\S+ \(//; $Robot =~ s/\// /; $Robot =~ s/^\s*//; $Robot =~ s/\(.*//; $Robot =~ s/\).*//; $Robot =~ s/;.*//; $Robot =~ s/ - .*$//; $Robot =~ s/_$//; $Robot =~ s/\s*$//; $Robot = "$1" if ($Robot =~ /^www\.(\w+)\.\w+\.com/); $Robot =~ s/\W+\S+\.com$//; $Robot =~ s/\W+\S+\.org$//; $Robot = "unknown" if (length($Robot) < 2 || $Robot =~ /^Mozilla/ || $Robot =~ /^MSIE / || $Robot =~ /^Konqueror / || $Robot =~ /^Windows/ || $Robot =~ /^Linux/ ); $Robot = "$1$2" if ($Robot =~ /^(\S+)\.org(.*)/); $Robot = "$1$2" if ($Robot =~ /^(\S+)\.com(.*)/); $Robot = "$1 $2" if ($Robot =~ /^(\S+)[ _-]([0-9.x]+)/); $Robot = ( ucfirst $Robot) unless ($Robot eq "unknown"); $Robot =~ s/'/ /g; return($Robot); } # ------------------------------------------------------------------------ sub which_OS{ $OS = "-" unless ($OS = $_[0]); if ( $OS =~ /^Mozilla\/\S+ \(compatible;/){ my @t = split(/;/,$'); my $i = 1; $i++ if ($t[$i] =~ /MSN/); $i++ if ($t[$i] =~ /AOL [0-9]/); $i++ if ($t[$i] =~ /CS 2000/); $i++ if ($t[$i] =~ /Wal-Mart/); $OS = $t[$i]; } else { if ( $OS =~ /(Linux .*)/ || $OS =~ /(Windows .*)/ || $OS =~ /(\S+bsd .*)/ || $OS =~ /(\S+BSD .*)/ || $OS =~ /\W+(Win98)/ || $OS =~ /(Mac OS X)/ || $OS =~ /(Mac OS)/ || $OS =~ /(IRIX)/i ) { $OS = "$1"; } else { $OS = "-"; } } $OS =~ s/^\(//; $OS = "Windows ME" if (($OS =~ /Windows 98/ || $OS =~ /-/) && $_[0] =~ /Win 9x 4.90/); $OS = "Windows 2000" if ($OS =~ /Windows NT 5.0/); $OS = "Windows XP" if ($OS =~ /Windows NT 5.1/); $OS = "Windows 98" if ($OS =~ /Win98/); $OS = "Windows NT" if ($OS =~ /Windows NT 4.0/); $OS = "Irix" if ($OS =~ /IRIX/i); $OS = "$1 intel" if ($OS =~ /(.*BSD) .*[2-6]86/); $OS = "Linux intel" if ($OS =~ /Linux/ && $_[0] =~ /i[2-6]86/); $OS = "-" if ($OS =~ /www/i); $OS =~ s/\).*//; $OS =~ s/\;.*//; $OS =~ s/\s*$//; $OS =~ s/^\s+//; $OS =~ s/'/ /g; return($OS); } 1;