tmp>perl -MCPAN -e shell 色々と質問に答えます。 CPAN>install HTML::Entities::Numbered CPAN>install Unicode::Japanese CPAN>install Jcode
--- C:\awstats-6.9-mod/wwwroot/cgi-bin/awstats.pl Fri Jan 23 11:16:31 2009 +++ C:\AWStats69t_Jpn/wwwroot/cgi-bin/awstats.pl Sat Feb 07 22:02:36 2009 @@ -31,9 +33,10 @@ use vars qw/ $DEBUGFORCED $NBOFLINESFORBENCHMARK $FRAMEWIDTH $NBOFLASTUPDATELOOKUPTOSAVE $LIMITFLUSH $NEWDAYVISITTIMEOUT $VISITTIMEOUT $NOTSORTEDRECORDTOLERANCE -$WIDTHCOLICON $TOOLTIPON +$WIDTHCOLICON $WIDTHHIT $WIDTHPAGE $WIDTHBANDWIDTH $WIDTHFULLDATE $WIDTHPARCENT $WIDTHYESNO $WIDTHBROWSER $WIDTHOS $TOOLTIPON $lastyearbeforeupdate $lastmonthbeforeupdate $lastdaybeforeupdate $lasthourbeforeupdate $lastdatebeforeupdate +$endtag $OptionSelect /; $DEBUGFORCED=0; # Force debug level to log lesser level into debug.log file (Keep this value to 0) $NBOFLINESFORBENCHMARK=8192; # Benchmark info are printing every NBOFLINESFORBENCHMARK lines (Must be a power of 2) @@ -50,9 +53,17 @@ $NOTSORTEDRECORDTOLERANCE = 20000 ; # Lapse of time to accept a record if not in correct order. 20000 = 2 hour (Default = 20000) $WIDTHCOLICON = 32; +$WIDTHHIT=70; +$WIDTHPAGE=70; +$WIDTHBANDWIDTH=80; +$WIDTHFULLDATE=160; +$WIDTHPARCENT=80; +$WIDTHYESNO=50; +$WIDTHBROWSER=160; +$WIDTHOS=90; $TOOLTIPON = 0; # Tooltips plugin loaded $NOHTML = 0; # Suppress the html headers - +$endtag=">"; # ----- Running variables ----- use vars qw/ $DIR $PROG $Extension
@@ -2502,6 +2493,13 @@
if ( $BuildReportFormat !~ /html|xhtml|xml/i ) {
$BuildReportFormat = 'html';
}
+ if ($BuildReportFormat eq "html") { # HTML 4.1
+ $endtag=">";
+ $OptionSelect = "selected";
+ } else { # XHTML 1.01
+ $endtag=" />";
+ $OptionSelect = "selected=\"selected\"";
+ }
if ( $BuildHistoryFormat !~ /text|xml/ ) { $BuildHistoryFormat = 'text'; }
if ( $SaveDatabaseFilesWithPermissionsForEveryone !~ /[0-1]/ ) {
$SaveDatabaseFilesWithPermissionsForEveryone = 0;
@@ -129,6 +140,7 @@
$DNSLastUpdateCacheFile
$MiscTrackerUrl
$Lang
+$LangHTML
$MaxRowsInHTMLOutput
$MaxLengthOfShownURL
$MaxLengthOfStoredURL
@@ -795,7 +814,7 @@
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Frameset//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd\">\n";
}
print
-"<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"$Lang\">\n";
+"<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"$LangHTML\" lang=\"$LangHTML\">\n";
else {
if ( $FrameName ne 'index' ) {
@@ -806,7 +825,7 @@
print
"<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Frameset//EN\">\n";
}
- print '<html lang="' . $Lang . '"'
+ print '<html lang="' . $LangHTML . '"'
. ( $PageDir ? ' dir="rtl"' : '' ) . ">\n";
}
print "<head>\n";
@@ -9801,6 +9818,9 @@
}
$Lang = 'en';
}
+foreach (keys %LangBrowserToLangAwstats) {
+ if ($Lang eq $LangBrowserToLangAwstats{$_}) { $LangHTML = $_; } #convert Lang to LangHTML
+}
# Check and correct bad parameters
&Check_Config();
@@ -16200,48 +16316,48 @@
# TODO
# Total row
print
-"<tr><td bgcolor=\"#$color_TableBGRowTitle\">$Message[102]</td>";
+"<tr><td class=\"colortab\">$Message[102]</td>";
if ( $ShowMonthStats =~ /U/i ) {
--- C:\awstats-6.9-mod/wwwroot/css/awstats_bw.css Sun Apr 27 17:47:28 2008
+++ C:\AWStats69t_Jpn/wwwroot/css/awstats_bw.css Thu Jan 22 11:26:22 2009
@@ -27,5 +27,21 @@
a:link { color: #001133; text-decoration: none; }
a:visited { color: #001133; text-decoration: none; }
a:hover { color: #444444; text-decoration: underline; }
-div { font: 12px arial,verdana,helvetica; text-align:justify; }
-.ctooltip { position:absolute; top:0px; left:0px; z-index:2; width:380; visibility:hidden; font: 8pt MS Comic Sans,arial,sans-serif; background-color: #EEEEEE; padding: 8px; border: 1px solid black; }
+div { font: 12px arial,verdana,helvetica,sans-serif; text-align:justify; }
+.ctooltip { position:absolute; top:0px; left:0px; z-index:2; width:380px; visibility:hidden; font: 8pt MS Comic Sans,arial,sans-serif; background-color: #EEEEEE; padding: 8px; border: 1px solid black; }
+img {border-width: 0em}
+.vmiddle {vertical-align:middle;}
+table {table-layout:fixed}
+.currentday { font-weight: bold; }
+.colortab { background-color: #ECECEC}
+.colore { background-color: #CEC2E8}
+.colorh { background-color: #66DDEE}
+.colork { background-color: #2EA495}
+.colorp { background-color: #4477DD}
+.colors { background-color: #8888DD}
+.coloru { background-color: #FFAA66}
+.colorv { background-color: #F4F090}
+.colorweekend { background-color: #EAEAEA}
+.colorx { background-color: #C1B2E2}
+.bottom {vertical-align:bottom;}
+.nowrap { white-space:nowrap}
@@ -1035,28 +1013,18 @@
if ( $width == 70 && $QueryString =~ /buildpdf/i ) {
print
-"<table class=\"aws_border\" border=\"0\" cellpadding=\"2\" cellspacing=\"0\" width=\"800\">\n";
+"<table class=\"aws_data\" border=\"1\" cellpadding=\"2\" cellspacing=\"0\" width=\"796\" summary=\"$title1\" id=\"$class\">\n";
}
else {
print
-"<table class=\"aws_border\" border=\"0\" cellpadding=\"2\" cellspacing=\"0\" width=\"100%\">\n";
+"<table class=\"aws_data\" border=\"1\" cellpadding=\"2\" cellspacing=\"0\" width=\"100%\" summary=\"$title1\" id=\"$class\">\n";
}
if ($tooltipnb) {
- print "<tr><td class=\"aws_title\" width=\"$width%\"".Tooltip($tooltipnb,$tooltipnb).">$title ".$extra_head_html."</td>";
- }
- else {
- print "<tr><td class=\"aws_title\" width=\"$width%\">$title ".$extra_head_html."</td>";
- }
- print "<td class=\"aws_blank\"></td></tr>\n";
- print "<tr><td colspan=\"2\">\n";
- if ( $width == 70 && $QueryString =~ /buildpdf/i ) {
- print
-"<table class=\"aws_data\" border=\"1\" cellpadding=\"2\" cellspacing=\"0\" width=\"796\">\n";
+ print "<caption class=\"aws_title\"".Tooltip($tooltipnb,$tooltipnb).">$title ".$extra_head_html."</caption>\n";
}
else {
- print
-"<table class=\"aws_data\" border=\"1\" cellpadding=\"2\" cellspacing=\"0\" width=\"100%\">\n";
+ print "<caption class=\"aws_title\">$title ".$extra_head_html."</caption>\n";
}
}
@@ -7452,9 +7448,10 @@
# Return: encodedstring
#------------------------------------------------------------------------------
sub XMLEncode {
- if ( $BuildReportFormat ne 'xhtml' && $BuildReportFormat ne 'xml' ) {
- return shift;
- }
+# エスケープはHTMLでも必要
+# if ( $BuildReportFormat ne 'xhtml' && $BuildReportFormat ne 'xml' ) {
+# return shift;
+# }
my $string = shift;
$string =~ s/&/&/g;
$string =~ s/</</g;
@@ -7474,7 +7457,7 @@
sub XMLEncodeForHisto {
my $string = shift;
$string =~ s/\s/%20/g;
- if ($BuildHistoryFormat ne 'xml') { return $string; }
+# if ($BuildHistoryFormat ne 'xml') { return $string; }
$string =~ s/&/&/g;
$string =~ s/</</g;
$string =~ s/>/>/g;
@@ -12945,13 +12983,13 @@
$NewLinkParams =~ s/(^|&|&)framename=[^&]*//i;
my $NewLinkTarget = '';
if ($DetailedReportsOnNewWindows) {
- $NewLinkTarget = " target=\"awstatsbis\"";
+ $NewLinkTarget = "";
}
if ( ( $FrameName eq 'mainleft' || $FrameName eq 'mainright' )
&& $DetailedReportsOnNewWindows < 2 )
{
$NewLinkParams .= "&framename=mainright";
- $NewLinkTarget = " target=\"mainright\"";
+ $NewLinkTarget = "";
}
$NewLinkParams =~ s/(&|&)+/&/i;
$NewLinkParams =~ s/^&//;
@@ -7711,9 +7708,9 @@
: $lng );
print "<a href=\""
. XMLEncode("$AWScript?${NewLinkParams}lang=$lng")
- . "\"$NewLinkTarget><img src=\"$DirIcons\/flags\/$flag.png\" height=\"14\" border=\"0\""
+ . "\"$NewLinkTarget><img src=\"$DirIcons\/flags\/$flag.png\" height=\"14\" width=\"14\""
. AltTitle("$lngtitle")
- . "$endtag</a> \n";
+ . "$endtag</a>\n";
}
}
}
@@ -13082,18 +13123,31 @@
}
# Logo and flags
+ my $width;
+ my $height;
+ if ($Logo eq "awstats_logo1.png") {
+ $width = 111;
+ $height = 51;
+ } elsif ($Logo eq "awstats_logo5.png") {
+ $width = 230;
+ $height = 54;
+ } elsif ($Logo eq "awstats_logo6.png") {
+ $width = 112;
+ $height = 54;
+ }
if ( $FrameName ne 'mainleft' ) {
if ( $LogoLink =~ "http://awstats.sourceforge.net" ) {
print "<td align=\"right\" rowspan=\"3\"><a href=\""
. XMLEncode($LogoLink)
- . "\" target=\"awstatshome\"><img src=\"$DirIcons/other/$Logo\" border=\"0\""
+ . "\"><img src=\"$DirIcons/other/$Logo\" width=\"$width\" height=\"$height\""
. AltTitle( ucfirst($PROG) . " Web Site" )
. "$endtag</a>";
}
else {
print "<td align=\"right\" rowspan=\"3\"><a href=\""
. XMLEncode($LogoLink)
- . "\" target=\"awstatshome\"><img src=\"$DirIcons/other/$Logo\" border=\"0\"$endtag</a>";
+ . "\"><img src=\"$DirIcons/other/$Logo\" width=\"$width\" height=\"$height\""
+ . AltTitle(ucfirst($PROG)." Web Site")."$endtag</a>";
}
if ( !$StaticLinks ) { print "<br$endtag"; Show_Flag_Links($Lang); }
print "</td>";
@@ -14105,8 +14174,15 @@
$title .= "$Message[9]";
$cpt = ( scalar keys %_host_h );
}
- &tab_head( "$title", 19, 0, 'hosts' );
- print "<tr bgcolor=\"#$color_TableBGRowTitle\"><th>";
+ &tab_head( "$title", 19, 0, 'HOSTS' );
+ print "<col$endtag";
+ &ShowHostInfo('__col__');
+ if ($ShowHostsStats =~ /P/i) { print "<col width=\"$WIDTHPAGE\"$endtag";}
+ if ($ShowHostsStats =~ /H/i) { print "<col width=\"$WIDTHHIT\"$endtag";}
+ if ($ShowHostsStats =~ /B/i) { print "<col width=\"$WIDTHBANDWIDTH\"$endtag";}
+ if ($ShowHostsStats =~ /L/i) { print "<col width=\"$WIDTHFULLDATE\"$endtag";}
+ print "\n";
+ print "<tr class=\"colortab\"><th abbr=\"$Message[79]\">";
if ( $FilterIn{'host'} || $FilterEx{'host'} ) { # With filter
if ( $FilterIn{'host'} ) {
print "$Message[79] '<strong>$FilterIn{'host'}</strong>'";
--- C:\awstats-6.9-mod/wwwroot/cgi-bin/awstats.pl Fri Jan 23 11:16:31 2009
+++ C:\AWStats69t_Jpn/wwwroot/cgi-bin/awstats.pl Sat Feb 07 22:02:36 2009
@@ -288,7 +302,7 @@
)
= ( 2, 2, 0, 2, 2, 2, 2, 2, 2 );
use vars qw/
- $DirLock $DirCgi $DirConfig $DirData $DirIcons $DirLang $AWScript $ArchiveFileName
+ $DirLock $DirCgi $DirConfig $DirData $DirIcons $DirJs $DirCss $DirLang $AWScript $ArchiveFileName
$AllowAccessFromWebToFollowingIPAddresses $HTMLHeadSection $HTMLEndSection $LinksToWhoIs $LinksToIPWhoIs
$LogFile $LogType $LogFormat $LogSeparator $Logo $LogoLink $StyleSheet $WrapperScript $SiteDomain
$UseHTTPSLinkForUrl $URLQuerySeparators $URLWithAnchor $ErrorMessages $ShowFlagLinks
@@ -296,7 +310,8 @@
(
$DirLock, $DirCgi,
$DirConfig, $DirData,
- $DirIcons, $DirLang,
+ $DirIcons, $DirJs, # JS用ディレクトリ
+ $DirCss, $DirLang, # CSS用ディレクトリ
$AWScript, $ArchiveFileName,
$AllowAccessFromWebToFollowingIPAddresses, $HTMLHeadSection,
$HTMLEndSection, $LinksToWhoIs,
@@ -1813,6 +1793,14 @@
if ( $QueryString !~ /diricons=([^\s&]+)/i ) { $DirIcons = $value; }
next;
}
+ if ($param =~ /^DirJs/) {
+ if ($QueryString !~ /dirjs=([^\s&]+)/i) { $DirJs=$value; }
+ next;
+ }
+ if ($param =~ /^DirCss/) {
+ if ($QueryString !~ /dircss=([^\s&]+)/i) { $DirCss=$value; }
+ next;
+ }
if ( $param =~ /^SiteDomain/ ) {
# No regex test as SiteDomain is always exact value
@@ -2471,6 +2459,9 @@
$DirData ||= '.';
$DirCgi ||= '/cgi-bin';
$DirIcons ||= '/icon';
+ $DirJs ||= '/js';
+ $DirCss ||= '/css';
+ $StyleSheet ||= '/awstatscss/awstats_bw.css';
if ( $DNSLookup !~ /[0-2]/ ) {
error( "DNSLookup parameter is wrong in config/domain file. Value is '$DNSLookup' (should be 0,1 or 2)"
);
--- C:\awstats-6.9-mod/wwwroot/cgi-bin/plugins/tooltips.pm Thu Jan 29 16:51:24 2009
+++ C:\AWStats69t_Jpn/wwwroot/cgi-bin/plugins/tooltips.pm Thu Jan 29 16:52:31 2009
@@ -104,50 +104,17 @@
$doctop="document.documentElement.scrollTop";
}
- print <<EOF;
-
-<script language="javascript" type="text/javascript">
-function ShowTip(fArg)
-{
- var tooltipOBJ = (document.getElementById) ? document.getElementById('tt' + fArg) : eval("document.all['tt" + fArg + "']");
- if (tooltipOBJ != null) {
- var tooltipLft = ($docwidth?$docwidth:document.body.style.pixelWidth) - (tooltipOBJ.offsetWidth?tooltipOBJ.offsetWidth:(tooltipOBJ.style.pixelWidth?tooltipOBJ.style.pixelWidth:$TOOLTIPWIDTH)) - 30;
- var tooltipTop = 10;
- if (navigator.appName == 'Netscape') {
- tooltipTop = ($doctop>=0?$doctop+10:event.clientY+10);
- tooltipOBJ.style.top = tooltipTop+"px";
- tooltipOBJ.style.left = tooltipLft+"px";
- }
- else {
- tooltipTop = ($doctop>=0?$doctop+10:event.clientY+10);
- tooltipTop = (document.body.scrollTop>=0?document.body.scrollTop+10:event.clientY+10);
-EOF
# Seul IE en HTML a besoin de code supplementaire. IE en xhtml est OK
if ($BuildReportFormat ne 'xhtml' && $BuildReportFormat ne 'xml') {
print <<EOF;
- if ((event.clientX > tooltipLft) && (event.clientY < (tooltipOBJ.scrollHeight?tooltipOBJ.scrollHeight:tooltipOBJ.style.pixelHeight) + 10)) {
- tooltipTop = ($doctop?$doctop:document.body.offsetTop) + event.clientY + 20;
- }
+<script language="javascript" type="text/javascript" src="$DirJs/tooltiph.js"></script>
EOF
- }
+ } else {
print <<EOF;
- tooltipOBJ.style.left = tooltipLft;
- tooltipOBJ.style.top = tooltipTop;
- }
- tooltipOBJ.style.visibility = "visible";
- }
-}
-function HideTip(fArg)
-{
- var tooltipOBJ = (document.getElementById) ? document.getElementById('tt' + fArg) : eval("document.all['tt" + fArg + "']");
- if (tooltipOBJ != null) {
- tooltipOBJ.style.visibility = "hidden";
- }
-}
-</script>
+<script language="javascript" type="text/javascript" src="$DirJs/tooltipx.js"></script>
EOF
-
+ }
}
return 1;
# ----->
@@ -198,6 +165,7 @@
s/#RobotArray#/$aws_NbOfRobots/;
s/#WormsArray#/$aws_NbOfWorms/;
s/#SearchEnginesArray#/$aws_NbOfSearchEngines/;
+ s/#br#/<br$endtag/;
print "$_";
}
}
@@ -747,7 +766,7 @@
if ( $BuildReportFormat eq 'xhtml' || $BuildReportFormat eq 'xml' ) {
print( $ENV{'HTTP_USER_AGENT'} =~ /MSIE|Googlebot/i
? "Content-type:text/html; charset=$newpagecode\n"
- : "Content-type: text/xml; charset=$newpagecode\n" );
+ : "Content-type: application/xhtml+xml; charset=$newpagecode\n" );
}
else { print "Content-type: text/html; charset=$newpagecode\n"; }
@@ -839,9 +858,7 @@
? "<meta http-equiv=\"content-type\" content=\"text/html; charset="
. ( $PageCode ? $PageCode : "iso-8859-1" )
. "\"$endtag\n"
- : "<meta http-equiv=\"content-type\" content=\"text/xml; charset="
- . ( $PageCode ? $PageCode : "iso-8859-1" )
- . "\"$endtag\n" );
+ : "" );
}
else {
print
@@ -851,12 +868,18 @@
}
if ($Expires) {
+ if ( $BuildReportFormat eq 'xhtml' || $BuildReportFormat eq 'xml' ) {
+ if ( $ENV{'HTTP_USER_AGENT'} =~ /MSIE|Googlebot/i ){
print "<meta http-equiv=\"expires\" content=\""
. ( gmtime( $starttime + $Expires ) )
. "\"$endtag\n";
}
+ }
+ }
my @k = keys
%HTMLOutput; # This is to have a unique title and description page
+ if ( $BuildReportFormat eq 'xhtml' || $BuildReportFormat eq 'xml' ) {
+ if ( $ENV{'HTTP_USER_AGENT'} =~ /MSIE|Googlebot/i ){
print "<meta http-equiv=\"description\" content=\""
. ucfirst($PROG)
. " - Advanced Web Statistics for $SiteDomain$periodtitle"
@@ -866,85 +889,32 @@
print
"<meta http-equiv=\"keywords\" content=\"$SiteDomain, free, advanced, realtime, web, server, logfile, log, analyzer, analysis, statistics, stats, perl, analyse, performance, hits, visits\"$endtag\n";
}
+ }
+ }
print "<title>$Message[7] $SiteDomain$periodtitle"
. ( $k[0] ? " - " . $k[0] : "" )
. "</title>\n";
if ( $FrameName ne 'index' ) {
- if ($StyleSheet) {
- print "<link rel=\"stylesheet\" href=\"$StyleSheet\"$endtag\n";
+ if ($TOOLTIPON) {
+ if ( $BuildReportFormat eq 'xhtml' || $BuildReportFormat eq 'xml' ) {
+ if ( $ENV{'HTTP_USER_AGENT'} =~ /MSIE|Googlebot/i ){
+ print "<meta http-equiv=\"Content-Script-Type\" content=\"text/javascript\"$endtag\n";
}
@@ -8203,54 +8206,56 @@
$NewLinkParams =~ s/^&//;
$NewLinkParams =~ s/&$//;
if ($NewLinkParams) { $NewLinkParams = "${NewLinkParams}&"; }
- print "\n<form name=\"FormFilter\" action=\""
+ print "\n<form"
+ . ($BuildReportFormat eq 'html' ? " name=\"FORMFILTER\"" : "")
+ . " id=\"FORMFILTER\" action=\""
. XMLEncode("$AWScript?${NewLinkParams}")
. "\" class=\"aws_border\">\n";
print
@@ -13045,24 +13083,27 @@
my $NewLinkTarget = '';
if ( $FrameName eq 'mainright' ) {
- $NewLinkTarget = " target=\"_parent\"";
+ $NewLinkTarget = "";
}
- print "<form name=\"FormDateFilter\" action=\""
+ print "<form"
+ . ($BuildReportFormat eq 'html' ? " name=\"FORMDATEFILTER\"" : "")
+ . " id=\"FORMDATEFILTER\" action=\""
. XMLEncode("$AWScript?${NewLinkParams}")
- . "\" style=\"padding: 0px 0px 0px 0px; margin-top: 0\"$NewLinkTarget>\n";
+ . "\" style=\"padding: 0px 0px 0px 0px; margin-top: 0\">\n";
}
--- C:\awstats-6.9-mod/wwwroot/css/awstats_bw.css Sun Apr 27 17:47:28 2008
+++ C:\AWStats69t_Jpn/wwwroot/css/awstats_bw.css Thu Jan 22 11:26:22 2009
@@ -10,7 +10,7 @@
border-right-width: 0px;
border-bottom-width: 0px;
}
-.aws_formfield { font: 13px verdana, arial, helvetica; }
+.aws_formfield { font: 13px verdana, arial, helvetica, sans-serif; }
.aws_button {
font-family: arial,verdana,helvetica, sans-serif;
font-size: 12px;
--- C:\awstats-6.9-mod/wwwroot/cgi-bin/awstats.pl Fri Jan 23 11:16:31 2009
+++ C:\AWStats69t_Jpn/wwwroot/cgi-bin/awstats.pl Sat Feb 07 22:02:36 2009
@@ -10396,7 +10432,7 @@
my $regverfirefox = qr/firefox\/([\d\.]*)/i;
my $regversvn = qr/svn\/([\d\.]*)/i;
my $regvermozilla = qr/mozilla(\/|)([\d\.]*)/i;
- my $regnotie = qr/webtv|omniweb|opera/i;
+ my $regnotie = qr/webtv|omniweb|opera|sleipnir|avant browser|lunascape/i;
my $regnotnetscape = qr/gecko|compatible|opera|galeon|safari/i;
my $regreferer = qr/^(\w+):\/\/([^\/:]+)(:\d+|)/;
my $regreferernoquery = qr/^([^$URLQuerySeparators]+)/;
--- C:\awstats-6.9-mod/wwwroot/cgi-bin/lib/browsers.pm Thu Jan 29 16:15:14 2009
+++ C:\AWStats69t_Jpn/wwwroot/cgi-bin/lib/browsers.pm Thu Jan 22 16:02:14 2009
@@ -32,6 +33,16 @@
# Note: Regex IDs are in lower case and ' ' and '+' are changed into '_'
#-------------------------------------------------------
@BrowsersSearchIDOrder = (
+# Japanese Browsers
+# Added by Ryu 2007.01.06
+'another_html\-lint',
+'avant\sbrowser',
+'sleipnir',
+'lunascape',
+'j\-phone',
+'softbank',
+'kddi',
+'vodafone',
# Most frequent standard web browsers are first in this list (except msie, netscape and firefox)
'chrome',
'firebird',
@@ -115,6 +126,15 @@
'xaudio',
'xine',
'xmms',
+# RSS Readers added by Makoto Takahashi
+'apple\-pubsub',
+'fenrir\_headline\-reader',
+'goo\srss\sreader',
+'headline\-reader',
+'movabletype',
+'mt\-rssfeed',
+'rssbar',
+'webryreader',
# RSS Readers
'abilon',
'aggrevator',
@@ -213,6 +233,16 @@
# List of browser's name ('browser id in lower case', 'browser text')
#---------------------------------------------------------------
%BrowsersHashIDLib = (
+# Japanese Browsers
+# Added by Ryu 2007.01.06
+'another_html\-lint','Another HTML-lint',
+'avant\sbrowser','Avant Browser',
+'sleipnir','Sleipnir',
+'lunascape','Lunascape',
+'j\-phone','Vodafone (Non-3G)',
+'softbank','Softbank (3G)',
+'kddi','au by KDDI (HTML Compatible)',
+'vodafone','Vodafone (3G)',
# Common web browsers text
'msie','MS Internet Explorer',
'netscape','Netscape',
@@ -301,6 +331,15 @@
'xaudio','Some XAudio Engine based MPEG player (media player)',
'xine','Xine, a free multimedia player (media player)',
'xmms','XMMS (media player)',
+# RSS Readers added by Makoto Takahashi
+'apple\-pubsub','<a href="http://www.apple.com/jp/macosx/features/safari.html" title="Browser home page">Apple-PubSub (RSS Reader)</a>',
+'fenrir\_headline\-reader','Fenrir Headline-Reader Plugin',
+'goo\srss\sreader','Goo_RSS_Reader',
+'headline\-reader', 'Headline-Reader (RSS Reader)',
+'movabletype','MovableType (RSS Reader)',
+'mt\-rssfeed','mt-rssfeed (RSS Reader)',
+'rssbar','RssBar (RSS Reader)',
+'webryreader','WebryReader (RSS Reader)',
# RSS Readers
'abilon','Abilon (RSS Reader)',
'aggrevator', 'Aggrevator (RSS Reader)',
@@ -419,6 +458,15 @@
# file for this browser.
#---------------------------------------------------------------------------
%BrowsersHashIcon = (
+# Japanese Browsers
+# Added by Ryu 2007.01.06
+'avant\sbrowser','avant',
+'sleipnir','sleipnir',
+'lunascape','lunascape',
+'j\-phone','vodafone',
+'softbank','softbank',
+'kddi','au',
+'vodafone','vodafone',
# Standard web browsers
'msie','msie',
'netscape','netscape',
@@ -514,6 +562,16 @@
'webtv','webtv',
# Anonymous Proxy Browsers (can be used as grabbers as well...)
'cjb\.net','cjbnet',
+# RSS Readers added by Makoto Takahashi
+'apple\-pubsub', 'rss',
+'applesyndication', 'rss',
+'fenrir\_headline\-reader','rss',
+'goo\srss\sreader', 'rss',
+'headline\-reader', 'rss',
+'movabletype', 'rss',
+'mt\-rssfeed','rss',
+'rssbar','rss',
+'webryreader','rss',
# RSS Readers
'abilon', 'abilon',
'aggrevator', 'rss',
--- C:\awstats-6.9-mod/wwwroot/cgi-bin/lib/robots.pm Thu Jan 29 16:18:34 2009
+++ C:\AWStats69t_Jpn/wwwroot/cgi-bin/lib/robots.pm Sun Jan 25 14:10:18 2009
@@ -333,6 +334,130 @@
# Note: Robots IDs are in lower case, '_', ' ' and '+' are changed into '[_+ ]' and are quoted.
#-------------------------------------------------------
@RobotsSearchIDOrder_list1 = (
+# Japanese Robots+alpha
+# Added by Ryu 2007.01.06
+'adsenserbot',
+'amfibibot',
+'baiduimagespider',
+'becomejpbot',
+'bibliotheca',
+'bizpal\srss\saggregator',
+'bookmark\srenewal\scheck\sagent',
+'camelstampede',
+'cazoodlebot',
+'coast\swebmaster',
+'cococ',
+'comaneci_bot',
+'crooz',
+'daumoa',
+'depspid\/',
+'diffbrowser\/',
+'dotbot',
+'ec_favorite',
+'empas_robot',
+'e\-societyrobot',
+'everyfeed-spider',
+'faedit\/',
+'fast\smetaweb\scrawler',
+'fastladder\sfeedfetcher',
+'feed24\.com',
+'feedbringer',
+'feedchecker',
+'feed\sparser',
+'feedpath',
+'feedshow',
+'fly\/',
+'freshreader',
+'fsbot',
+'g10\sreader',
+'goorssreader',
+'grub\-client',
+'gsa\-crawler',
+'hatena',
+'headline\-reader',
+'hobbit\sbbtest\-net',
+'html\sget',
+'hyperestraier',
+'hyperrobot',
+'icc\-crawler',
+'ilissurf',
+'indexpert',
+'internal\sdummy\sconnection',
+'jetbot',
+'kb\.rmail',
+'keywalkerbot',
+'klsh\-pageget\/',
+'kotoha\.co\.jp',
+'libghttp\/',
+'livedoorcheckers',
+'livedoor\sfeedfetcher',
+'livedoor\sscreenshot\/',
+'livedoor\shttpclient\/',
+'maldive\scrawler',
+'masagool',
+'mfcrawler',
+'mogimogi\/',
+'metalogger',
+'mlbot',
+'mqbot',
+'msr\-isrccrawler',
+'multicrawler',
+'naver',
+'niyonizer',
+'ndl\-japan\-research\-robot',
+'netresearchserver',
+'nettracker',
+'netvibes',
+'newsalloy',
+'^obot',
+'onetszukaj',
+'openbot',
+'oracle\ssecure\senterprise\ssearch',
+'outfoxbot',
+'page_verifier',
+'pear\shttp_request\sclass',
+'paipo\-bot',
+'pathtraq',
+'pingdom\sgigrib',
+'pipeliner',
+'playon\srss\sreader\/',
+'pockey\-gethtml',
+'protopage\/',
+'research\-spider',
+'search\-hp_bot',
+'seo\.cug\.net\slink\schecker',
+'shopwiki\/',
+'snapbot',
+'snoopy\sv',
+'sogou\sweb\sspider',
+'sonar\/', #Added by toshi 2006.04.09
+'sonarplus\/',
+'spamrobot@126\.com',
+'sproose\/',
+'stackrambler',
+'strategic\sboard\sbot',
+'statbot@gmail\.com',
+'technoratisnoop',
+'techrigybot',
+'tencenttraveler', # Must be before msiecrawler
+'trackback\/',
+'umn\/',
+'useragent',
+'w3crobot',
+'wadaino\.jp\-crawler',
+'webalta',
+'webauto',
+'webaroobot',
+'webdigity\swhois\sservice',
+'website\sexplorer',
+'wish\-la',
+'wish\-project',
+'wiwi',
+'wwwster\/',
+'yeti\/',
+'zao\-crawler',
+'zibber',
+'^-$',
# Common robots (In robot file)
'appie',
'architext',
@@ -343,12 +468,12 @@
'googlebot',
'google\-sitemaps',
'gulliver',
-'virus[_+ ]detector', # Must be before harvest
+'virus\_detector', # Must be before harvest
'harvest',
'htdig',
'linkwalker',
'lilina',
-'lycos[_+ ]',
+'lycos_',
'moget',
'muscatferret',
'myweb',
@@ -498,7 +623,7 @@
'kapsi',
'katipo',
'kilroy',
-'ko[_+ ]yappo[_+ ]robot',
+'ko_yappo_robot',
'kummhttp',
'labelgrabber\.txt',
'larbin',
@@ -584,7 +709,7 @@
'snooper',
'solbot',
'speedy',
-'spider[_+ ]monkey',
+'spider_monkey',
'spiderbot',
'spiderline',
'spiderman',
@@ -672,8 +797,8 @@
'bender',
'biglotron',
'bittorrent_bot',
-'biz360[_+ ]spider',
-'blogbridge[_+ ]service',
+'biz360\sspider',
+'blogbridge\sservice',
'bloglines',
'blogpulse',
'blogsearch',
@@ -686,9 +811,9 @@
'bookmark\-manager',
'boris',
'bumblebee',
-'candlelight[_+ ]favorites[_+ ]inspector',
+'candlelight\_favorites\_inspector',
'cbn00glebot',
-'cerberian_drtrs',
+'cerberian\sdrtrs',
'cfnetwork',
'cipinetbot',
'checkweb_link_validator',
@@ -707,7 +832,7 @@
'deepindex',
'dipsie\.bot',
'dnsgroup',
-'docomo',
+#'docomo',
'domainchecker',
'domainsdb\.net',
'dulance',
@@ -718,7 +843,7 @@
'edgeio\-retriever',
'ets_v',
'exactseek',
-'extreme[_+ ]picture[_+ ]finder',
+'extreme\_picture\_finder',
'eventax',
'everbeecrawler',
'everest\-vulcan',
@@ -727,7 +852,7 @@
'fast_enterprise_crawler.*crawleradmin\.t\-info@telekom\.de',
'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de',
'matrix_s\.p\.a\._\-_fast_enterprise_crawler', # must come before fast enterprise crawler
-'fast_enterprise_crawler',
+'fast\senterprise\scrawler',
'fast\-search\-engine',
'favorg',
'favorites_sweeper',
@@ -758,7 +883,7 @@
'hoowwwer',
'hpprint',
'htmlparser',
-'html[_+ ]link[_+ ]validator',
+'html\_link\_validator',
'httrack',
'hundesuche\.com\-bot',
'ichiro',
@@ -767,14 +892,14 @@
'infociousbot',
'infomine',
'insurancobot',
-'internet[_+ ]ninja',
+'internet\_ninja',
'internetarchive',
'internetseer',
'internetsupervision',
'irlbot',
'isearch2006',
'iupui_research_bot',
-'jrtwine[_+ ]software[_+ ]check[_+ ]favorites[_+ ]utility',
+'jrtwine\_software\_check\_favorites\_utility',
'justview',
'kalambot',
'kamano\.de_newsfeedverzeichnis',
@@ -806,7 +931,7 @@
'mediapartners\-google',
'megite',
'metaspinner',
-'microsoft[_+ ]url[_+ ]control',
+'microsoft\surl\scontrol',
'mini\-reptile',
'minirank',
'missigua_locator',
@@ -816,7 +941,7 @@
'mj12bot',
'mojeekbot',
'msiecrawler',
-'ms_search_4\.0_robot',
+'ms\ssearch\s4\.0\srobot',
'msrabot',
'msrbot',
'mt::telegraph::agent',
@@ -835,8 +960,8 @@
'nutch', # Must come after other nutch versions
'ocelli',
'octora_beta_bot',
-'omniexplorer[_+ ]bot',
-'onet\.pl[_+ ]sa',
+'omniexplorer\_bot',
+'onet\.pl\_sa',
'onfolio',
'opentaggerbot',
'openwebspider',
@@ -848,7 +973,7 @@
'pear_http_request_class',
'peerbot',
'perman',
-'php[_+ ]version[_+ ]tracker',
+'php\sversion\stracker',
'pictureofinternet',
'ping\.blo\.gs',
'plinki',
@@ -870,9 +995,9 @@
'sbider',
'schizozilla',
'scumbot',
-'searchguild[_+ ]dmoz[_+ ]experiment',
+'searchguild\_dmoz\_experiment',
'seekbot',
-'sensis_web_crawler',
+'sensis\sweb\scrawler',
'seznambot',
'shim\-crawler',
'shoutcast',
@@ -881,7 +1006,7 @@
'sohu\-search',
'sohu', # "sohu agent"
'snappy',
-'sphere_scout',
+'sphere\sscout',
'sproose_crawler',
'steeler',
'steroid__download',
@@ -895,7 +1020,7 @@
'tcl_http_client_package',
'technoratibot',
'teragramcrawlersurf',
-'test_crawler',
+'test\scrawler',
'testbot',
't\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e',
'topicblogs',
@@ -918,7 +1043,7 @@
'vortex',
'vse',
'w3c\-checklink',
-'w3c[_+ ]css[_+ ]validator[_+ ]jfouffa',
+'w3c\_css\_validator\_jfouffa',
'w3c_validator',
'wavefire',
'webclipping\.com',
@@ -929,14 +1054,14 @@
'webfilter',
'webindexer',
'webminer',
-'website[_+ ]monitoring[_+ ]bot',
+'website\_monitoring\_bot',
'webvulncrawl',
'wells_search',
'wonderer',
'wume_crawler',
'wwweasel',
'xenu\'s_link_sleuth',
-'xenu_link_sleuth',
+'xenu\slink\ssleuth',
'xirq',
'y!j', # Must come after keyoshid Y!J
'yacy',
@@ -957,7 +1082,8 @@
'ng\/1\.', # put at end to avoid false positive
'ng\/2\.', # put at end to avoid false positive
'exabot', # put at end to avoid false positive
-'java' # put at end to avoid false positive
+'java', # put at end to avoid false positive
+'uri::fetch'
);
@RobotsSearchIDOrder_listgen = (
# Generic robot
@@ -975,6 +1101,130 @@
# List of robots names ('robot id','robot clear text')
#-------------------------------------------------------
%RobotsHashIDLib = (
+# Japanese Robots+alpha
+# Added by Ryu 2006.03.15
+'adsenserbot','<a href="http://adsenser.jp">AdSenserBot</a>',
+'amfibibot','Amfibibot',
+'baiduimagespider','<a href="http://www.baidu.com/search/spider.html">BaiduImageSpider</a>',
+'becomejpbot','<a href="http://www.become.co.jp/site_owner.html">BecomeJPBot</a>',
+'bibliotheca','<a href="http://www.hitachi.co.jp/Prod/comp/soft1/textsearch/product/component/bib21/">Bibliotheca</a>',
+'bizpal\srss\saggregator','<a href="http://bizpal.jp">BizPal RSS Aggregator</a>',
+'bookmark\srenewal\scheck\sagent','<a href="http://www.bookmark.ne.jp">Bookmark Renewal Check Agent</a>',
+'camelstampede','CamelStampede',
+'cazoodlebot','<a href="http://www.cazoodle.com">CazoodleBot</a>',
+'coast\swebmaster','COAST WebMaster',
+'cococ','<a href="http://am13.net/wiki/index.php?cococ">cococ</a>',
+'comaneci_bot','<a href="http://help.i-know.jp/?crawler">Comaneci bot</a>',
+'crooz','DoCoMo CROOZ',
+'daumoa','<a href="http://www.daum.net">DAUMOA</a>',
+'depspid\/','<a href="http://about.depspid.net">DepSpid</a>',
+'diffbrowser\/','<a href="http://www010.upp.so-net.ne.jp/suede/diffbrowser.html">DiffBrowser</a>',
+'dotbot','<a href="http://www.dotnetdotcom.org">Dotbot</a>',
+'ec_favorite','eC_favorite',
+'empas_robot','EMPAS ROBOT',
+'e\-societyrobot','<a href="http://www.yama.info.waseda.ac.jp/~yamana/es/">e-SocietyRobot</a>',
+'everyfeed-spider','Everyfeed Spider',
+'faedit\/','<a href="http://www.srcw.net/wiki/index.php?FaEdit">FaEdit</a>',
+'fast\smetaweb\scrawler','<a href="http://fastsearch.com">FAST MetaWeb Crawler</a>',
+'fastladder\sfeedfetcher','<a href="http://fastladder.com">Fastladder FeedFetcher</a>',
+'feed24\.com','<a href="http://feed24.com">Feed24.com</a>',
+'feedbringer','<a href="http://feedbringer.net" title="Bot home page">FEEDBRINGER</a>',
+'feedchecker','FeedChecker',
+'feed\sparser','<a href="http://rss-search.net">Feed Parser</a>',
+'feedpath','<a href="http://feedpath.jp" title="Bot home page">Feedpath</a>',
+'feedshow','<a href="http://www.feedshow.com" title="Bot home page">Feedshow</a>',
+'fly\/','fly sinet.ad.jp',
+'freshreader','<a href="http://www.freshreader.com" title="Bot home page">FreshReader</a>',
+'fsbot','<a href="http://www.adin.co.jp/fs/">Flex Search</a>',
+'g10\sreader','<a href="http://wordg10.com" title="Bot home page">G10 Reader</a>',
+'goorssreader','goo RSS Reader',
+'grub\-client','Grub Client',
+'gsa\-crawler','<a href="http://www.google.com/enterprise/gsa/index.html" title="Bot home page">GSA Crawler</a>',
+'hatena','<a href="http://a.hatena.ne.jp/help" title="Bot home page">はてなアンテナ</a>',
+'headline\-reader','Headline-Reader',
+'hobbit\sbbtest\-net','<a href="http://hobbitmon.sourceforge.net" title="Bot home page">Hobbit bbtest-net</a>',
+'html\sget','HTML Get(SPAM)',
+'hyperestraier','<a href="http://hyperestraier.sourceforge.net/index.html" title="Bot home page">HyperEstraier</a>',
+'hyperrobot','HyperRobot InfoWeb',
+'icc\-crawler','<a href="http://kc.nict.go.jp/icc/crawl-ja.html">ICC-Crawler</a>',
+'ilissurf','<a href="http://software.fujitsu.com/jp/ilis_univ/surf/" title="iLisSurf - FUJITSU Japan">iLisSurf</a>',
+'indexpert','indexpert',
+'internal\sdummy\sconnection','internal dummy connection for <a href="http://httpd.apache.org/docs/2.0/mod/mod_dav.html" title="Apache モジュール mod_dav">Apache WebDAV</a>',
+'jetbot','<a href="http://www.jetrun.jp">jetbot</a>',
+'kb\.rmail','<a href="http://www.r-mail.org">kb.Rmail</a>',
+'keywalkerbot','<a href="http://www.keywalker.co.jp/crawl/bot.html">Keywalkerbot</a>',
+'klsh\-pageget\/','<a href="http://www.kondo-net.gr.jp/klsh/">KLSH-PageGet</a>',
+'kotoha\.co\.jp','コトハコ',
+'libghttp\/','libghttp(恐らくSPAM)',
+'livedoorcheckers','Livedoor Checkers',
+'livedoor\sfeedfetcher','<a href="http://reader.livedoor.com" title="Bot home page">livedoor FeedFetcher</a>',
+'livedoor\sscreenshot\/','<a href="http://reader.livedoor.com" title="Bot home page">livedoor ScreenShot</a>',
+'livedoor\shttpclient\/','livedoor HttpClient',
+'maldive\scrawler','Maldive crawler',
+'masagool','<a href="http://sagool.jp">MaSagool</a>',
+'metalogger','Metalogger',
+'mfcrawler','MFcrawler',
+'mogimogi\/','mogimogi',
+'mlbot','<a href="http://www.metadatalabs.com">MLBot</a>',
+'mqbot','<a href="http://metaquerier.cs.uiuc.edu">MQbot</a>',
+'msr\-isrccrawler','MSR-ISRCCrawler',
+'multicrawler','<a href="http://sw.deri.org/2006/04/multicrawler/robots.html">MultiCrawler</a>',
+'naver','NaverBot',
+'niyonizer','NIYONIZER',
+'ndl\-japan\-research\-robot','国立国会図書館',
+'netresearchserver','<a href="http://loopimprovements.com/robot.html">Net Research Server (NRS)</a>',
+'nettracker','NetTracker',
+'netvibes','<a href="http://www.netvibes.com" title="Bot home page">Netvibes<a>',
+'newsalloy','<a href="http://www.NewsAlloy.com" title="Bot home page">NewsAlloy<a>',
+'^obot','oBot',
+'onetszukaj','<a href="http://szukaj.onet.pl">OnetSzukaj</a>',
+'openbot','Openfind data gatherer',
+'oracle\ssecure\senterprise\ssearch','Oracle Secure Enterprise Search',
+'outfoxbot','<a href="http://www.yodao.com/help/webmaster/spider/" title="YodaoBot">old OutfoxBot</a>',
+'page_verifier','<a href="http://www.securecomputing.com/PageVerifier.cfm">page_verifier</a>',
+'pear\shttp_request\sclass','<a href="http://pear.php.net">PEAR HTTP_Request class</a>',
+'paipo\-bot','<a href="http://paipo.jp">PAIPO-Bot</a>',
+'pathtraq','<a href="http://pathtraq.com/about">Pathtraq</a>',
+'pingdom\sgigrib','<a href="http://www.pingdom.com">Pingdom GIGRIB</a>',
+'pipeliner','PipeLine Spider',
+'playon\srss\sreader\/','<a href="http://playon.jp/rss/">PLAYON RSS READER</a>',
+'pockey\-gethtml','Pockey GetHTML',
+'protopage\/','<a href="http://www.protopage.com">Protopage</a>',
+'research\-spider','<a href="http://www.freedownloadscenter.com/Network_and_Internet/Web_Searching_Tools/Research_Spider.html">Research Spider</a>',
+'search\-hp_bot','<a href="http://search-hp.com" title="Bot home page">search-hp_bot</a>',
+'seo\.cug\.net\slink\schecker','<a href="http://seo.cug.net">seo.cug.net link checker</a>',
+'shopwiki\/','<a href="http://www.shopwiki.com/wiki/Help:Bot">ShopWiki</a>',
+'snapbot','Snapbot',
+'snoopy\sv','<a href="http://sourceforge.net/projects/snoopy/">Snoopy</a>',
+'sogou\sweb\sspider','<a href="http://www.sogou.com/docs/help/webmasters.htm#07">Sogou web spider</a>',
+'sonar\/','<a href="http://boxer.ne.jp/product_list/sonar/">Sonar Crawler</a>', #Added by toshi 2006.04.09
+'sonarplus\/','<a href="http://boxer.ne.jp/product_list/sonar_plus/">Sonar PLUS Crawler</a>',
+'spamrobot@126\.com','126.com',
+'sproose\/','<a href="http://www.sproose.com/bot.html" title="Bot home page">sproose bot</a>',
+'stackrambler','StackRambler',
+'statbot@gmail\.com','Gmail Com (Google)',
+'strategic\sboard\sbot','<a href="http://www.strategicboard.com" title="Bot home page">Strategic Board Bot</a>',
+'technoratisnoop','TechnoratiSnoop(恐らくSPAM)',
+'techrigybot','<a href="http://www.techrigy.com" title="Bot home page">TechrigyBot</a>',
+'tencenttraveler','TencentTraveler', # Must be before msiecrawler.
+'trackback\/','TrackBack(恐らくSPAM)',
+'umn\/','<a href="http://www.nori-s.net/soft/umn/">URLマネージャ</a>',
+'useragent','USERAGENT(恐らくSPAM)',
+'w3crobot','W3CRobot',
+'wadaino\.jp\-crawler','<a href="http://wadaino.jp">話題の.jpクローラー</a>',
+'webalta','<a href="http://www.webalta.net/ru/about_webmaster.html">WebAlta Crawler</a>',
+'webauto','<a href="http://www.yanasoft.co.jp/webautodoc.html">WebAuto</a>',
+'webaroobot','<a href="http://www.webaroo.com/rooSiteOwners.html">Webaroo Bot</a>',
+'webdigity\swhois\sservice','<a href="http://www.webdigity.com/ws/">webdigity whois service</a>',
+'website\sexplorer','<a href="http://www.umechando.com/webex/">Website Explorer</a>',
+'wish\-la','<a href="http://wish.slis.tsukuba.ac.jp/jp/">Wish Project(wish la)</a>',
+'wish\-project','<a href="http://wish.slis.tsukuba.ac.jp/jp/">Wish Project</a>',
+'wiwi','<a href="http://wi2.jp">WiWi</a>',
+'wwwster\/','<a href="mailto:gue@cis.uni-muenchen.de">wwwster</a>',
+'yeti\/','Yeti',
+'zao\-crawler','Zao Crawler',
+'zibber','<a href="http://www.zibb.com/CrawlerInformaion.aspx">Zibb Crawler</a>',
+'^-$','-(恐らくSPAM)',
# Common robots (In robot file)
'appie','<a href="http://www.walhello.com" title="Bot home page">Walhello appie</a>',
'architext','ArchitextSpider',
@@ -985,12 +1235,12 @@
'googlebot','<a href="http://www.google.com/bot.html" title="Bot home page">Googlebot</a>',
'google\-sitemaps', 'Google Sitemaps',
'gulliver','Northern Light Gulliver',
-'virus[_+ ]detector','<a href="http://www.securecomputing.com" title="virus_harvester@securecomputing.com; Bot home page">virus_detector</a>',
+'virus\_detector','<a href="http://www.securecomputing.com" title="virus_harvester@securecomputing.com; Bot home page">virus_detector</a>',
'harvest','Harvest',
'htdig','ht://Dig',
'linkwalker','LinkWalker',
'lilina','Lilina',
-'lycos[_+ ]','Lycos',
+'lycos_','Lycos',
'moget','moget',
'muscatferret','Muscat Ferret',
'myweb','Internet Shinchakubin',
@@ -1087,7 +1337,7 @@
'felix','Felix IDE',
'fetchrover','FetchRover',
'fido','fido',
-'finnish','H���ki',
+'finnish','Hämähäkki',
'fireball','KIT-Fireball',
'fouineur','Fouineur',
'francoroute','Robot Francoroute',
@@ -1137,8 +1387,8 @@
'kapsi','image.kapsi.net',
'katipo','Katipo',
'kilroy','Kilroy',
-'ko[_+ ]yappo[_+ ]robot','KO_Yappo_Robot',
-'kummhttp','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b" title="Bot documentation page">KummHttp</a>',
+'ko_yappo_robot','KO_Yappo_Robot',
+'kummhttp','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_g_l_301105_2\b" title="Bot documentation page">KummHttp</a>',
'labelgrabber\.txt','LabelGrabber',
'larbin','<a href="http://para.inria.fr/~ailleret/larbin/index-eng.html" title="Bot home page">larbin</a>',
'legs','legs',
@@ -1226,15 +1476,15 @@
'snooper','Snooper',
'solbot','Solbot',
'speedy','<a href="http://www.entireweb.com/about/search_tech/speedyspider/" title="Speedy Spider home page">Speedy Spider</a>',
-'spider[_+ ]monkey','Spider monkey',
+'spider_monkey','Spider monkey',
'spiderbot','SpiderBot',
'spiderline','Spiderline Crawler',
'spiderman','<a href="http://www.iscrawling.com" title="Spiderman home page">Spiderman</a>',
-'spiderview','SpiderView(tm)',
+'spiderview','SpiderView™',
'spry','Spry Wizard Robot',
'ssearcher','Site Searcher',
-'sqworm','<a href="http://www.websense.com" title="Bot home page (source: http://www.pgts.com.au/)">Sqworm</a>',
-'suke','Suke',
+'sqworm','<a href="http://www.websense.com" title="Bot home page (source: http://www.pgts.com.au)">Sqworm</a>',
+'suke','<a href="http://kensaku.org" title="Bot home page">Suke</a>',
'sunrise','<a href="http://www.sunrisexp.com" title="Sunrise home page">Sunrise</a>',
'suntek','suntek search engine',
'sven','Sven',
@@ -1290,7 +1540,7 @@
'wombat','The Web Wombat',
'wordpress','<a href="http://wordpress.org" title="WordPress home page">WordPress</a>',
'worm','The World Wide Web Worm',
-'wwwc','WWWC Ver 0.2.5',
+'wwwc','WWWC',
'wz101','WebZinger',
'xget','XGET',
# Other robots reported by users
@@ -1314,8 +1564,8 @@
'bender','<a href="http://bender.ucr.edu" title="Bot home page">bender</a> <a href="http://ivia.ucr.edu/manuals/NiFC/current/index.shtml" title="Bot home page">focused_crawler</a>',
'biglotron','<a href="http://www.biglotron.com/robot.html" title="Bot home page">Biglotron</a>',
'bittorrent_bot','<a href="http://www.bittorrent.com" title="Bot home page">BitTorrent Bot</a>',
-'biz360[_+ ]spider','<a href="http://www.biz360.com" title="blogsmanager@biz360.com Bot home page">Biz360 spider</a>',
-'blogbridge[_+ ]service','<a href="http://www.blogbridge.com" title="Bot home page">BlogBridge Service</a>',
+'biz360\sspider','<a href="http://www.biz360.com" title="blogsmanager@biz360.com Bot home page">Biz360 spider</a>',
+'blogbridge\sservice','<a href="http://www.blogbridge.com" title="Bot home page">BlogBridge Service</a>',
'bloglines','<a href="http://www.bloglines.com" title="Bot home page">Bloglines</a>',
'blogpulse','<a href="http://www.intelliseek.com" title="Bot home page">BlogPulse ISSpider intelliseek.com</a>',
'blogsearch','<a href="http://www.icerocket.com" title="Bot home page">BlogSearch</a>',
@@ -1328,9 +1578,9 @@
'bookmark\-manager','<a href="http://bkm.sourceforge.net" title="Bookmark-Manager home page">Bookmark-Manager</a>',
'boris', 'Boris',
'bumblebee', 'Bumblebee (relevare.com)',
-'candlelight[_+ ]favorites[_+ ]inspector','<a href="http://www.candlelight.com/home.html" title="Candlelight_Favorites_Inspector home page">Candlelight_Favorites_Inspector</a>',
+'candlelight\_favorites\_inspector','<a href="http://www.candlelight.com/home.html" title="Candlelight_Favorites_Inspector home page">Candlelight_Favorites_Inspector</a>',
'cbn00glebot','cbn00glebot',
-'cerberian_drtrs','<a href="http://www.pgts.com.au/cgi-bin/psql?robot_info=25240" title="Bot home page">Cerberian Drtrs</a>',
+'cerberian\sdrtrs','<a href="http://www.pgts.com.au/cgi-bin/psql?robot_info=25240" title="Bot home page">Cerberian Drtrs</a>',
'cfnetwork','<a href="http://www.cocoadev.com/index.pl?CFNetwork" title="CFNetwork home page">CFNetwork</a>',
'cipinetbot','<a href="http://www.cipinet.com/bot.html" title="CipinetBot home page">CipinetBot</a>',
'checkweb_link_validator','<a href="http://p.duby.free.fr/chkweb.htm" title="CheckWeb link validator home page">CheckWeb link validator</a>',
@@ -1348,7 +1598,7 @@
'deepindex','<a href="http://www.deepindex.net/faq.php" title="Deepindex home page">Deepindex</a>',
'dipsie\.bot','<a href="http://www.dipsie.com/bot/" title="Bot home page">Dipsie</a>',
'dnsgroup','<a href="http://www.dnsgroup.com" title="DNSGroup home page">DNSGroup</a>',
-'docomo','<a href="http://www.nttdocomo.co.jp" title="DoCoMo home page">DoCoMo</a>',
+#'docomo','<a href="http://www.nttdocomo.co.jp" title="DoCoMo home page">DoCoMo</a>',
'domainchecker','<a href="http://net-promoter.com" title="DomainChecker home page (not confirmed)">DomainChecker</a>',
'domainsdb\.net','<a href="http://domainsdb.net" title="Bot home page">DomainsDB.net</a>',
'dulance','<a href="http://www.dulance.com/bot.jsp" title="Bot home page">Dulance</a>',
@@ -1359,14 +1609,14 @@
'edgeio\-retriever','<a href="http://www.edgeio.com" title="Bot home page">edgeio-retriever</a>',
'ets_v','<a href="http://www.freetranslation.com/help/" title="ETS home page">ETS</a> Enterprise Translation Server',
'exactseek','ExactSeek Crawler',
-'extreme[_+ ]picture[_+ ]finder','<a href="http://www.exisoftware.com" title="Extreme_Picture_Finder home page">Extreme_Picture_Finder</a>',
+'extreme\_picture\_finder','<a href="http://www.exisoftware.com" title="Extreme_Picture_Finder home page">Extreme_Picture_Finder</a>',
'eventax','<a href="http://www.eventax.de" title="eventax home page">eventax</a>',
'everbeecrawler','EverbeeCrawler',
'everest\-vulcan','<a href="http://everest.vulcan.com/crawlerhelp" title="Bot home page">Everest-Vulcan</a>',
'ezresult', 'Ezresult',
'enteprise','<a href="http://www.fastsearch.com" title="Bot home page">Fast Enteprise Crawler</a>',
'fast\-search\-engine','<a href="http://www.fast-search-engine.com" title="Bot home page">Fast-Search-Engine</a> (not fastsearch.com)',
-'fast_enterprise_crawler','<a href="http://www.fast.no" title="FAST Enterprise Crawler home page">FAST Enterprise Crawler</a>',
+'fast\senterprise\scrawler','<a href="http://www.fast.no" title="FAST Enterprise Crawler home page">FAST Enterprise Crawler</a>',
'fast_enterprise_crawler.*scrawleradmin\.t\-info@telekom\.de','<a href="http://www.telekom.de" title="FAST Enterprise Crawler * crawleradmin.t-info@telekom.de home page">FAST Enterprise Crawler * crawleradmin.t-info@telekom.de</a>',
'matrix_s\.p\.a\._\-_fast_enterprise_crawler','<a href="http://tin.virgilio.it" title="Matrix S.p.A. - FAST Enterprise Crawler home page">Matrix S.p.A. - FAST Enterprise Crawler</a>',
'fast_enterprise_crawler.*t\-info_bi_cluster_crawleradmin\.t\-info@telekom\.de','<a href="http://www.telekom.de" title="FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de home page">FAST Enterprise Crawler * T-Info_BI_cluster crawleradmin.t-info@telekom.de</a>',
@@ -1392,30 +1642,30 @@
'goforit\.com','<a href="http://www.goforit.com/about/" title="GoForIt.com home page">GoForIt.com</a>',
'goforitbot','<a href="http://www.goforit.com/about/" title="GOFORITBOT home page">GOFORITBOT</a>',
'gpu_p2p_crawler','<a href="http://gpu.sourceforge.net/search_engine.php" title="Bot home page">GPU p2p crawler</a>',
-'grub','Grub.org',
+'grub','<a href="http://www.grub.org">Grub.org</a>',
'henrythemiragorobot', '<a href="http://www.miragorobot.com/scripts/mrinfo.asp" title="Bot home page">Mirago</a>',
'heritrix','<a href="http://crawler.archive.org" title="(used by a few different companies) Bot home page">Heritrix</a>',
'holmes', 'Holmes',
'hoowwwer','<a href="http://cosco.hiit.fi/search/hoowwwer/" title="HooWWWer home page">HooWWWer</a>',
'hpprint','HPPrint',
'htmlparser','<a href="http://htmlparser.sourceforge.net" title="HTMLParser home page">HTMLParser</a>',
-'html[_+ ]link[_+ ]validator','<a href="http://www.lithopssoft.com/ " title="Html_Link_Validator home page">Html_Link_Validator</a>',
+'html\_link\_validator','<a href="http://www.lithopssoft.com " title="Html_Link_Validator home page">Html_Link_Validator</a>',
'httrack','<a href="http://www.httrack.com" title="Bot home page">HTTrack off-line browser</a>',
'hundesuche\.com\-bot','<a href="http://www.hundesuche.com" title="Hundesuche.com-Bot home page">Hundesuche.com-Bot</a>',
'ichiro','<a href="http://help.goo.ne.jp/door/crawlerE.html" title="Bot home page">ichiro</a>',
'iltrovatore\-setaccio','<a href="http://www.iltrovatore.it/aiuto/motore_di_ricerca.html" title="bot@iltrovatore.it IlTrovatore-Setaccio home page">IlTrovatore-Setaccio</a>',
'infobot','<a href="http://www.infobot.org" title="InfoBot home page">InfoBot</a>',
'infociousbot','<a href="http://corp.infocious.com/tech_crawler.php" title="InfociousBot home page">InfociousBot</a>',
-'infomine','<a href="http://infomine.ucr.edu/useragents" title="Bot home page">INFOMINE VLCrawler</a>',
+'infomine','<a href="http://infomine.ucr.edu/useragents/" title="Bot home page">INFOMINE VLCrawler</a>',
'insurancobot','<a href="http://www.fastspywareremoval.com" title="InsurancoBot home page">InsurancoBot</a>',
-'internet[_+ ]ninja','<a href="http://www.dti.ne.jp/ " title="Internet_Ninja home page">Internet_Ninja </a>',
+'internet\_ninja','<a href="http://www.dti.ne.jp" title="Internet_Ninja home page">Internet_Ninja </a>',
'internetarchive','<a href="http://lucene.apache.org/nutch/bot.html " title="InternetArchive home page">InternetArchive</a>',
'internetseer', 'InternetSeer',
'internetsupervision','<a href="http://internetsupervision.com" title="InternetSupervision home page">InternetSupervision</a>',
'irlbot','<a href="http://irl.cs.tamu.edu/crawler" title="Bot home page">IRLbot</a>',
'isearch2006','<a href="http://www.yahoo.com.cn" title="isearch2006 home page">isearch2006</a>',
'iupui_research_bot','<a href="http://spamhuntress.com/2005/04/25/a-mail-harvester-visits/" title="IUPUI_Research_Bot home page">IUPUI_Research_Bot</a>',
-'jrtwine[_+ ]software[_+ ]check[_+ ]favorites[_+ ]utility','<a href="http://www.jrtwine.com/Products/CheckFavs/" title="JRTwine_Software_Check_Favorites_Utility home page">JRTwine_Software_Check_Favorites_Utility</a>',
+'jrtwine\_software\_check\_favorites\_utility','<a href="http://www.jrtwine.com/Products/CheckFavs/" title="JRTwine_Software_Check_Favorites_Utility home page">JRTwine_Software_Check_Favorites_Utility</a>',
'justview', 'JustView',
'kalambot','<a href="http://64.124.122.251/feedback.html" title="KalamBot home page">KalamBot</a>',
'kamano\.de_newsfeedverzeichnis','<a href="http://www.kamano.de" title="kamano.de NewsFeedVerzeichnis home page">kamano.de NewsFeedVerzeichnis</a>',
@@ -1444,7 +1694,7 @@
'megite','<a href="http://www.megite.com" title="Megite home page">Megite</a>',
'metager\-linkchecker','MetaGer LinkChecker',
'metaspinner','<a href="http://index.meta-spinner.de" title="Metaspinner home page">Metaspinner</a>',
-'microsoft[_+ ]url[_+ ]control','<a href="http://www.webmasterworld.com/forum11/1005.htm" title="Microsoft URL Control home page">Microsoft URL Control</a>',
+'microsoft\surl\scontrol','<a href="http://www.webmasterworld.com/forum11/1005.htm" title="Microsoft URL Control home page">Microsoft URL Control</a>',
'minirank','<a href="http://minirank.com" title="miniRank home page">miniRank</a>',
'mini\-reptile','Mini-reptile',
'missigua_locator','<a href="http://www.webmasterworld.com/forum11/2690.htm" title="Missigua_Locator home page">Missigua_Locator</a>',
@@ -1454,7 +1704,7 @@
'mj12bot','<a href="http://majestic12.co.uk/bot.php" title="Bot home page.">MJ12bot</a>',
'mojeekbot','<a href="http://www.mojeek.com/bot.html" title="Bot home page.">MojeekBot</a>',
'msiecrawler','<a href="http://msdn.microsoft.com/workshop/delivery/offline/linkrel.asp" title="Bot home page.">MSIECrawler</a>',
-'ms_search_4\.0_robot','<a href="http://support.microsoft.com/default.aspx?scid=kb;en-us;284022" title="Bot home page.">MS SharePoint Portal Server - MS Search 4.0 Robot</a>',
+'ms\ssearch\s4\.0\srobot','<a href="http://support.microsoft.com/default.aspx?scid=kb;en-us;284022" title="Bot home page.">MS SharePoint Portal Server - MS Search 4.0 Robot</a>',
'msrabot','msrabot',
'msrbot','<a href="http://research.microsoft.com/research/sv/msrbot/" title="MSRBOT home page">MSRBOT</a>',
'mt::telegraph::agent','MT::Telegraph::Agent',
@@ -1473,10 +1723,10 @@
'nutch','<a href="http://lucene.apache.org/nutch/" title="Bot home page. Used by many, including Looksmart.">Nutch</a>',
'ocelli','<a href="http://www.globalspec.com/Ocelli/" title="Ocelli home page">Ocelli</a>',
'octora_beta_bot','<a href="http://www.octora.com" title="Bot home page">Octora Beta Bot</a>',
-'omniexplorer[_+ ]bot','<a href="http://www.omni-explorer.com" title="Bot home page.">OmniExplorer Bot</a>',
-'onet\.pl[_+ ]sa','<a href="http://szukaj.onet.pl" title="Onet.pl_SA home page">Onet.pl_SA</a>',
-'onfolio','<a href="http://www.onfolio.com" title="Bot home page [new window]">Onfolio</a>',
-'opentaggerbot','<a href="http://www.opentagger.com/opentaggerbot.htm" title="Bot home page [new window]">OpenTaggerBot</a>',
+'omniexplorer\_bot','<a href="http://www.omni-explorer.com" title="Bot home page.">OmniExplorer Bot</a>',
+'onet\.pl\_sa','<a href="http://szukaj.onet.pl" title="Onet.pl_SA home page">Onet.pl_SA</a>',
+'onfolio','<a href="http://www.onfolio.com" title="Bot home page">Onfolio</a>',
+'opentaggerbot','<a href="http://www.opentagger.com/opentaggerbot.htm" title="Bot home page">OpenTaggerBot</a>',
'openwebspider','<a href="http://www.openwebspider.org" title="OpenWebSpider home page">OpenWebSpider</a>',
'oracle_ultra_search','<a href="http://www.oracle.com/technology/products/ultrasearch/index.html" title="Oracle Ultra Search home page">Oracle Ultra Search</a>',
'orbiter','<a href="http://www.dailyorbit.com/bot.htm" title="Orbiter home page">Orbiter</a>',
@@ -1485,8 +1735,10 @@
'passwordmaker\.org','<a href="http://passwordmaker.org" title="passwordmaker.org home page">passwordmaker.org</a>',
'pear_http_request_class','<a href="http://pear.php.net" title="PEAR HTTP Request class home page">PEAR HTTP Request class</a>',
'peerbot','<a href="http://www.peerbot.com" title="PEERbot home page">PEERbot</a>',
-'perman', 'Perman surfer',
-'php[_+ ]version[_+ ]tracker','<a href="http://www.nexen.net/phpversion/bot.php" title="PHP Version Tracker home page">PHP version tracker</a>',
+# Modified by Ryu 2006.03.15
+#'perman', 'Perman surfer',
+'perman', '<a href="http://www.bug.co.jp/nami-nori/" title="Bot home page.">波乗野郎</a>',
+'php\sversion\stracker','<a href="http://www.nexen.net/phpversion/bot.php" title="PHP version tracker home page">PHP version tracker</a>',
'pictureofinternet','<a href="http://malfunction.org/poi/" title="PictureOfInternet home page">PictureOfInternet</a>',
'ping\.blo\.gs','<a href="http://blo.gs/ping.php" title="Bot home page.">ping.blo.gs</a>',
'plinki','<a href="http://www.plinki.com" title="plinki home page">plinki</a>',
@@ -1498,7 +1750,7 @@
'projectwf\-java\-test\-crawler','ProjectWF-java-test-crawler',
'proodlebot','<a href="http://www.proodle.com" title="proodleBot home page">proodleBot</a>',
'pyquery','<a href="http://sourceforge.net/projects/pyquery/" title="PyQuery home page">PyQuery</a>',
-'rambler','<a href="http://www.rambler.ru/doc/faq.shtml" title="Bot home page [new window]">StackRambler</a>',
+'rambler','<a href="http://www.rambler.ru/doc/faq.shtml" title="Bot home page">StackRambler</a>',
'redalert','Red Alert',
'relevantnoise\.com', '<a href="http://www.relevantnoise.com" title="Relevant Noise">Relevant Noise</a>',
'rojo','<a href="http://rojo.com" title="Bot home page">RoJo</a> aggregator',
@@ -1509,9 +1761,9 @@
'sbider','<a href="http://www.sitesell.com/sbider.html" title="Bot home page">SBIder</a>',
'schizozilla','<a href="http://spamhuntress.com/2005/03/18/gizmo/ " title="Schizozilla home page">Schizozilla</a>',
'scumbot','Scumbot',
-'searchguild[_+ ]dmoz[_+ ]experiment','<a href="http://www.searchguild.com" title="SearchGuild_DMOZ_Experiment home page">SearchGuild_DMOZ_Experiment</a>',
-'seekbot','<a href="http://www.seekbot.net/bot.html" title="Bot home page [new window]">Seekbot</a>',
-'sensis_web_crawler','<a href="http://www.sensis.com.au" title="Sensis Web Crawler home page">Sensis Web Crawler</a>',
+'searchguild\_dmoz\_experiment','<a href="http://www.searchguild.com" title="SearchGuild_DMOZ_Experiment home page">SearchGuild_DMOZ_Experiment</a>',
+'seekbot','<a href="http://www.seekbot.net/bot.html" title="Bot home page">Seekbot</a>',
+'sensis\sweb\scrawler','<a href="http://www.sensis.com.au" title="Sensis Web Crawler home page">Sensis Web Crawler</a>',
'seznambot','<a href="http://fulltext.seznam.cz" title="Bot home page">SeznamBot</a>',
'shim\-crawler','<a href="http://www.logos.ic.i.u-tokyo.ac.jp/crawler/" title="crawl@logos.ic.i.u-tokyo.ac.jp Bot home page">Shim-Crawler</a>',
'shoutcast','Shoutcast Directory Service',
@@ -1520,10 +1772,10 @@
'sohu\-search','<a href="http://corp.sohu.com" title="Bot home page">sohu-search</a>',
'sohu','<a href="http://corp.sohu.com" title="Bot home page">sohu agent</a>',
'snappy','<a href="http://www.urltrends.com/faq.php" title="Bot home page">Snappy</a>',
-'sphere_scout','<a href="http://www.sphere.com" title="Bot home page">Sphere Scout</a>',
+'sphere\sscout','<a href="http://www.sphere.com" title="Bot home page">Sphere Scout</a>',
'sproose_crawler','<a href="http://www.sproose.com/bot.html" title="Bot home page">sproose crawler</a>',
'steroid__download','<a href="http://faqs.org.ru/progr/pascal/delphi_internet2.htm" title="STEROID Download home page">STEROID Download</a>',
-'steeler','<a href="http://www.tkl.iis.u-tokyo.ac.jp/~crawler/ " title="Steeler home page">Steeler</a>',
+'steeler','<a href="http://www.tkl.iis.u-tokyo.ac.jp/~crawler/" title="Steeler home page">Steeler</a>',
'suchfin\-bot','<a href="http://www.suchfin.de" title="Suchfin-Bot home page">Suchfin-Bot</a>',
'superbot','<a href="http://www.sparkleware.com/superbot/" title="SuperBot home page">SuperBot</a>',
'surveybot','SurveyBot',
@@ -1534,11 +1786,11 @@
'tcl_http_client_package','<a href="http://www.tcl.tk/man/tcl8.4/TclCmd/http.htm" title="Tcl http client package home page">Tcl http client package</a>',
'technoratibot', 'Technoratibot',
'teragramcrawlersurf','<a href="http://www.teragram.com" title="TeragramCrawlerSURF home page">TeragramCrawlerSURF</a>',
-'test_crawler','<a href="http://netp.ath.cx" title="Test Crawler home page">Test Crawler</a>',
+'test\scrawler','<a href="http://netp.ath.cx" title="Test Crawler home page">Test Crawler</a>',
'testbot','<a href="http://www.agbrain.com" title="TestBot home page">TestBot</a>',
't\-h\-u\-n\-d\-e\-r\-s\-t\-o\-n\-e','<a href="http://www.thunderstone.com" title="Bot home page. Used by many.">T-H-U-N-D-E-R-S-T-O-N-E</a>',
'topicblogs', '<a href="http://www.topicblogs.com" title="Bot home page">topicblogs</a>',
-'turnitinbot','Turn It In',
+'turnitinbot','<a href="http://www.turnitin.com/robot/crawlerinfo.html">TurnitinBot</a>',
'turtle', 'Turtle',
'turtlescanner', 'Turtle',
'tutorgigbot','<a href="http://www.tutorgig.info" title="TutorGigBot home page">TutorGigBot</a>',
@@ -1557,7 +1809,7 @@
'vortex','<a href="http://marty.anstey.ca/projects/robots/vortex/" title="Bot home page">VORTEX</a>',
'vse','<a href="http://www.vivisimo.com" title="VSE home page">VSE</a>',
'w3c\-checklink','<a href="http://validator.w3.org/checklink/" title="Bot home page">W3C Link Checker</a>',
-'w3c[_+ ]css[_+ ]validator[_+ ]jfouffa', '<a href="http://jigsaw.w3.org/css-validator/" title="Bot home page">W3C jigsaw CSS Validator</a>',
+'w3c\_css\_validator\_jfouffa', '<a href="http://jigsaw.w3.org/css-validator/" title="Bot home page">W3C jigsaw CSS Validator</a>',
'w3c_validator','<a href="http://validator.w3.org" title="Bot home page">W3C Validator</a>',
'wavefire','<a href="http://www.wavefire.com" title="info@wavefire.com; Bot home page">Wavefire</a>',
'webclipping\.com', 'WebClipping.com',
@@ -1568,14 +1820,14 @@
'webfilter','<a href="http://www.verso.com/enterprise/netspective/webfilter.asp" title="Bot home page">WebFilter</a>',
'webindexer','<a href="mailto://webindexerv1@yahoo.com" title="WebIndexer home page">WebIndexer</a>',
'webminer','<a href="http://64.124.122.252/feedback.html" title="WebMiner home page">WebMiner</a>',
-'website[_+ ]monitoring[_+ ]bot','<a href="http://InternetSupervision.com/UrlMonitor/3/" title="Website_Monitoring_Bot home page">Website_Monitoring_Bot</a>',
+'website\_monitoring\_bot','<a href="http://InternetSupervision.com/UrlMonitor/3/" title="Website_Monitoring_Bot home page">Website_Monitoring_Bot</a>',
'webvulncrawl', 'WebVulnCrawl',
'wells_search','<a href="http://www.psychedelix.com/cgi-bin/csv2html.pl?data=allagents.csv&template=detail.html&match=\bid_t_z_1484\b " title="Wells Search home page">Wells Search</a>',
'wonderer', 'Web Wombat Redback Spider',
'wume_crawler','<a href="http://wume.cse.lehigh.edu/~xiq204/crawler/ " title="wume crawler home page">wume crawler</a>',
'wwweasel',,'<a href="http://wwweasel.de" title="Website_Monitoring_Bot home page">WWWeasel</a>',
'xenu\'s_link_sleuth','<a href="http://home.snafu.de/tilman/xenulink.html" title="Xenu Link Sleuth home page">Xenu Link Sleuth</a>',
-'xenu_link_sleuth','<a href="http://home.snafu.de/tilman/xenulink.html" title="Xenu Link Sleuth home page">Xenu Link Sleuth</a>',
+'xenu\slink\ssleuth','<a href="http://home.snafu.de/tilman/xenulink.html" title="Xenu Link Sleuth home page">Xenu Link Sleuth</a>',
'xirq','<a href="http://www.xirq.com" title="xirq home page">xirq</a>',
'y!j', '<a href="http://help.yahoo.co.jp/help/jp/search/indexing/indexing-15.html" title="Bot home page">Y!J Yahoo Japan</a>',
'yacy','<a href="http://www.yacy.net/yacy" title="Bot home page">yacy</a>',
@@ -1584,8 +1836,8 @@
'yahoofeedseeker', '<a href="http://publisher.yahoo.com/rssguide" title="Bot home page">Yahoo Feed Seeker</a>',
'yahooseeker\-testing', '<a href="http://search.yahoo.com" title="Bot home page">YahooSeeker-Testing</a>',
'yahooseeker', '<a href="http://help.yahoo.com/help/us/ysearch/crawling/crawling-02.html" title="Bot home page">YahooSeeker Yahoo! Blog crawler</a>',
-'yahoo\-mmcrawler', '<a href="mailto:mms-mmcrawler-support@yahoo-inc.com?subject=Yahoo-MMCrawler Information" title="E-mail Bot">Yahoo-MMCrawler</a>',
-'yahoo!_mindset','<a href="http://mindset.research.yahoo.com" title="Bot home page [new window]">Yahoo! Mindset</a>',
+'yahoo\-mmcrawler', '<a href="mailto:mms-mmcrawler-support@yahoo-inc.com?subject=Yahoo-MMCrawler_Information" title="E-mail Bot">Yahoo-MMCrawler</a>',
+'yahoo!_mindset','<a href="http://mindset.research.yahoo.com" title="Bot home page">Yahoo! Mindset</a>',
'yandex', 'Yandex bot',
'yooglifetchagent','<a href="http://www.yoogli.com" title="yoogliFetchAgent home page">yoogliFetchAgent</a>',
'z\-add_link_checker','<a href="http://w3.z-add.co.uk/linkcheck/" title="Z-Add Link Checker home page">Z-Add Link Checker</a>',
@@ -1596,16 +1848,17 @@
'ng\/1\.','<a href="http://www.exabot.com" title="Bot home page">NG 1.x (Exalead)</a>', # put at end to avoid false positive
'ng\/2\.','<a href="http://www.exabot.com" title="Bot home page">NG 2.x (Exalead)</a>', # put at end to avoid false positive
'exabot','<a href="http://www.exabot.com" title="Bot home page">Exabot</a>', # put at end to avoid false positive
-'java','<a href="http://www.projecthoneypot.org/harvester_useragents.php" title="Bot home page">Java (Often spam bot)</a>', # put at end to avoid false positive
+'java','<a href="http://www.projecthoneypot.org/harvester_useragents.php" title="Bot home page">Java (spam botによく使われる)</a>', # put at end to avoid false positive
# Generic root ID
-'robot', 'Unknown robot (identified by \'robot\')',
-'crawl', 'Unknown robot (identified by \'crawl\')',
-'spider', 'Unknown robot (identified by \'spider\')',
-'bot[+:,\.\;\/\\\-]','Unknown robot (identified by \'bot*\')',
-'[+:,\.\;\/\\\-]bot','Unknown robot (identified by \'*bot\')',
-'no_user_agent','Unknown robot (identified by empty user agent string)',
+'robot', '未分類のrobot (名前に\'robot\'を含む)',
+'crawl', '未分類のrobot (名前に\'crawl\'を含む)',
+'spider', '未分類のrobot (名前に\'spider\'を含む)',
+'bot[+:,\.\;\/\\\-]','未分類のrobot (名前に\'bot*\'を含む)',
+'[+:,\.\;\/\\\-]bot','未分類のrobot (名前に\'*bot\'を含む)',
+'no_user_agent','未分類のrobot (user agent文字列がない)',
+'uri::fetch','URI::Fetch',
# Unknown robots identified by hit on robots.txt
-'unknown', 'Unknown robot (identified by hit on \'robots.txt\')'
+'unknown', '未分類のrobot (\'robots.txt\'への参照で検出)'
);
--- C:\awstats-6.9-mod/wwwroot/cgi-bin/lib/search_engines.pm Mon Feb 09 13:08:16 2009
+++ C:\AWStats69t_Jpn/wwwroot/cgi-bin/lib/search_engines.pm Fri Jan 23 13:00:27 2009
@@ -6,7 +6,7 @@
# An entry if known in SearchEnginesKnownUrl is also welcome.
#------------------------------------------------------------------------------
# $Revision: 1.46 $ - $Author: eldy $ - $Date: 2008/11/15 14:58:01 $
-
+# Modified by ホビット 2009/1/21
# 2005-08-19 Sean Carlos http://www.antezeta.com/awstats.html
# added minor italian search engines
# arianna http://arianna.libero.it/
@@ -154,6 +154,55 @@
# Note: Regex IDs are in lower case and ' ' and '+' are changed into '_'
#------------------------------------------------------------------------------
@SearchEnginesSearchIDOrder_list1=(
+# Japanese Search Engines
+# Added by Ryu 2007.01.06
+'66\.102\.11\.104',
+'64\.233\.179\.99',
+'72\.14\.235\.132',
+'74\.125\.77\.132',
+'74\.125\.113\.104',
+'209\.85\.(129|135|165|173|175|207)\.104',
+'209\.85\.175\.132',
+'216\.239\.(37\.99|39\.104|41\.104|63\.104)',
+'www\.answers\.com',
+'biglobe\.ne\.jp',
+'cache\.yahoofs\.jp',
+'cgi\.search\.biglobe\.ne\.jp',
+'clusty\.jp',
+'eniro\.fi',
+'excite\.co\.jp',
+'excite-cache\.jp',
+'find\.x0\.to',
+'search\.fresheye\.com',
+'dir\.fresheye\.com',
+'goo\.ne\.jp',
+'google\.co\.jp',
+'iug\.newsing\.jp',
+#'msnscache\.com',
+'mysearch\.myway\.com',
+'naver\.co\.jp',
+'search\.biglobe\.ne\.jp',
+'search\.jp\.aol\.com',
+'search\.livedoor\.com',
+'search\.nifty\.com',
+'search\.msn\.co\.jp',
+'search\.odn\.ne\.jp',
+'search-hp\.com',
+'picsearch\.com',
+'picsearch\.sk',
+'www\.prsearch\.',
+'m\.technorati\.jp',
+'www\.technorati\.jp',
+'technorati\.jp',
+'technorati\.com',
+'tocc\.co\.jp',
+'www\.baidu\.jp',
+'www\.ceek\.jp',
+'www\.dir\.bg',
+'www\.infoseek\.co\.jp',
+'www\.megite\.com',
+'www\.seochat\.com',
+'www\.yahoogle\.jp',
# Major international search engines
'google\.[\w.]+/products',
'base\.google\.',
@@ -362,7 +411,13 @@
'msn\.'=>'hotmail\.msn\.',
'tiscali\.'=>'mail\.tiscali\.',
'yahoo\.'=>'mail\.yahoo\.',
-'yandex\.'=>'direct\.yandex\.'
+'yandex\.'=>'direct\.yandex\.',
+# For Japanese Search Engines
+# Added by Ryu 2006.03.15
+'infoseek\.co\.jp'=>'at\.imfoseek\.co\.jp',
+'googlee\.'=>'mail\.googlee\.',
+'googlee\.'=>'translate\.googlee\.',
+'goo\.ne\.jp'=>'members\.goo\.ne\.jp'
);
@@ -370,6 +425,55 @@
# Each Search Engine Search ID is associated to an AWStats id string
#------------------------------------------------------------------------------
%SearchEnginesHashID = (
+# Japanese Search Engines
+# Added by Ryu 2007.01.06
+'66\.102\.11\.104','googlejapan',
+'64\.233\.179\.99','googlejapan',
+'72\.14\.235\.132','google',
+'74\.125\.77\.132','google',
+'74\.125\.113\.104','google',
+'209\.85\.(129|135|165|173|175|207)\.104','google',
+'209\.85\.175\.132','google',
+'216\.239\.(37\.99|39\.104|41\.104|63\.104)','googlejapan',
+'www\.answers\.com','answers',
+'biglobe\.ne\.jp','biglobe',
+'cache\.yahoofs\.jp','yahoocache',
+'cgi\.search\.biglobe\.ne\.jp','biglobe',
+'clusty\.jp','clustyjp',
+'eniro\.fi','eniro',
+'excite\.co\.jp','excitejapan',
+'excite-cache\.jp','excitejapan',
+'find\.x0\.to','asamasearch',
+'search\.fresheye\.com','fresheye',
+'dir\.fresheye\.com','fresheyedir',
+'goo\.ne\.jp','goo',
+'google\.co\.jp','google',
+'iug\.newsing\.jp','iug',
+#'msnscache\.com','msn_cache',
+'mysearch\.myway\.com','myway',
+'naver\.co\.jp','naver',
+'search\.biglobe\.ne\.jp','biglobe',
+'search\.jp\.aol\.com','aoljapan',
+'search\.livedoor\.com','livedoor',
+'search\.nifty\.com','nifty',
+'search\.msn\.co\.jp','msnjapan',
+'search\.odn\.ne\.jp','odn',
+'search-hp\.com','searchhp',
+'picsearch\.com','picsearchcom',
+'picsearch\.sk','picsearchsk',
+'www\.prsearch\.','prsearch',
+'m\.technorati\.jp','mtechnoratijp',
+'www\.technorati\.jp','wtechnoratijp',
+'technorati\.jp','technoratijp',
+'technorati\.com','technorati',
+'tocc\.co\.jp','tocc',
+'www\.baidu\.jp','baidujp',
+'www\.ceek\.jp','ceekjp',
+'www\.dir\.bg','dirbg',
+'www\.infoseek\.co\.jp','infoseek',
+'www\.megite\.com','megite',
+'www\.seochat\.com','seochat',
+'www\.yahoogle\.jp','yahoogle',
# Major international search engines
'google\.[\w.]+/products','google_products',
'base\.google\.','google_base',
@@ -641,13 +745,59 @@
# List of search engines that store keyword as page instead of query parameter
#------------------------------------------------------------------------------
%SearchEnginesWithKeysNotInQuery=(
-'a9',1 # www.a9.com/searckey1%20searchkey2
+'a9',1, # www.a9.com/searckey1%20searchkey2
+'answers',1, # www.answers.com/searckey1%20searchkey2
+'fresheyedir',1, # dir.fresheye.com/ds/kw/new/searckey1/
+'iug',1,, # iug.newsing.jp/tag/searckey1?...
+'megite',1, # www.megite.com/discover/searckey1%20searchkey2
+'yahoogle',1, # www.yahoogle.jp/yahoogle-1-searckey1%20searchkey2
+'mtechnoratijp',1, # m.technorati.jp/search/searckey1+searchkey2
+'wtechnoratijp',1, # www.technorati.jp/search/searckey1+searchkey2
+ # www.technorati.jp/posts/tag/searckey1+searchkey2
+'technoratijp',1, # technorati.jp/search/searckey1+searchkey2
+'technorati',1, # technorati.com/posts/tag/searckey1+searchkey2
);
# SearchEnginesKnownUrl
# Known rules to extract keywords from a referrer search engine URL
#------------------------------------------------------------------------------
%SearchEnginesKnownUrl=(
+# Japanese Search Engines
+# Added by Ryu 2007.01.06
+'answers','www\.answers\.com\/',
+'asamasearch','q=',
+'askjapan','q=',
+'baidujp','wd=',
+'biglobe','q=',
+'ceekjp','q=',
+'dirbg','s=',
+'clustyjp','query=',
+'eniro','q=',
+'excitejapan','search=',
+'fresheye','kw=',
+'fresheyedir','dir\.fresheye\.com\/ds\/kw\/new\/',
+'goo','mt=',
+'googlejapan','(p=|q=cache:[0-9A-Za-z]{12}:|as_p=|as_q=|as_epq=|q=)',
+'infoseek','qt=',
+'iug','iug\.newsing\.jp\/tag\/',
+'livedoor','q=',
+'megite','www\.megite\.com\/discover\/',
+'myway','searchfor=',
+'msnjapan','(q|mt)=',
+'naver','query=',
+'nifty','text=',
+'odn','querystring=',
+'searchhp','kensaku_ward=',
+'picsearchcom','q=',
+'picsearchsk','q=',
+'prsearch','query=',
+'mtechnoratijp','m\.technorati\.jp\/search\/',
+'wtechnoratijp','www\.technorati\.jp\/(?:search|posts/tag)\/',
+'technoratijp','technorati\.jp\/search\/',
+'technorati','technorati\.com\/(?:tag|posts/tag)\/',
+'tocc','qry=',
+'yahoocache','w=',
+'yahoogle','www\.yahoogle\.jp\/yahoogle-1-',
# Most common search engines
'alexa','q=',
'alltheweb','q(|uery)=',
@@ -660,9 +810,9 @@
'google_groups','group\/', # does not work
'google_image','(p|q|as_p|as_q)=',
'google_cache','(p|q|as_p|as_q)=cache:[0-9A-Za-z]{12}:',
-'google','(p|q|as_p|as_q)=',
+'google','(p=|as_p=|as_q=|as_epq=|q=cache:[0-9A-Za-z]{12}:|q=)',
'lycos','query=',
-'msn','q=',
+'msn','(q|mt)=',
'live','q=',
'netscape','search=',
'tiscali','key=',
@@ -671,7 +821,7 @@
'voila','(kw|rdata)=',
'search.com','q=',
'yahoo_mindset','p=',
-'yahoo','p=',
+'yahoo','(p|kw)=',
'sympatico', 'query=',
'excite','search=',
# Minor international search engines
@@ -859,7 +1019,8 @@
# If no rules are known and search in WordsToExtractSearchUrl failed, this will be used to clean URL of not keyword parameters.
#------------------------------------------------------------------------------
@WordsToExtractSearchUrl= ('tn=','ie=','ask=','claus=','general=','key=','kw=','keyword=','keywords=','MT=','p=','q=','qr=','qt=','query=','s=','search=','searchText=','string=','su=','txtsearch=','w=');
-@WordsToCleanSearchUrl= ('act=','annuaire=','btng=','cat=','categoria=','cfg=','cof=','cou=','count=','cp=','dd=','domain=','dt=','dw=','enc=','exec=','geo=','hc=','height=','hits=','hl=','hq=','hs=','id=','kl=','lang=','loc=','lr=','matchmode=','medor=','message=','meta=','mode=','order=','page=','par=','pays=','pg=','pos=','prg=','qc=','refer=','sa=','safe=','sc=','sort=','src=','start=','style=','stype=','sum=','tag=','temp=','theme=','type=','url=','user=','width=','what=','\\.x=','\\.y=','y=','look=');
+# Words after "look" are added by Ryu for Japanese Search Engines.
+@WordsToCleanSearchUrl= ('act=','annuaire=','btng=','cat=','categoria=','cfg=','cof=','cou=','count=','cp=','dd=','domain=','dt=','dw=','enc=','exec=','geo=','hc=','height=','hits=','hl=','hq=','hs=','id=','kl=','lang=','loc=','lr=','matchmode=','medor=','message=','meta=','mode=','order=','page=','par=','pays=','pg=','pos=','prg=','qc=','refer=','sa=','safe=','sc=','sort=','src=','start=','style=','stype=','sum=','tag=','temp=','theme=','type=','url=','user=','width=','what=','\\.x=','\\.y=','y=','look=','target=','collection=','showSummary=','perPage=','next=','DB=','OPE=','Max=','base=','submit=','SearchType=','SESSIONID=','QUERYRULE=','DISPLAYMIN=','RELURLSWITCH=','SORT=','start=','direct=','relwd=','lk=','svx=','nh=','internet=','DC=','submit0=','DEST=','where=','CCM=','NRS=','UNI=','UD0=','FRS=','sv=','rf=','oq=','col=','act\.search=');
# SearchEnginesKnownUTFCoding
# Known parameter that proves a search engine has coded its parameters in UTF-8
@@ -876,6 +1037,44 @@
# 'search_engine_id', 'search_engine_name',
#------------------------------------------------------------------------------
%SearchEnginesHashLib=(
+# Japanese Search Engines
+# Added by Ryu 2006.03.15
+'answers','<a href="http://www.answers.com">Answers.com</a>',
+'googlejapan','<a href="http://www.google.co.jp">Google Japan</a>',
+'biglobe','Biglobe',
+'aoljapan','<a href="http://search.jp.aol.com">AOL Japan</a>',
+'goo','<a href="http://goo.ne.jp">Goo</a>',
+'nifty','<a href="http://www.nifty.com/search/">Nifty</a>',
+'msnjapan','MSN Japan',
+'odn','ODN',
+'clustyjp','<a href="http://clusty.jp">Clusty Japan</a>',
+'eniro','<a href="http://eniro.fi">eniro</a>',
+'excitejapan','Excite Japan',
+'mtechnoratijp','<a href="http://m.technorati.jp">テクノラティモバイル検索</a>',
+'wtechnoratijp','<a href="http://technorati.jp">テクノラティ検索(WWW)</a>',
+'technoratijp','<a href="http://technorati.jp">テクノラティ検索</a>',
+'technorati','<a href="http://technorati.com">Technorati Search</a>',
+'yahoogle','<a href="http://www.yahoogle.jp">yahoogle</a>',
+#'msn_cache','MSN Cache',
+'myway','MyWay',
+'naver','Naver',
+'asamasearch','<a href="http://find.x0.to">Asamasearch</a>',
+'fresheye','<a href="http://fresheye.com">Fresheye</a>',
+'fresheyedir','<a href="http://fresheye.com">Fresheye(Dir)</a>',
+'iug','<a href="http://iug.newsing.jp">iUG-newsing</a>',
+'seochat','SEOChat',
+'tocc','TOCC/Search',
+'searchhp','<a href="http://search-hp.com">Search HP</a>',
+'picsearchcom','<a href="www.picsearch.com">picsearch</a>',
+'picsearchsk','<a href="www.picsearch.sk">picsearch.sk</a>',
+'prsearch','<a href="http://www.prsearch.biz">prsearch</a>',
+'baidujp','<a href="www.baidu.jp">baidu.jp</a>',
+'ceekjp','<a href="http://www.ceek.jp">CEEK.JP</a>',
+'dirbg','<a href="http://dir.dir.bg">Dir.bg</a>',
+'infoseek','<a href="http://www.infoseek.co.jp">Infoseek</a>',
+'yahoocache','Yahoo Cache',
+'askjapan','Ask Jeeves Japan',
+'livedoor','Livedoor',
# Major international search engines
'alexa','<a href="http://www.alexa.com" title="Search Engine Home Page">Alexa</a>',
'alltheweb','<a href="http://www.alltheweb.com" title="Search Engine Home Page">AllTheWeb</a>',
@@ -905,7 +1104,7 @@
# Minor international search engines
'google4counter','<a href="http://www.4-counter.com" title="Search Engine Home Page">4-counter (Google)</a>',
'att','<a href="http://www.att.net" title="Search Engine Home Page">AT&T search (powered by Google)</a>',
-'bungeebonesdotcom','<a href="http://BungeeBones.com/search.php/" title="Search Engine Home Page">BungeeBones</a>',
+'bungeebonesdotcom','<a href="http://BungeeBones.com/search.php" title="Search Engine Home Page">BungeeBones</a>',
'go','Go.com',
'askde','<a href="http://de.ask.com" title="Search Engine Home Page">Ask Deutschland</a>',
'askes','<a href="http://es.ask.com" title="Search Engine Home Page">Ask España</a>', # break out Ask country specific engines.
@@ -717,7 +732,14 @@
'Created by',
'plugins',
'Regions',
- 'Cities'
+ 'Cities',
+ '','','','','','','',
+ '','','','','','','','','','',
+ '','','','','','','','','','',
+ 'dd mmm yyyy',
+ 'mmmbreakyyyy',
+ 'mmm yyyy',
+ 'ddbreakmmm'
);
#------------------------------------------------------------------------------
@@ -7812,9 +7809,14 @@
my $sec = substr( "$date", 12, 2 );
my $dateformat = $Message[78];
- if ( $option == 2 ) {
- $dateformat =~ s/^[^ymd]+//g;
- $dateformat =~ s/[^ymd]+$//g;
+ if ($option == 2) { # dd mmm yy
+ $dateformat = $Message[200];
+ } elsif ($option == 3) { # mmm<br>yyyy
+ $dateformat = $Message[201];
+ } elsif ($option == 4) { # mmm yyyy
+ $dateformat = $Message[202];
+ } elsif ($option == 5) { # dd<br>mmm
+ $dateformat = $Message[203];
}
$dateformat =~ s/yyyy/$year/g;
$dateformat =~ s/yy/$year/g;
@@ -7824,6 +7826,7 @@
$dateformat =~ s/HH/$hour/g;
$dateformat =~ s/MM/$min/g;
$dateformat =~ s/SS/$sec/g;
+ $dateformat =~ s/break/<br$endtag/g;
return "$dateformat";
}
@@ -15699,26 +15805,28 @@
if ( $LogType eq 'W' || $LogType eq 'S' ) {
$w = '17';
$colspan = 6;
+ print "<col width=\"$w%\"$endtag<col width=\"$w%\"$endtag<col width=\"$w%\"$endtag<col width=\"$w%\"$endtag<col width=\"$w%\"$endtag<col width=\"$w%\"$endtag\n";
+ } else {
+ print "<col width=\"$w%\"$endtag<col width=\"$w%\"$endtag<col width=\"$w%\"$endtag<col width=\"$w%\"$endtag<col width=\"$w%\"$endtag\n";
}
# Show first/last
- print "<tr bgcolor=\"#$color_TableBGRowTitle\">";
+ print "<tr class=\"colortab\">";
print
"<td class=\"aws\"><strong>$Message[133]</strong></td><td class=\"aws\" colspan=\""
. ( $colspan - 1 ) . "\">\n";
print( $MonthRequired eq 'all'
? "$Message[6] $YearRequired"
: "$Message[5] "
- . $MonthNumLib{$MonthRequired}
- . " $YearRequired" );
+ . Format_Date("$YearRequired$MonthRequired"."00000000",4));
print "</td></tr>\n";
- print "<tr bgcolor=\"#$color_TableBGRowTitle\">";
+ print "<tr class=\"colortab\">";
print "<td class=\"aws\"><strong>$Message[8]</strong></td>\n";
print "<td class=\"aws\" colspan=\""
. ( $colspan - 1 ) . "\">"
. ( $FirstTime ? Format_Date( $FirstTime, 0 ) : "NA" ) . "</td>";
print "</tr>\n";
- print "<tr bgcolor=\"#$color_TableBGRowTitle\">";
+ print "<tr class=\"colortab\">";
print "<td class=\"aws\"><strong>$Message[9]</strong></td>\n";
print "<td class=\"aws\" colspan=\""
. ( $colspan - 1 ) . "\">"
@@ -16095,12 +16204,12 @@
. ( !$StaticLinks
&& $monthix == $nowmonth
&& $YearRequired == $nowyear
- ? '<font class="currentday">'
+ ? '<span class="currentday">'
: '' );
- print "$MonthNumLib{$monthix}<br$endtag$YearRequired";
+ print Format_Date("$YearRequired$monthix"."00000000",3);
print( !$StaticLinks
&& $monthix == $nowmonth
- && $YearRequired == $nowyear ? '</font>' : '' );
+ && $YearRequired == $nowyear ? '</span>' : '' );
print "</td>";
# }
@@ -16156,12 +16272,12 @@
. ( !$StaticLinks
&& $monthix == $nowmonth
&& $YearRequired == $nowyear
- ? '<font class="currentday">'
+ ? '<span class="currentday">'
: '' );
- print "$MonthNumLib{$monthix} $YearRequired";
+ print Format_Date("$YearRequired$monthix"."00000000",4);
print( !$StaticLinks
&& $monthix == $nowmonth
- && $YearRequired == $nowyear ? '</font>' : '' );
+ && $YearRequired == $nowyear ? '</span>' : '' );
print "</td>";
if ( $ShowMonthStats =~ /U/i ) {
print "<td>",
@@ -16531,25 +16647,20 @@
my $dayofweekcursor = DayOfWeek( $day, $month, $year );
print "<td"
. ( $dayofweekcursor =~ /[06]/
- ? " bgcolor=\"#$color_weekend\""
+ ? " class=\"colorweekend\""
: "" )
. ">";
print( !$StaticLinks
&& $day == $nowday
&& $month == $nowmonth
&& $year == $nowyear
- ? '<font class="currentday">'
+ ? '<span class="currentday">'
: '' );
- print "$day<br$endtag<span style=\"font-size: "
- . ( $FrameName ne 'mainright'
- && $QueryString !~ /buildpdf/i ? "9" : "8" )
- . "px;\">"
- . $MonthNumLib{$month}
- . "</span>";
+ print Format_Date("$year$month$day"."000000",5);
print( !$StaticLinks
&& $day == $nowday
&& $month == $nowmonth
- && $year == $nowyear ? '</font>' : '' );
+ && $year == $nowyear ? '</span>' : '' );
print "</td>\n";
}
print "<td></td>";
--- C:\awstats-6.9-mod/wwwroot/cgi-bin/lang/awstats-en.txt Tue Feb 10 13:45:35 2009
+++ C:\AWStats69t_Jpn/wwwroot/cgi-bin/lang/awstats-en.txt Tue Feb 10 14:25:36 2009
@@ -174,3 +174,34 @@
message170=plugins
message171=Regions
message172=Cities
+message173=
+message174=
+message175=
+message176=
+message177=
+message178=
+message179=
+message180=
+message181=
+message182=
+message183=
+message184=
+message185=
+message186=
+message187=
+message188=
+message189=
+message190=
+message191=
+message192=
+message193=
+message194=
+message195=
+message196=
+message197=
+message198=
+message199=
+message200=dd mmm yyyy
+message201=mmmbreakyyyy
+message202=mmm yyyy
+message203=ddbreakmmm
--- C:\awstats-6.9-mod/wwwroot/cgi-bin/lang/awstats-jp.txt Sun Apr 27 17:47:24 2008
+++ C:\AWStats69t_Jpn/wwwroot/cgi-bin/lang/awstats-jp.txt Tue Feb 10 14:24:50 2009
@@ -93,79 +95,115 @@
message89=金曜日
message90=土曜日
message91=曜日
-message92=だれ
-message93=いつ
-message94=認証されたユーザー
+message92=訪問者の属性
+message93=訪問日時
+message94=認証されたユーザ
message95=最小
message96=平均
message97=最大
-message98=Web圧縮
-message99=帯域幅の保存
+message98=mod_zipによる圧縮
+message99=節約されたバイト数
message100=圧縮前
message101=圧縮後
message102=合計
-message103=キーフレーズ
-message104=入り口
+message103=種類の検索文
+message104=最初に閲覧
message105=コード
-message106=平均サイズ
+message106=平均データ長
message107=ニュースグループからのリンク
-message108=Kb
-message109=Mb
-message110=Gb
+message108=kB
+message109=MB
+message110=GB
message111=Grabber
message112=Yes
message113=No
-message114=WhoIs情報
+message114=WhoIsDBの情報
message115=OK
-message116=出口
-message117=訪問の長さ
-message118=ウィンドーを閉じる
+message116=最後に閲覧
+message117=滞在時間
+message118=このウィンドウを閉じる
message119=バイト
-message120=検索文字列(キーフレーズ)
-message121=検索文字列(キーワード)
-message122=検索エンジン
-message123=ホームページ
-message124=他のフレーズ
-message125=他のログイン
-message126=検索エンジン
-message127=ホームページ
-message128=サマリー
-message129=「年」ビューでは精密な数字はありません
+message120=検索文
+message121=検索語
+message122=の検索エンジンから登録されている
+message123=このサイトにリンクしているWebページ
+message124=上記以外の検索文
+message125=他のユーザ(もしくは匿名ユーザ)のログイン
+message126=参照検索エンジン
+message127=参照サイト
+message128=サマリ
+message129=年単位の統計では正確な値は表示されません
message130=データ配列関数
message131=送信者のEMail
message132=受信者のEMail
message133=表示するレポート
message134=エキストラ/マーケティング
message135=画面解像度
-message136=ワーム/ウィルス攻撃
-message137=お気に入りに追加
-message138=日付
-message139=その他
-message140=Java 対応ブラウザー
-message141=Macromedia Director 対応ブラウザー
-message142=Flash 対応ブラウザー
-message143=Real Audio 対応ブラウザー
-message144=Quicktime Audio 対応ブラウザー
-message145=Windows Media 対応ブラウザー
-message146=PDF 対応ブラウザー
+message136=ワーム/ウィルスによる攻撃
+message137=favicon.icoへのヒット
+message138=日別の統計
+message139=その他の情報
+message140=Java 対応ブラウザ
+message141=Macromedia Director 対応ブラウザ
+message142=Flash 対応ブラウザ
+message143=Real Audio 対応ブラウザ
+message144=Quicktime Audio 対応ブラウザ
+message145=Windows Media 対応ブラウザ
+message146=PDF 対応ブラウザ
message147=SMTP エラーコード
message148=国
message149=メール
message150=サイズ
message151=最初
message152=最後
-message153=除外フィルター
-message154=このチャートのコードは訪問者によるアクセスではありませんので他のチャートに含まれていません。
-message155=クラスター
-message156=ロボットによるアクセスは訪問者の閲覧とは違いますので他のチャートに含まれていません。
-message157=+の後の数字は「robots.txt」の表示が成功した回数です。
-message158=ワームによるアクセスは訪問者の閲覧とは違いますので他のチャートに含まれていません。
-message159=閲覧に含まれないアクセスはロボット、ワームなどによるものです。
-message160=閲覧アクセス
-message161=閲覧に含まれないアクセス
-message162=月
+message153=除外フィルタ
+message154=ここにリストされているコードは不可視な訪問者としてカウントされているため、その他のチャートの数値には含まれていません。
+message155=クラスタ
+message156=ここにリストされているロボットは不可視な訪問者としてカウントされているため、その他のチャートの数値には含まれていません。
+message157=+の後に表示されている数値は、“robots.txt”ファイルへの参照で検出されたヒット数です。
+message158=ここにリストされているワームは不可視な訪問者としてカウントされているため、その他のチャートの数値には含まれていません。
+message159=不可視トラフィックとは、ワームやロボット、あるいは特殊なHTTPステータスコードを持つ応答などによって生成されたトラフィックのことです。
+message160=可視トラフィック
+message161=不可視トラフィック
+message162=月別の統計
message163=ワーム
-message164=その他のワーム
-message165=Mails successfully sent
-message166=Mails failed/refused
+message164=種類のワーム
+message165=送信成功メール
+message166=失敗メール
message167=Sensitive targets
+message168=Javascript disabled
+message169=生成
+message170=プラグイン
+message171=地域
+message172=都市
+message173=
+message174=
+message175=
+message176=
+message177=
+message178=
+message179=
+message180=
+message181=
+message182=
+message183=
+message184=
+message185=
+message186=
+message187=
+message188=
+message189=
+message190=
+message191=
+message192=
+message193=
+message194=
+message195=
+message196=
+message197=
+message198=
+message199=
+message200=yyyy年 mmm dd日
+message201=yyyy年breakmmm
+message202=yyyy年 mmm
+message203=mmmbreakdd日
perl Makefile.PL make make test make install
C:\>ppm install http://theoryx5.uwinnipeg.ca/ppms/Geo-IP.ppd
@@ -10382,7 +10402,7 @@
my $regmisc=qr/^$miscquoted/;
my $regfavico=qr/\/favicon\.ico$/i;
my $regrobot=qr/^\/robots\.txt$/i;
- my $regtruncanchor=qr/#(\w*)$/;
+ my $regtruncanchor=qr/#([A-Za-z0-9\-_:\.]*)$/;
my $regtruncurl=qr/([$URLQuerySeparators])(.*)$/;
my $regext=qr/\.(\w{1,6})$/;
my $regdefault;
+ my $regtruncanchor=qr/#([A-Za-z][A-Za-z0-9\-_:\.]*)$/;
--- C:\awstats-6.9-mod/wwwroot/cgi-bin/awstats.pl Fri Jan 23 11:16:31 2009
+++ C:\AWStats69t_Jpn/wwwroot/cgi-bin/awstats.pl Sat Feb 07 22:02:36 2009
@@ -1578,12 +1546,24 @@
# Return: -1, 0, 1
#------------------------------------------------------------------------------
sub SortBrowsers {
- $a =~ m/^(\w+?)([\d\.]+)?$/;
- my $a_family = $1;
- my @a_ver = split(/\./, $2);
- $b =~ m/^(\w+?)([\d\.]+)?$/;
- my $b_family = $1;
- my @b_ver = split(/\./, $2);
+ my $a_family = $a;
+ my @a_ver = ();
+ foreach my $family ( keys %BrowsersFamily ) {
+ if ( $a =~ /^$family/i ) {
+ $a =~ m/^(\D+)([\d\.]+)?$/;
+ $a_family = $1;
+ @a_ver = split(/\./, $2);
+ }
+ }
+ my $b_family = $b;
+ my @b_ver = ();
+ foreach my $family ( keys %BrowsersFamily ) {
+ if ( $b =~ /^$family/i ) {
+ $b =~ m/^(\D+)([\d\.]+)?$/;
+ $b_family = $1;
+ @b_ver = split(/\./, $2);
+ }
+ }
my $compare = 0;
my $done = 0;
--- C:\awstats-6.9-mod/wwwroot/cgi-bin/awstats.pl Fri Jan 23 11:16:31 2009
+++ C:\AWStats69t_Jpn/wwwroot/cgi-bin/awstats.pl Sat Feb 07 22:02:36 2009
@@ -12136,6 +12172,7 @@
$SearchEnginesKnownUrl{
$tmprefererserver} )
{ # Search engine with known URL syntax
+ my $keyphrasep = "";
foreach my $param (
split(
/&/,
@@ -12163,11 +12200,11 @@
$param =~ tr/ /\+/s;
if ( ( length $param ) > 0 )
{
- $_keyphrases{$param}++;
+ $keyphrasep = $keyphrasep . "+" . "$param";
}
- last;
}
}
+ if ((length $keyphrasep) > 0) { $_keyphrases{$keyphrasep}++; }
}
elsif (
$LevelForKeywordsDetection >= 2 )
--- C:\awstats-6.9-mod/wwwroot/cgi-bin/awstats.pl Fri Jan 23 11:16:31 2009
+++ C:\AWStats69t_Jpn/wwwroot/cgi-bin/awstats.pl Sat Feb 07 22:02:36 2009
@@ -12224,13 +12261,14 @@
# debug("xxx".$refurl[0]);
# If search engine with key inside page url like a9 (www.a9.com/searchkey1%20searchkey2)
if ( $refurl[0] =~
-/$SearchEnginesKnownUrl{$tmprefererserver}(.*)$/
+/$SearchEnginesKnownUrl{$tmprefererserver}(.*)$SearchEnginesKnownUrlTrail{$tmprefererserver}$/
)
{
my $param = $1;
&ChangeWordSeparatorsIntoSpace(
$param);
$param =~ tr/ /\+/s;
+ $param = "+" . $param;
if ( ( length $param ) > 0 ) {
$_keyphrases{$param}++;
}
--- C:\awstats-6.9-mod/wwwroot/cgi-bin/lib/search_engines.pm Mon Feb 09 13:08:16 2009
+++ C:\AWStats69t_Jpn/wwwroot/cgi-bin/lib/search_engines.pm Fri Jan 23 13:00:27 2009
@@ -847,6 +997,16 @@
'searchch', 'q=', 'bluewin', 'qry='
);
+# SearchEnginesKnownUrlTrail
+# Known rules to extract keywords from a referrer search engine URL
+#------------------------------------------------------------------------------
+%SearchEnginesKnownUrlTrail=(
+# Japanese Search Engines
+# Added by Hobbit_makoto 2007.01.17
+'fresheyedir','\/',
+'yahoogle','\.html',
+);
+
# SearchEnginesKnownUrlNotFound
# Known rules to extract not found keywords from a referrer search engine URL
#------------------------------------------------------------------------------
--- C:\awstats-6.9-mod/wwwroot/cgi-bin/lib/operating_systems.pm Tue Feb 10 13:30:56 2009 +++ C:\AWStats69t_Jpn/wwwroot/cgi-bin/lib/operating_systems.pm Tue Feb 10 13:42:51 2009 @@ -34,9 +34,6 @@ 'win(.*)95', 'win(.*)16','windows[_+ ]3', # This works for windows_31 and windows_3.1 'win(.*)ce', -'microsoft', -'msie[_+ ]', -'ms[_+ ]frontpage', # Macintosh OS family 'mac[_+ ]os[_+ ]x', 'mac[_+ ]?p', # This works for macppc and mac_ppc and mac_powerpc @@ -92,7 +89,12 @@ 'applesyndication', 'akregator', 'plagger', -'syndirella' +'syndirella', +# Unknown Windows +'microsoft', +'msie[_+ ]', +'ms[_+ ]frontpage', +'sleipnir' );