--- wwwstat.ORG Thu Jan 4 19:17:48 2001 +++ wwwstat Thu Mar 27 23:13:29 2003 @@ -1,6 +1,6 @@ #!/usr/bin/perl5.00503 # ========================================================================== -$Version = 'wwwstat-2.01'; +$Version = 'wwwstat-2.01r'; # # Copyright (c) 1994, 1996 Regents of the University of California. # @@ -227,6 +227,7 @@ $SubdomainHeader = 'Transfers by Reversed Subdomain'; $ArchiveHeader = 'Transfers by URL/Archive Section'; $IdentHeader = 'Transfers by Remote Identity'; + $ReferHeader = 'Transfers by Referrer'; # These Old headers are for reading old summary files @@ -253,6 +254,7 @@ $SortSubdomain = 0; $SortArchive = 0; $SortIdent = 0; + $SortRefer = 0; # If the output of a section is sorted, you may also want to restrict # the output to only the N best in that section. @@ -263,6 +265,7 @@ $TopSubdomain = 0; $TopArchive = 0; $TopIdent = 0; + $TopRefer = 0; # On the other hand, you may want to exclude (0) an entire section. # If set = 2, the top N is done first and then followed by normal section. @@ -282,6 +285,8 @@ $Do_Ident = 0; # Set 1 or 2 ONLY if Ident is ALWAYS desired. + $Do_Refer = 0; + # The rest of these options are normally only changed on the command-line $Verbose = 0; # Display valid log entries on STDERR? @@ -389,11 +394,13 @@ elsif (/^sub(domain)?$/) { $Do_Subdomain = 0; } elsif (/^arc(hive)?$/) { $Do_Archive = 0; } elsif (/^id(ent)?$/) { $Do_Ident = 0; } + elsif (/^refer(re)?$/) { $Do_Refer = 0; } elsif (/^all$/) { $Do_Daily = $Do_Hourly = $Do_Domain = $Do_Subdomain = $Do_Archive = + $Do_Refer = $Do_Ident = 0; } else { &badarg('-no',$_); } } @@ -401,7 +408,7 @@ { # and how to apply the -top and -sort options $scope = 0; $Do_Daily = $Do_Hourly = $Do_Domain = - $Do_Subdomain = $Do_Archive = $Do_Ident = 1; + $Do_Subdomain = $Do_Archive = $Do_Ident = $Do_Refer = 1; } elsif (/^da(y|ily|te)$/) { $scope = 1; $Do_Daily = 1; } elsif (/^hour(ly)?$/) { $scope = 2; $Do_Hourly = 1; } @@ -409,6 +416,7 @@ elsif (/^sub(domain)?$/) { $scope = 4; $Do_Subdomain = 1; } elsif (/^arc(hive)?$/) { $scope = 5; $Do_Archive = 1; } elsif (/^id(ent)?$/) { $scope = 6; $Do_Ident = 1; } + elsif (/^refer(er)?$/) { $scope = 7; $Do_Refer = 1; } elsif (/^link$/) { $InsertLink = 1; # Add anchors to archive @@ -455,8 +463,9 @@ elsif ($scope == 4) { $TopSubdomain = $_; } elsif ($scope == 5) { $TopArchive = $_; } elsif ($scope == 6) { $TopIdent = $_; } + elsif ($scope == 7) { $TopRefer = $_; } else { $TopDaily = $TopHourly = $TopDomain = - $TopSubdomain = $TopArchive = $TopIdent = $_; + $TopSubdomain = $TopArchive = $TopIdent = $TopRefer = $_; } } elsif (/^sort(.*)/) # Change sort method @@ -476,8 +485,9 @@ elsif ($scope == 4) { $SortSubdomain = $sortkey; } elsif ($scope == 5) { $SortArchive = $sortkey; } elsif ($scope == 6) { $SortIdent = $sortkey; } + elsif ($scope == 7) { $SortRefer = $sortkey; } else { $SortDaily = $SortHourly = $SortDomain = - $SortSubdomain = $SortArchive = $SortIdent = $sortkey; + $SortSubdomain = $SortArchive = $SortIdent = $SortRefer = $sortkey; } } elsif (/^both$/) # Do both top N and normal @@ -512,13 +522,18 @@ if (!$TopIdent) { $TopIdent = 10; } if (!$SortIdent) { $SortIdent = 1; } } + elsif ($scope == 7) { + $Do_Refer = 2; + if (!$TopRefer) { $TopRefer = 10; } + if (!$SortRefer) { $SortRefer = 1; } + } else { $Do_Daily = $Do_Hourly = $Do_Domain = $Do_Subdomain = $Do_Archive = $Do_Ident = 2; $TopDaily = $TopHourly = $TopDomain = $TopSubdomain = - $TopArchive = $TopIdent = 10; + $TopArchive = $TopIdent = $TopRefer = 10; $SortDaily = $SortHourly = $SortDomain = $SortSubdomain = - $SortArchive = $SortIdent = 1; + $SortArchive = $SortIdent = $SortRefer = 1; } } else # End of full-word option arguments @@ -763,6 +778,7 @@ %DomainRequests = (); # per domain %SubdomainRequests = (); # per subdomain %IdentRequests = (); # per ident + %ReferRequests = (); $TotalBytes = 0; # total bytes sent %HourBytes = (); # per hour @@ -771,6 +787,7 @@ %DomainBytes = (); # per domain %SubdomainBytes = (); # per subdomain %IdentBytes = (); # per ident + %ReferBytes = (); $StartTag = "
\n";     # Note that these vars are used by both
     $EndTag   = "
\n"; # input and output of summaries. @@ -1026,6 +1043,7 @@ local($ident, $hour, $date, $method, $htv, $has_head, $idx); local($fname, $rname, $dvalue, $pathkey, @address, $domain, $subdomain); local($saveline); + local($referrer); LINE: while() { @@ -1037,8 +1055,9 @@ # First, parse the logfile entry into its seven basic components # - ($host, $rfc931, $authuser, $timestamp, $request, $status, $bytes) = - /^(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^"]*)\" (\S+) (\S+)/; + ($host, $rfc931, $authuser, $timestamp, $request, $status, $bytes, + $referrer) = + /^(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^"]*)\" (\S+) (\S+) \"([^\"]*)\"/; #" Now, is this garbage or is it memorex? Note that $bytes can be 0 @@ -1320,6 +1339,14 @@ $IdentRequests{$ident}++; $IdentBytes{$ident} += $bytes; } + if ($Do_Refer) # Add to Referrer Table? + { + if($referrer =~ /^http\s*:\/\/([^\/]+)/) + { + $ReferRequests{$1}++; + $ReferBytes{$1} += $bytes; + } + } } continue { @@ -1461,6 +1488,7 @@ &output_xref($Do_Subdomain, $TopSubdomain, 'Subdomain', $SubdomainHeader); &output_xref($Do_Archive, $TopArchive, 'Archive', $ArchiveHeader); &output_xref($Do_Ident, $TopIdent, 'Ident', $IdentHeader); + &output_xref($Do_Refer, $TopRefer, 'Refer', $ReferHeader); if ($LastSummary) { local($prevmonth, $prevyear) = &lastmonth($earliest); @@ -1526,6 +1554,14 @@ &output_ident('AllIdent'); } } + if ($Do_Refer) { + &output_refer('Refer'); + if ($Do_Refer == 2) { + $TopRefer = 0; + $SortRefer = 0; + &output_refer('AllRefer'); + } + } print "
\n"; print "
This summary was generated by \n"; @@ -1755,6 +1791,7 @@ # sub output_ident { + local($fmt) = "$StatsFormat %s\n"; local($frag) = @_; local($rqsts, $bytes, $pctrqsts, $pctbytes); local($top) = $TopIdent; @@ -1765,7 +1802,6 @@ print $StartTag; print $StatsHeader, " Remote Identity\n"; print $StatsRule, "------------------------------------\n"; - local($fmt) = "$StatsFormat %s\n"; foreach $ident (sort identcompare keys %IdentRequests) { @@ -1789,6 +1825,43 @@ } # ========================================================================== +# Output the stats for each referrer +# +sub output_refer +{ + local($fmt) = "$StatsFormat %s\n"; + local($frag) = @_; + local($rqsts, $bytes, $pctrqsts, $pctbytes); + local($top) = $TopRefer; + local($prefix) = $top ? "$PrefixTop $top" : $PrefixTotal; + + print "
\n"; + print "

$prefix $ReferHeader

\n"; + print $StartTag; + print $StatsHeader, " Referrer\n"; + print $StatsRule, "------------------------------------\n"; + + foreach $refer (sort refercompare keys %ReferRequests) + { + $rqsts = $ReferRequests{$refer}; + $bytes = $ReferBytes{$refer}; + if ($rqsts == $TotalRequests) { + $pctrqsts = "100.0"; + } else { + $pctrqsts = sprintf("%5.2f", 100*$rqsts/$TotalRequests); + } + if ($bytes == $TotalBytes) { + $pctbytes = "100.0"; + } else { + $pctbytes = sprintf("%5.2f", 100*$bytes/$TotalBytes); + } + printf $fmt, $pctrqsts, $pctbytes, $bytes, $rqsts, $refer; + + last if ($top && (--$top == 0)); + } + print $EndTag; +} + # ========================================================================== # The following sort comparison functions take $a and $b as the two # arguments keys or values to compare. Speed is important here. @@ -1863,6 +1936,17 @@ $sdiff = $IdentRequests{$b} - $IdentRequests{$a}; } else { $sdiff = $IdentBytes{$b} - $IdentBytes{$a}; + } + ($sdiff < 0) ? -1 : ($sdiff > 0) ? 1 : ($a cmp $b); +} + +sub refercompare +{ + if ($SortRefer == 0) { return ($a cmp $b); } + if ($SortRefer == 1) { + $sdiff = $ReferRequests{$b} - $ReferRequests{$a}; + } else { + $sdiff = $ReferBytes{$b} - $ReferBytes{$a}; } ($sdiff < 0) ? -1 : ($sdiff > 0) ? 1 : ($a cmp $b); }