--- wwwstat.ORG Thu Jan 4 19:17:48 2001 +++ wwwstat Thu Mar 27 23:13:29 2003 @@ -1,6 +1,6 @@ #!/usr/bin/perl5.00503 # ========================================================================== -$Version = 'wwwstat-2.01'; +$Version = 'wwwstat-2.01r'; # # Copyright (c) 1994, 1996 Regents of the University of California. # @@ -227,6 +227,7 @@ $SubdomainHeader = 'Transfers by Reversed Subdomain'; $ArchiveHeader = 'Transfers by URL/Archive Section'; $IdentHeader = 'Transfers by Remote Identity'; + $ReferHeader = 'Transfers by Referrer'; # These Old headers are for reading old summary files @@ -253,6 +254,7 @@ $SortSubdomain = 0; $SortArchive = 0; $SortIdent = 0; + $SortRefer = 0; # If the output of a section is sorted, you may also want to restrict # the output to only the N best in that section. @@ -263,6 +265,7 @@ $TopSubdomain = 0; $TopArchive = 0; $TopIdent = 0; + $TopRefer = 0; # On the other hand, you may want to exclude (0) an entire section. # If set = 2, the top N is done first and then followed by normal section. @@ -282,6 +285,8 @@ $Do_Ident = 0; # Set 1 or 2 ONLY if Ident is ALWAYS desired. + $Do_Refer = 0; + # The rest of these options are normally only changed on the command-line $Verbose = 0; # Display valid log entries on STDERR? @@ -389,11 +394,13 @@ elsif (/^sub(domain)?$/) { $Do_Subdomain = 0; } elsif (/^arc(hive)?$/) { $Do_Archive = 0; } elsif (/^id(ent)?$/) { $Do_Ident = 0; } + elsif (/^refer(re)?$/) { $Do_Refer = 0; } elsif (/^all$/) { $Do_Daily = $Do_Hourly = $Do_Domain = $Do_Subdomain = $Do_Archive = + $Do_Refer = $Do_Ident = 0; } else { &badarg('-no',$_); } } @@ -401,7 +408,7 @@ { # and how to apply the -top and -sort options $scope = 0; $Do_Daily = $Do_Hourly = $Do_Domain = - $Do_Subdomain = $Do_Archive = $Do_Ident = 1; + $Do_Subdomain = $Do_Archive = $Do_Ident = $Do_Refer = 1; } elsif (/^da(y|ily|te)$/) { $scope = 1; $Do_Daily = 1; } elsif (/^hour(ly)?$/) { $scope = 2; $Do_Hourly = 1; } @@ -409,6 +416,7 @@ elsif (/^sub(domain)?$/) { $scope = 4; $Do_Subdomain = 1; } elsif (/^arc(hive)?$/) { $scope = 5; $Do_Archive = 1; } elsif (/^id(ent)?$/) { $scope = 6; $Do_Ident = 1; } + elsif (/^refer(er)?$/) { $scope = 7; $Do_Refer = 1; } elsif (/^link$/) { $InsertLink = 1; # Add anchors to archive @@ -455,8 +463,9 @@ elsif ($scope == 4) { $TopSubdomain = $_; } elsif ($scope == 5) { $TopArchive = $_; } elsif ($scope == 6) { $TopIdent = $_; } + elsif ($scope == 7) { $TopRefer = $_; } else { $TopDaily = $TopHourly = $TopDomain = - $TopSubdomain = $TopArchive = $TopIdent = $_; + $TopSubdomain = $TopArchive = $TopIdent = $TopRefer = $_; } } elsif (/^sort(.*)/) # Change sort method @@ -476,8 +485,9 @@ elsif ($scope == 4) { $SortSubdomain = $sortkey; } elsif ($scope == 5) { $SortArchive = $sortkey; } elsif ($scope == 6) { $SortIdent = $sortkey; } + elsif ($scope == 7) { $SortRefer = $sortkey; } else { $SortDaily = $SortHourly = $SortDomain = - $SortSubdomain = $SortArchive = $SortIdent = $sortkey; + $SortSubdomain = $SortArchive = $SortIdent = $SortRefer = $sortkey; } } elsif (/^both$/) # Do both top N and normal @@ -512,13 +522,18 @@ if (!$TopIdent) { $TopIdent = 10; } if (!$SortIdent) { $SortIdent = 1; } } + elsif ($scope == 7) { + $Do_Refer = 2; + if (!$TopRefer) { $TopRefer = 10; } + if (!$SortRefer) { $SortRefer = 1; } + } else { $Do_Daily = $Do_Hourly = $Do_Domain = $Do_Subdomain = $Do_Archive = $Do_Ident = 2; $TopDaily = $TopHourly = $TopDomain = $TopSubdomain = - $TopArchive = $TopIdent = 10; + $TopArchive = $TopIdent = $TopRefer = 10; $SortDaily = $SortHourly = $SortDomain = $SortSubdomain = - $SortArchive = $SortIdent = 1; + $SortArchive = $SortIdent = $SortRefer = 1; } } else # End of full-word option arguments @@ -763,6 +778,7 @@ %DomainRequests = (); # per domain %SubdomainRequests = (); # per subdomain %IdentRequests = (); # per ident + %ReferRequests = (); $TotalBytes = 0; # total bytes sent %HourBytes = (); # per hour @@ -771,6 +787,7 @@ %DomainBytes = (); # per domain %SubdomainBytes = (); # per subdomain %IdentBytes = (); # per ident + %ReferBytes = (); $StartTag = "
\n"; # Note that these vars are used by both $EndTag = "\n"; # input and output of summaries. @@ -1026,6 +1043,7 @@ local($ident, $hour, $date, $method, $htv, $has_head, $idx); local($fname, $rname, $dvalue, $pathkey, @address, $domain, $subdomain); local($saveline); + local($referrer); LINE: while() { @@ -1037,8 +1055,9 @@ # First, parse the logfile entry into its seven basic components # - ($host, $rfc931, $authuser, $timestamp, $request, $status, $bytes) = - /^(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^"]*)\" (\S+) (\S+)/; + ($host, $rfc931, $authuser, $timestamp, $request, $status, $bytes, + $referrer) = + /^(\S+) (\S+) (\S+) \[([^\]]*)\] \"([^"]*)\" (\S+) (\S+) \"([^\"]*)\"/; #" Now, is this garbage or is it memorex? Note that $bytes can be 0 @@ -1320,6 +1339,14 @@ $IdentRequests{$ident}++; $IdentBytes{$ident} += $bytes; } + if ($Do_Refer) # Add to Referrer Table? + { + if($referrer =~ /^http\s*:\/\/([^\/]+)/) + { + $ReferRequests{$1}++; + $ReferBytes{$1} += $bytes; + } + } } continue { @@ -1461,6 +1488,7 @@ &output_xref($Do_Subdomain, $TopSubdomain, 'Subdomain', $SubdomainHeader); &output_xref($Do_Archive, $TopArchive, 'Archive', $ArchiveHeader); &output_xref($Do_Ident, $TopIdent, 'Ident', $IdentHeader); + &output_xref($Do_Refer, $TopRefer, 'Refer', $ReferHeader); if ($LastSummary) { local($prevmonth, $prevyear) = &lastmonth($earliest); @@ -1526,6 +1554,14 @@ &output_ident('AllIdent'); } } + if ($Do_Refer) { + &output_refer('Refer'); + if ($Do_Refer == 2) { + $TopRefer = 0; + $SortRefer = 0; + &output_refer('AllRefer'); + } + } print "