#!/usr/local/bin/perl # access-extract-urls.pl,v 1.19 1996/06/02 03:41:43 wessels Exp # # This file is under RCS control in # /O1/Squid_Central/Scripts/RCS/access-extract-urls.pl,v # # This script is a special case subset of access-extract.pl because # creating a hash of URLs eats up lots of memory. $|=1; $me = $0; @F = split('/', $me); pop @F; $medir = join ('/', @F); $debug=0; push (@INC, $medir); require 'timelocal.pl'; require 'squid-logs.pl'; $usage = "usage: $me -h\n"; # Defaults $HTTPDFormat = 0; $MaxEntries = 50; while (($#ARGV >= $[) && ($ARGV[0] =~ /^-/) && ($_ = shift)) { if ($_ eq '-h') { $HTTPDFormat = 1; } else { warn "$me: $_: unknown option\n"; die $usage; } } $CATS{'UR'} = $MaxEntries; truncate('/var/tmp/UR_COUNT_ALLTCP.dir', 0); truncate('/var/tmp/UR_COUNT_ALLTCP.pag', 0); truncate('/var/tmp/UR_COUNT_ALLUDP.dir', 0); truncate('/var/tmp/UR_COUNT_ALLUDP.pag', 0); truncate('/var/tmp/UR_COUNT_ALL.dir', 0); truncate('/var/tmp/UR_COUNT_ALL.pag', 0); die "dbmopen: $!\n" unless dbmopen(%UR_COUNT_ALLTCP, '/var/tmp/UR_COUNT_ALLTCP', 0660); die "dbmopen: $!\n" unless dbmopen(%UR_COUNT_ALLUDP, '/var/tmp/UR_COUNT_ALLUDP', 0660); die "dbmopen: $!\n" unless dbmopen(%UR_COUNT_ALL, '/var/tmp/UR_COUNT_ALL', 0660); $i = 0; while (<>) { @F = $HTTPDFormat ? &parse_common_log($_) : split; next unless (@F); ($when,$elapsed,$who,$tag,$size,$method,$what,$id,$hier) = @F; ($tag,$code) = split('/', $tag); $count_tcp = $tag =~ /^TCP_/ ? 1 : 0; $count_udp = $tag =~ /^UDP_/ ? 1 : 0; next unless ($count_tcp || $count_udp); print "T=$when,E=$elapsed,A=$who,T=$tag,S=$size,M=$method,R=$what\n" if ($debug); next if (length($what) > 1024); # ndbm barfs $UR_COUNT_ALLTCP{$what} = $UR_COUNT_ALLTCP{$what}+1 if $count_tcp; $UR_COUNT_ALLUDP{$what} = $UR_COUNT_ALLUDP{$what}+1 if $count_udp; $UR_COUNT_ALL{$what} = $UR_COUNT_ALL{$what}+1; printf "# %d %d\n", time, $i if ($debug && ++$i % 10000 == 0); } while (($k,$v) = each %UR_COUNT_ALLTCP) { $ACOUNT{$v}++; } # memory efficient sort... # $max = $CATS{UR} - 1; @counts = (); while (($k,$v) = each %UR_COUNT_ALL) { next unless ($v > 1); #print "$v $k\n"; push(@counts, $v); next unless ($#counts > ($max*20)); #printf "# SORTING %d counts...\n", $#counts+1; @counts = sort { $b <=> $a } @counts; $#counts = $max; print "# COUNTS = ", join(' ', @counts), "\n" if ($debug); } printf "# SORTING %d counts...\n", $#counts+1; @counts = sort { $b <=> $a } @counts; print "# COUNTS = ", join(' ', @counts), "\n"; $thresh = $#counts > $max ? $counts[$max] : 0; print "# THRESH %UR = $thresh\n"; @counts = undef; # dump while (($k,$v) = each %UR_COUNT_ALL) { next if ($v < $thresh); print "UR|$k|COUNT|ALL $v\n"; print "UR|$k|COUNT|ALLTCP $UR_COUNT_ALLTCP{$k}\n" if defined $UR_COUNT_ALLTCP{$k}; print "UR|$k|COUNT|ALLUDP $UR_COUNT_ALLUDP{$k}\n" if defined $UR_COUNT_ALLUDP{$k}; } unlink "/var/tmp/UR_COUNT_ALLTCP.dir"; unlink "/var/tmp/UR_COUNT_ALLTCP.pag"; unlink "/var/tmp/UR_COUNT_ALLUDP.dir"; unlink "/var/tmp/UR_COUNT_ALLUDP.pag"; unlink "/var/tmp/UR_COUNT_ALL.dir"; unlink "/var/tmp/UR_COUNT_ALL.pag"; print "# Access histogram\n"; print "# AC|N number-of-objects-accessed-N-times\n"; while (($k,$v) = each %ACOUNT) { print "AC|$k $v\n"; } exit 0; sub request_count { local($h1) = @_; local($n) = 0; local($k1); local($k2); local($h2); foreach $k1 (keys %{$h1}) { next unless ($k1 eq 'COUNT'); $h2 = \%{${$h1}{$k1}}; foreach $k2 (keys %{$h2}) { next unless ($k2 =~ /^TCP_/ || $k2 =~ /^UDP_/); $n += ${$h2}{$k2}; } } #print "request_count: returning $n\n"; $n; }