Just a quick script to parse though an altavista se log and generate a file that has what terms were used, and how often. Outputs in reverse numerical order of frequency, then alphabetical order of terms when there are more than one of the same number of uses. Nothing fancy, just for creating a file to be displayed by another script
Update:
edited the sort loop to dump the use of a coupe of temporary variables. I left the older code there so its easier to understand. (and cause I havn't got round to testing the newer code)
Update:
take a look at this
node it has some suggestions for tightening up the sorting code at then end of the script. I'll probably get round to rewriting this to use a more efficent algorithm at some point, but for now it works, as long as the files you're using it on aren't huge.
#!/usr/bin/perl -w
use strict;
## program to parse out the various search terms that are entered i
+nto the search engine. ##
use Text::ParseWords;
use CGI;
use Unix::Syslog qw(:macros); # Syslog macros
use Unix::Syslog qw(:subs); # Syslog functions
use Date::Manip; #to get the week of the year
## Generic Error Function that logs to the syslog.
sub error_with_rotation{
my $error_mess = shift;
openlog "Log Rotate",0,LOG_SYSLOG;
syslog LOG_CRIT, "Daily Log rotation failed: $error_mess";
closelog;
return;
}
## End of error function
my $file_to_be_parsed="/usr/local/apache/logs/oldlog/weeklysearch.log"
+;
## Date information for the previous day ($p_****)
my $p_day=&UnixDate("yesterday","%d");
my $p_month=&UnixDate("yesterday","%m");
my $p_year=&UnixDate("yesterday","%Y");
my $p_week=&UnixDate("yesterday","%W");
my %queryterms;
my $results="/usr/local/apache/htdocs/stats/$p_week$p_year.log";
open (INFILE,"$file_to_be_parsed") or error_with_rotation("Search log
+parse open failed: $!");
while (<INFILE>)
{
my $line=$_;
next if ($line !~ /\/cgi-bin\/query/);
my @words=&parse_line(' ',0,$line);
my $phrase=$words[5];
my @phrase2=&parse_line(' ',0,$phrase);
my $query= new CGI($phrase2[1]);
my $queryphrase=$query->param('q');
if ($queryphrase){
my @terms=&parse_line('\s',0,$queryphrase);
my $term;
foreach $term (@terms){
if ($term){
$queryterms{$term}++;
}
}
}
}
close (INFILE) or error_with_rotation("Search log parse close failed:
+ $!");
my @sortedkeys = sort(keys %queryterms);
my $key;
my @array1;
my @array2;
foreach $key (@sortedkeys)
{
push(@array1,$key);
push(@array2,$queryterms{$key});
}
## sort area. sorts the two arrays keeping each pair value together ac
+ross the split.
my $index=0;
my $smallest=0;
#my $temp1="";
#my $temp2=0;
my $count=@array1;
my $x;
for ($x=($count-1);$x>0;$x--){
$smallest=$x;
#Find the smallest element with this
for ($index=0;$index<=$x;$index++)
{
next if ($array2[$smallest] < $array2[$index]);
next if (($array2[$smallest] == $array2[$index]) && (uc($array
+1[$smallest]) gt uc($array1[$index])));
$smallest=$index;
}
#swap the smallest and the top element
if ($x != $smallest){
($array1[$smallest],$array1[$x])=($array1[$x],$array1[$smalles
+t]);
($array2[$smallest],$array2[$x])=($array2[$x],$array2[$smalles
+t]);
# $temp1=$array1[$smallest];
# $temp2=$array2[$smallest];
# $array1[$smallest]=$array1[$x];
# $array2[$smallest]=$array2[$x];
# $array1[$x]=$temp1;
# $array2[$x]=$temp2;
}
}
open (OUTFILE,">$results") or error_with_rotation("Search log results
+open failed: $!");
for ($x=0;$x<$count;$x++){
print OUTFILE $array1[$x]."::::".$array2[$x]."\n";
}
close (OUTFILE) or error_with_rotation("Search log results close fai
+led: $!");