#!/usr/bin/perl
###################################################
# ALF: Apache Log Filter (c) 2007 Charlie Harvey #
###################################################
use strict;
use warnings;
use DBD::AnyData;
use AnyData::Format::Weblog;
use AnyData::Format::WeblogVhost;
use AnyData::Format::WeblogCombined;
use Gtk2 -init;
use Gtk2::Ex::Simple::Menu;
use Getopt::Std;
my %opts;
getopts('?cvl:', \%opts);
my $APP_NAME = 'ALF: Apache Log Filter';
my $VERSION = '0.41';
my $USAGE = "$APP_NAME, version $VERSION\nUsage: $0 [cv?][l filename]\n";
# Tweakable options #
my $TABLE = 'log';
my $SEPERATOR = ' | ';
my $QUERY_FONT = 'FreeSans 12';
my $RESULTS_FONT = 'FreeMono 12';
my $LOG_DEFAULT = '/home/charlie/access_log_test';
# Stop tweaking #
my %log_types = (4=>"WeblogCombined", 5=>"WeblogVhost", 6=>'Weblog', );
my $log_type = "WeblogCombined";
my $dbh = DBI->connect('dbi:AnyData:(RaiseError=>0)');
my $window = Gtk2::Window->new ('toplevel');
my $menu = Gtk2::Ex::Simple::Menu->new (
menu_tree => do './alf_menu.pl',
default_callback => \¬_implemented,
);
my $query = Gtk2::Entry->new;
my $col_selecter = Gtk2::ComboBox->new_text;
my $results_view = Gtk2::TextView->new;
my $results = $results_view->get_buffer;
my $scroll_results = Gtk2::ScrolledWindow->new;
my $select_all = Gtk2::Button->new_with_label('Get everything');
my $hits = Gtk2::Button->new_with_label('Total Hits');
my $_404 = Gtk2::Button->new_with_label('404s');
my $short = Gtk2::RadioButton->new_with_label(undef,'Short');
my $long = Gtk2::RadioButton->new_with_label($short,'Vertical');
my $top_box = Gtk2::HBox->new;
my $main_box = Gtk2::VBox->new(0,0);
my $bottom_box = Gtk2::HBox->new(0,15);
my $status_bar = Gtk2::Statusbar->new;
my $log_file = $LOG_DEFAULT;
# Show error dialogue box
sub err {
my $msg = shift;
my $error = Gtk2::MessageDialog->new( $window, 'modal', 'error', 'close', $msg );
$error->signal_connect ( response => \&dialog_close );
$error->run;
$error->destroy;
}
# Show informational dialogue box
sub info {
my $msg = shift;
my $info = Gtk2::MessageDialog->new( $window, 'modal', 'info', 'close', $msg );
$info->signal_connect ( response => \&dialog_close );
$info->run;
$info->destroy;
}
# Actually only used during development - pop up an info box warning that this feature
# isn't done.
sub not_implemented {
info("Not done yet :-(")
}
# Show about dialogue box
sub about {
Gtk2->show_about_dialog(
$window, name=>$APP_NAME, version=>$VERSION, copyright=>"(C) 2007 Charlie Harvey",
authors => "Charlie Harvey",
);
}
# Set status text to arg
sub status {
my $status = shift;
$status_bar->pop(0);
$status_bar->push(0,$status);
}
# Set standard status text
sub changed_status {
status("$APP_NAME, using $log_file as $TABLE ($log_type format)");
}
# Change the log type, call dbh->func for that type, update column selecter
sub set_log_type {
my ($caller,$log_t,$widget)=(@_);
if ($widget) {
return unless $widget->get_active;
}
$log_type = $log_types{$log_t};
$log_type ||= $log_t;
$dbh->func($TABLE , $log_type, $log_file, 'ad_catalog');
$col_selecter->remove_text(0) for (1..10);
my $i = 0;
my @cols = col_names();
$col_selecter->insert_text($i++,$_) for (@cols);
info("Changed to $log_type format parser") if $widget;
changed_status();
}
# Open a new log file
sub open_log {
my $chooser = Gtk2::FileChooserDialog->new(
"Open apache log", $window, 'open',
('Open' => 'ok', 'Cancel' =>'cancel')
);
my $response = $chooser->run;
if ($response eq 'ok') {
$log_file = $chooser->get_filename;
$dbh->func($TABLE , $log_type, $log_file, 'ad_catalog');
changed_status();
}
$chooser->destroy;
}
# Save text from results text_view
sub export_results {
my $success = 0;
my $chooser = Gtk2::FileChooserDialog->new(
"Save filtered log", $window, 'save',
('Export' => 'ok', 'Cancel' =>'cancel')
);
my $response = $chooser->run;
if ($response eq 'ok') {
my $output_file = $chooser->get_filename();
if (-e $output_file) {
err("Can't overwrite existing file $output_file");
} else {
my $length = $results->get_char_count;
my $text_to_export = $results->get_text($results->get_start_iter,$results->get_end_iter,0);
open OUT, ">$output_file" || err("Can't write to file");
print OUT $text_to_export;
close OUT;
$success=1;
}
}
$chooser->destroy;
info("Filtered results exported") if ($success);
}
# Close a dialogue box
sub dialog_close {
my ($self, $response) = @_;
$self->destroy;
}
# Close application
sub close {
Gtk2->main_quit;
}
# When user presses enter, call go with current sql
sub filter_pressed {
my ($widget,$event,$data) = @_;
return unless ($event->keyval==65293);
go();
}
# Run sql that is given as arg
sub go {
my $sql = $query->get_text;
status("Filtering...");
my $res = query($sql);
$results->set_text( $res );
}
# Run select * query
sub select_all_query {
my $sql = "SELECT * FROM $TABLE";
$query->set_text($sql);
$results->set_text(query($sql));
}
# Run hits query
sub hits_query {
my $sql = "SELECT count(client) AS hits, status FROM $TABLE GROUP BY status ORDER BY status";
$query->set_text($sql);
$results->set_text(query($sql));
}
# Run 404 query
sub _404_query {
my $sql = "SELECT * FROM $TABLE WHERE status = 404";
$query->set_text($sql);
$results->set_text(query($sql));
}
# Append column name to query field
sub append_col_to_qry {
my $widget = shift;
my @cols = col_names();
$col_selecter->popdown;
$query->append_text( ' ' . $cols[$widget->get_active] . ' ');
$query->grab_focus;
my $pos=length $query->get_text;
$query->set_position($pos);
0;
}
# Run query
sub query {
my $sql = shift;
my $vl = 0;
if ($sql =~ /\\g\s*$/i) {
$vl = 1;
$sql =~ s/\\g\s*$//i;
}
if ($long->get_active) {
$vl = 1;
}
my $return = '';
my $row_count=0;
my $sth = $dbh->prepare($sql) ||
err("Can't prepare $sql\nIs your SQL syntactically correct?");
$sth->execute ||
err("Can't execute $sql\n" . $sth->errstr);
while (my $x = $sth->fetchrow_hashref) {
if ($vl) {
$return .= sprintf("%10s : %s\n",$_, $x->{$_}) for ( sort keys %$x );
}
else {
$return .= $x->{$_} . $SEPERATOR for ( sort keys %$x);
}
$return .= "\n";
$row_count++;
}
err("Couldn't run SQL.\nMaybe the file you opened wasn't a valid Apache log?\n\n" .
$sth->errstr) if $sth->err;
status("$row_count rows. Query: $sql");
$return;
}
# Return column names for current parser
sub col_names {
my $parser_name = "AnyData::Format::$log_type";
return (sort split /,/, $parser_name->new->{col_names});
}
## Process commandline options
if ($opts{'?'}) {
print $USAGE; exit(0);
}
if ($opts{l}) {
$log_file = $opts{l};
}
if ($opts{v}) {
$log_type = "WeblogVhost";
} elsif ($opts{c}) {
$log_type = "Weblog";
}
# Connect signals up.
$window->signal_connect ( destroy => \&close );
$query->signal_connect ( key_press_event => \&filter_pressed );
$col_selecter->signal_connect ( changed => \&append_col_to_qry );
$select_all->signal_connect ( clicked => \&select_all_query );
$hits->signal_connect ( clicked => \&hits_query );
$_404->signal_connect ( clicked => \&_404_query );
# Initialise widgets
$window->set_border_width(5);
$window->set_default_size(1024,768);
$window->set_title($APP_NAME);
$query->set_width_chars(80);
$query->set_has_frame(1);
$query->modify_font(Gtk2::Pango::FontDescription->from_string($QUERY_FONT));
$col_selecter->set_wrap_width(1);
$results_view->set_editable(0);
$results_view->set_wrap_mode('word-char');
$results_view->set_overwrite(1);
$results_view->modify_font(Gtk2::Pango::FontDescription->from_string($RESULTS_FONT));
$scroll_results->set_policy ('never', 'always');
# Layout
$scroll_results->add($results_view);
$top_box->add($query);
$top_box->add($col_selecter);
$bottom_box->pack_start($short,0,1,1);
$bottom_box->pack_start($long,0,0,1);
$bottom_box->pack_start($select_all,1,1,1);
$bottom_box->pack_start($hits,1,1,1);
$bottom_box->pack_start($_404,1,1,1);
$main_box->pack_start($menu->{widget},0,0,0);
$main_box->pack_start($top_box,0,0,5);
$main_box->pack_start($scroll_results,1,1,5);
$main_box->pack_start($bottom_box,0,0,5);
$main_box->pack_start($status_bar,0,0,5);
$window->add($main_box);
$window->show_all;
# Lets go
set_log_type(undef,$log_type);
Gtk2->main;
0;
__END__
=head1 NAME
ALF: Apache Log Filter. Or alf.pl if you want to be picky.
=head1 SYNOPSIS
./alf.pl [-l /path/to/access.log] [-cv]
=head1 DESCRIPTION
ALF is a filter for people using Apache logging to plain text logs, who still want the
flexibility of being able to search their logs like a database. It relies heavily on Jeff
Zucker's DBD::AnyData to do the clever stuff, and overlays that with a simple GTK2
interface.
You can run SQL queries and see the results in horizontal or vertical formats;
open different logs; run a few everyday queries from buttons; append column names to
your SQL from a dropdown box; switch between parser formats; and export your filtered
results in vertical or horizontal formats.
You'll need to copy WeblogCombined.pm and WeblogVhost.pm into /path/to/AnyData/Format
before ALF will play with you. AFAICS WeblogVhost.pm ought to work on all three types of logs
but I don't have common format logs available to test on. This is very much a learning project
so all comments are welcome.
=head1 OPTIONS
=head2 Commandline options
=over
=item -?
Print usage and exit
=item -l /path/to/access.log
A valid apache log
=item -c
Use common format parser
=item -v
Use vhost parser
=back
=head2 Set in script body
=over
=item $TABLE
The name which you wish to give the table you'll search. Default: 'log'.
=item $SEPERATOR
Field seperator for use when viewing records in horizontal format. Default: ' | '.
=item $QUERY_FONT
Font used in the query box. Default: 'FreeSans 12'.
=item $RESULTS_FONT
Font used to display your results. Default: 'FreeMono 12'.
=item $LOG_DEFAULT
Log that gets filtered if you don't specify one on the commandline. Default: '/home/charlie/access_log_test'
=back
=head1 REQUIREMENTS
perl 5.8.8
DBD::AnyData
AnyData::Format::WeblogVhost
AnyData::Format::WeblogCombined
Gtk2
Gtk2::Ex::Simple::Menu
Getopt::Std
=head1 LICENSE
Copyright (C)2007 Charlie Harvey
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version
2 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more
details.
You should have received a copy of the GNU General Public
License along with this program; if not, write to the Free
Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.
Also available on line: http://www.gnu.org/copyleft/gpl.html
=cut
####
#!/usr/bin/perl
use strict;
use warnings;
my $menu_tree = [
_File => {
item_type => '',
children => [
_Open => {
item_type => '',
extra_data => 'gtk-open',
callback => \&open_log,
callback_action => 0,
accelerator => 'O',
},
_Save => {
item_type => '',
extra_data => 'gtk-save',
callback_action => 1,
callback => \&export_results,
accelerator => 'S',
},
_Quit => {
item_type => '',
extra_data => 'gtk-quit',
callback => \&close,
callback_action => 3,
accelerator => 'Q',
},
],
},
For_mat => {
item_type => '',
children => [
_Combined => {
item_type => '',
callback => \&set_log_type,
callback_action => 4,
groupid => 1,
},
_Vhost => {
item_type => '',
callback => \&set_log_type,
callback_action => 5,
groupid => 1,
},
_Common => {
extra_data => 1,
item_type => '',
callback => \&set_log_type,
callback_action => 6,
groupid => 1,
},
],
},
_Help => {
item_type => '',
children => [
_About => {
item_type => '',
extra_data => 'gtk-about',
callback => \&about,
callback_action => 7,
},
],
},
];
##
##
#########################################################
package AnyData::Format::WeblogVhost;
#########################################################
# AnyData driver for "Vhost Log Format" web log files
# Also supports combined and common log formats.
# Copyright (c) 2007, Charlie
#########################################################
=head1 NAME
AnyData::Format::WeblogVhost - tiedhash & DBI/SQL access to HTTPD Logs
=head1 SYNOPSIS
use AnyData;
my $weblog = adTie( 'Weblog', $filename );
while (my $hit = each %$weblog) {
print $hit->{remotehost},"\n" if $hit->{request} =~ /mypage.html/;
}
# ... other tied hash operations
OR
use DBI
my $dbh = DBI->connect('dbi:AnyData:');
$dbh->func('hits','Weblog','access_log','ad_catalog');
my $hits = $dbh->selectall_arrayref( qq{
SELECT remotehost FROM hits WHERE request LIKE '%mypage.html%'
});
# ... other DBI/SQL read operations
=head1 DESCRIPTION
This is a plug-in format parser for the AnyData and DBD::AnyData modules. You can gain read access to Vhost Log Format
files web server log files (e.g. NCSA or Apache) either through tied hashes or arrays or through SQL database queries.
Fieldnames are taken from the W3 definitions found at, with the addition of client, referer and vhost fields
http://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format
remotehost
usernname
authuser
date
request
status
bytes
referer
client
vhost
This module does not currently support writing to weblog files.
Please refer to the documentation for AnyData.pm and DBD::AnyData.pm
for further details.
=head1 LICENCE
Copyright (C)2007 Charlie Harvey
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version
2 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more
details.
You should have received a copy of the GNU General Public
License along with this program; if not, write to the Free
Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.
Also available on line: http://www.gnu.org/copyleft/gpl.html
=cut
use strict;
use AnyData::Format::Base;
use vars qw( @ISA $DEBUG $VERSION);
@AnyData::Format::WeblogVhost::ISA = qw( AnyData::Format::Base );
$DEBUG = 0;
$VERSION = '0.02';
my $vlog_re = qr/^(\S*) (\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" (\S*) (\S*)\s*(.*)$/;
my $norm_re = qr/^(\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" (\S*) (\S*)\s*(.*)$/;
my $ref_client_re = qr/^"(.*?)" "(.*?)".*$/;
sub new {
my $class = shift;
my $self = shift || {};
$self->{col_names} =
'vhost,remotehost,username,authuser,date,request,status,bytes,client,referer';
$self->{record_sep} = "\n";
$self->{key} = 'datestamp';
$self->{keep_first_line} = 1;
return bless $self, $class;
}
sub read_fields {
my $self = shift;
my $str = shift || return undef;
$str =~ s/^\s+//;
$str =~ s/\s+$//;
return undef unless $str;
my @row;
if($str =~ /VLOG=-$/) {
(@row) = $str =~ $vlog_re;
}
else {
(@row) = ('', $str =~ $norm_re);
}
return undef unless defined $row[0];
my($referer,$client) = $row[8] =~ $ref_client_re;
$client ||= '';
$referer ||= '';
($row[8],$row[9])=($client,$referer);
# $row[3] =~ s/\s*-\s*(\S*)$//; # hide GMT offset on datestamp
return @row
}
1;
##
##
#########################################################
package AnyData::Format::WeblogCombined;
#########################################################
# AnyData driver for "Common Log Format" web log files
# copyright (c) 2007, Charlie
#########################################################
=head1 NAME
AnyData::Format::WeblogCombined - tiedhash & DBI/SQL access to HTTPD Logs
=head1 SYNOPSIS
use AnyData;
my $weblog = adTie( 'Weblog', $filename );
while (my $hit = each %$weblog) {
print $hit->{remotehost},"\n" if $hit->{request} =~ /mypage.html/;
}
# ... other tied hash operations
OR
use DBI
my $dbh = DBI->connect('dbi:AnyData:');
$dbh->func('hits','Weblog','access_log','ad_catalog');
my $hits = $dbh->selectall_arrayref( qq{
SELECT remotehost FROM hits WHERE request LIKE '%mypage.html%'
});
# ... other DBI/SQL read operations
=head1 DESCRIPTION
This is a plug-in format parser for the AnyData and DBD::AnyData modules. You can gain read access to Combined Log Format
files web server log files (e.g. NCSA or Apache) either through tied hashes or arrays or through SQL database queries.
Fieldnames are taken from the W3 definitions found at
http://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format
remotehost
usernname
authuser
date
request
status
bytes
referer
client
This module does not currently support writing to weblog files.
Please refer to the documentation for AnyData.pm and DBD::AnyData.pm
for further details.
=head1 AUTHOR & COPYRIGHT
(C)Copyright 2007, Charlie
All rights reversed
=cut
use strict;
use AnyData::Format::Base;
use vars qw( @ISA $DEBUG $VERSION);
@AnyData::Format::WeblogCombined::ISA = qw( AnyData::Format::Base );
$DEBUG = 0;
$VERSION = '0.01';
sub new {
my $class = shift;
my $self = shift || {};
$self->{col_names} =
'remotehost,username,authuser,date,request,status,bytes,client,referer';
$self->{record_sep} = "\n";
$self->{key} = 'datestamp';
$self->{keep_first_line} = 1;
return bless $self, $class;
}
sub read_fields {
print "PARSE RECORD\n" if $DEBUG;
my $self = shift;
my $str = shift || return undef;
$str =~ s/^\s+//;
$str =~ s/\s+$//;
return undef unless $str;
my(@row) = $str =~
/^(\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" (\S*) (\S*)\s*(.*)$/;
return undef unless defined $row[0];
my($referer,$client) = $row[7] =~ /^"(.*?)" "(.*?)"$/;
$client ||= '';
$referer ||= '';
($row[7],$row[8])=($client,$referer);
# $row[3] =~ s/\s*-\s*(\S*)$//; # hide GMT offset on datestamp
return @row
}
1;