#!/usr/bin/perl ################################################### # ALF: Apache Log Filter (c) 2007 Charlie Harvey # ################################################### use strict; use warnings; use DBD::AnyData; use AnyData::Format::Weblog; use AnyData::Format::WeblogVhost; use AnyData::Format::WeblogCombined; use Gtk2 -init; use Gtk2::Ex::Simple::Menu; use Getopt::Std; my %opts; getopts('?cvl:', \%opts); my $APP_NAME = 'ALF: Apache Log Filter'; my $VERSION = '0.41'; my $USAGE = "$APP_NAME, version $VERSION\nUsage: $0 [cv?][l filename]\n"; # Tweakable options # my $TABLE = 'log'; my $SEPERATOR = ' | '; my $QUERY_FONT = 'FreeSans 12'; my $RESULTS_FONT = 'FreeMono 12'; my $LOG_DEFAULT = '/home/charlie/access_log_test'; # Stop tweaking # my %log_types = (4=>"WeblogCombined", 5=>"WeblogVhost", 6=>'Weblog', ); my $log_type = "WeblogCombined"; my $dbh = DBI->connect('dbi:AnyData:(RaiseError=>0)'); my $window = Gtk2::Window->new ('toplevel'); my $menu = Gtk2::Ex::Simple::Menu->new ( menu_tree => do './alf_menu.pl', default_callback => \¬_implemented, ); my $query = Gtk2::Entry->new; my $col_selecter = Gtk2::ComboBox->new_text; my $results_view = Gtk2::TextView->new; my $results = $results_view->get_buffer; my $scroll_results = Gtk2::ScrolledWindow->new; my $select_all = Gtk2::Button->new_with_label('Get everything'); my $hits = Gtk2::Button->new_with_label('Total Hits'); my $_404 = Gtk2::Button->new_with_label('404s'); my $short = Gtk2::RadioButton->new_with_label(undef,'Short'); my $long = Gtk2::RadioButton->new_with_label($short,'Vertical'); my $top_box = Gtk2::HBox->new; my $main_box = Gtk2::VBox->new(0,0); my $bottom_box = Gtk2::HBox->new(0,15); my $status_bar = Gtk2::Statusbar->new; my $log_file = $LOG_DEFAULT; # Show error dialogue box sub err { my $msg = shift; my $error = Gtk2::MessageDialog->new( $window, 'modal', 'error', 'close', $msg ); $error->signal_connect ( response => \&dialog_close ); $error->run; $error->destroy; } # Show informational dialogue box sub info { my $msg = shift; my $info = Gtk2::MessageDialog->new( $window, 'modal', 'info', 'close', $msg ); $info->signal_connect ( response => \&dialog_close ); $info->run; $info->destroy; } # Actually only used during development - pop up an info box warning that this feature # isn't done. sub not_implemented { info("Not done yet :-(") } # Show about dialogue box sub about { Gtk2->show_about_dialog( $window, name=>$APP_NAME, version=>$VERSION, copyright=>"(C) 2007 Charlie Harvey", authors => "Charlie Harvey", ); } # Set status text to arg sub status { my $status = shift; $status_bar->pop(0); $status_bar->push(0,$status); } # Set standard status text sub changed_status { status("$APP_NAME, using $log_file as $TABLE ($log_type format)"); } # Change the log type, call dbh->func for that type, update column selecter sub set_log_type { my ($caller,$log_t,$widget)=(@_); if ($widget) { return unless $widget->get_active; } $log_type = $log_types{$log_t}; $log_type ||= $log_t; $dbh->func($TABLE , $log_type, $log_file, 'ad_catalog'); $col_selecter->remove_text(0) for (1..10); my $i = 0; my @cols = col_names(); $col_selecter->insert_text($i++,$_) for (@cols); info("Changed to $log_type format parser") if $widget; changed_status(); } # Open a new log file sub open_log { my $chooser = Gtk2::FileChooserDialog->new( "Open apache log", $window, 'open', ('Open' => 'ok', 'Cancel' =>'cancel') ); my $response = $chooser->run; if ($response eq 'ok') { $log_file = $chooser->get_filename; $dbh->func($TABLE , $log_type, $log_file, 'ad_catalog'); changed_status(); } $chooser->destroy; } # Save text from results text_view sub export_results { my $success = 0; my $chooser = Gtk2::FileChooserDialog->new( "Save filtered log", $window, 'save', ('Export' => 'ok', 'Cancel' =>'cancel') ); my $response = $chooser->run; if ($response eq 'ok') { my $output_file = $chooser->get_filename(); if (-e $output_file) { err("Can't overwrite existing file $output_file"); } else { my $length = $results->get_char_count; my $text_to_export = $results->get_text($results->get_start_iter,$results->get_end_iter,0); open OUT, ">$output_file" || err("Can't write to file"); print OUT $text_to_export; close OUT; $success=1; } } $chooser->destroy; info("Filtered results exported") if ($success); } # Close a dialogue box sub dialog_close { my ($self, $response) = @_; $self->destroy; } # Close application sub close { Gtk2->main_quit; } # When user presses enter, call go with current sql sub filter_pressed { my ($widget,$event,$data) = @_; return unless ($event->keyval==65293); go(); } # Run sql that is given as arg sub go { my $sql = $query->get_text; status("Filtering..."); my $res = query($sql); $results->set_text( $res ); } # Run select * query sub select_all_query { my $sql = "SELECT * FROM $TABLE"; $query->set_text($sql); $results->set_text(query($sql)); } # Run hits query sub hits_query { my $sql = "SELECT count(client) AS hits, status FROM $TABLE GROUP BY status ORDER BY status"; $query->set_text($sql); $results->set_text(query($sql)); } # Run 404 query sub _404_query { my $sql = "SELECT * FROM $TABLE WHERE status = 404"; $query->set_text($sql); $results->set_text(query($sql)); } # Append column name to query field sub append_col_to_qry { my $widget = shift; my @cols = col_names(); $col_selecter->popdown; $query->append_text( ' ' . $cols[$widget->get_active] . ' '); $query->grab_focus; my $pos=length $query->get_text; $query->set_position($pos); 0; } # Run query sub query { my $sql = shift; my $vl = 0; if ($sql =~ /\\g\s*$/i) { $vl = 1; $sql =~ s/\\g\s*$//i; } if ($long->get_active) { $vl = 1; } my $return = ''; my $row_count=0; my $sth = $dbh->prepare($sql) || err("Can't prepare $sql\nIs your SQL syntactically correct?"); $sth->execute || err("Can't execute $sql\n" . $sth->errstr); while (my $x = $sth->fetchrow_hashref) { if ($vl) { $return .= sprintf("%10s : %s\n",$_, $x->{$_}) for ( sort keys %$x ); } else { $return .= $x->{$_} . $SEPERATOR for ( sort keys %$x); } $return .= "\n"; $row_count++; } err("Couldn't run SQL.\nMaybe the file you opened wasn't a valid Apache log?\n\n" . $sth->errstr) if $sth->err; status("$row_count rows. Query: $sql"); $return; } # Return column names for current parser sub col_names { my $parser_name = "AnyData::Format::$log_type"; return (sort split /,/, $parser_name->new->{col_names}); } ## Process commandline options if ($opts{'?'}) { print $USAGE; exit(0); } if ($opts{l}) { $log_file = $opts{l}; } if ($opts{v}) { $log_type = "WeblogVhost"; } elsif ($opts{c}) { $log_type = "Weblog"; } # Connect signals up. $window->signal_connect ( destroy => \&close ); $query->signal_connect ( key_press_event => \&filter_pressed ); $col_selecter->signal_connect ( changed => \&append_col_to_qry ); $select_all->signal_connect ( clicked => \&select_all_query ); $hits->signal_connect ( clicked => \&hits_query ); $_404->signal_connect ( clicked => \&_404_query ); # Initialise widgets $window->set_border_width(5); $window->set_default_size(1024,768); $window->set_title($APP_NAME); $query->set_width_chars(80); $query->set_has_frame(1); $query->modify_font(Gtk2::Pango::FontDescription->from_string($QUERY_FONT)); $col_selecter->set_wrap_width(1); $results_view->set_editable(0); $results_view->set_wrap_mode('word-char'); $results_view->set_overwrite(1); $results_view->modify_font(Gtk2::Pango::FontDescription->from_string($RESULTS_FONT)); $scroll_results->set_policy ('never', 'always'); # Layout $scroll_results->add($results_view); $top_box->add($query); $top_box->add($col_selecter); $bottom_box->pack_start($short,0,1,1); $bottom_box->pack_start($long,0,0,1); $bottom_box->pack_start($select_all,1,1,1); $bottom_box->pack_start($hits,1,1,1); $bottom_box->pack_start($_404,1,1,1); $main_box->pack_start($menu->{widget},0,0,0); $main_box->pack_start($top_box,0,0,5); $main_box->pack_start($scroll_results,1,1,5); $main_box->pack_start($bottom_box,0,0,5); $main_box->pack_start($status_bar,0,0,5); $window->add($main_box); $window->show_all; # Lets go set_log_type(undef,$log_type); Gtk2->main; 0; __END__ =head1 NAME ALF: Apache Log Filter. Or alf.pl if you want to be picky. =head1 SYNOPSIS ./alf.pl [-l /path/to/access.log] [-cv] =head1 DESCRIPTION ALF is a filter for people using Apache logging to plain text logs, who still want the flexibility of being able to search their logs like a database. It relies heavily on Jeff Zucker's DBD::AnyData to do the clever stuff, and overlays that with a simple GTK2 interface. You can run SQL queries and see the results in horizontal or vertical formats; open different logs; run a few everyday queries from buttons; append column names to your SQL from a dropdown box; switch between parser formats; and export your filtered results in vertical or horizontal formats. You'll need to copy WeblogCombined.pm and WeblogVhost.pm into /path/to/AnyData/Format before ALF will play with you. AFAICS WeblogVhost.pm ought to work on all three types of logs but I don't have common format logs available to test on. This is very much a learning project so all comments are welcome. =head1 OPTIONS =head2 Commandline options =over =item -? Print usage and exit =item -l /path/to/access.log A valid apache log =item -c Use common format parser =item -v Use vhost parser =back =head2 Set in script body =over =item $TABLE The name which you wish to give the table you'll search. Default: 'log'. =item $SEPERATOR Field seperator for use when viewing records in horizontal format. Default: ' | '. =item $QUERY_FONT Font used in the query box. Default: 'FreeSans 12'. =item $RESULTS_FONT Font used to display your results. Default: 'FreeMono 12'. =item $LOG_DEFAULT Log that gets filtered if you don't specify one on the commandline. Default: '/home/charlie/access_log_test' =back =head1 REQUIREMENTS perl 5.8.8 DBD::AnyData AnyData::Format::WeblogVhost AnyData::Format::WeblogCombined Gtk2 Gtk2::Ex::Simple::Menu Getopt::Std =head1 LICENSE Copyright (C)2007 Charlie Harvey This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Also available on line: http://www.gnu.org/copyleft/gpl.html =cut #### #!/usr/bin/perl use strict; use warnings; my $menu_tree = [ _File => { item_type => '', children => [ _Open => { item_type => '', extra_data => 'gtk-open', callback => \&open_log, callback_action => 0, accelerator => 'O', }, _Save => { item_type => '', extra_data => 'gtk-save', callback_action => 1, callback => \&export_results, accelerator => 'S', }, _Quit => { item_type => '', extra_data => 'gtk-quit', callback => \&close, callback_action => 3, accelerator => 'Q', }, ], }, For_mat => { item_type => '', children => [ _Combined => { item_type => '', callback => \&set_log_type, callback_action => 4, groupid => 1, }, _Vhost => { item_type => '', callback => \&set_log_type, callback_action => 5, groupid => 1, }, _Common => { extra_data => 1, item_type => '', callback => \&set_log_type, callback_action => 6, groupid => 1, }, ], }, _Help => { item_type => '', children => [ _About => { item_type => '', extra_data => 'gtk-about', callback => \&about, callback_action => 7, }, ], }, ]; #### ######################################################### package AnyData::Format::WeblogVhost; ######################################################### # AnyData driver for "Vhost Log Format" web log files # Also supports combined and common log formats. # Copyright (c) 2007, Charlie ######################################################### =head1 NAME AnyData::Format::WeblogVhost - tiedhash & DBI/SQL access to HTTPD Logs =head1 SYNOPSIS use AnyData; my $weblog = adTie( 'Weblog', $filename ); while (my $hit = each %$weblog) { print $hit->{remotehost},"\n" if $hit->{request} =~ /mypage.html/; } # ... other tied hash operations OR use DBI my $dbh = DBI->connect('dbi:AnyData:'); $dbh->func('hits','Weblog','access_log','ad_catalog'); my $hits = $dbh->selectall_arrayref( qq{ SELECT remotehost FROM hits WHERE request LIKE '%mypage.html%' }); # ... other DBI/SQL read operations =head1 DESCRIPTION This is a plug-in format parser for the AnyData and DBD::AnyData modules. You can gain read access to Vhost Log Format files web server log files (e.g. NCSA or Apache) either through tied hashes or arrays or through SQL database queries. Fieldnames are taken from the W3 definitions found at, with the addition of client, referer and vhost fields http://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format remotehost usernname authuser date request status bytes referer client vhost This module does not currently support writing to weblog files. Please refer to the documentation for AnyData.pm and DBD::AnyData.pm for further details. =head1 LICENCE Copyright (C)2007 Charlie Harvey This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Also available on line: http://www.gnu.org/copyleft/gpl.html =cut use strict; use AnyData::Format::Base; use vars qw( @ISA $DEBUG $VERSION); @AnyData::Format::WeblogVhost::ISA = qw( AnyData::Format::Base ); $DEBUG = 0; $VERSION = '0.02'; my $vlog_re = qr/^(\S*) (\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" (\S*) (\S*)\s*(.*)$/; my $norm_re = qr/^(\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" (\S*) (\S*)\s*(.*)$/; my $ref_client_re = qr/^"(.*?)" "(.*?)".*$/; sub new { my $class = shift; my $self = shift || {}; $self->{col_names} = 'vhost,remotehost,username,authuser,date,request,status,bytes,client,referer'; $self->{record_sep} = "\n"; $self->{key} = 'datestamp'; $self->{keep_first_line} = 1; return bless $self, $class; } sub read_fields { my $self = shift; my $str = shift || return undef; $str =~ s/^\s+//; $str =~ s/\s+$//; return undef unless $str; my @row; if($str =~ /VLOG=-$/) { (@row) = $str =~ $vlog_re; } else { (@row) = ('', $str =~ $norm_re); } return undef unless defined $row[0]; my($referer,$client) = $row[8] =~ $ref_client_re; $client ||= ''; $referer ||= ''; ($row[8],$row[9])=($client,$referer); # $row[3] =~ s/\s*-\s*(\S*)$//; # hide GMT offset on datestamp return @row } 1; #### ######################################################### package AnyData::Format::WeblogCombined; ######################################################### # AnyData driver for "Common Log Format" web log files # copyright (c) 2007, Charlie ######################################################### =head1 NAME AnyData::Format::WeblogCombined - tiedhash & DBI/SQL access to HTTPD Logs =head1 SYNOPSIS use AnyData; my $weblog = adTie( 'Weblog', $filename ); while (my $hit = each %$weblog) { print $hit->{remotehost},"\n" if $hit->{request} =~ /mypage.html/; } # ... other tied hash operations OR use DBI my $dbh = DBI->connect('dbi:AnyData:'); $dbh->func('hits','Weblog','access_log','ad_catalog'); my $hits = $dbh->selectall_arrayref( qq{ SELECT remotehost FROM hits WHERE request LIKE '%mypage.html%' }); # ... other DBI/SQL read operations =head1 DESCRIPTION This is a plug-in format parser for the AnyData and DBD::AnyData modules. You can gain read access to Combined Log Format files web server log files (e.g. NCSA or Apache) either through tied hashes or arrays or through SQL database queries. Fieldnames are taken from the W3 definitions found at http://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-format remotehost usernname authuser date request status bytes referer client This module does not currently support writing to weblog files. Please refer to the documentation for AnyData.pm and DBD::AnyData.pm for further details. =head1 AUTHOR & COPYRIGHT (C)Copyright 2007, Charlie All rights reversed =cut use strict; use AnyData::Format::Base; use vars qw( @ISA $DEBUG $VERSION); @AnyData::Format::WeblogCombined::ISA = qw( AnyData::Format::Base ); $DEBUG = 0; $VERSION = '0.01'; sub new { my $class = shift; my $self = shift || {}; $self->{col_names} = 'remotehost,username,authuser,date,request,status,bytes,client,referer'; $self->{record_sep} = "\n"; $self->{key} = 'datestamp'; $self->{keep_first_line} = 1; return bless $self, $class; } sub read_fields { print "PARSE RECORD\n" if $DEBUG; my $self = shift; my $str = shift || return undef; $str =~ s/^\s+//; $str =~ s/\s+$//; return undef unless $str; my(@row) = $str =~ /^(\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" (\S*) (\S*)\s*(.*)$/; return undef unless defined $row[0]; my($referer,$client) = $row[7] =~ /^"(.*?)" "(.*?)"$/; $client ||= ''; $referer ||= ''; ($row[7],$row[8])=($client,$referer); # $row[3] =~ s/\s*-\s*(\S*)$//; # hide GMT offset on datestamp return @row } 1;