Beefy Boxes and Bandwidth Generously Provided by pair Networks
Welcome to the Monastery
 
PerlMonks  

ALF: Apache Log Filter

by ciderpunx (Vicar)
on Apr 01, 2007 at 15:47 UTC ( [id://607707]=CUFP: print w/replies, xml ) Need Help??

ALF is a project I've been working on to learn some more about Gtk2. It uses the fantastic DBI::AnyData and allows you to run SQL queries on an Apache log. I've written a couple of extra parser plugins so that AnyData can read combined and vhost logs. You need to chuck them into /path/to/AnyData/Format.

There's a demo video on the internet archive.

alf.pl

#!/usr/bin/perl ################################################### # ALF: Apache Log Filter (c) 2007 Charlie Harvey # ################################################### use strict; use warnings; use DBD::AnyData; use AnyData::Format::Weblog; use AnyData::Format::WeblogVhost; use AnyData::Format::WeblogCombined; use Gtk2 -init; use Gtk2::Ex::Simple::Menu; use Getopt::Std; my %opts; getopts('?cvl:', \%opts); my $APP_NAME = 'ALF: Apache Log Filter'; my $VERSION = '0.41'; my $USAGE = "$APP_NAME, version $VERSION\nUsage: $0 [cv?][l f +ilename]\n"; # Tweakable options # my $TABLE = 'log'; my $SEPERATOR = ' | '; my $QUERY_FONT = 'FreeSans 12'; my $RESULTS_FONT = 'FreeMono 12'; my $LOG_DEFAULT = '/home/charlie/access_log_test'; # Stop tweaking # my %log_types = (4=>"WeblogCombined", 5=>"WeblogVhost", 6=>'Weblo +g', ); my $log_type = "WeblogCombined"; my $dbh = DBI->connect('dbi:AnyData:(RaiseError=>0)'); my $window = Gtk2::Window->new ('toplevel'); my $menu = Gtk2::Ex::Simple::Menu->new ( menu_tree => do './alf_menu.pl', default_callback => \&not_implemented, ); my $query = Gtk2::Entry->new; my $col_selecter = Gtk2::ComboBox->new_text; my $results_view = Gtk2::TextView->new; my $results = $results_view->get_buffer; my $scroll_results = Gtk2::ScrolledWindow->new; my $select_all = Gtk2::Button->new_with_label('Get everything'); my $hits = Gtk2::Button->new_with_label('Total Hits'); my $_404 = Gtk2::Button->new_with_label('404s'); my $short = Gtk2::RadioButton->new_with_label(undef,'Short'); my $long = Gtk2::RadioButton->new_with_label($short,'Vertica +l'); my $top_box = Gtk2::HBox->new; my $main_box = Gtk2::VBox->new(0,0); my $bottom_box = Gtk2::HBox->new(0,15); my $status_bar = Gtk2::Statusbar->new; my $log_file = $LOG_DEFAULT; # Show error dialogue box sub err { my $msg = shift; my $error = Gtk2::MessageDialog->new( $window, 'modal', 'error', 'cl +ose', $msg ); $error->signal_connect ( response => \&dialog_close ); $error->run; $error->destroy; } # Show informational dialogue box sub info { my $msg = shift; my $info = Gtk2::MessageDialog->new( $window, 'modal', 'info', 'clos +e', $msg ); $info->signal_connect ( response => \&dialog_close ); $info->run; $info->destroy; } # Actually only used during development - pop up an info box warning t +hat this feature # isn't done. sub not_implemented { info("Not done yet :-(") } # Show about dialogue box sub about { Gtk2->show_about_dialog( $window, name=>$APP_NAME, version=>$VERSION, copyright=>"(C) 2007 +Charlie Harvey", authors => "Charlie Harvey", ); } # Set status text to arg sub status { my $status = shift; $status_bar->pop(0); $status_bar->push(0,$status); } # Set standard status text sub changed_status { status("$APP_NAME, using $log_file as $TABLE ($log_type format)"); } # Change the log type, call dbh->func for that type, update column sel +ecter sub set_log_type { my ($caller,$log_t,$widget)=(@_); if ($widget) { return unless $widget->get_active; } $log_type = $log_types{$log_t}; $log_type ||= $log_t; $dbh->func($TABLE , $log_type, $log_file, 'ad_catalog'); $col_selecter->remove_text(0) for (1..10); my $i = 0; my @cols = col_names(); $col_selecter->insert_text($i++,$_) for (@cols); info("Changed to $log_type format parser") if $widget; changed_status(); } # Open a new log file sub open_log { my $chooser = Gtk2::FileChooserDialog->new( "Open apache log", $window, 'open', ('Open' => 'ok', 'Cancel' =>'cancel') ); my $response = $chooser->run; if ($response eq 'ok') { $log_file = $chooser->get_filename; $dbh->func($TABLE , $log_type, $log_file, 'ad_catalog'); changed_status(); } $chooser->destroy; } # Save text from results text_view sub export_results { my $success = 0; my $chooser = Gtk2::FileChooserDialog->new( "Save filtered log", $window, 'save', ('Export' => 'ok', 'Cancel' =>'cancel') ); my $response = $chooser->run; if ($response eq 'ok') { my $output_file = $chooser->get_filename(); if (-e $output_file) { err("Can't overwrite existing file $output_file"); } else { my $length = $results->get_char_count; my $text_to_export = $results->get_text($results->get_start_iter +,$results->get_end_iter,0); open OUT, ">$output_file" || err("Can't write to file"); print OUT $text_to_export; close OUT; $success=1; } } $chooser->destroy; info("Filtered results exported") if ($success); } # Close a dialogue box sub dialog_close { my ($self, $response) = @_; $self->destroy; } # Close application sub close { Gtk2->main_quit; } # When user presses enter, call go with current sql sub filter_pressed { my ($widget,$event,$data) = @_; return unless ($event->keyval==65293); go(); } # Run sql that is given as arg sub go { my $sql = $query->get_text; status("Filtering..."); my $res = query($sql); $results->set_text( $res ); } # Run select * query sub select_all_query { my $sql = "SELECT * FROM $TABLE"; $query->set_text($sql); $results->set_text(query($sql)); } # Run hits query sub hits_query { my $sql = "SELECT count(client) AS hits, status FROM $TABLE GROUP BY + status ORDER BY status"; $query->set_text($sql); $results->set_text(query($sql)); } # Run 404 query sub _404_query { my $sql = "SELECT * FROM $TABLE WHERE status = 404"; $query->set_text($sql); $results->set_text(query($sql)); } # Append column name to query field sub append_col_to_qry { my $widget = shift; my @cols = col_names(); $col_selecter->popdown; $query->append_text( ' ' . $cols[$widget->get_active] . ' '); $query->grab_focus; my $pos=length $query->get_text; $query->set_position($pos); 0; } # Run query sub query { my $sql = shift; my $vl = 0; if ($sql =~ /\\g\s*$/i) { $vl = 1; $sql =~ s/\\g\s*$//i; } if ($long->get_active) { $vl = 1; } my $return = ''; my $row_count=0; my $sth = $dbh->prepare($sql) || err("Can't prepare $sql\nIs your SQL syntactically correct?"); $sth->execute || err("Can't execute $sql\n" . $sth->errstr); while (my $x = $sth->fetchrow_hashref) { if ($vl) { $return .= sprintf("%10s : %s\n",$_, $x->{$_}) for ( sort keys % +$x ); } else { $return .= $x->{$_} . $SEPERATOR for ( sort keys %$x); } $return .= "\n"; $row_count++; } err("Couldn't run SQL.\nMaybe the file you opened wasn't a valid Apa +che log?\n\n" . $sth->errstr) if $sth->err; status("$row_count rows. Query: $sql"); $return; } # Return column names for current parser sub col_names { my $parser_name = "AnyData::Format::$log_type"; return (sort split /,/, $parser_name->new->{col_names}); } ## Process commandline options if ($opts{'?'}) { print $USAGE; exit(0); } if ($opts{l}) { $log_file = $opts{l}; } if ($opts{v}) { $log_type = "WeblogVhost"; } elsif ($opts{c}) { $log_type = "Weblog"; } # Connect signals up. $window->signal_connect ( destroy => \&close ); $query->signal_connect ( key_press_event => \&filter_pressed ); $col_selecter->signal_connect ( changed => \&append_col_to_qry ); $select_all->signal_connect ( clicked => \&select_all_query ); $hits->signal_connect ( clicked => \&hits_query ); $_404->signal_connect ( clicked => \&_404_query ); # Initialise widgets $window->set_border_width(5); $window->set_default_size(1024,768); $window->set_title($APP_NAME); $query->set_width_chars(80); $query->set_has_frame(1); $query->modify_font(Gtk2::Pango::FontDescription->from_string($QUERY_F +ONT)); $col_selecter->set_wrap_width(1); $results_view->set_editable(0); $results_view->set_wrap_mode('word-char'); $results_view->set_overwrite(1); $results_view->modify_font(Gtk2::Pango::FontDescription->from_string($ +RESULTS_FONT)); $scroll_results->set_policy ('never', 'always'); # Layout $scroll_results->add($results_view); $top_box->add($query); $top_box->add($col_selecter); $bottom_box->pack_start($short,0,1,1); $bottom_box->pack_start($long,0,0,1); $bottom_box->pack_start($select_all,1,1,1); $bottom_box->pack_start($hits,1,1,1); $bottom_box->pack_start($_404,1,1,1); $main_box->pack_start($menu->{widget},0,0,0); $main_box->pack_start($top_box,0,0,5); $main_box->pack_start($scroll_results,1,1,5); $main_box->pack_start($bottom_box,0,0,5); $main_box->pack_start($status_bar,0,0,5); $window->add($main_box); $window->show_all; # Lets go set_log_type(undef,$log_type); Gtk2->main; 0; __END__ =head1 NAME ALF: Apache Log Filter. Or alf.pl if you want to be picky. =head1 SYNOPSIS ./alf.pl [-l /path/to/access.log] [-cv] =head1 DESCRIPTION ALF is a filter for people using Apache logging to plain text logs, wh +o still want the flexibility of being able to search their logs like a database. It rel +ies heavily on Jeff Zucker's DBD::AnyData to do the clever stuff, and overlays that with a + simple GTK2 interface. You can run SQL queries and see the results in horizontal or vertical +formats; open different logs; run a few everyday queries from buttons; append c +olumn names to your SQL from a dropdown box; switch between parser formats; and expor +t your filtered results in vertical or horizontal formats. You'll need to copy WeblogCombined.pm and WeblogVhost.pm into /path/to +/AnyData/Format before ALF will play with you. AFAICS WeblogVhost.pm ought to work on +all three types of logs but I don't have common format logs available to test on. This is very + much a learning project so all comments are welcome. =head1 OPTIONS =head2 Commandline options =over =item -? Print usage and exit =item -l /path/to/access.log A valid apache log =item -c Use common format parser =item -v Use vhost parser =back =head2 Set in script body =over =item $TABLE The name which you wish to give the table you'll search. Default: 'log +'. =item $SEPERATOR Field seperator for use when viewing records in horizontal format. Def +ault: ' | '. =item $QUERY_FONT Font used in the query box. Default: 'FreeSans 12'. =item $RESULTS_FONT Font used to display your results. Default: 'FreeMono 12'. =item $LOG_DEFAULT Log that gets filtered if you don't specify one on the commandline. De +fault: '/home/charlie/access_log_test' =back =head1 REQUIREMENTS perl 5.8.8 DBD::AnyData AnyData::Format::WeblogVhost AnyData::Format::WeblogCombined Gtk2 Gtk2::Ex::Simple::Menu Getopt::Std =head1 LICENSE Copyright (C)2007 Charlie Harvey This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Also available on line: http://www.gnu.org/copyleft/gpl.html =cut

alf_menu.pl

#!/usr/bin/perl use strict; use warnings; my $menu_tree = [ _File => { item_type => '<Branch>', children => [ _Open => { item_type => '<StockItem>', extra_data => 'gtk-open', callback => \&open_log, callback_action => 0, accelerator => '<ctrl>O', }, _Save => { item_type => '<StockItem>', extra_data => 'gtk-save', callback_action => 1, callback => \&export_results, accelerator => '<ctrl>S', }, _Quit => { item_type => '<StockItem>', extra_data => 'gtk-quit', callback => \&close, callback_action => 3, accelerator => '<ctrl>Q', }, ], }, For_mat => { item_type => '<Branch>', children => [ _Combined => { item_type => '<RadioItem>', callback => \&set_log_type, callback_action => 4, groupid => 1, }, _Vhost => { item_type => '<RadioItem>', callback => \&set_log_type, callback_action => 5, groupid => 1, }, _Common => { extra_data => 1, item_type => '<RadioItem>', callback => \&set_log_type, callback_action => 6, groupid => 1, }, ], }, _Help => { item_type => '<Branch>', children => [ _About => { item_type => '<StockItem>', extra_data => 'gtk-about', callback => \&about, callback_action => 7, }, ], }, ];

WeblogVhost.pm

######################################################### package AnyData::Format::WeblogVhost; ######################################################### # AnyData driver for "Vhost Log Format" web log files # Also supports combined and common log formats. # Copyright (c) 2007, Charlie <charlie@charlieharvey.com> ######################################################### =head1 NAME AnyData::Format::WeblogVhost - tiedhash & DBI/SQL access to HTTPD Log +s =head1 SYNOPSIS use AnyData; my $weblog = adTie( 'Weblog', $filename ); while (my $hit = each %$weblog) { print $hit->{remotehost},"\n" if $hit->{request} =~ /mypage.html/; } # ... other tied hash operations OR use DBI my $dbh = DBI->connect('dbi:AnyData:'); $dbh->func('hits','Weblog','access_log','ad_catalog'); my $hits = $dbh->selectall_arrayref( qq{ SELECT remotehost FROM hits WHERE request LIKE '%mypage.html%' }); # ... other DBI/SQL read operations =head1 DESCRIPTION This is a plug-in format parser for the AnyData and DBD::AnyData modul +es. You can gain read access to Vhost Log Format files web server log files (e.g. NCSA or Apache) either through tied h +ashes or arrays or through SQL database queries. Fieldnames are taken from the W3 definitions found at, with the additi +on of client, referer and vhost fields http://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-forma +t remotehost usernname authuser date request status bytes referer client vhost This module does not currently support writing to weblog files. Please refer to the documentation for AnyData.pm and DBD::AnyData.pm for further details. =head1 LICENCE Copyright (C)2007 Charlie Harvey This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Also available on line: http://www.gnu.org/copyleft/gpl.html =cut use strict; use AnyData::Format::Base; use vars qw( @ISA $DEBUG $VERSION); @AnyData::Format::WeblogVhost::ISA = qw( AnyData::Format::Base ); $DEBUG = 0; $VERSION = '0.02'; my $vlog_re = qr/^(\S*) (\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" ( +\S*) (\S*)\s*(.*)$/; my $norm_re = qr/^(\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" (\S*) ( +\S*)\s*(.*)$/; my $ref_client_re = qr/^"(.*?)" "(.*?)".*$/; sub new { my $class = shift; my $self = shift || {}; $self->{col_names} = 'vhost,remotehost,username,authuser,date,request,status,bytes, +client,referer'; $self->{record_sep} = "\n"; $self->{key} = 'datestamp'; $self->{keep_first_line} = 1; return bless $self, $class; } sub read_fields { my $self = shift; my $str = shift || return undef; $str =~ s/^\s+//; $str =~ s/\s+$//; return undef unless $str; my @row; if($str =~ /VLOG=-$/) { (@row) = $str =~ $vlog_re; } else { (@row) = ('', $str =~ $norm_re); } return undef unless defined $row[0]; my($referer,$client) = $row[8] =~ $ref_client_re; $client ||= ''; $referer ||= ''; ($row[8],$row[9])=($client,$referer); # $row[3] =~ s/\s*-\s*(\S*)$//; # hide GMT offset on datestamp return @row } 1;

WeblogCombined.pm

######################################################### package AnyData::Format::WeblogCombined; ######################################################### # AnyData driver for "Common Log Format" web log files # copyright (c) 2007, Charlie <charlie_ampersat_charlieharvey_fullstop +_com> ######################################################### =head1 NAME AnyData::Format::WeblogCombined - tiedhash & DBI/SQL access to HTTPD +Logs =head1 SYNOPSIS use AnyData; my $weblog = adTie( 'Weblog', $filename ); while (my $hit = each %$weblog) { print $hit->{remotehost},"\n" if $hit->{request} =~ /mypage.html/; } # ... other tied hash operations OR use DBI my $dbh = DBI->connect('dbi:AnyData:'); $dbh->func('hits','Weblog','access_log','ad_catalog'); my $hits = $dbh->selectall_arrayref( qq{ SELECT remotehost FROM hits WHERE request LIKE '%mypage.html%' }); # ... other DBI/SQL read operations =head1 DESCRIPTION This is a plug-in format parser for the AnyData and DBD::AnyData modul +es. You can gain read access to Combined Log Format files web server log files (e.g. NCSA or Apache) either through tied h +ashes or arrays or through SQL database queries. Fieldnames are taken from the W3 definitions found at http://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-forma +t remotehost usernname authuser date request status bytes referer client This module does not currently support writing to weblog files. Please refer to the documentation for AnyData.pm and DBD::AnyData.pm for further details. =head1 AUTHOR & COPYRIGHT (C)Copyright 2007, Charlie <charlie_ampersat_charlieharvey_fullstop_co +m> All rights reversed =cut use strict; use AnyData::Format::Base; use vars qw( @ISA $DEBUG $VERSION); @AnyData::Format::WeblogCombined::ISA = qw( AnyData::Format::Base ); $DEBUG = 0; $VERSION = '0.01'; sub new { my $class = shift; my $self = shift || {}; $self->{col_names} = 'remotehost,username,authuser,date,request,status,bytes,client +,referer'; $self->{record_sep} = "\n"; $self->{key} = 'datestamp'; $self->{keep_first_line} = 1; return bless $self, $class; } sub read_fields { print "PARSE RECORD\n" if $DEBUG; my $self = shift; my $str = shift || return undef; $str =~ s/^\s+//; $str =~ s/\s+$//; return undef unless $str; my(@row) = $str =~ /^(\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" (\S*) (\S*)\s*(.*)$/; return undef unless defined $row[0]; my($referer,$client) = $row[7] =~ /^"(.*?)" "(.*?)"$/; $client ||= ''; $referer ||= ''; ($row[7],$row[8])=($client,$referer); # $row[3] =~ s/\s*-\s*(\S*)$//; # hide GMT offset on datestamp return @row } 1;
--
Linux, perl, punk rock, cider: charlieharvey.org.uk.

Replies are listed 'Best First'.
Re: ALF: Apache Log Filter
by zentara (Archbishop) on Apr 02, 2007 at 09:28 UTC
    Nice demo video. I guess with the rising bandwidth everyone is getting, the demo-video will eventually replace the screenshot. What did you use to make it?

    I'm not really a human, but I play one on earth. Cogito ergo sum a bum
      Thanks zentara. I used xvidcap, which seems to do pretty much everything I wanted it to do - though I also tried Istanbul, which can only produce ogg theoras.
      --
      Linux, perl, punk rock, cider: charlieharvey.org.uk.

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: CUFP [id://607707]
Approved by valdez
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others chanting in the Monastery: (3)
As of 2024-03-29 02:04 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found