ALF is a project I've been working on to learn some more about Gtk2. It uses the fantastic DBI::AnyData and allows you to run SQL queries on an Apache log. I've written a couple of extra parser plugins so that AnyData can read combined and vhost logs. You need to chuck them into /path/to/AnyData/Format.
There's a demo video on the internet archive.
alf.pl
#!/usr/bin/perl
###################################################
# ALF: Apache Log Filter (c) 2007 Charlie Harvey #
###################################################
use strict;
use warnings;
use DBD::AnyData;
use AnyData::Format::Weblog;
use AnyData::Format::WeblogVhost;
use AnyData::Format::WeblogCombined;
use Gtk2 -init;
use Gtk2::Ex::Simple::Menu;
use Getopt::Std;
my %opts;
getopts('?cvl:', \%opts);
my $APP_NAME = 'ALF: Apache Log Filter';
my $VERSION = '0.41';
my $USAGE = "$APP_NAME, version $VERSION\nUsage: $0 [cv?][l f
+ilename]\n";
# Tweakable options #
my $TABLE = 'log';
my $SEPERATOR = ' | ';
my $QUERY_FONT = 'FreeSans 12';
my $RESULTS_FONT = 'FreeMono 12';
my $LOG_DEFAULT = '/home/charlie/access_log_test';
# Stop tweaking #
my %log_types = (4=>"WeblogCombined", 5=>"WeblogVhost", 6=>'Weblo
+g', );
my $log_type = "WeblogCombined";
my $dbh = DBI->connect('dbi:AnyData:(RaiseError=>0)');
my $window = Gtk2::Window->new ('toplevel');
my $menu = Gtk2::Ex::Simple::Menu->new (
menu_tree => do './alf_menu.pl',
default_callback => \¬_implemented,
);
my $query = Gtk2::Entry->new;
my $col_selecter = Gtk2::ComboBox->new_text;
my $results_view = Gtk2::TextView->new;
my $results = $results_view->get_buffer;
my $scroll_results = Gtk2::ScrolledWindow->new;
my $select_all = Gtk2::Button->new_with_label('Get everything');
my $hits = Gtk2::Button->new_with_label('Total Hits');
my $_404 = Gtk2::Button->new_with_label('404s');
my $short = Gtk2::RadioButton->new_with_label(undef,'Short');
my $long = Gtk2::RadioButton->new_with_label($short,'Vertica
+l');
my $top_box = Gtk2::HBox->new;
my $main_box = Gtk2::VBox->new(0,0);
my $bottom_box = Gtk2::HBox->new(0,15);
my $status_bar = Gtk2::Statusbar->new;
my $log_file = $LOG_DEFAULT;
# Show error dialogue box
sub err {
my $msg = shift;
my $error = Gtk2::MessageDialog->new( $window, 'modal', 'error', 'cl
+ose', $msg );
$error->signal_connect ( response => \&dialog_close );
$error->run;
$error->destroy;
}
# Show informational dialogue box
sub info {
my $msg = shift;
my $info = Gtk2::MessageDialog->new( $window, 'modal', 'info', 'clos
+e', $msg );
$info->signal_connect ( response => \&dialog_close );
$info->run;
$info->destroy;
}
# Actually only used during development - pop up an info box warning t
+hat this feature
# isn't done.
sub not_implemented {
info("Not done yet :-(")
}
# Show about dialogue box
sub about {
Gtk2->show_about_dialog(
$window, name=>$APP_NAME, version=>$VERSION, copyright=>"(C) 2007
+Charlie Harvey",
authors => "Charlie Harvey",
);
}
# Set status text to arg
sub status {
my $status = shift;
$status_bar->pop(0);
$status_bar->push(0,$status);
}
# Set standard status text
sub changed_status {
status("$APP_NAME, using $log_file as $TABLE ($log_type format)");
}
# Change the log type, call dbh->func for that type, update column sel
+ecter
sub set_log_type {
my ($caller,$log_t,$widget)=(@_);
if ($widget) {
return unless $widget->get_active;
}
$log_type = $log_types{$log_t};
$log_type ||= $log_t;
$dbh->func($TABLE , $log_type, $log_file, 'ad_catalog');
$col_selecter->remove_text(0) for (1..10);
my $i = 0;
my @cols = col_names();
$col_selecter->insert_text($i++,$_) for (@cols);
info("Changed to $log_type format parser") if $widget;
changed_status();
}
# Open a new log file
sub open_log {
my $chooser = Gtk2::FileChooserDialog->new(
"Open apache log", $window, 'open',
('Open' => 'ok', 'Cancel' =>'cancel')
);
my $response = $chooser->run;
if ($response eq 'ok') {
$log_file = $chooser->get_filename;
$dbh->func($TABLE , $log_type, $log_file, 'ad_catalog');
changed_status();
}
$chooser->destroy;
}
# Save text from results text_view
sub export_results {
my $success = 0;
my $chooser = Gtk2::FileChooserDialog->new(
"Save filtered log", $window, 'save',
('Export' => 'ok', 'Cancel' =>'cancel')
);
my $response = $chooser->run;
if ($response eq 'ok') {
my $output_file = $chooser->get_filename();
if (-e $output_file) {
err("Can't overwrite existing file $output_file");
} else {
my $length = $results->get_char_count;
my $text_to_export = $results->get_text($results->get_start_iter
+,$results->get_end_iter,0);
open OUT, ">$output_file" || err("Can't write to file");
print OUT $text_to_export;
close OUT;
$success=1;
}
}
$chooser->destroy;
info("Filtered results exported") if ($success);
}
# Close a dialogue box
sub dialog_close {
my ($self, $response) = @_;
$self->destroy;
}
# Close application
sub close {
Gtk2->main_quit;
}
# When user presses enter, call go with current sql
sub filter_pressed {
my ($widget,$event,$data) = @_;
return unless ($event->keyval==65293);
go();
}
# Run sql that is given as arg
sub go {
my $sql = $query->get_text;
status("Filtering...");
my $res = query($sql);
$results->set_text( $res );
}
# Run select * query
sub select_all_query {
my $sql = "SELECT * FROM $TABLE";
$query->set_text($sql);
$results->set_text(query($sql));
}
# Run hits query
sub hits_query {
my $sql = "SELECT count(client) AS hits, status FROM $TABLE GROUP BY
+ status ORDER BY status";
$query->set_text($sql);
$results->set_text(query($sql));
}
# Run 404 query
sub _404_query {
my $sql = "SELECT * FROM $TABLE WHERE status = 404";
$query->set_text($sql);
$results->set_text(query($sql));
}
# Append column name to query field
sub append_col_to_qry {
my $widget = shift;
my @cols = col_names();
$col_selecter->popdown;
$query->append_text( ' ' . $cols[$widget->get_active] . ' ');
$query->grab_focus;
my $pos=length $query->get_text;
$query->set_position($pos);
0;
}
# Run query
sub query {
my $sql = shift;
my $vl = 0;
if ($sql =~ /\\g\s*$/i) {
$vl = 1;
$sql =~ s/\\g\s*$//i;
}
if ($long->get_active) {
$vl = 1;
}
my $return = '';
my $row_count=0;
my $sth = $dbh->prepare($sql) ||
err("Can't prepare $sql\nIs your SQL syntactically correct?");
$sth->execute ||
err("Can't execute $sql\n" . $sth->errstr);
while (my $x = $sth->fetchrow_hashref) {
if ($vl) {
$return .= sprintf("%10s : %s\n",$_, $x->{$_}) for ( sort keys %
+$x );
}
else {
$return .= $x->{$_} . $SEPERATOR for ( sort keys %$x);
}
$return .= "\n";
$row_count++;
}
err("Couldn't run SQL.\nMaybe the file you opened wasn't a valid Apa
+che log?\n\n" .
$sth->errstr) if $sth->err;
status("$row_count rows. Query: $sql");
$return;
}
# Return column names for current parser
sub col_names {
my $parser_name = "AnyData::Format::$log_type";
return (sort split /,/, $parser_name->new->{col_names});
}
## Process commandline options
if ($opts{'?'}) {
print $USAGE; exit(0);
}
if ($opts{l}) {
$log_file = $opts{l};
}
if ($opts{v}) {
$log_type = "WeblogVhost";
} elsif ($opts{c}) {
$log_type = "Weblog";
}
# Connect signals up.
$window->signal_connect ( destroy => \&close );
$query->signal_connect ( key_press_event => \&filter_pressed );
$col_selecter->signal_connect ( changed => \&append_col_to_qry );
$select_all->signal_connect ( clicked => \&select_all_query );
$hits->signal_connect ( clicked => \&hits_query );
$_404->signal_connect ( clicked => \&_404_query );
# Initialise widgets
$window->set_border_width(5);
$window->set_default_size(1024,768);
$window->set_title($APP_NAME);
$query->set_width_chars(80);
$query->set_has_frame(1);
$query->modify_font(Gtk2::Pango::FontDescription->from_string($QUERY_F
+ONT));
$col_selecter->set_wrap_width(1);
$results_view->set_editable(0);
$results_view->set_wrap_mode('word-char');
$results_view->set_overwrite(1);
$results_view->modify_font(Gtk2::Pango::FontDescription->from_string($
+RESULTS_FONT));
$scroll_results->set_policy ('never', 'always');
# Layout
$scroll_results->add($results_view);
$top_box->add($query);
$top_box->add($col_selecter);
$bottom_box->pack_start($short,0,1,1);
$bottom_box->pack_start($long,0,0,1);
$bottom_box->pack_start($select_all,1,1,1);
$bottom_box->pack_start($hits,1,1,1);
$bottom_box->pack_start($_404,1,1,1);
$main_box->pack_start($menu->{widget},0,0,0);
$main_box->pack_start($top_box,0,0,5);
$main_box->pack_start($scroll_results,1,1,5);
$main_box->pack_start($bottom_box,0,0,5);
$main_box->pack_start($status_bar,0,0,5);
$window->add($main_box);
$window->show_all;
# Lets go
set_log_type(undef,$log_type);
Gtk2->main;
0;
__END__
=head1 NAME
ALF: Apache Log Filter. Or alf.pl if you want to be picky.
=head1 SYNOPSIS
./alf.pl [-l /path/to/access.log] [-cv]
=head1 DESCRIPTION
ALF is a filter for people using Apache logging to plain text logs, wh
+o still want the
flexibility of being able to search their logs like a database. It rel
+ies heavily on Jeff
Zucker's DBD::AnyData to do the clever stuff, and overlays that with a
+ simple GTK2
interface.
You can run SQL queries and see the results in horizontal or vertical
+formats;
open different logs; run a few everyday queries from buttons; append c
+olumn names to
your SQL from a dropdown box; switch between parser formats; and expor
+t your filtered
results in vertical or horizontal formats.
You'll need to copy WeblogCombined.pm and WeblogVhost.pm into /path/to
+/AnyData/Format
before ALF will play with you. AFAICS WeblogVhost.pm ought to work on
+all three types of logs
but I don't have common format logs available to test on. This is very
+ much a learning project
so all comments are welcome.
=head1 OPTIONS
=head2 Commandline options
=over
=item -?
Print usage and exit
=item -l /path/to/access.log
A valid apache log
=item -c
Use common format parser
=item -v
Use vhost parser
=back
=head2 Set in script body
=over
=item $TABLE
The name which you wish to give the table you'll search. Default: 'log
+'.
=item $SEPERATOR
Field seperator for use when viewing records in horizontal format. Def
+ault: ' | '.
=item $QUERY_FONT
Font used in the query box. Default: 'FreeSans 12'.
=item $RESULTS_FONT
Font used to display your results. Default: 'FreeMono 12'.
=item $LOG_DEFAULT
Log that gets filtered if you don't specify one on the commandline. De
+fault: '/home/charlie/access_log_test'
=back
=head1 REQUIREMENTS
perl 5.8.8
DBD::AnyData
AnyData::Format::WeblogVhost
AnyData::Format::WeblogCombined
Gtk2
Gtk2::Ex::Simple::Menu
Getopt::Std
=head1 LICENSE
Copyright (C)2007 Charlie Harvey
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version
2 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more
details.
You should have received a copy of the GNU General Public
License along with this program; if not, write to the Free
Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.
Also available on line: http://www.gnu.org/copyleft/gpl.html
=cut
alf_menu.pl
#!/usr/bin/perl
use strict;
use warnings;
my $menu_tree = [
_File => {
item_type => '<Branch>',
children => [
_Open => {
item_type => '<StockItem>',
extra_data => 'gtk-open',
callback => \&open_log,
callback_action => 0,
accelerator => '<ctrl>O',
},
_Save => {
item_type => '<StockItem>',
extra_data => 'gtk-save',
callback_action => 1,
callback => \&export_results,
accelerator => '<ctrl>S',
},
_Quit => {
item_type => '<StockItem>',
extra_data => 'gtk-quit',
callback => \&close,
callback_action => 3,
accelerator => '<ctrl>Q',
},
],
},
For_mat => {
item_type => '<Branch>',
children => [
_Combined => {
item_type => '<RadioItem>',
callback => \&set_log_type,
callback_action => 4,
groupid => 1,
},
_Vhost => {
item_type => '<RadioItem>',
callback => \&set_log_type,
callback_action => 5,
groupid => 1,
},
_Common => {
extra_data => 1,
item_type => '<RadioItem>',
callback => \&set_log_type,
callback_action => 6,
groupid => 1,
},
],
},
_Help => {
item_type => '<Branch>',
children => [
_About => {
item_type => '<StockItem>',
extra_data => 'gtk-about',
callback => \&about,
callback_action => 7,
},
],
},
];
WeblogVhost.pm
#########################################################
package AnyData::Format::WeblogVhost;
#########################################################
# AnyData driver for "Vhost Log Format" web log files
# Also supports combined and common log formats.
# Copyright (c) 2007, Charlie <charlie@charlieharvey.com>
#########################################################
=head1 NAME
AnyData::Format::WeblogVhost - tiedhash & DBI/SQL access to HTTPD Log
+s
=head1 SYNOPSIS
use AnyData;
my $weblog = adTie( 'Weblog', $filename );
while (my $hit = each %$weblog) {
print $hit->{remotehost},"\n" if $hit->{request} =~ /mypage.html/;
}
# ... other tied hash operations
OR
use DBI
my $dbh = DBI->connect('dbi:AnyData:');
$dbh->func('hits','Weblog','access_log','ad_catalog');
my $hits = $dbh->selectall_arrayref( qq{
SELECT remotehost FROM hits WHERE request LIKE '%mypage.html%'
});
# ... other DBI/SQL read operations
=head1 DESCRIPTION
This is a plug-in format parser for the AnyData and DBD::AnyData modul
+es. You can gain read access to Vhost Log Format
files web server log files (e.g. NCSA or Apache) either through tied h
+ashes or arrays or through SQL database queries.
Fieldnames are taken from the W3 definitions found at, with the additi
+on of client, referer and vhost fields
http://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-forma
+t
remotehost
usernname
authuser
date
request
status
bytes
referer
client
vhost
This module does not currently support writing to weblog files.
Please refer to the documentation for AnyData.pm and DBD::AnyData.pm
for further details.
=head1 LICENCE
Copyright (C)2007 Charlie Harvey
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version
2 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more
details.
You should have received a copy of the GNU General Public
License along with this program; if not, write to the Free
Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.
Also available on line: http://www.gnu.org/copyleft/gpl.html
=cut
use strict;
use AnyData::Format::Base;
use vars qw( @ISA $DEBUG $VERSION);
@AnyData::Format::WeblogVhost::ISA = qw( AnyData::Format::Base );
$DEBUG = 0;
$VERSION = '0.02';
my $vlog_re = qr/^(\S*) (\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" (
+\S*) (\S*)\s*(.*)$/;
my $norm_re = qr/^(\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" (\S*) (
+\S*)\s*(.*)$/;
my $ref_client_re = qr/^"(.*?)" "(.*?)".*$/;
sub new {
my $class = shift;
my $self = shift || {};
$self->{col_names} =
'vhost,remotehost,username,authuser,date,request,status,bytes,
+client,referer';
$self->{record_sep} = "\n";
$self->{key} = 'datestamp';
$self->{keep_first_line} = 1;
return bless $self, $class;
}
sub read_fields {
my $self = shift;
my $str = shift || return undef;
$str =~ s/^\s+//;
$str =~ s/\s+$//;
return undef unless $str;
my @row;
if($str =~ /VLOG=-$/) {
(@row) = $str =~ $vlog_re;
}
else {
(@row) = ('', $str =~ $norm_re);
}
return undef unless defined $row[0];
my($referer,$client) = $row[8] =~ $ref_client_re;
$client ||= '';
$referer ||= '';
($row[8],$row[9])=($client,$referer);
# $row[3] =~ s/\s*-\s*(\S*)$//; # hide GMT offset on datestamp
return @row
}
1;
WeblogCombined.pm
#########################################################
package AnyData::Format::WeblogCombined;
#########################################################
# AnyData driver for "Common Log Format" web log files
# copyright (c) 2007, Charlie <charlie_ampersat_charlieharvey_fullstop
+_com>
#########################################################
=head1 NAME
AnyData::Format::WeblogCombined - tiedhash & DBI/SQL access to HTTPD
+Logs
=head1 SYNOPSIS
use AnyData;
my $weblog = adTie( 'Weblog', $filename );
while (my $hit = each %$weblog) {
print $hit->{remotehost},"\n" if $hit->{request} =~ /mypage.html/;
}
# ... other tied hash operations
OR
use DBI
my $dbh = DBI->connect('dbi:AnyData:');
$dbh->func('hits','Weblog','access_log','ad_catalog');
my $hits = $dbh->selectall_arrayref( qq{
SELECT remotehost FROM hits WHERE request LIKE '%mypage.html%'
});
# ... other DBI/SQL read operations
=head1 DESCRIPTION
This is a plug-in format parser for the AnyData and DBD::AnyData modul
+es. You can gain read access to Combined Log Format
files web server log files (e.g. NCSA or Apache) either through tied h
+ashes or arrays or through SQL database queries.
Fieldnames are taken from the W3 definitions found at
http://www.w3.org/Daemon/User/Config/Logging.html#common-logfile-forma
+t
remotehost
usernname
authuser
date
request
status
bytes
referer
client
This module does not currently support writing to weblog files.
Please refer to the documentation for AnyData.pm and DBD::AnyData.pm
for further details.
=head1 AUTHOR & COPYRIGHT
(C)Copyright 2007, Charlie <charlie_ampersat_charlieharvey_fullstop_co
+m>
All rights reversed
=cut
use strict;
use AnyData::Format::Base;
use vars qw( @ISA $DEBUG $VERSION);
@AnyData::Format::WeblogCombined::ISA = qw( AnyData::Format::Base );
$DEBUG = 0;
$VERSION = '0.01';
sub new {
my $class = shift;
my $self = shift || {};
$self->{col_names} =
'remotehost,username,authuser,date,request,status,bytes,client
+,referer';
$self->{record_sep} = "\n";
$self->{key} = 'datestamp';
$self->{keep_first_line} = 1;
return bless $self, $class;
}
sub read_fields {
print "PARSE RECORD\n" if $DEBUG;
my $self = shift;
my $str = shift || return undef;
$str =~ s/^\s+//;
$str =~ s/\s+$//;
return undef unless $str;
my(@row) = $str =~
/^(\S*) (\S*) (\S*) \[([^\]]*)\] "(.*?)" (\S*) (\S*)\s*(.*)$/;
return undef unless defined $row[0];
my($referer,$client) = $row[7] =~ /^"(.*?)" "(.*?)"$/;
$client ||= '';
$referer ||= '';
($row[7],$row[8])=($client,$referer);
# $row[3] =~ s/\s*-\s*(\S*)$//; # hide GMT offset on datestamp
return @row
}
1;
Re: ALF: Apache Log Filter
by zentara (Archbishop) on Apr 02, 2007 at 09:28 UTC
|
Nice demo video. I guess with the rising bandwidth everyone is getting, the demo-video will eventually replace the screenshot. What did you use to make it?
| [reply] [Watch: Dir/Any] |
|
Thanks zentara. I used xvidcap, which seems to do pretty much everything I wanted it to do - though I also tried Istanbul, which can only produce ogg theoras.
| [reply] [Watch: Dir/Any] |
|
|