Beefy Boxes and Bandwidth Generously Provided by pair Networks
Perl-Sensitive Sunglasses
 
PerlMonks  

spamstats.pl

by tcf03 (Deacon)
on Aug 26, 2006 at 02:10 UTC ( #569766=sourcecode: print w/replies, xml ) Need Help??
Category: Text Processing
Author/Contact Info fiedlert@gmail.com
Description: I needed to parse logs from a barracuda spam filter so I wrote this - its fairly simple, and so far seems to be doing well - it was written for barracudas "new" logging method. The hardest part was typing in the action/reason codes.
#!/usr/bin/perl -w

use strict;
#use diagnostics;
#use Data::Dumper;

my %action = ( 'RECV' => { 0   => 'Allowed Message',
                           1   => 'Aborted Message',
                           2   => 'Blocked Message',
                           3   => 'Quarantined Message',
                           4   => 'Tagged Message',
                           5   => 'Deferred Message',
                           6   => 'Per-User Quarantined Message',
                           7   => 'Whitelisted Message',
                           '-' => 'Undefined' },

               'SEND' => { 1   => 'Delivered Message',
                           2   => 'Rejected Message',
                           3   => 'Deferred Message',
                           4   => 'Expired Message',
                           '-' => 'Undefined' } );

$action{'SCAN'} = $action{'RECV'};

my %reason = ( 0    => 'Undefined',
               1    => 'Virus',
               2    => 'Banned Attachment',
               3    => 'RBL Match',
               4    => 'Rate Control',
               5    => 'Too Many Message In Session',
               6    => 'Timeout Exceeded',
               7    => 'No Such Domain',
               8    => 'No Such User',
               9    => 'Subject Filter Match',
               10   => '',
               11   => 'Client IP',
               12   => 'Recipient Address Rejected',
               13   => 'No Valid Recipients',
               14   => 'Domain Not Found',
               15   => 'Sender Address Rejected',
               16   => '',
               17   => 'Need Fully Qualified Recipient',
               18   => 'Need Fully Qualified Sender',
               19   => 'Unsupported Command',
               20   => 'MAIL FROM Syntax Error',
               21   => 'Bad Address Syntax',
               22   => 'RCPT TO Syntax Error',
               23   => 'Send EHLO/HELO first',
               24   => 'Need MAIL Command',
               25   => 'Nested Mail Command',
               26   => '',
               27   => 'EHLO/HELO Syntax Error',
               28   => '',
               29   => '',
               30   => 'Mail Protocol Error',
               31   => 'Score',
               32   => '',
               33   => '',
               34   => 'Header Filter Match',
               35   => 'Sender Block/Accept',
               36   => 'Recipient Block/Accept',
               37   => 'Body Filter Match',
               38   => 'Message Size Bypass',
               39   => 'Intention Analysis Match',
               40   => 'SPF/Caller-ID',
               41   => 'Client Host Rejected',
               42   => '',
               43   => '',
               44   => 'Authentication Not Enabled',
               45   => 'Allowed Message Size Exceeded',
               46   => 'Too Many Recipients',
               47   => 'Need RCPT Command',
               48   => 'DATA Syntax Error',
               49   => 'Internal Error',
               50   => 'Too Many Hops',
               51   => '',
               52   => '',
               53   => '',
               54   => '',
               55   => 'Invalid Parameter Syntax',
               56   => 'STARTTLS Syntax Error',
               57   => 'TLS Already Active',
               58   => 'Too Many Errors',
               59   => 'Need STARTTLS First',
               60   => 'Spam Fingetpront Found',
               '-'  => 'Undefined' );


## Check Usage ##
die "usage: spamstat.pl <logfile>\n"
    unless ( defined ($ARGV[0]) and -f $ARGV[0]);

my $file;
open $file, $ARGV[0] or
    die "Unable to open $ARGV[0]: $!\n";

# Define our buckets for collecting data
my %bucket  = ();
my %USER    = ();
my %VIRUS   = ();
my %SPAM    = ();

# These numbers must be valid reason codes...
my @wanted = ( 1, 60 );

# We rely on autovivification for counting 
# it almost seems like cheating...
for (<$file>)
{
    chomp;    

    my ( $month, $day, $time, $host,
         $process, $clientIP, $MessageID,
         $timeStart, $timeEnd, $service, @INFO ) = split /\s+/;
    # anything after this point can/should be changed depending on you
+r needs.
    my ( $sender, $recip, $action_, $reason_, @reasonextra,
         $encrypted, $score, $queueid, @response );
   
    if ( $service =~ /^RECV/ )
    {
        ( $sender, $recip, $action_, $reason_, @reasonextra ) = (@INFO
+);

        print "Action = $action_ Service = $service\n" unless
            $action{$service}{$action_};
        print "Reason = $reason_  Service = $service\n" unless
            $reason{$reason_};

        $VIRUS{$host}{$reasonextra[0]}{'count'}++ if ( $reason_ == 1 )
+;

        $SPAM{$host}{$recip}{'count'}++ if ( $action_  == 2 and
                                             $reason_  == 60 );

        $USER{$host}{$recip}{$action{$service}{$action_}}{$reason{$rea
+son_}}{'count'}++;
            #if ( grep /^$reason_$/, @wanted );

        $bucket{$host}{$service}{$action{$service}{$action_}}{$reason{
+$reason_}}{'count'}++
    }

    if ( $service =~ /^SCAN/ )
    {
        ( $encrypted, $sender, $recip, $score, $action_, $reason_, @re
+asonextra ) = (@INFO);

        print "Action = $action_ Service = $service\n" unless
            $action{$service}{$action_};
        print "Reason = $reason_  Service = $service\n" unless
            $reason{$reason_};
        
        $VIRUS{$host}{$reasonextra[0]}{'count'}++ if ( $reason_ == 1 )
+;

        $SPAM{$host}{$recip}{'count'}++ if ( $action_  == 2 and
                                             $reason_  == 60 );

        $USER{$host}{$recip}{$action{$service}{$action_}}{$reason{$rea
+son_}}{'count'}++;
            #if ( grep /^$reason_$/, @wanted );

        $bucket{$host}{$service}{$action{$service}{$action_}}{$reason{
+$reason_}}{'count'}++;
    }

    if ( $service =~ /^SEND/ )
    {
        ( $encrypted, $action_, $queueid, @response ) = (@INFO);
        $bucket{$host}{$service}{$action{$service}{$action_}}{'count'}
+++;
    }


}

# Example reports...

#for my $host ( keys %VIRUS )
#{
#    print "[$host]\n";
#    for my $virus ( sort keys %{$VIRUS{$host}} )
#    {
#        printf "\t%-50s %3d\n", $virus, $VIRUS{$host}{$virus}{'count'
+};
#    }                
#}

#for my $host ( keys %USER )
#{
#    print "[$host]\n";
#    for my $user ( keys %{$USER{$host}} )
#    {
#        print " $user\n";
#        for my $action ( keys %{$USER{$host}{$user}} )
#        {
#            print "\t  $action\n";
#            for my $reason ( keys %{$USER{$host}{$user}{$action}} )
#            {
#                print "\t\t$reason", 
#                " $USER{$host}{$action}{$reason}{'count'}\n";
#            }
#        }
#    }
#}

#for my $host ( keys %SPAM )
#{
#    for my $user ( keys %{ $SPAM{$host} } )
#    {
#        print "$user [$SPAM{$host}{$user}{'count'}]\n";
#    }
#}
Replies are listed 'Best First'.
Re: spamstats.pl
by jwkrahn (Monsignor) on Aug 26, 2006 at 08:24 UTC
    # Define our buckets for collecting data my %bucket = (); my %USER = (); my %VIRUS = (); my %SPAM = ();
    my creates a variable in an empty state. Assigning emptiness to an empty container is redundant.
    # These numbers must be valid reason codes... my @wanted = ( 1, 60 );
    It would be more efficient to use a hash instead of an array there.
    for (<$file>)
    A for loop iterates over a list which means that the entire contents of the file have to be stored in a list in memory before it can be iterated over. It is more efficient to use a while loop which only reads one line at a time.
    while ( <$file> )
    chomp; my ( $month, $day, $time, $host, $process, $clientIP, $MessageID, $timeStart, $timeEnd, $service, @INFO ) = split /\s+/;
    The chomp is superfluous because split /\s+/ removes all whitespace. You are declaring eleven variables but you are only using three. You can use undef as a placeholder:
    my ( undef, undef, undef, $host, undef, undef, undef, undef, undef, $s +ervice, @INFO ) = split;
    $USER{$host}{$recip}{$action{$service}{$action_}}{$reason{$rea +son_}}{'count'}++; #if ( grep /^$reason_$/, @wanted );
    If you had used a hash for wanted then you would not need to use grep.
    $USER{$host}{$recip}{$action{$service}{$action_}}{$reason{$rea +son_}}{'count'}++; #if exists $wanted{ $reason_ };
Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: sourcecode [id://569766]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others wandering the Monastery: (4)
As of 2020-06-01 23:40 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?
    Do you really want to know if there is extraterrestrial life?



    Results (12 votes). Check out past polls.

    Notices?