#!/usr/bin/perl
################################################
# Title: crunchrep.pl
# Author: Clovis Sangrail
# Description: The crunchrep.pl Perl script will process one or more i
+nput
# data files consisting of GT.M Journal extract log records. It produc
+es a
# report of exeptional transactions for possible further scrutiny by a
+udit
# or security personnel.
#
# Revision History
# Ver Author Date
# --- ------ ----
# 1.0 Clovis Sangrail Apr 2012
# Initial port/rewrite of audit.gawk.
# 1.1 Clovis Sangrail 9-May-2012
# Rewrite regex for Global Variable analysis to handle subscripts
# like """""ABU%"""" . Then change back.
################################################
#####################
# Calling Arguments #
#####################
# See usage function.
###########
# Modules #
###########
use strict;
use Getopt::Std; # Process switches.
use File::Basename;
#############
# Variables #
#############
$| = 1; # Flush after every write.
my $NMF = "namefile"; # Userids to report upon. (Default.)
my $ALLF; # Journal extract Log file.
my %rpt; # Hash of Hash of Hash of 2x1 arrays.
my %opts; # Hash of commandline opts => vals.
my %rname; # Hash of uid => GECOS name.
my @fields; # Array of Journal Log fields.
my $PWLINE; # Capture grep output.
my ( $uid , $cnt);
my %matchpid; # Hash of pid => uid.
#############
# Functions #
#############
sub usage # Display program usage instructions.
{
my $prg = basename($0);
print <<EOF
$prg usage:
$prg -f <Glbfld#> [ -F <skipregex> -k <keepregex> <names>] <data>
The $prg program reads input files of GT.M Journal Extract Log recs
generated by the MUPIP program and Unix UIDs and produces a report of
transactions performed by the users in the namefile. The <data> file
is required, the <names> file defaults to "namefile" in the current
directory.
Switches and Parameters:
-f = Field number of Global Variable, currently 7 or 9 so far. Thi
+s
may change with new versions of GT.M and MUPIP. Required swit
+ch.
-F = Perl regex of Global Variable names to omit from report. Opti
+onal.
-k = Perl regex of Global Variable names for which subscripts will
+ be
preserved and reported on separately. Default is to aggregate
+ all
subscripts into single count. Optional.
<names> File of User ID's (one per line) on which to report. Optional
+,
defaults to "namefile" in current directory.
<data> File of 01/04/05/10 mupip journal extract log records, sorted
+ by
PID and rectype within PID. Required parameter.
EOF
# Sorry no indentation allowed with EOF. (No in-line comments either.)
}
########
# Main #
########
#####################################################################
# Process the switch arguments. (See 'usage{}' function above for a #
# description of the switches and other commandline parameters.) #
#####################################################################
getopt('fFk', \%opts); # Read switches into %opts hash.
my $gfld = $opts{"f"}; # This is global var field number.
if( (7 != $gfld) && (9 != $gfld) ) { # Must be one of these.
&usage();
die "Invalid logfile global field number\n\n";
} else {
$gfld--; # Perl numbers from zero.
}
my $fltr = $opts{"F"}; # Get regex of globals to skip.
my $keepsub = $opts{"k"}; # and Globals to not aggregate.
# (These can be null, I guess.)
##############################
#DBG#$fltr='^\^LOG\(?|^\^BCHLOG\(?|^\^ORSLOG\(?|^\^ZLOG\(?|^\^PROCID\(
+?|^\^SYSLOG\(?';
#DBG#$keepsub='^\^SCAU\(?';
#DBG#print "\n";
#DBG#print " 'f' switch value: $opts{\"f\"} " . "\n";
#DBG#print " 'F' switch value: $opts{\"F\"} " . "\n";
#DBG#print "fltr = $fltr \n";
#DBG#print " keepsub = $keepsub \n";
#DBG#print " 'k' switch value: $opts{\"k\"} " . "\n";
#DBG#print "\n";
#DBG#exit(0);
##############################
######################################################################
+#####
# Process filename arguments. Error if none. One arg is the data file
+of #
# journal records, and the namefile is the default. If two args, then
+1st #
# is the namefile, 2nd is the data file.
+ #
######################################################################
+#####
$cnt = 0 + @ARGV; # Remaining arg cnt. (getopts shifts.)
BLOCK: {
if ( 0 == $cnt ) { # This is an error.
&usage();
die "Need a journal extract log file name\n\n";
last BLOCK; # Not really needed.
}
if ( 1 == $cnt ) {
$ALLF = $ARGV[0]; # Just log file.
last BLOCK;
}
if ( 2 == $cnt ) { # Names file and log file.
$NMF = $ARGV[0];
$ALLF = $ARGV[1];
last BLOCK;
}
}
#######################################################
# Open the $NAMES and $ALL files, err out on failure. #
#######################################################
die "Cannot open $NMF : $!" unless ( open NAMES, $NMF );
die "Cannot open $ALLF : $!" unless ( open ALL, $ALLF );
######################################################################
+######
# Process NAMES file. This is the list of User IDs (UIDs) on which we
+wish #
# to report. Read each UID and seek it's /etc/passwd entry. If found s
+et #
# the rname{UID} hash entry to the real name field of the PW line. If
+not #
# found set that hash entry to "Name_Not_Found".
+ #
######################################################################
+######
while (<NAMES>) { # Read till EOF.
chomp; # Trim trailing '\n'.
if( $PWLINE = `/usr/bin/grep $_ /etc/passwd` ) {
@fields = split ":" , $PWLINE; # If found load PW name into
+ array.
$rname{$_} = $fields[4];
} else {
$rname{$_} = "Name_Not_Found"; # Or if not found say so.
}
}
######################################
#DBG##my ( $c , $mcnt , $pmil );
#DBG#my $uid;
#DBG#foreach $uid ( sort keys %rname ) {
#DBG# print "uid: $uid , name: \"$rname{$uid}\" \n";
#DBG#}
#DBG#exit(0);
#DBG#$c=0;
#DBG#$mcnt=0;
#DBG#print "\n.";
######################################
######################################################################
+######
# This is the main part. Read each line of the input file of MUPIP jou
+rnal #
# extract log records. Input file is sorted by PID and by rectype with
+in #
# each PID. Skip all but '01', '04', '05', and '10' record types. Skip
+ '01'#
# recs if UID (6th field) was not among those in the namefile, else sa
+ve #
# PID and set the matchpid entry for that PID to be the UID. For 04/05
+/10 #
# skip if matchpid entry for that PID not defined, or if Global Var is
+ in #
# the ignore list. Replace any subscripts with "(..)" unless Global is
+ in #
# the do-not-aggregate list. Translate timestamp and either make new a
+rray #
# entry of [ timestamp, cnt=1 ] for this rpt{uid}[pid}{startglob} hash
+ or #
# increment count of the existing entry.
+ #
######################################################################
+######
while (<ALL>) {
my ( $pid, $curpid ); # Process IDs.
my ( $wholeglob, $saveglob, $startglob );
# Parts of Global Var.
my $stamp; # Xlate of Horolog format.
chomp; # Remove trailing newline.
################################
#DBG#if( 100 == $c++ ) {
#DBG# $c=0;
#DBG# print ".";
#DBG# if( 0 == ( ++$mcnt % 10 ) ) {
#DBG# $pmil = $mcnt / 10;
#DBG# print "\n $pmil K recs\n";
#DBG# }
#DBG#}
################################
@fields = split /\\/ , $_ , $gfld+1;
# Split on backslashes.`
################################
#DBG#print "$_ \nrectype: x$fields[0]x\n";
#DBG#print ".";
################################
##################################################################
# If we find an '01' record, skip if username is not among those #
# found when the namefile was processed. #
##################################################################
if( "01" eq $fields[0] ) { # Record type '01'?
next unless defined( $rname{$fields[5]} );
# Skip if not read from namefile.
$matchpid{ $fields[3] } = $fields[5];
# Save UID matching PID.
####################################################
#DBG#print "Inside 01 compare. pid = $fields[3] , matching uid
+ = $matchpid{$fields[3]}\n";
#DBG#if( defined( $rname{$fields[5]} ) ) {
#DBG# print "rectype: $fields[0] ,pid: $fields[3] ,uid: $fiel
+ds[5]\n";
#DBG#} else {
#DBG# print "skip: $fields[5]\n";
#DBG#}
#DBG#} else {
####################################################
}
################################
#DBG# print "$_ \nrectype: x$fields[0]x\n";
#DBG# print ".";
################################
##################################################################
+###
# Global variables are altered by record types '04, '05', and '10'
+. #
##################################################################
+###
if( $fields[0] =~ /04|05|10/ ) { # Have 04/05/10 record type?
#DBG#print "Inside 05 compare. pid = $fields[3] , matching uid
+ = $matchpid{$fields[3]}\n";
#DBG#print "$_ \n";
next unless defined( $matchpid{$fields[3]} );
$curpid = $fields[3]; # Capture PID if found in '01' re
+c.
# Otherwise, skip it.
$uid = $matchpid{$curpid};
##############################################################
+####
# Now extract the initial portion of the Global Variable being
+ #
# affected by this rtecord, and also extract the whole Variabl
+e. #
# (include any subscripts present).
+ #
##############################################################
+####
#DBG#print "Global Field = $fields[$gfld] \n";
#if($fields[$gfld] =~ /^((\^[%A-Za-z\d]+)($|=|\(([^"]+|(("+)[^
+"]+\6)+)+\)))/ ) {
if($fields[$gfld] =~ /^((\^[%A-Za-z\d]+)($|=|.*?\)))/ ) {
$wholeglob = $1; # Nested regex memory variables.
$startglob = $2;
} else {
print "Warning: Cannot parse $fields[$gfld] for pid $curpi
+d \n";
#DBG#print "$_ \n";
next; # Print error msg if regex fails.
}
#DBG#print "whole global = $wholeglob \n";
##############################################################
+###
# If this Global's start matches the regex, input via switch o
+n #
# the commandline, of variables to skip, then skip it.
+ #
##############################################################
+###
if( ($fltr ne "") && ($startglob =~ /$fltr/) ) {
#DBG#print "Skipping global $wholeglob \n";
next;
}
##############################################################
+#
# The GTM Journal timestamp is in HoroLog format: DDDDD,SSSSS
+#
# where DDDDD = days since 12/31/1840 midnight and SSSSS =
+#
# seconds since midnight. We need to convert this to Timestamp
+#
# format (secs since start of 1/1/1970). (Note: 47117 is the
+#
# number of days between midnight 12/31/1840 and 1/1/1970.)
+#
##############################################################
+#
if( $fields[1] =~ /^(\d+),(\d+)/ ) {
# Extract DDDDD & SSSSS via regex.
$stamp = ( $1 - 47117 ) * 86400 + $2 + 3600 * 6;
} else { # Convert, incl CST timezone offset.
print "Cannot parse horolog field $fields[1] \n";
} # Err, can't find DDDDD &/or SSSSS.
##############################################################
+###
# Now match the start of the Global variable against the searc
+h #
# regex of variables that we do not aggregate. These are vars
+ #
# for which we keep separate counts of each different set of
+ #
# subscripts that are modified. If we find a match, then leave
+ #
# the variable alone. If no match, then replace any subscripts
+ #
# with the string (..) .
+ #
##############################################################
+###
$saveglob = ""; # Start out assuming no aggregate.
if( ($keepsub ne "") && ($startglob =~ /$keepsub/) ) {
# If match, preserve Global
#DBG#print "$wholeglob will not be aggregated \n";
} else {
if( $wholeglob =~ /\(/ ) { # Aggregate if have subscripts
+.
$saveglob = $wholeglob;
$wholeglob = $startglob . "(..)";
#DBG#print "Aggregating $saveglob into $wholeglob \n";
}
}
##############################################################
# Finally, if we already have an entry for this (possibly an #
# aggregated) Global then increment it's count. If this is a #
# new one then create the initial [ timestamp, cnt=1 ] array #
# for the newly-created hash element to reference. #
##############################################################
if( defined( $rpt{$uid}{$curpid}{$wholeglob} ) ) {
$rpt{$uid}{$curpid}{$wholeglob}[2]++;
#DBG#print "inc node: uid = $uid pid = $curpid Global = $w
+holeglob \n";
} else { # Have entry, increment count.
#DBG#print "new node: uid = $uid pid = $curpid Global = $w
+holeglob tstamp = $stamp \n";
if( $saveglob eq "" ) {
$rpt{$uid}{$curpid}{$wholeglob} = [ $stamp , $wholeglo
+b, 1 ];
} else {
$rpt{$uid}{$curpid}{$wholeglob} = [ $stamp , $saveglob
+, 1 ];
}
} # Create new node, cnt=1.
}
#DBG#print "Did /04/05/10 \n";
}
####################################################################
# At this point the three-dimensional %rpt hash is loaded with the #
# report data, we're ready to print the report. The %rpt hash is #
# dimensioned as rpt{uid}{pid}{global}. #
####################################################################
for $uid ( sort keys %rpt ) { # Outermost dimension is UIDs.
my @rptblock; # Report lines for current UID.
my @tparts; # Return value of localtime.
my ( $pid, $glob ); # Hash indices.
my ( $stamp, $cnt, $prtglob ); # Array variables.
my ( $rptline, $baseline ); # For composing rptblock entry.
#################################
# print header for current UID. #
#################################
printf "%s: %s\n" , $uid , $rname{$uid};
printf "YYYYMMDD hh:mm PID Global\n";
printf "-------------- ------------- ------\n";
@rptblock = (); # Clear data lines array.
################################################
# Within current UID 2nd index is Process IDs. #
################################################
for $pid ( keys %{ $rpt{$uid} } ) {
##############################################################
+#
# Within current PID, 3rd index is each affected GT.M Global.
+#
##############################################################
+#
for $glob ( keys %{ $rpt{$uid}{$pid} } ) {
$stamp = $rpt{$uid}{$pid}{$glob}[0];
# Timestamp of 1st access.
$cnt = $rpt{$uid}{$pid}{$glob}[2];
# Count of accesses.
if( 1 == $cnt ) {
$prtglob = $rpt{$uid}{$pid}{$glob}[1];
} else { # Show subscripts if cnt=1.
$prtglob = $glob; # Show "(..)" unless keepsubs.
}
@tparts = localtime $stamp; # Convert to list.
$baseline = sprintf "%4d%02d%02d %02d:%02d%13s %s" ,
1900 + $tparts[5], 1 + $tparts[4], $tparts[3],
$tparts[2], $tparts[1], $pid, $prtglob;
# YYYYMMDD HH:MM pid Globalname
############################
# One or multiple updates? #
############################
if( 1 == $cnt ) {
$rptline = sprintf "%s\n", $baseline;
} else { # Just add newline for single.
$rptline = sprintf "%s\t(%d updates)\n" , $baseline, $
+cnt;
} # Multiple updates, print count.
push( @rptblock , $rptline);# Load rptblock array.
}
}
##################################################################
+#####
# Finally, print out the formatted data lines for this UID. Sort w
+ill #
# put them in date/timestamp order, and PID order within that.
+ #
##################################################################
+#####
print sort @rptblock;
print "\n"; # Skip line before next user.
}
|