Beefy Boxes and Bandwidth Generously Provided by pair Networks
XP is just a number
 
PerlMonks  

Re: Parsing of the web log file, access_log

by YuckFoo (Abbot)
on Jun 20, 2003 at 22:33 UTC ( [id://267733]=note: print w/replies, xml ) Need Help??


in reply to Parsing of the web log file, access_log

Andy,

Here is how I would do it.

- convert all times to seconds.
- make all times relative to the base time.
- determine a major key, the fifteen minute interval it's in relative to the base time.
- determine a minor key, the one minute interval it's in relative to the major key.
- save memory by processing each 15 minute interval as it completes, in the while loop.

Hope this gets you on track.

YuckFoo

#!/usr/bin/perl use strict; use DateTime; my $MAJOR_SIZE = 15 * 60; my $MINOR_NUM = 15; my $MINOR_SIZE = $MAJOR_SIZE / $MINOR_NUM; my $BASETIME = 0; my %ABBREVS; @ABBREVS{qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)} = (1.. +12); my ($bucket, $oldmajor); while (my $line = <DATA>) { chomp ($line); my (undef, $day, $mon, $year, $hour, $min, $sec) = split(/\W/, $li +ne); my $time = DateTime->new( year => $year, month => $ABBREVS{$mon}, day => $day, hour => $hour, minute => $min, second => $sec, ); $time = $time->epoch(); $BASETIME = $BASETIME || $time; my $relative = $time - $BASETIME; my $major = int($relative / $MAJOR_SIZE); my $minor = int(($relative - ($major * $MAJOR_SIZE)) / $MINOR_SIZE +); if ($major != $oldmajor) { if (defined($bucket)) { process($bucket); $bucket = undef; } } if (!defined($bucket)) { $bucket = {}; $bucket->{major} = $major; $bucket->{minors} = []; } $bucket->{minors}[$minor]++; $oldmajor = $major; print "$line $time $relative $major $minor\n"; } if (defined($bucket)) { process($bucket); } #----------------------------------------------------------- sub process { my ($bucket) = @_; my $major = ($bucket->{major} * $MAJOR_SIZE) + $BASETIME; print "\nmajor: $major\n"; for my $i (0..$MINOR_NUM-1) { my $minor = ($i * $MINOR_SIZE) + $major; print " minor: $minor $bucket->{minors}[$i]\n"; } print "\n"; } __DATA__ [15/Jun/2003:00:02:27 -0500] [15/Jun/2003:00:03:44 -0500] [15/Jun/2003:00:03:44 -0500] [15/Jun/2003:00:03:44 -0500] [15/Jun/2003:00:07:28 -0500] [15/Jun/2003:00:08:44 -0500] [15/Jun/2003:00:08:45 -0500] [15/Jun/2003:00:08:45 -0500] [15/Jun/2003:00:12:28 -0500] [15/Jun/2003:00:13:45 -0500] [15/Jun/2003:00:13:45 -0500] [15/Jun/2003:00:13:46 -0500] [15/Jun/2003:00:17:29 -0500] [15/Jun/2003:00:18:46 -0500] [15/Jun/2003:00:18:46 -0500] [15/Jun/2003:00:18:47 -0500] [15/Jun/2003:00:22:29 -0500] [15/Jun/2003:00:23:47 -0500] [15/Jun/2003:00:23:47 -0500] [15/Jun/2003:00:23:48 -0500] [15/Jun/2003:00:27:30 -0500] [15/Jun/2003:00:28:48 -0500] [15/Jun/2003:00:28:48 -0500] [15/Jun/2003:00:28:49 -0500] [15/Jun/2003:00:32:30 -0500] [15/Jun/2003:00:33:49 -0500] [15/Jun/2003:00:33:49 -0500] [15/Jun/2003:00:33:49 -0500] [15/Jun/2003:00:37:31 -0500]

Replies are listed 'Best First'.
Re: Re: Parsing of the web log file, access_log
by Andy61 (Initiate) on Jun 20, 2003 at 23:08 UTC
    <Thanks for the post. Sure, let me try it out! However, I didn't understand why you were defining 2 times, 15 min and 1 min. Also, may be I didn't understand it well, with this approach, how do I determine the no. of same timestamps? For ex. from your data, I could have 2 occurrences of, 15/Jun/2003:00:03:44? May be some other timestamp has 5 occurrences and so on?

    Regards

    Andy

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: note [id://267733]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others browsing the Monastery: (8)
As of 2024-04-16 07:29 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found