Beefy Boxes and Bandwidth Generously Provided by pair Networks
"be consistent"
 
PerlMonks  

One possible answer

by tilly (Archbishop)
on Aug 04, 2000 at 22:49 UTC ( [id://26255] : note . print w/replies, xml ) Need Help??


in reply to Finding patterns in packet data?

I had an OK idea for finding the patterns of maximum length that appear in at least n of the packets. This could be sped up immensely. You can also keep an array of the top packs seen and change this to, say, return the top 100 length patterns seen.

Coding it was not as easy as I hoped. :-(

use strict; # Takes the minimum count you are interested in and an array of string +s, # returns the list of patterns of the maximum size that can be found i +n # at least that threshold of the strings, sorted by number of strings. # sub find_max_pat { my $min = shift; # Sanity check return () unless $min <= scalar (@_); # %active_pats are the counts of the size of remaining active patt +erns my %active_pats = ('', scalar (@_)); # @searches contain search information for all strings my @searches = map {new Search::Strings($_)} @_; my %last_pats = (); while (%active_pats) { %last_pats = %active_pats; %active_pats = (); # Get counts at the next length foreach my $search (@searches) { foreach my $pat ($search->inc(\%last_pats)) { ++$active_pats{$pat}; } } # Remove patterns below our threshold foreach my $pat (keys %active_pats) { delete $active_pats{$pat} unless ($min <= $active_pats{$pa +t}); } } return keys %last_pats; } package Search::Strings; use strict; sub inc { my $self = shift; my $is_cont = shift; my $str = $self->{str}; my $len = length($str); my $pat_len = ++$self->{pat_len}; my %cur_pat; foreach my $pat (keys %{$self->{pats}}) { next unless exists $is_cont->{$pat}; my $spots = $self->{pats}{$pat}; if ($len < $pat_len + $spots->[-1]) { # This pattern reached the end of the string pop @$spots; } foreach my $spot (@$spots) { push @{$cur_pat{ substr($str, $spot, $pat_len) }}, $spot; } } $self->{pats} = \%cur_pat; return keys %cur_pat; } sub new { my $class = shift; my $self = {}; $self->{str} = shift; $self->{pat_len} = 0; my %pats = ('', [0..(length($self->{str}) - 1)]); $self->{pats} = \%pats; return bless $self, $class; }