#!/usr/bin/env perl =head1 NAME B - Run Longest Common SubString tests =head1 SYNOPSIS lcss.pl file ... =head1 DESCRIPTION Input is a file, files or STDIN. Output is to STDOUT. Example: lcss.pl test.txt =cut use warnings FATAL => 'all'; use strict; use String::LCSS_XS qw(); use String::LCSS qw(); sub lcssN1 (\$\$;$); sub lcssN2 (\$\$;$); sub lcssN3 (\$\$;$); my $cnt = 0; my %data; while (<>) { chomp; if (s/^([stmec])=//) { my $param = $1; if ($param eq 'e') { push @{ $data{$param} }, $_; } elsif ($param eq 'c') { for my $p (qw(s t e)) { die "ERROR in input file: missing $p" unless exists $data{$p}; } # my $actual = String::LCSS_XS::lcss($data{s}, $data{t}); # my $actual = String::LCSS::lcss($data{s}, $data{t}); # my $actual = longest_common_substr($data{s}, $data{t}); my $actual = lc_substr($data{s}, $data{t}); # my $actual = lcssN1($data{s}, $data{t}); # my $actual = lcssN2($data{s}, $data{t}); # my $actual = lcssN3($data{s}, $data{t}); $actual = 'undef' unless defined $actual; check($actual); %data = (); $cnt++; } else { $data{$param} = $_; } } } print "cnt=$cnt\n"; die "ERROR: No checks performed\n" unless $cnt; exit; sub check { my $actual = shift; my $pass = 0; my @expects = @{ $data{e} }; my $num = @expects; for my $expect (@expects) { $pass++ if $actual eq $expect; } unless ($pass) { my $msg = exists $data{m} ? $data{m} : ''; warn "FAILED:"; print "actual='$actual'\n"; for my $expect (@expects) { print "expect='$expect'\n"; } print "s='$data{s}'\n"; print "t='$data{t}'\n"; print "m=$msg\n\n"; } } sub longest_common_substr { #japhy # provided you know there are no NULs my $str = join "\0", @_; my $len = 1; my $match; while ($str =~ m{ ([^\0]{$len,}) (?= [^\0]* \0 [^\0]*? \1 ) }x) { $len = length($match = $1) + 1; } return $match; } sub lcssN3 (\$\$;$) { # BrowserUk, rev. 3 my( $ref1, $ref2, $min ) = @_; my( $swapped, $l1, $l2 ) = ( 0, map length( $$_ ), $ref1, $ref2 ); ( $l2, $ref2, $l1, $ref1, $swapped ) = ( $l1, $ref1, $l2, $ref2, 1 ) if $l1 > $l2; $min = 1 unless defined $min; my $mask = $$ref1 x ( int( $l2 / $l1 ) ); my @match = ''; for my $start ( 0 .. $l1-1 ) { my $masked = substr( $mask, $start, $l2 ) ^ $$ref2; while( $masked =~ m[\0{$min,}]go ) { my $l = $+[ 0 ] - $-[ 0 ]; my $match = substr( $$ref2, $-[ 0 ], $l ); @match = ( $match, ( $-[ 0 ]+$start ) % $l1, $-[ 0 ] ) if $l > length $match[ 0 ]; } } @match[ 2, 1 ] = @match[ 1, 2 ] if $swapped; return unless $match[ 0 ]; return wantarray ? @match : $match[ 0 ]; } sub lcssN2 (\$\$;$) { # BrowserUk, rev. 2 my( $ref1, $ref2, $min ) = @_; my( $swapped, $l1, $l2 ) = ( 0, map length( $$_ ), $ref1, $ref2 ); ( $l2, $ref2, $l1, $ref1, $swapped ) = ( $l1, $ref1, $l2, $ref2, 1 ) if $l1 > $l2; $min = 1 unless defined $min; my $mask = $$ref1 x ( int( $l2 / $l1 ) + 1 ); my @match = ''; for my $start ( 0 .. $l1-1 ) { my $masked = substr( $mask, $start, $l2 ) ^ $$ref2; while( $masked =~ m[\0{$min,}]go ) { my $l = $+[ 0 ] - $-[ 0 ]; my $match = substr( $$ref2, $-[ 0 ], $l ); next unless 1+index $$ref1, $match; @match = ( $match, ( $-[ 0 ]+$start ) % $l1, $-[ 0 ] ) if $l > length $match[ 0 ]; } } @match[ 2, 1 ] = @match[ 1, 2 ] if $swapped; return unless $match[ 0 ]; return wantarray ? @match : $match[ 0 ]; } sub lcssN1 (\$\$;$) { # BrowserUk, rev. 1 my( $ref1, $ref2, $min ) = @_; my( $swapped, $l1, $l2 ) = ( 0, map length( $$_ ), $ref1, $ref2 ); ( $l2, $ref2, $l1, $ref1, $swapped ) = ( $l1, $ref1, $l2, $ref2, 1 ) if $l1 > $l2; $min = 1 unless defined $min; my $mask = $$ref1 x ( int( $l2 / $l1 ) + 1 ); my @match = ''; for my $start ( 0 .. $l1-1 ) { my $masked = substr( $mask, $start, $l2 ) ^ $$ref2; while( $masked =~ m[\0{$min,}]go ) { @match = ( substr( $$ref2, $-[ 0 ], $+[ 0 ] - $-[ 0 ] ), ( $-[ 0 ]+$start ) % $l1, $-[ 0 ] ) if ( $+[ 0 ] - $-[ 0 ] ) > length $match[ 0 ]; } } @match[ 2, 1 ] = @match[ 1, 2 ] if $swapped; return unless $match[ 0 ]; return wantarray ? @match : $match[ 0 ]; } sub lc_substr { # wiki my ($str1, $str2) = @_; my $l_length = 0; # length of longest common substring my $len1 = length $str1; my $len2 = length $str2; my @char1 = (undef, split(//, $str1)); # $str1 as array of chars, indexed from 1 my @char2 = (undef, split(//, $str2)); # $str2 as array of chars, indexed from 1 my @lc_suffix; # "longest common suffix" table my @substrings; # list of common substrings of length $l_length for my $n1 ( 1 .. $len1 ) { for my $n2 ( 1 .. $len2 ) { if ($char1[$n1] eq $char2[$n2]) { # We have found a matching character. Is this the first matching character, or a # continuation of previous matching characters? If the former, then the length of # the previous matching portion is undefined; set to zero. $lc_suffix[$n1-1][$n2-1] ||= 0; # In either case, declare the match to be one character longer than the match of # characters preceding this character. $lc_suffix[$n1][$n2] = $lc_suffix[$n1-1][$n2-1] + 1; # If the resulting substring is longer than our previously recorded max length ... if ($lc_suffix[$n1][$n2] > $l_length) { # ... we record its length as our new max length ... $l_length = $lc_suffix[$n1][$n2]; # ... and clear our result list of shorter substrings. @substrings = (); } # If this substring is equal to our longest ... if ($lc_suffix[$n1][$n2] == $l_length) { # ... add it to our list of solutions. push @substrings, substr($str1, ($n1-$l_length), $l_length); } } } } return $substrings[0]; # return @substrings; } __END__ Input file sytnax: - lines starting with s= define the 1st input string - lines starting with t= define the 2nd input string - lines starting with m= define the test message (optional) - lines starting with e= define the expected output - lines starting with c= trigger the check - all other lines are ignored and can be used for comments - whitespace is significant, except for the trailing newline - limited to "printable" ascii: space to tilde