$, = '|'; $DEBUG = 1; print compare( 'Hello', 'hello' ), $/; print compare( 'Hello', 'HELLO WORLD' ), $/; print compare( 'The quick brown fox jumped over the lazy dogs.', 'The quick brown dogs jumped over the lazy fox.' ), $/; print compare( 'The quick brown fox jumped over the lazy dogs.', 'The quick brown fox jumped over the lazy kangaroo.' ), $/; print compare( 'The quick brown fox jumped over the lazy dogs.', 'The quick brown fox jumped, tripped and broke its neck.' ), $/; use Algorithm::Diff qw(traverse_sequences); sub compare { my ( $str1, $str2 ) = @_; print "\nCompare '$str1' <=> '$str2'\n" if $DEBUG; my $tok_str1 = tokenize($str1); my $tok_str2 = tokenize($str2); my (@match,@str1, @str2); traverse_sequences( $tok_str1, $tok_str2, { MATCH => sub { push @match, $tok_str1->[$_[0]] }, DISCARD_A => sub { push @str1, $tok_str1->[$_[0]] }, DISCARD_B => sub { push @str2, $tok_str2->[$_[1]] }, }); print "'@match' '@str1' '@str2'\n" if $DEBUG; return @match/(@match+@str1), @match/(@match+@str2); } sub tokenize { my ($str) = @_; # remove punctuation stuff $str =~ s/[^A-Za-z0-9 ]+//g; # lowercase $str = lc $str; # return array ref return [split ' ', $str]; } __DATA__ Compare 'Hello' <=> 'hello' 'hello' '' '' 1|1| Compare 'Hello' <=> 'HELLO WORLD' 'hello' '' 'world' 1|0.5| Compare 'The quick brown fox jumped over the lazy dogs.' <=> 'The quick brown dogs jumped over the lazy fox.' 'the quick brown jumped over the lazy' 'fox dogs' 'dogs fox' 0.777777777777778|0.777777777777778| Compare 'The quick brown fox jumped over the lazy dogs.' <=> 'The quick brown fox jumped over the lazy kangaroo.' 'the quick brown fox jumped over the lazy' 'dogs' 'kangaroo' 0.888888888888889|0.888888888888889| Compare 'The quick brown fox jumped over the lazy dogs.' <=> 'The quick brown fox jumped, tripped and broke its neck.' 'the quick brown fox jumped' 'over the lazy dogs' 'tripped and broke its neck' 0.555555555555556|0.5|