print compare( 'Hello', 'hello' ), $/;  # 1
print compare( 'Hello', 'HELLO WORLD' ), $/; # 0.5
print compare( 'The quick brown fox jumped over the lazy dogs.', 
               'The quick brown dogs jumped over the lazy fox.' ), $/; # 1
print compare( 'The quick brown fox jumped over the lazy dogs.', 
               'The quick brown dogs jumped over the lazy kangaroo.' ); # 0.888



sub compare {
    my ( $str1, $str2 ) = @_;
    my $tok_str1 = tokenize($str1);
    my $tok_str2 = tokenize($str2);
    # swap unless @$tok_str1 contains the most tokens
    ($tok_str1, $tok_str2) = ($tok_str2, $tok_str1) if @$tok_str2 > @$tok_str1;
    # make a lookup hash for the smaller numer of tokens in str2
    my %h;
    @h{@$tok_str2} = ();  # slice syntax if fastest
    # now scan str1 for these tokens and count
    my $found = 0;
    for my $tok ( @$tok_str1 ) {
        $found++ if exists $h{$tok};
    }
    my $similarity = $found/@$tok_str1;
  return $similarity;
}



sub tokenize {
    my ($str) = @_;
    # remove punctuation stuff
    $str =~ s/[^A-Za-z0-9 ]+//g;
    # lowercase
    $str = lc $str;
    # magic whitespace split and return array ref
  return [split ' ', $str];
}