#!/usr/bin/perl
use strict;
use warnings;
=pod Removed original file name code to make sample self contained.
my $f1 = shift;
my $f2 = shift;
if (! defined($f1) or ! defined($f2)) {
die "Need two text file names as arguments. \n";
}
=cut
my $file1Content = <<CONTENT;
red green blue red orange
CONTENT
my $file2Content = <<CONTENT;
yellow orange
red grey purple
CONTENT
my %results;
open my $file1, '<', \$file1Content;
while (my $line = <$file1>) {
$line =~ s/[[:punct:]]//g;
for my $word (split(/\s+/, $line)) {
$word =~ s/[^A-Za-z0-9]//g;
$results{lc $word} = 1;
}
}
my @words2;
my @storage;
open my $file2, '<', \$file2Content;
while (my $line = <$file2>) {
$line =~ s/[[:punct:]]/ /g;
@words2 = grep {/\S/} split(/ /, $line);
for (my $i = 0; $i < scalar @words2; $i++) {
$words2[$i] = lc($words2[$i]);
$words2[$i] =~ s/[^A-Za-z0-9]//g;
push(@storage, $words2[$i]);
if (grep {$_ eq $words2[$i]} @storage[0 .. $#storage - 1]) {
$results{$words2[$i]} = 1;
} else {
$results{$words2[$i]}++;
}
}
}
my $counter = 0;
foreach my $words (sort {$results{$b} <=> $results{$a}} keys %results)
+ {
if ($results{$words} > 1) {
$counter = $counter + 1;
print $words, "\n\n";
}
}
printf "Found %1.0f words in common\n", $counter;
Prints:
orange
red
Found 2 words in common
Maybe you can provide "file contents" that fail in the way you didn't describe?
Of course, the code can be cleaned up a little:
#!/usr/bin/perl
use strict;
use warnings;
my $file1Content = <<CONTENT;
red green blue red orange
CONTENT
my $file2Content = <<CONTENT;
yellow orange
red grey purple
CONTENT
my %group1;
open my $file1, '<', \$file1Content;
while (my $line = <$file1>) {
my @words = map {lc} grep {$_} split /[\W\d]+/, $line;
$group1{$_} = $_ for @words;
}
my %common;
open my $file2, '<', \$file2Content;
while (my $line = <$file2>) {
my @words = map {lc} grep {/\S/} split /[\W\d]+/, $line;
$common{$_} = $_ for grep {exists $group1{$_}} @words;
}
print "$_\n\n" for sort values %common;
printf "Found %1.0f words in common\n", scalar keys %common;
Premature optimization is the root of all job security
|