For your first two cases the solution is straight forward, but you need to do a bit more to cover the third. Here is one way, maybe not so efficient but something to build on. I am assuming your focus is on the artists - i.e. you want to rank each artist as opposed to ranking each line.
use Data::Dumper;
$Data::Dumper::Sortkeys = 1;
my %artists = (
'William Blake' => 1,
'David Hockney' => 1,
'Francis Blake' => 1,
'David Lynch' => 1,
);
my @lines = (
'William Blake',
'Blake Morrison',
'david lynch',
'francis bacon',
'William Blake and Blake Morrison',
);
my %rank;
# make artists lower-case for case-insensitive match
my %artists_lc = map { lc($_) => $_ } keys %artists;
# map all the artist words
my %artist_words;
for my $artist ( keys %artists ) {
my @words = split( /\s+/, $artist );
for my $word ( @words ) {
$artist_words{lc($word)}{$artist}++;
}
}
# have a look at the map
print Dumper(\%artist_words);
for my $i ( 0 .. $#lines ) {
my $line = $lines[$i];
my $artist;
my @words = split( /\s+/, lc($line) );
for my $word ( @words ) {
if ( my $hash = $artist_words{$word} ) {
$rank{$_}{$i}++ for keys %$hash;
}
}
# deal with exact and case-insensitive
if ( $artists{$line} ) {
$rank{$line}{$i} = 256;
}
elsif ( $artist = $artists_lc{lc($line)} ) {
$rank{$artist}{$i} = 128;
}
}
print Dumper(\%rank);
Output:
{
Artist => {
line_index => score
}
{
'David Hockney' => {
'2' => 1
},
'David Lynch' => {
'2' => 128
},
'Francis Blake' => {
'0' => 1,
'1' => 1,
'3' => 1,
'4' => 2
},
'William Blake' => {
'0' => 256,
'1' => 1,
'4' => 3
}
};