Beefy Boxes and Bandwidth Generously Provided by pair Networks
Your skill will accomplish
what the force of many cannot
 
PerlMonks  

Re: using perl to find words for scrabble

by atcroft (Abbot)
on Sep 06, 2019 at 04:01 UTC ( #11105700=note: print w/replies, xml ) Need Help??


in reply to using perl to find words for scrabble

I played with this a little too long, to be honest. (I also wanted to play with an inverted index, although I'm not sure if I coded that correctly or not.)

The code below seems to do the following:

  • Run against a specified word list (default: /usr/share/dict/words).
  • Can filter on word length (min/max) and/or base word score (min/max).
  • Can have words included/excluded from the command line (but will NOT display a value that cannot be created with the tiles you specify).
  • Displays words in descending score order, then in alphabetical (case-INsensitive) order.
  • Coded to run under at least 5.10 and following (possibly even earlier).
#!/usr/bin/perl # vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: use strict; use warnings; use Data::Dumper; use Getopt::Long; $Data::Dumper::Deepcopy = 1; $Data::Dumper::Sortkeys = 1; $| = 1; srand(); my %limit; $limit{len} = { max => 0xFFFFFFFF, min => 0x00000000, }; $limit{score} = { max => 0xFFFFFFFF, min => 0x00000000, }; $limit{word} = { exclude => [], include => [], }; my $wordlist = q{/usr/share/dict/words}; GetOptions( q{exclude:s} => \@{ $limit{word}{exclude} }, q{include:s} => \@{ $limit{word}{include} }, join( q{:}, join( q{|}, q{length-min}, q{lengthmin}, q{len-min}, q{lenmin}, q{min-len}, q{minlen}, q{min-length}, q{minlength} ), q{i}, ) => \$limit{len}{min}, join( q{:}, join( q{|}, q{length-max}, q{lengthmax}, q{len-max}, q{lenmax}, q{max-len}, q{maxlen}, q{max-length}, q{maxlength} ), q{i}, ) => \$limit{len}{max}, join( q{:}, join( q{|}, q{score-min}, q{scoremin}, q{min-score}, q{minscore} ), q{i}, ) => \$limit{score}{min}, join( q{:}, join( q{|}, q{score-max}, q{scoremax}, q{max-score}, q{maxscore} ), q{i}, ) => \$limit{score}{max}, 'word-list|wordlist|wl:s' => \$wordlist, 'help' => sub { help( limit => \%limit, tile => \@ARGV, wordlist => $wordlist, ); }, ); { # Split include and exclude phrases on the following: # commas (',') # semi-colons (';'), and/or # pipe characters ('|') foreach my $str (qw/ include exclude /) { if ( scalar @{ $limit{word}{$str} } ) { my @t_array = (); foreach my $i ( 0 .. $#{ $limit{word}{$str} } ) { foreach my $t ( split /[,;|]/, $limit{word}{$str}[$i] ) { push @t_array, $t; } } @{ $limit{word}{$str} } = sort { $a cmp $b } @t_array; } } # # Enforce $limit{len}{max} > $limit{len}{min} # and $limit{score}{max} > $limit{score}{min} foreach my $str (qw/ len score /) { if ( $limit{$str}{min} > $limit{$str}{max} ) { ( $limit{$str}{min}, $limit{$str}{max}, ) = ( $limit{$str}{max}, $limit{$str}{min}, ); } } } my @tile = @ARGV; # Letter score values nicked from # [id://11105648] by Discipulus my %score; map { $score{$_} = 1; } qw/ a e i l o n r s t u /; map { $score{$_} = 2; } qw/ d g /; map { $score{$_} = 3; } qw/ b c m p /; map { $score{$_} = 4; } qw/ f h v w y /; map { $score{$_} = 5; } qw/ k /; map { $score{$_} = 8; } qw/ j x /; map { $score{$_} = 10; } qw/ q z /; # @dict is an array for which each entry will be a # hash containing: # word: original word from word list # len: length of word in characters # score: total score of all letters, no # modifiers # component: a hash with: # key: a lowercase letter # value: the occurrences of # the letter in the # word my @dict = (); # $word_width will eventually contain the length of # longest word in $wordlist; used for creating output # format my $word_width = -1; create_dictionary( file => $wordlist, maxwidth => \$word_width, dict => \@dict, ); # %found is a HoHoA ($found{key1}{key2}[index]) where: # key1: lower-case letter # key2: occurs number or less times in word # index: index of a particular word from @dict, # containing key1 at least key2 times my %found; build_inverted_index( index => \%found, dict => \@dict, ); my @possible = (); get_raw_possible( dict => \@dict, index => \%found, limit => \%limit, possible => \@possible, tile => \@tile, ); filter_possible( dict => \@dict, index => \%found, limit => \%limit, possible => \@possible, tile => \@tile, ); output_possible( dict => \@dict, limit => \%limit, possible => \@possible, tile => \@tile, width => $word_width, ); # # Subroutines # sub build_inverted_index { my %param = @_; return if ( not defined $param{dict} ); return if ( not defined $param{index} ); my $dict = $param{dict}; my $found = $param{index}; foreach my $i ( 0 .. $#dict ) { my @part = split //, $dict[$i]->{word}; my %count; foreach my $s (@part) { $count{$s}++; } foreach my $s ( keys %count ) { foreach my $j ( 1 .. $count{$s} ) { push @{ $found->{ lc $s }{$j} }, $i; } } } } sub create_dictionary { my %param = @_; return if ( not defined $param{dict} ); return if ( not defined $param{file} ); return if ( not defined $param{maxwidth} ); my $dict = $param{dict}; my $width = $param{maxwidth}; open my $fh, q{<}, $param{file} or die $!; while ( my $line = <$fh> ) { chomp $line; my %hash = (); $hash{word} = $line; $hash{len} = length $line; if ( $hash{len} > $$width ) { $$width = $hash{len}; } { my $w_score = 0; my @part = split //, $line; my %t_hash = (); foreach my $s ( map { lc $_; } @part ) { $t_hash{$s}++; if ( $s =~ m/[a-z]/i ) { $w_score += $score{$s}; } } $hash{component} = \%t_hash; $hash{score} = $w_score; } push @{$dict}, \%hash; } close $fh; @{$dict} = sort { $b->{len} <=> $a->{len} || $a->{word} cmp $b->{word} } @{$dict}; } sub filter_possible { my %param = @_; return if ( not defined $param{dict} ); return if ( not defined $param{index} ); return if ( not defined $param{limit} ); return if ( not defined $param{possible} ); return if ( not defined $param{tile} ); my $dict = $param{dict}; my $limit = $param{limit}; my $possible = $param{possible}; my $tile = $param{tile}; # Get list of dictionary indicies # for exclude and include words my %temp_hash; foreach my $str (qw/ exclude include /) { foreach my $i ( 0 .. $#{ $limit->{word}{$str} } ) { foreach my $j ( 0 .. $#{$dict} ) { if ( lc( $dict->[$j]{word} ) eq lc( $limit->{word}{$str}[$i] ) ) { $temp_hash{$str}{$j}++; } } } } # Discount possible entries on word length @{$possible} = grep { $dict->[$_]{len} >= $limit->{len}{min} } @{$possible}; @{$possible} = grep { $dict->[$_]{len} <= $limit->{len}{max} } @{$possible}; # Discount possible entries on word score @{$possible} = grep { $dict->[$_]{score} >= $limit->{score}{min} } @{$possible}; @{$possible} = grep { $dict->[$_]{score} <= $limit->{score}{max} } @{$possible}; # Discount possible entries in exclude list foreach my $i ( reverse( 0 .. $#{$possible} ) ) { if ( exists $temp_hash{exclude}{ $possible->[$i] } ) { splice @{$possible}, $i, 1; last; } } # Add include list entries back to possible entries foreach my $k ( map { 0 + $_; } keys %{ $temp_hash{include} } ) { push @{$possible}, $k; } @{$possible} = sort { $a <=> $b } get_unique( @{$possible}, ); # %count in the form of: letter => occurrences my %count; foreach my $s (@tile) { $count{ lc $s }++; } # Exclude if not possible with current set of tiles, # even if in include list foreach my $i ( reverse( 0 .. $#{$possible} ) ) { foreach my $s ( keys %{ $dict->[ $possible->[$i] ]{component} } ) { # Letter not in existing tiles if ( not exists $count{ lc $s } ) { splice @{$possible}, $i, 1; last; } # Not enough letters in existing tiles if ( $count{ lc $s } < $dict->[ $possible->[$i] ]{component}{$s} ) { splice @{$possible}, $i, 1; last; } } } @{$possible} = sort { $a <=> $b } get_unique( @{$possible}, ); } sub get_raw_possible { my %param = @_; return if ( not defined $param{dict} ); return if ( not defined $param{index} ); return if ( not defined $param{limit} ); return if ( not defined $param{possible} ); return if ( not defined $param{tile} ); my $dict = $param{dict}; my $found = $param{index}; my $limit = $param{limit}; my $possible = $param{possible}; my $tile = $param{tile}; # Get count of tiles in hand my %count; foreach my $s ( @{$tile} ) { $count{$s}++; } # Get all entries containing 1..$count{$s} for each $s foreach my $s ( keys %count ) { foreach my $i ( 1 .. $count{$s} ) { foreach my $j ( 0 .. $#{ $found->{ lc $s }{$i} } ) { push @{$possible}, $found->{ lc $s }{$i}[$j]; } } } @{$possible} = sort { $a <=> $b } get_unique( @{$possible}, ); } sub get_unique { my (@temp_array) = @_; my %temp_hash; foreach my $s (@temp_array) { $temp_hash{$s}++; } @temp_array = sort keys %temp_hash; return @temp_array; } sub output_possible { my %param = @_; return if ( not defined $param{dict} ); return if ( not defined $param{limit} ); return if ( not defined $param{possible} ); return if ( not defined $param{tile} ); my $dict = $param{dict}; my $limit = $param{limit}; my $possible = $param{possible}; my $tile = $param{tile}; my $width = $param{width}; # Display limitations (if provided) if ( ( ( scalar @{ $limit->{word}{exclude} } ) or ( scalar @{ $limit->{word}{include} } ) ) or ( ( $limit->{len}{max} < 0xFFFFFFFF ) or ( $limit->{len}{min} > 0x00000000 ) ) or ( ( $limit->{score}{max} < 0xFFFFFFFF ) or ( $limit->{score}{min} > 0x00000000 ) ) ) { print qq{Limitations:\n}; if ( ( $limit->{len}{max} < 0xFFFFFFFF ) or ( $limit->{len}{min} > 0x00000000 ) ) { print sprintf qq{\tWord length: %10d (min), %10d (max)\n}, $limit->{len}{min}, $limit->{len}{max}; } if ( ( $limit->{score}{max} < 0xFFFFFFFF ) or ( $limit->{score}{min} > 0x00000000 ) ) { print sprintf qq{\tWord score: %10d (min), %10d (max)\n}, $limit->{score}{min}, $limit->{score}{max}; } if ( scalar @{ $limit->{word}{exclude} } ) { print qq{\tExclude words:\n}; print qq{\t\t}, join( qq{, }, @{ $limit->{word}{exclude} }, ), qq{\n}; } if ( scalar @{ $limit->{word}{include} } ) { print qq{\tInclude words (overrides exclude words):\n}; print qq{\t\t}, join( qq{, }, @{ $limit->{word}{include} }, ), qq{\n}; } } # Display tiles my %count = (); foreach my $s ( map { lc $_; } @tile ) { $count{$s}++; } print qq{Tiles:\n}; print qq{\t}, join( q{, }, map { sprintf qq{%s: %d}, $_, $count{$_}; } sort { lc($a) cmp lc($b) } keys %count, ), qq{\n}; print qq{\n}; my $count = 0; my $index_width = int( ( log( scalar @dict ) / log(10) ) + 0.501 + 1.0 ); my $output_format = qq{%${index_width}d: %${width}s}; $output_format .= q{ (}; my @output_options = (); push @output_options, q{score: %d}; # To add to output line (examples): # push @output_options, q{len: %d}; # push @output_options, q{components: %s}; $output_format .= join( q{, }, @output_options, ); $output_format .= q{) }; $output_format .= qq{\n}; foreach my $i ( sort { $dict[$b]{score} <=> $dict[$a]{score} || lc $dict[$a]{word} cmp lc $dict[$b]{word} } @possible ) { $count++; print sprintf $output_format, $count, $dict[$i]{word}, $dict[$i]{score}, # To add to output line (examples): # $dict[$i]{len}, # join( q{, }, # map { sprintf qq{%s: %d}, $_, # $dict[$i]{component}{$_}; } # sort { lc $a cmp lc $b } # keys %{ $dict[$i]{component} }, # ) ; } } sub help { my %param = @_; return if ( not defined $param{limit} ); return if ( not defined $param{tile} ); return if ( not defined $param{wordlist} ); my $limit = $param{limit}; my $tile = $param{tile}; my $wordlist = $param{wordlist}; print qq{Usage:\n}; print qq{\t$0 --help\n}; print qq{\t$0 [--options] letters\n}; print qq{\n}; print qq{Options:\n}; print qq{\t--help\t\t - Display usage help\n}; print qq{\t--exclude str \t - Word to exclude (*)\n}; print qq{\t--include str \t - Word to include (*)\n}; print qq{\t--minlength n \t - Minimum word length to }; print qq{display\n}; print qq{\t--maxlength n \t - Maximum word length to }; print qq{display\n}; print qq{\t--minscore n \t - Minimum word score to }; print qq{display\n}; print qq{\t--maxscore n \t - Maximum word score to }; print qq{display\n}; print qq{\t--wordlist str\t - Word list to use\n}; print qq{\n}; print qq{\tletters\t - Letters in hand, }; print qq{space separated.\n}; print qq{\n}; print qq{Notes:\n}; print qq{\t* - Entry may be repeated\n}; print qq{\tWords for --include|--exclude may be joined }; print qq{by comma (","), semi-colon (";"), or pipe }; print qq{character ("|").\n}; print qq{\tAn include entry will override }; print qq{an exclude entry.\n}; print qq{\n}; print qq{Current values:\n}; # Display limitations (if provided) if ( ( ( scalar @{ $limit->{word}{exclude} } ) or ( scalar @{ $limit->{word}{include} } ) ) or ( ( $limit->{len}{max} < 0xFFFFFFFF ) or ( $limit->{len}{min} > 0x00000000 ) ) or ( ( $limit->{score}{max} < 0xFFFFFFFF ) or ( $limit->{score}{min} > 0x00000000 ) ) ) { if ( ( $limit->{len}{max} < 0xFFFFFFFF ) or ( $limit->{len}{min} > 0x00000000 ) ) { print sprintf qq{\tWord length: %10d (min), %10d (max)\n}, $limit->{len}{min}, $limit->{len}{max}; } if ( ( $limit->{score}{max} < 0xFFFFFFFF ) or ( $limit->{score}{min} > 0x00000000 ) ) { print sprintf qq{\tWord score: %10d (min), %10d (max)\n}, $limit->{score}{min}, $limit->{score}{max}; } if ( scalar @{ $limit->{word}{exclude} } ) { print qq{\tExclude words:\n}; print qq{\t\t}, join( qq{, }, @{ $limit->{word}{exclude} }, ), qq{\n}; } if ( scalar @{ $limit->{word}{include} } ) { print qq{\tInclude words (overrides exclude words):\n}; print qq{\t\t}, join( qq{, }, @{ $limit->{word}{include} }, ), qq{\n}; } } print qq{\tWord list: }, $wordlist, qq{\n}; print qq{\n}; if ( scalar @{$tile} > 0 ) { # Display tiles my %count = (); foreach my $s ( map { lc $_; } grep { !m/\S{2,}/; } @{$tile} ) { $count{$s}++; } print qq{Tiles:\n}; print qq{\t}, join( q{, }, map { sprintf qq{%s: %d}, $_, $count{$_}; } sort { lc($a) cmp lc($b) } keys %count, ), qq{\n}; } print qq{\n}; exit; }

Enjoyed seeing what others posted on this one.

Hope that helps.

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://11105700]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others contemplating the Monastery: (3)
As of 2020-09-27 00:53 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?
    If at first I donít succeed, I Ö










    Results (142 votes). Check out past polls.

    Notices?