#!/usr/bin/perl -w
#
# based on kernighan and pike's markov chain generator
# in _the practice of programming_, chapter 3
# (http://cm.bell-labs.com/cm/cs/tpop/markov.pl)
#

use strict;

my @words;    # words on a line
my %wordlist; # key: prefix, value: anon hash (k: suffix,
              #                                v: frequency)

my $pref_len = shift @ARGV || 2;
my $maxwords = shift @ARGV || 100;

my $entries  = 0;

# build word list
#
# 'Blessed is the man that walketh not in the counsel'
# %wordlist = ( 'blessed is' => { 'the' => 1, },
#               'is the'     => { 'man' => 1, },
#               'the man'    => { 'that'=> 1, },
#             );
#
while (<>) {
  my $suf;

  push @words, split;

  while ( @words > $pref_len )  {
    # build prefix of $pref_len words
    # join(' ', @array) is faster than qq(@array) or "@array"
    #
    my $pref = join(' ', @words[0..($pref_len-1)]);

    # add suffix to list
    #
    $suf = $words[$pref_len];

    $wordlist{$pref}{$suf}++;

    shift @words; # next word on this line

    $entries++;
  }
}

# change frequency count to a percentage
# (with help from pcb, recipe 2.10)
#
foreach my $href ( values %wordlist ) {
  foreach ( values %$href ) {
    $_ /= $entries;
  }
}

# starting point
#
my $pref = (keys %wordlist)[rand keys %wordlist];

print "$pref";

# dump out listings
#
for (0..($maxwords-1)) {
  last unless (exists $wordlist{$pref});

  my $suf = weighted_suffix();

  print ' '. $suf;

  print "\n" if ( $_ % 10 == 0);

  # skip past first word in prefix
  #
  $pref =~ s/^[^ ]+ (.+)$/$1 $suf/;
}

exit;

# from pcb (recipe 2.10)
#
sub weighted_suffix {
  my ($suf,$weight,$rand);

  while (1) {
    $rand = rand;

    while ( ($suf,$weight) = each %{ $wordlist{$pref} } ) {
      return $suf if ($rand -= $weight) < 0;
    }
  }
}