use strict;
use warnings;
# based on original code written by Ron Starr in May 2000
my $MAXLETTERS = 1000;
my $GRAIN = 3;
my $LETTERS_LINE = 70;
my @textletters;
while(<DATA>) {
# pull in the text, break it into letters, put in letter array
chomp;
my $text = $_ . " ";
# regularize whitespace in order to split text into letters
$text =~ s/^\s+//g; # remove leading blanks
$text =~ s/\s+/ /g; # convert any whitespace to blanks
$text =~ s/ +/ /g; # eliminate any multiple blanks...
push @textletters, split (//, $text); # split text into letters
}
# generate the frequency table
my $loopmax = $#textletters - ($GRAIN - 2) - 1; # calculate outer loop
+ limits
my %frequency_table;
for my $j (0 .. $loopmax - 1) {
# go through all lists of $GRAIN letters in the text
my $key_string = "";
# build the key string (GRAIN - 1) letters
$key_string .= $textletters[$_] for $j .. $j + $GRAIN - 1;
$frequency_table{$key_string} .= $textletters[$j + $GRAIN];
}
# generate the travesty
my @lastletters;
my $wordCount = 0;
my $sentenceCount = 0;
# start with a seed of the first $GRAIN letters from the text
push @lastletters, @textletters[0 .. $GRAIN - 1];
my $line = join '', @textletters[0 .. $GRAIN - 1];
# now, do the actual generation
while ($wordCount < 20 or $sentenceCount < 2) {
# see if the current last letters are in the table
my $key_string = ""; # construct the key string from the lastlette
+rs
$key_string .= join '', @lastletters[0 .. $GRAIN - 1];
if ( exists $frequency_table{$key_string} ) {
# we have possible letters
# split the list of letters that follow the key string
my @possible = split "", $frequency_table{$key_string};
# select the next letter
my $nextletter = $possible[rand @possible];
# adjust the lastletters array
@lastletters[0 .. $GRAIN - 2] = @lastletters[1 .. $GRAIN - 1];
$lastletters[$GRAIN - 1] = $nextletter;
++$wordCount if $nextletter eq ' ';
# Translate spaces to newlines
#$nextletter = "\n" if $nextletter = ' ';
# add letter to buffer and dump buffer if ready for output
$line .= $nextletter;
++$sentenceCount if $line =~ /\.[\n ]$/;
if(length ($line) > $LETTERS_LINE && $line =~ /[ .]$/) {
print "$line\n";
$line = '';
}
} else {
# we drew a blank
# re-seed the generation with the first $GRAIN letters from th
+e text
@lastletters = @textletters[0 .. $GRAIN - 1];
$line = join '', @textletters[0 .. $GRAIN - 1];
}
}
print "$line\n" if length $line;
__DATA__
After much searching I was able to locate a character-level Markov tex
+t
generator: http://www.eskimo.com/~rstarr/poormfa/travesty2.html Howeve
+r, this
program terminates once it has reached a certain number of characters.
+ My
knowledge of Perl is very limited (the concept of hashes escapes me co
+mpletely),
but I think what I need changed is relatively simple. How would this p
+rogram be
modified so that each word generated is printed on its own line (ie. w
+ords are
separated by \n instead of ' ') and it terminates after generating a c
+ertain
number of words, rather than characters?
Prints some variant on:
After generated is relatively), but I need (the com/~rstarr/poormfa/tr
+avesty2.
html Howevery line (ie. How words able to located a characterminates m
+e
com/~rstarr/poormfa/travesty2.html How word generating a charate a cer
+tain
number that I need once its on its on instead of Perl is program be mo
+dified
on its own line (ie.
DWIM is Perl's answer to Gödel
|