sub gimme_guten_tables {
my ($decoded, $maximum) = @_;
$decoded =~ s,
\n(.*?)\n,$1,g;
$decoded =~ s,(.*?)
.*?,$1,g;
$decoded =~ s,,$1,g;
$decoded =~ s,(.*?),$1,g;
$decoded =~ s,<\/?ol>,,g;
$decoded =~ s,\n.*,,;
$decoded =~ s,^\n(.*?)(?: \((\d+)\))?<\/a>/) {
my $splitguten = join('/', split(/ */, $1));
my $clipguten = substr($splitguten, -2, 2, '');
my $readmarks = $3 ? $3 : $1;
my $title = $2;
$title =~ s,by (.*?), by $1,g;
my %gutentypes = (
plucker => {
'mirror' => "http://www.gutenberg.org/cache/plucker/$1/$1",
'content-type' => 'application/prs.plucker',
'string' => 'Plucker',
'format' => 'pdb'
},
html => {
'mirror' => "http://www.gutenberg.org/dirs/$splitguten/$1/$1-h/$1-h.htm",
'content-type' => 'text/html',
'string' => 'Marked-up HTML',
'format' => 'html'
},
text => {
'mirror' => "http://sailor.gutenberg.lib.md.us/$splitguten/$1/$1.txt",
'content-type' => 'text/plain',
'string' => 'Plain text',
'format' => 'txt'
},
);
for my $types ( sort keys %gutentypes ) {
my ($status, $type) = test_head($gutentypes{$types}{mirror});
if ($status == 200) {
$gutentypes{$types}{link} =
qq{$gutentypes{$types}{format}\n};
} else {
$gutentypes{$types}{link} =
qq{$gutentypes{$types}{format}};
}
}
$guten_tables .= qq{
$count |
$readmarks |
$title
|
$gutentypes{plucker}{link} |
$gutentypes{html}{link} |
$gutentypes{text}{link} |
\n};
$count++;
}
}
$guten_tables =~ s,\&,\&,g;
$guten_tables =~ s,>\n\s+<,><,g;
return $guten_tables;
}
sub test_head {
my $url = shift;
my $ua = LWP::UserAgent->new();
$ua->agent('Mozilla/5.0 (Windows; U; Windows NT 5.1;) Firefox/2.0.0.6');
my $request = HTTP::Request->new(HEAD => $url);
my $response = $ua->request($request);
my $status = $response->status_line;
my $type = $response->header('Content-Type');
my $content = $response->content;
$status =~ m/(\d+)/;
return ($1, $type);
}