http://qs321.pair.com?node_id=374801
Category: Fun Stuff
Author/Contact Info
Description: Grabs the complete works of any poet on plagiarist.com and sticks them in a text file (alphabatized). Includes _PAGEBREAK_ markers for your favorite word processor's find/replace command.

Quick and dirty.

Invoke with  ./auden.pl NumberOfPoetAccordingToPlagiaristCom OutputFilePrefix

i.e.: ./auden.pl 91 jeffers_robinson
#!/usr/bin/perl

use strict;
use warnings;

my $poet = $ARGV[0];
my $name = $ARGV[1];

if (!$poet || !$name) {
    print "No poet specified.";
    exit();
}

my $flag = 1;
my $count = 1;
my %hash;

print "Going to retrieve poet number $poet, who you say is $name.\n";

while ($flag) {

    $flag = 0;

    if ($count > 1) {
        print "On page $count.\n";
        $poet = $ARGV[0] . "\/$count";
    }

    unlink($name . "_poet.html");
    system("wget -q -O\'" . $name . "_poet.html\' \'http:\/\/plagiaris
+t.com\/poetry\/poets\/" . $poet . "\/\'");
    open(FILE, $name . "_poet.html");

    my $line;
    $flag = 0;
    foreach $line (<FILE>) {
        if ($line =~ m/<li><a href=\"http\:\/\/plagiarist\.com\/poetry
+\/(\d+)\/\">/) {
            $flag = 1;
            print "           Getting poem number $1.\n";
            unlink("temp.html");
            system("wget -q -O\'" . $name . "_temp.html\' \'http://pla
+giarist.com/poetry/" . $1 . "/\'");
            open(POEM, $name . "_temp.html");

            {
                local $/ = undef;
                my $poem = <POEM>;
                my ($match) = $poem =~ m/<div id=\"poem\">(.*?)<\/div>
+/ms;
                my ($title) = $match =~ m/<title>(.*?)<\/title>/ms;
                $match =~ s/(?:<.*?>|\<!--|-->|\n\n\n)//mg;
                $match =~ s/^(?:Submitted by.*|    poem|    )$//mg;

                $hash{$title} .= $match;

            }

            close(POEM);

        }
    }

    close(FILE);
    
    $count ++;
}

print "Writing it down...\n";

unlink($name . ".txt");
open(OUTPUT, ">$name" . ".txt");

my $title;

foreach $title (sort(keys(%hash))) {
    print "       $title\n";
    print OUTPUT "_PAGEBREAK_\n\n$hash{$title}\n";
}

close(OUTPUT);

unlink($name . "_temp.html");
unlink($name . "_poet.html");