Category: | web stuff |
Author/Contact Info | Joe Cullin blahblahblah |
Description: | Scans WFMU's MP3 archive RSS Feed for certain show titles, and then downloads those shows.
There's no particular reason to use POE::Component::RSSAggregator rather than XML::RSS::Feed, other than the fact that I heard about the POE version first and was interested in trying something in POE. (Thanks again everyone for helping me get around the problems due to my out-of-date POE in POE::Component::RSSAggregator breaks LWP::Simple::get.) Also, I heartily recommend this station to everyone! |
use strict; use warnings; use POE qw(Component::RSSAggregator); ################################################################# my $wantedShowsPattern = join ('|', 'sinner', 'soulville', 'Laura Cantrell', 'mister c', 'billy jam', # will get "unshackled..." too? 'coffee', # does this feed include coffee2go als +o? 'Dave Emory', # 7 Second Delay? # Ken's show? # Pseu Braun? # Irwin (calypso 2-3PM)? ); my $downloadDir = 'E:/wfmu/'; my @feeds = ( { url => "http://wfmu.org/archivefeed/mp3.xml", name => "wfmu_mp3", delay => 3600, }, ); ################################################################# -d $downloadDir or die("download dir $downloadDir must be created.\n") +; -w $downloadDir or die("download dir $downloadDir must be writable.\n" +); POE::Session->create( inline_states => { _start => \&init_session, handle_feed => \&handle_feed, }, ); $poe_kernel->run(); sub init_session { my ( $kernel, $heap, $session ) = @_[ KERNEL, HEAP, SESSION ]; $heap->{rssagg} = POE::Component::RSSAggregator->new( alias => 'rssagg', debug => 1, callback => $session->postback("handle_feed"), tmpdir => 'f:/cgi/wfmu/', ); $kernel->post( 'rssagg', 'add_feed', $_ ) for @feeds; } sub handle_feed { my ( $kernel, $feed ) = ( $_[KERNEL], $_[ARG1]->[0] ); printf "\n========= %s ===============\n", scalar(localtime); for my $headline ( $feed->late_breaking_news ) { print $headline->headline() . "\n"; next unless parseHeadline($headline->headline())->{'show'} =~ m/ +$wantedShowsPattern/i; print "\n----- DOWNLOADING ... ---------------------\n"; print " url: " . $headline->url() . "\n"; processUrl($headline->url()); print "\n"; } } sub processUrl { my $url = shift; if ($url !~ /\.m3u/i) { print "Invalid playlist url?\n"; return; } use LWP::Simple; print "retrieving m3u file...\n"; my $mp3Url = LWP::Simple::get($url); print "mp3 url: $mp3Url\n"; if ($mp3Url !~ /mp3$/s) { print "Either the get failed or the content is unusable?\n"; return; } # example url: # http://archive.wfmu.org:5555/archive/BJ/bj070119.mp3 use URI; my $uriObj = URI->new($mp3Url); my $uriPath = $uriObj->path(); use File::Basename; my $baseFileName = basename($uriPath); if ($baseFileName eq '') { print "Botched processing of filename?\n"; return; } my $mp3File = $downloadDir . $baseFileName; print "file: $mp3File\n"; if (-e $mp3File) { print "File already exists!\n"; return; } print "SAVING MP3 TO FILE...\n"; my $responseCode = getstore($mp3Url, $mp3File); print "done saving.\n"; } sub parseHeadline { my $headline = shift; return if $headline eq ''; $headline =~ s/^WFMU\sMP3\sArchive:\s+//; if ($headline =~ s{ \sfrom \s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|D +ec) \s(\d+), \s(\d{4}) $ }{}six) { ($mon, $mday, $year) = ($1, $2, $3); my $show = $headline; return {'show' => $show}; # (I might eventually with the date, but not yet.) } else { print "parse error on headline?\n ( $_[0] )\n"; } return; } |
|
---|
Back to
Code Catacombs