Beefy Boxes and Bandwidth Generously Provided by pair Networks
We don't bite newbies here... much
 
PerlMonks  

download mp3s listed in RSS feed

by blahblahblah (Priest)
on Jan 27, 2007 at 02:36 UTC ( #596817=sourcecode: print w/replies, xml ) Need Help??
Category: web stuff
Author/Contact Info Joe Cullin blahblahblah
Description: Scans WFMU's MP3 archive RSS Feed for certain show titles, and then downloads those shows.

There's no particular reason to use POE::Component::RSSAggregator rather than XML::RSS::Feed, other than the fact that I heard about the POE version first and was interested in trying something in POE. (Thanks again everyone for helping me get around the problems due to my out-of-date POE in POE::Component::RSSAggregator breaks LWP::Simple::get.)

Also, I heartily recommend this station to everyone!

use strict;
use warnings;
use POE qw(Component::RSSAggregator);

#################################################################

my $wantedShowsPattern = join ('|',
                   'sinner',
                   'soulville',
                   'Laura Cantrell',
                   'mister c',
                   'billy jam', # will get "unshackled..." too?
                   'coffee',    # does this feed include coffee2go als
+o?
                   'Dave Emory',

                   # 7 Second Delay?
                   # Ken's show?
                   # Pseu Braun?
                   # Irwin (calypso 2-3PM)?
                  );

my $downloadDir = 'E:/wfmu/';

my @feeds = (
         {
          url   => "http://wfmu.org/archivefeed/mp3.xml",
          name  => "wfmu_mp3",
          delay => 3600,
         },
        );

#################################################################

-d $downloadDir or die("download dir $downloadDir must be created.\n")
+;
-w $downloadDir or die("download dir $downloadDir must be writable.\n"
+);

POE::Session->create(
             inline_states => {
                       _start      => \&init_session,
                       handle_feed => \&handle_feed,
                      },
            );

$poe_kernel->run();

sub init_session
{
  my ( $kernel, $heap, $session ) = @_[ KERNEL, HEAP, SESSION ];
  $heap->{rssagg} = POE::Component::RSSAggregator->new(
        alias    => 'rssagg',
        debug    => 1,
        callback => $session->postback("handle_feed"),
        tmpdir   => 'f:/cgi/wfmu/',
    );
  $kernel->post( 'rssagg', 'add_feed', $_ ) for @feeds;
}

sub handle_feed
{
    my ( $kernel, $feed ) = ( $_[KERNEL], $_[ARG1]->[0] );
    printf "\n========= %s ===============\n", scalar(localtime);
    for my $headline ( $feed->late_breaking_news )
    {
      print $headline->headline() . "\n";

      next unless parseHeadline($headline->headline())->{'show'} =~ m/
+$wantedShowsPattern/i;

      print "\n----- DOWNLOADING ... ---------------------\n";
      print "     url:  " . $headline->url() . "\n";

      processUrl($headline->url());

      print "\n";
    }
}

sub processUrl
{
  my $url = shift;
  if ($url !~ /\.m3u/i) {
    print "Invalid playlist url?\n";
    return;
  }
  use LWP::Simple;
  print "retrieving m3u file...\n";
  my $mp3Url = LWP::Simple::get($url);
  print "mp3 url:  $mp3Url\n";
  if ($mp3Url !~ /mp3$/s) {
    print "Either the get failed or the content is unusable?\n";
    return;
  }

  # example url:
  # http://archive.wfmu.org:5555/archive/BJ/bj070119.mp3

  use URI;
  my $uriObj = URI->new($mp3Url);
  my $uriPath = $uriObj->path();
  use File::Basename;
  my $baseFileName = basename($uriPath);

  if ($baseFileName eq '') {
    print "Botched processing of filename?\n";
    return;
  }

  my $mp3File = $downloadDir . $baseFileName;
  print "file: $mp3File\n";

  if (-e $mp3File) {
    print "File already exists!\n";
    return;
  }

  print "SAVING MP3 TO FILE...\n";
  my $responseCode = getstore($mp3Url, $mp3File);
  print "done saving.\n";
}


sub parseHeadline
{
  my $headline = shift;
  return if $headline eq '';

  $headline =~ s/^WFMU\sMP3\sArchive:\s+//;
  if ($headline =~ s{
                      \sfrom
                      \s(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|D
+ec)
                      \s(\d+),
                      \s(\d{4})
                      $
                     }{}six)
  {
    ($mon, $mday, $year) = ($1, $2, $3);
    my $show = $headline;
    return {'show' => $show};

    # (I might eventually with the date, but not yet.)
  }
  else
  {
    print "parse error on headline?\n  ( $_[0] )\n";
  }

  return;
}

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: sourcecode [id://596817]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others pondering the Monastery: (3)
As of 2020-09-27 10:37 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?
    If at first I dont succeed, I










    Results (142 votes). Check out past polls.

    Notices?