use strict; # Always use strict use warnings; # You Are Here use XML::RSS::Tools; # Parse the content use HTML::LinkExtor; # Extract the links use HTML::Entities; # "fix" any entities use LWP::UserAgent; # Change the UserAgent my $rss_feed = XML::RSS::Tools->new; my $ua = LWP::UserAgent->new; $ua->agent('pps 0.1.83 [rss]'); my $rss = "http://www.scottishlass.co.uk/rss.xml"; my $request = HTTP::Request->new(GET => $rss); my $response = $ua->request($request); my $status = $response->status_line; my $type = $response->header('Content-Type'); my %errors = ('500'=>'Bad hostname supplied', '501'=>'Protocol not supported', '404'=>'URL not found', '403'=>'URL forbidden', '401'=>'Authorization failed', '400'=>'Bad request found', '302'=>'Redirected URL' ); ($status) = ($status =~ /(\d+)/); if (defined($errors{$status})) { die "ERROR: $errors{$status}\n"; } else { my $content = $response->content; $rss_feed->rss_string($content); $rss_feed->xsl_file('rss.xsl'); $rss_feed->transform; my $parsed = $rss_feed->as_string; my $decoded = HTML::Entities::decode($parsed); parse_links($decoded); # print $decoded; } sub parse_links { my $decoded = shift; my @links = (); my $callback = sub { my($tag, %attr) = @_; return if $tag ne 'a'; push(@links, values %attr); }; my $p = HTML::LinkExtor->new($callback); $p->parse($decoded); my %seen; my @uniq = grep { ! $seen{$_} ++ } @links; print join("
", @links), "\n"; }