use strict;
# Debug
use warnings;
# use diagnostics;
# use LWP::Debug qw( + );
use WWW::Mechanize;
use XML::RSS;
use HTML::TableExtract;
use constant CPAN_URL => 'http://search.cpan.org/~%s/?R=D';
use constant DIST_URL => 'http://search.cpan.org/~%s/%s';
# Probably could've used a date module...
# Oh well.
my %months = (
Jan => '01',
Feb => '02',
Mar => '03',
Apr => '04',
May => '05',
Jun => '06',
Jul => '07',
Aug => '08',
Sep => '09',
Oct => '10',
Nov => '11',
Dec => '12'
);
my $author = uc( $ARGV[ 0 ] ) || die 'No author (CPAN id) specified.'
+;
my $url = sprintf( CPAN_URL, lc $author );
my $agent = WWW::Mechanize->new;
my $feed = XML::RSS->new;
my $parser = HTML::TableExtract->new( depth => 0, count => 2 );
$agent->get( $url );
die "Error fetching $url: " . $agent->response->status_line unless $ag
+ent->success;
$feed->channel(
title => "CPAN Modules by $author",
link => $url,
description => "A listing, sorted by date, of CPAN modules uploade
+d by $author"
);
$parser->parse( $agent->content );
my $state = $parser->first_table_state_found;
my $first = 1;
foreach my $row ( $state->rows ) {
$first-- && next if $first; # skip header row
$feed->add_item(
title => $row->[ 0 ],
link => sprintf( DIST_URL, lc $author, $row->[ 0 ] ),
description => $row->[ 1 ],
dc => {
date => isodate( $row->[ 3 ] )
}
);
}
print $feed->as_string;
# returns YYYY-MM-DD
# e.g. 07 Dec 2003 => 2003-12-07
sub isodate {
my @date_array = split( / /, shift );
return "$date_array[ 2 ]-$months{ $date_array[ 1 ] }-$date_array[
+0 ]";
}
__END__
=head1 NAME
cpandir_asrss - fetch an author's CPAN directory in RSS format
=head1 SYNOPSIS
cpandir_asrss cpanid > cpanid.rss
=head1 DESCRIPTION
Although there is already a "recent" RSS feed for CPAN (http://search.
+cpan.org/rss/search.rss),
it returns results for all authors. If you have a personal page where
+you would like to display
only those modules which you've authored, this script will generate th
+at feed for you.
The script takes one argument; a CPAN ID (e.g. bricas). It is case ins
+ensitive.
Using WWW::Mechanize, HTML::TableExtract and XML::RSS, you can fetch,
+parse and return an
RSS feed for an author's CPAN directory.
=head1 NOTICE
Please do not abuse http://search.cpan.org/ with this script. Consider
+ using
WWW::Mechanize::Cached if you want to use this on a regular basis.
|