The idea came up when we asked RedHat how we could get package updates for our licensed RHEL5 servers that were behind a firewall and could not sign into RHN themselves. The answer was: Buy a Satellite or buy a Satellite Proxy, both of which we did not want to do. So we decided to roll our own internal yum repository and needed a way to automatically download the packages.
Therefore I came up with the following script. It uses WWW::Mechanize and friends to get the information from their site and to download any packages that are not (yet) in the destination. Already downloaded packages are tracked in a file and are not downloaded again, so after the initial big download, you only pull the updates.
RedHat sponsors the Open Source community with good money and brains, so please use this only if you have a legal subscription and valid licenses.
Especially the first time you download a channel, it will hammer their servers!
If they find many people doing this over and over again, they might change their website which will make this script fail! Thanks.
#!/usr/bin/perl -w
#
# Name: rhn_channel_download.pl
#
# Description: Download rpm packages from RHN for legal subscriptions
+ that have no access to RHN
#
# This script will connect to the RHN we
+bsite, find any (new) packages that belong
# to a (selectable) channel, download th
+em, check their md5 sums and then send a
# summary mail to a recipient.
#
# RedHat sponsors the Open Source commun
+ity with good money and brains,
# so please use this only if you have a
+legal subscription and valid licenses.
# Especially the first time you download
+ a channel, it will hammer their servers!
# If they find many people doing this ov
+er and over again, they might change their
# website which will make this script f
+ail! Thanks.
#
# Author: svenXY (happy PerlMonk) mail me AT gmx dot net
#
# run with --help for help
#
# 2008-09-10
use strict;
use WWW::Mechanize;
use WWW::Mechanize::FormFiller;
use URI::URL;
use Tie::File;
use Digest::MD5;
use Getopt::Long;
use HTML::TableExtract qw(tree);
use Encode;
############### Variables and setup ##############
my ($verbose, $quiet, $help, $recipient, $channel, $arch, $username, $
+password);
my $destination_dir = '.';
my $rpmlist = '.rpmlist';
my $sender = 'RHN packages update <rhnupdater@yourcorp.com>';
GetOptions ('verbose' => \$verbose,
'quiet' => \$quiet,
'destination:s' => \$destination_dir,
'list:s' => \$rpmlist,
'channel:s' => \$channel,
'architecture:s' => \$arch,
'username:s' => \$username,
'password:s' => \$password,
'help' => \$help,
'sender:s' => \$sender,
'recipient:s' => \$recipient
);
$rpmlist = $destination_dir . '/' . $rpmlist;
usage() if $help;
usage() unless ( defined $username && defined $password );
#####################################
# Tie to rpmlist file and read it in
####################################
warn "Warning: RPM list file ($rpmlist) does not yet exist. An empty l
+ist file will now be created.\n" unless (-f $rpmlist);
tie my @existing_packages, 'Tie::File', $rpmlist or die "Could not tie
+ to $rpmlist: $!";
my %existing_packages = map { $_ => 1 } @existing_packages;
####################################
# HTTP to RHN network
####################################
my $agent = WWW::Mechanize->new( autocheck => 1 );
my $formfiller = WWW::Mechanize::FormFiller->new();
$agent->env_proxy();
#$agent->get('http://www.redhat.com/rhn'); changed html again...
safe_get('https://www.redhat.com/wapps/sso/rhn/login.html?redirect=htt
+p%3A%2F%2Frhn.redhat.com%2Frhn%2FYourRhn.do');
$agent->form_number(1) if $agent->forms and scalar @{$agent->forms};
#$agent->form_number(2);
{ local $^W; $agent->current_form->value('username', $username); };
{ local $^W; $agent->current_form->value('password', $password); };
#$agent->form_number(2);
$agent->submit();
print "Connected to RHN.\n" if $verbose;
################################################################
# get available Channels, architectures as well as the links that lead
+ there
################################################################
# the second is probably better as it is language independant
#$agent->follow_link('text' => 'Channels');
$agent->follow_link( 'url_regex' => qr/channels/ );
my $te = HTML::TableExtract->new( attribs => { 'id' => 'channel-list'
+}, keep_html => 1 );
$te->parse(decode_utf8 $agent->content());
my $table = $te->first_table_found;
my $table_tree = $table->tree;
my @channels;
foreach my $row (0..$table_tree->maxrow()) {
next unless $table_tree->cell($row,0)->as_HTML() =~ /-channel/
+;
(my $rhnchannel = $table_tree->cell($row,0)->as_text() ) =~ s/
+^.*>\s*(.+?)\s*$/$1/g;
if ( defined($channel) ){
next unless $rhnchannel eq $channel;
}
for (@{ $table_tree->cell($row,1)->extract_links('a') }) {
my($link, $element, $attr, $tag) = @$_;
my ($channel_id, $channel_arch) = $element->as_text()
+=~ m/\?cid=(\d+)">(.*)\s$/;
if ( defined($arch) ){
next unless $channel_arch eq $arch;
}
push(@channels, [ $rhnchannel, $channel_arch, $channel
+_id ] );
}
}
################################################################
# ask the user for a channel if more than one has been found
################################################################
my $selection;
if (scalar @channels == 1) {
$selection = 0;
}
else {
my $index = 0;
for (@channels) {
print "[", $index++, "] ", join(' - ', $_->[0], $_->[1
+]), "\n";
}
while ( not defined $selection ){
print "Please select a Channel [0 - ", $index-1, "]: "
+;
$selection = <STDIN>; chomp $selection;
$selection = undef if ($selection =~ /[\D]+/ || $selec
+tion > $index);
}
}
print "Selected channel: $channels[$selection][0] - $channels[$selecti
+on][1] (#$channels[$selection][2])\n" if $verbose;
######################################################################
+#########
# HTTP to rpmlist for the selected channel
######################################################################
+##########
$agent->get(
'https://rhn.redhat.com/network/software/channels/packages.pxt
+'
. '?upper=100000&filter_string=&lower=1&alphabar_column=NVREA&
+cid='
. $channels[$selection][2]);
print "Retrieving package data.\n" if $verbose;
# fixed - thanks to yves-alexis
#my @links = $agent->find_all_links( url_regex => qr/details\.pxt\?pid
+=/ );
my @links = $agent->find_all_links( url_regex => qr/Overview\.do\?pid=
+/ );
my %avail_packages;
my %packages = ( 'new' => [],
'existing' => 0,
'dl_error' => 0,
'md5_error' => 0,
);
print "Downloading new packages.\n" if $verbose;
my $dl_count = 0;
######################################################################
+#########
# work on packages
######################################################################
+##########
for my $link ( @links ) {
my $url = $link->url_abs;
my $rpmname = $link->text;
my $pkg_id;
($pkg_id = $url) =~ s/^.*pid=//;
$avail_packages{$rpmname} = $pkg_id;
##############################################################
+##
# download new packages
##############################################################
+##
if (! $existing_packages{$rpmname} ) {
$agent->get('https://rhn.redhat.com/network/software/p
+ackages/details.pxt?pid=' . $pkg_id);
$agent->content =~ m{<th><strong>MD5 Sum:</strong></t
+h>\s+<td><tt>([a-f0-9]+)</tt></td>}s;
my $md5 = $1;
my $pkg_link = $agent->find_link( text => 'Download Pa
+ckage');
print ++$dl_count . " - Downloading new package: $rpmn
+ame ($pkg_id)" if $verbose;
$agent->get( $pkg_link->url_abs, ':content_file' => "$
+destination_dir/$rpmname.rpm" );
if ($agent->success()){
##############################################
+######
# compare md5 sum from webpage with computed o
+ne
##############################################
+######
if (compare_md5($md5, "$destination_dir/$rpmna
+me.rpm")) {
print " ... success!\n" if $verbose;
push(@existing_packages, $rpmname);
push(@{$packages{'new'}}, $rpmname);
}
else {
print "... MD5 sum mismatch, removing
+package\n" if $verbose;
unlink "$rpmname.rpm";
$packages{'md5_error'}++;
}
}
else {
print " ... failure!\n" if $verbose;
$packages{'dl_error'}++;
}
}
else {
$packages{'existing'}++;
}
}
################################################################
# update rpmlist with added packages
################################################################
@existing_packages = sort {uc($a) cmp uc($b)} @existing_packages;
## remove stupid empty lines...
@existing_packages = grep {!/^$/} @existing_packages;
untie @existing_packages;
################################################################
# generate summary, print it to STDOUT and send it as mail
################################################################
my $sum_new_packages = scalar @{$packages{'new'}};
my $output="############### RPM Package Mirror Tool ##################
+######
### Channel: $channels[$selection][0] - $channels[$selection][1]
### Skriptname: $0
################################################################
$packages{'existing'} packages have not been downloaded again.
$packages{'dl_error'} packages were not properly downloaded.
$packages{'md5_error'} packages were not added as their MD5 sum did no
+t match.
################################################################
$sum_new_packages packages have been added:
################################################################\n"
. join("\n", sort @{$packages{'new'}}) . "\n"
. "################################################################\n"
+;
if ( $verbose || ( $sum_new_packages > 0 && ! $quiet )) {
print $output;
}
if ( $sum_new_packages > 0 && defined($recipient)) {
send_mail($output, $recipient, $sender, $sum_new_packages);
}
################################################################
# you will only want to run createrepo on exit 0...
################################################################
exit 2 unless $sum_new_packages > 0;
exit 0;
######################################################################
+#########
sub send_mail {
######################################################################
+#########
my ($output, $recipient, $sender, $num) = @_;
open(SENDMAIL, "|/usr/lib/sendmail -oi -t -odq")
or die "Can't fork for sendmail: $!\n";
print SENDMAIL <<"EOF";
From: $sender
To: $recipient
Subject: RHN update script added $num packages
$output
EOF
close(SENDMAIL) or warn "sendmail didn't close nicely";
}
######################################################################
+#########
sub compare_md5 {
######################################################################
+#########
my ($md5, $file) = @_;
open(FILE, $file) or
do { warn "Can't open '$file' to calculate md5sum: $!"
+; return 0; };
binmode(FILE);
return (Digest::MD5->new->addfile(*FILE)->hexdigest eq $md5)?1
+:0;
}
######################################################################
+#########
sub usage {
######################################################################
+#########
print<<EOH;
Usage: $0 --username <rhn-username> --password <passwd> [ optional opt
+ions ]
required:
--username the username you got from RHN network
--password the password for RHN Network
optional:
--channel the RHN channel you wish to download from (if
+not set, script will present a selection)
--architecture your Hardware architecture (if not set, script
+ will present a selection)
--destination the directory you want to download to (default
+s to '.')
--listfile The file (without path) with the rpm-list (def
+aults to '.rpmlist' in the destination), will be updated
--recipient Recipient for the mail, set this to a valid em
+ail address otherwise NO MAILS will be sent
--sender Sender of the mail (defauls to some meaningful
+ sender)
--verbose More output (without -v, only summary and some
+ warnings will be displayed)
--quiet Don't print summary
--help This help
EOH
exit 0;
}
ToDo: Add POD; Split up into functions; Interactively query for username & password if none are given as parameters or read parameters from config file. Patches are welcome!