This script will pull a list of the books you currently have checked out from the San Francisco Peninsula Library system. It will email you the list if any of the books are due within a user definable period of time.
The challenging part of this script is that the data we're interested in takes 5 clicks to get to. The original script I wrote scraped all five pages and was very fragile. I rewrote the code to get to the fifth page using WWW::Mechanize (written by petdance) and am very pleased with the result. The fetch_html() subroutine used to be several hundred lines long, now its about a dozen.
#!/usr/bin/perl
use strict;
use 5.006;
use warnings;
use WWW::Mechanize;
use Time::Piece;
use Mail::Mailer;
use HTML::TableExtract;
use Getopt::Long;
use Pod::Usage;
# Default auth info
my $defaultuser = '12345678901234';
my $defaultpass = '1234';
my $defaultemail = 'you@youremail.com';
my $starturl = 'http://catalog.plsinfo.org/ipac20/ipac.jsp?profile
+=';
# Get command line options
my %opt;
GetOptions (\%opt,
'user=s',
'pass=s',
'outfile=s',
'email=s',
'daystowarn=i',
'maxtitlelen=i',
'send_email',
'print_output',
'save_output',
'help',
) or pod2usage(-verbose => 1) && exit;
# Fill in some defaults
$opt{daystowarn} ||= 3;
$opt{user} ||= $defaultuser;
$opt{pass} ||= $defaultpass;
$opt{email} ||= $defaultemail;
$opt{maxtitlelen} ||= 58;
$opt{print_output} = 1 if !($opt{send_email} || $opt{save_output});
$opt{help} && pod2usage(-verbose => 1) && exit;
# Generate the output text
my $html = fetch_html($starturl);
my $data = parse_data_from_html($html);
my $sorted = sort_data_by_date($data);
my $mindays = $sorted->[0][2];
my $output = build_output($sorted);
# Do something with the output text
save_output($output) if $opt{save_output};
print_output($output) if $opt{print_output};
email_output($output,$mindays)
if $opt{send_email} and $mindays < $opt{daystowarn};
######### SUBS ##########
sub fetch_html {
my $url = shift;
my $agent = WWW::Mechanize->new();
$agent->quiet(1);
$agent->get($url);
error($agent,1) unless $agent->status eq 200;
$agent->follow('Your Library Account');
$agent->form(1);
$agent->field('sec1',$opt{user});
$agent->field('sec2',$opt{pass});
$agent->click('button');
error($agent,2) if $agent->content =~ /Login\s+failed/;
$agent->follow('Checked Out');
$agent->follow('Due Date');
return $agent->content;
}
sub parse_data_from_html {
my $html = shift;
my $te = new HTML::TableExtract (
headers => ['Due Date'],
subtables => 1,
);
$te->parse($html);
my (@dates,@titles);
foreach my $ts ($te->table_states) {
my $depth = ($ts->coords)[0];
if ($depth == 2) {
my $title = ($ts->rows)[0]->[0];
$title =~ s|\s+/$||;
$title = substr($title,0,$opt{maxtitlelen})
if length($title) > $opt{maxtitlelen};
push(@titles,$title);
} else {
@dates = map {$_->[0]} $ts->rows;
}
}
my ($checkedout) = $html =~ /Checked\s+Out:\s*(\d+)/;
$checkedout ||= 0;
warn "Dates=" . @dates . "; Titles=" . @titles . "; Checkedout=$chec
+kedout"
if @titles != @dates or @titles != $checkedout;
my @rv;
push(@rv,[$titles[$_],$dates[$_]]) for 0..$#dates;
return \@rv;
}
sub sort_data_by_date {
my $data = shift;
my @rv;
my $today = Time::Piece->strptime(localtime()->ymd, "%Y-%m-%d");
for (@$data) {
my ($title,$datestr) = @$_;
my $date = Time::Piece->strptime($datestr, "%m/%d/%Y");
my $days = int(($date - $today)->days);
push(@rv,[$title,$datestr,$days]);
}
@rv = sort {$a->[2] <=> $b->[2]} @rv;
return \@rv;
}
sub build_output {
my $sorted = shift;
my $output = localtime() . "\n";
for (@$sorted) {
my ($title,$datestr,$days) = @$_;
$output .= sprintf "%2s days (%s) %s\n", $days, $datestr, $title;
}
return $output;
}
sub print_output {
my $output = shift;
print $output;
}
sub save_output {
my $output = shift;
warn "no output file specified" unless $opt{outfile};
if ($output && $opt{outfile}) {
open(OUT,">$opt{outfile}") or die;
print OUT $output;
close(OUT);
}
}
sub email_output {
my $output = shift;
my $mindays = shift;
my $plural = abs($mindays) == 1 ? '' : 's';
my $subject;
if ($mindays < 0) {
$subject = "Library Books Are " . abs($mindays) . " day$plural LAT
+E";
} elsif ($mindays == 0) {
$subject = "Library Books Are Due Today";
} else {
$subject = "Library Books Are Due in $mindays day$plural";
}
my $mailer = Mail::Mailer->new;
my %headers = (To => $opt{email},
Subject => $subject,
);
$mailer->open(\%headers);
print $mailer $output;
$mailer->close;
}
sub error {
my $agent = shift;
my $err = shift;
my $url = $agent->uri;
if ($err == 1) {
warn "ERROR (".$agent->status."): Can't get $url\n";
warn " Library website may be down\n";
} elsif ($err == 2) {
warn "ERROR: Incorrect login information\n";
}
exit;
}
=head1 NAME
booksout.pl
=head1 SYNOPSIS
Print, save, or email a list of books checked out from the
San Francisco Peninsula Library System.
% booksout.pl --user=12345678901234 -pass=1234
=head1 DESCRIPTION
Fetch and print a list of books checked out of the
San Francisco Peninsula Library System
=head1 ARGUMENTS
--user=USER # 14 digit library barcode
--pass=PASS # 4 digit library pin nuber (last part of phone nu
+mber)
--email=EMAIL # email to send booklist to
--outfile=FILENAME # file to save booklist to
--daystowarn=INT # warn this many days in advance
--maxtitlelen=INT # maximum length of title to display in booklist
=head1 OPTIONS
--help # print this help info
--send_email # email booklist if due in less than $daystowarn d
+ays
--save_output # save the booklist to $outfile
--print_output # print booklist
=head1 AUTHOR
blakem
=head1 CREDITS
petdance wrote WWW::Mechanize, which made this script feasible
=head1 BUGS
None that I know of.
=cut
-Blake