#!/usr/bin/perl -w # Downloads NASA Astromy Picture of the Day # with metadatas in YAML format (HTML is stripped) # # Images are downloaded to a directory and stored # by date. # # Script can also maintain a symlink to the latest # picture for anyone who cares to use it to any end # # Modify the configuration in the main package below use strict; package HTML::Parser::AOTD; use base q/HTML::Parser/; my $state = 'date'; my $count = 0; sub start { my ($self, $tagname, $attr, $attrseq, $text) = @_; if ($state eq 'date') { ++$count if $tagname eq 'p'; } elsif ($state eq 'image') { if ($tagname eq 'a') { $self->{image} = $attr->{href}; $state = 'title'; } } elsif ($state eq 'title') { if ($tagname eq 'b') { ++$count; } } elsif ($state eq 'desc') { if ($tagname eq 'center') { # ghetto whitespace tidying for ($self->{desc}) {s/\s/ /gs;s/ / /g;s/^ +//;s/ +$//} $count = 0; $state = 'done'; } } } sub text { my ($self, $text) = @_; if ($state eq 'date' && $count == 2) { $text =~ s/\s*$//; $text =~ s/^\s*//; $self->{date} = $text; $count = 0; $state = 'image'; } elsif ($state eq 'title' && $count) { for($text){s/^ +//;s/ +$//} $self->{title} = $text; $count = 0; $state = 'credit'; } elsif ($state eq 'credit') { if ($count > 1) { chomp $text; $self->{credit} .= $text; } elsif ($text =~ /Image Credit/i) { ++$count; } } elsif ($state eq 'desc') { if ($count > 1) { $self->{desc} .= $text; } elsif ($text =~ /Explanation/i) { ++$count; } } } sub end { my ($self, $tagname) = @_; if ($state eq 'credit') { ++$count if $tagname eq 'b'; if ($tagname eq 'center') { $count = 0; $state = 'desc'; } } elsif ($state eq 'desc') { ++$count if $tagname eq 'b'; } } 1; package main; use LWP::Simple; use YAML::XS; # CONFIG # base url my $AOTD_URL = q|http://apod.nasa.gov/apod|; # directory to store data my $DIRECTORY = q|/home/shared/apod/|; # name of symlink to current image/yaml # written for Unix-like systems # to disable, set to undef my $LINK = "$DIRECTORY/CURRENT"; # ghetto date conversion my %month = ( January => '01', February => '02', March => '03', April => '04', May => '05', June => '06', July => '07', August => '08', September => '09', October => '10', November => '11', December => '12' ); sub usage { print <new(); my $html = get($AOTD_URL) or die "Could not fetch from $AOTD_URL. Maybe the date is wrong?\n"; $p->parse($html); for ($p->{date}) { s/ /-/g; s/([A-Za-z]+)/$month{$1}/e; # ghetto date conversion } my $file = "$DIRECTORY/$p->{date}.yaml"; die "$file exists - remove manually to download again\n" if -e $file; my ($ext) = $p->{image} =~ /\.(\w+)$/; my $imgfile = "$DIRECTORY/$p->{date}.$ext"; getstore("$AOTD_URL_BASE/$p->{image}", $imgfile) == RC_OK or die "Something went wrong when downloading $p->{image}: $!"; open my $fh, '>', $file or die "can't write to $file: $!"; print $fh Dump { title => $p->{title}, credit => $p->{credit}, desc => $p->{desc}, imgfile => $imgfile, }; close $fh; if ($LINK && !$arg) { -e && unlink for ($LINK, "$LINK.yaml") symlink $file, "$LINK.yaml"; symlink $imgfile, $LINK; } __END__ `,= (K) 3178 \/\/_____\/ /\ \ / / \ /__/__ ALL RIGHTS REVERSED