Here is a more simplified script which uses the get method on
XML::FeedPP items as an alternative to
XML::Rules.
#!/usr/bin/perl -w
use strict;
use warnings;
use XML::FeedPP;
use HTML::TreeBuilder::XPath;
# input
my $source = 'http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary
+/4.5_day.atom';
# output
my $outfile = "quake.txt";
open my $fh,">",$outfile or die "$!";
# process
report_header();
my $feed = XML::FeedPP->new( $source );
foreach my $quake( $feed->get_item() ) {
my $title = $quake->get('title');
my $magnitude = substr($title,2,3);
my $place = substr($title,8);
my $updated = $quake->get('updated');
my $locn = $quake->get('georss:point');
my $summary = $quake->get('summary');
my $id = $quake->get('id');
# extract time from summary using XPath
my $tree = HTML::TreeBuilder::XPath->new_from_content($summary);
my @dd = $tree->findvalues('//dd');
# extract time using regex
my $t1;
my $t2;
if ($summary =~ m!<dt>Time</dt>
<dd>(.*)\ UTC</dd>
<dd>(.*)\ at\ epicenter</dd>!x){
$t1 = $1;
$t2 = $2;
}
print $fh <<EOF
ID : $id
Title : $title
Place : $place
Magnitude : $magnitude
Updated : $updated
Location : $locn
Summary : $summary
Time Xpath: $dd[0]
$dd[1]
Time regex: $t1
: $t2
EOF
}
close $fh;
sub report_header {
my $cur_time = localtime;
print $fh <<EOF
# This Quake file created by quake_parsing_9
# Matt Coblentz; Perl version unknown
# For more information, see the USGS website
# Last Updated: $cur_time
EOF
}
poj