HTML::Element=HASH(0xb5ed04) 0.1.1.0
Milky Way Over Piton de l'Eau
/html/body/center[2]/b
/html/body/center[2]/b
/html/body[@link='#0000FF' and @vlink='#7F0F9F' and @alink='#FF0000' and @bgcolor='#F4F4FF' and @text='#000000']/center[2]/b
------------------------------------------------------------------
####
$ scraper apod.html
scraper> d
$VAR1 = {};
scraper> process '/html/body/center/p[2]' => 'Date' => 'TEXT';
scraper> d
$VAR1 = {
'Date' => ' 2012 June 25 '
};
scraper> process '//b' => 'b[]' => 'TEXT';
scraper> y
---
Date: ' 2012 June 25 '
b:
- " Milky Way Over Piton de l'Eau "
- ' Image Credit & Copyright: '
- ' Explanation: '
- ' Help Evaluate APOD: '
- " Tomorrow's picture: "
- ' Authors & editors: '
- 'NASA Official: '
- 'A service of:'
- '&'
scraper> c all
#!c:\perl\5.14.1\bin\MSWin32-x86-multi-thread\perl.exe
use strict;
use Web::Scraper;
use URI;
my $file = \do { my $file = "apod.html"; open my $fh, $file or die "$file: $!"; join '', <$fh> };
my $scraper = scraper {
process '/html/body/center/p[2]' => 'Date' => 'TEXT';
process '//b' => 'b[]' => 'TEXT';
};
my $result = $scraper->scrape($file);
scraper> q
##
##
my $scraper = scraper {
process '//b[1]' => 'Title' => 'TEXT';
process '/html/body/center[2]/b[2]' => 'Credit' => 'TEXT';
process '/html/body/p[1]' => 'Desc' => 'TEXT';
process '/html/body/center/p[2]' => 'Date' => 'TEXT';
#~ process q{//a[ @href =~ "image/" ]} => 'Image' => '@HREF';
process q{//a[ contains(@href, "image/") ]} => 'Image' => '@HREF';
};
## NOTE use URI object so scraper will download (read) file
my $url = URI->new( 'file:apod.html' );
my $base = 'http://apod.nasa.gov/apod/';
my $ret = $scraper->scrape( $url , $base );