#!/usr/bin/perl
use strict;
use warnings;
use HTML::TokeParser::Simple;
my $html = q{
A
};
my $p = HTML::TokeParser::Simple->new(\$html);
# parse until second table
my $table_count = 2;
while (my $t = $p->get_tag('table')){
last unless --$table_count;
}
my (%href, $this_href, $number);
while (my $t = $p->get_token){
if ($t->is_start_tag('a')){
$this_href = $t->get_attr('href');
next;
}
if ($t->is_start_tag('span')){
$number = $p->get_trimmed_text('/span');
$href{$this_href} = $number;
next;
}
last if $t->is_end_tag('table');
}
for my $key (keys %href){
print "$key -> $href{$key}\n";
}
####
---------- Capture Output ----------
> "C:\Perl\bin\perl.exe" _new.pl
pdf\8a956f66-1c60-48fc-905c-b49d617aa6c5.pdf -> 110377660
pdf\c76b834e-36e1-497b-b13e-eba2348dc044.pdf -> 110136892
pdf\ae8d51e0-005b-44be-84cb-3c9b57335755.pdf -> 108318866
pdf\37d3e78b-1adb-458b-9e89-0df780909f08.pdf -> 108116112
pdf\e646f948-f78d-4463-a01d-0261aebf70dc.pdf -> 113069066
pdf\6c0a5bb4-143d-4305-957b-796c8193d07a.pdf -> 116815754
> Terminated with exit code 0.