my $p = HTML::TokeParser::Simple->new(\$html); my (%href, $this_href, $number, $letter); while (my $t = $p->get_token){ if ($t->is_start_tag('h2')){ $letter = $p->get_trimmed_text('/h2'); next; } if ($t->is_start_tag('a')){ # skip bookmarks next if $t->get_attr('name'); $this_href = $t->get_attr('href'); next; } if ($t->is_start_tag('span')){ $number = $p->get_trimmed_text('/span'); $href{$letter}{$this_href} = $number; next; } } #### ---------- Capture Output ---------- > "C:\Perl\bin\perl.exe" _new.pl A pdf\8a956f66-1c60-48fc-905c-b49d617aa6c5.pdf -> 110377660 pdf\c76b834e-36e1-497b-b13e-eba2348dc044.pdf -> 110136892 pdf\ae8d51e0-005b-44be-84cb-3c9b57335755.pdf -> 108318866 pdf\37d3e78b-1adb-458b-9e89-0df780909f08.pdf -> 108116112 pdf\e646f948-f78d-4463-a01d-0261aebf70dc.pdf -> 113069066 pdf\6c0a5bb4-143d-4305-957b-796c8193d07a.pdf -> 116815754 B pdf\8a956f66-1c60-48fc-905c-b49d617aa6c5.pdf -> 110377660 pdf\c76b834e-36e1-497b-b13e-eba2348dc044.pdf -> 110136892 pdf\ae8d51e0-005b-44be-84cb-3c9b57335755.pdf -> 108318866 pdf\37d3e78b-1adb-458b-9e89-0df780909f08.pdf -> 108116112 pdf\e646f948-f78d-4463-a01d-0261aebf70dc.pdf -> 113069066 pdf\6c0a5bb4-143d-4305-957b-796c8193d07a.pdf -> 116815754 C pdf\8a956f66-1c60-48fc-905c-b49d617aa6c5.pdf -> 110377660 pdf\c76b834e-36e1-497b-b13e-eba2348dc044.pdf -> 110136892 pdf\ae8d51e0-005b-44be-84cb-3c9b57335755.pdf -> 108318866 pdf\37d3e78b-1adb-458b-9e89-0df780909f08.pdf -> 108116112 pdf\e646f948-f78d-4463-a01d-0261aebf70dc.pdf -> 113069066 pdf\6c0a5bb4-143d-4305-957b-796c8193d07a.pdf -> 116815754 > Terminated with exit code 0..