use HTML::TreeBuilder::XPath; my $p = HTML::TreeBuilder::XPath->new; $p->marked_sections(1); $p->xml_mode(1); # DEPENDING ON INPUT my @links = $p->parse($html)->findnodes('//a[@href]'); for my $link (@links) { print $link->attr('href'), "\t", $link->as_text_trimmed, "\n"; }