Relies on well-formed HTML:
#!/usr/bin/perl
use strict;
use warnings;
use HTML::TokeParser;
my $doc = do { local $/; <DATA> };
my $p = HTML::TokeParser->new( \$doc );
while ( my $outer = $p->get_tag("div") ) {
next unless $outer->[1]{class} eq "full";
my $nested_div = 0;
while ( my $inner = $p->get_tag ) {
# keep count of nested divs
$nested_div++ if $inner->[0] eq "div";
$nested_div-- if $inner->[0] eq "/div";
# "full" div has closed
last if $nested_div == -1;
print $p->get_text, "\n" if $inner->[0] eq "a";
}
}
__DATA__
<!-- some other divs and so here -->
<div class="full">
<div class="content">
<ul class="topics">
<-- I want extract these links div class "full" only -->
<li><a href="foobar">foobar</a></li>
<li><a href="foobr2">fobar2</a></li>
<li><a href="fobar3">foobr3</a></li>
</ul>
</div>
</div>
<div class="otherclass">
<div class="content">
<ul class="topics">
<-- I DO NOT WANT these links -->
<li><a href="fbaor">fbaor</a></li>
<li><a href="fabar2">fabar2</a></li>
<li><a href="fbar3">fbar3</a></li>
</ul>
</div>
</div>
<!-- some other divs and so here -->