#!/usr/bin/perl --
use strict;
use warnings;
use HTML::TreeBuilder;
Main(@ARGV);
exit(0);
sub Main {
if (@_) {
PumpDump(@_);
#~ PumpDump('', qw/look_down criteria/ );
#~ PumpDump('file', qw/look_down criteria/ );
} else {
print "Usage: $0 file _tag div\n\n";
print "Demo1\n";
Demo1();
print "Demo1\n";
Demo3();
} ## end else [ if (@_) ]
} ## end sub Main
sub Demo1 {
my $html = <<'__HTML__';
key1 val1
key2 val2
key3 val3
key4 val4
key5 val5
key6 val6
key7 val7
key8 val8
key9 val9
key10 val10
key11 val11
__HTML__
PumpDump( $html, _tag => qr/table|strong/i );
} ## end sub Demo1
sub Demo3 {
my $html = <<'__HTML__';
educa.ch
Altes Schulhaus Ossingen
Guntibachstrasse 10
8475 Ossingen
Tel:
052 317 15 45
Fax:
052 317 04 42
__HTML__
PumpDump( $html, _tag => qr/div/i );
} ## end sub Demo3
sub HTML::Element::addressx {
return join(
'/',
'', # // ROOT
reverse( # so it starts at the top
map {
my $count = 0;
my $t = $_->tag;
## LEFT CAN BE A STRING
my @left = $_->left;
for my $left (@left) {
eval { $count++ if $left->tag eq $t };
}
if ( $count > 1 ) {
$count = "[$count]";
} else {
$count = '';
}
$t . $count
} $_[0], # self and...
$_[0]->lineage
)
);
} ## end sub HTML::Element::addressx
sub HTML::Element::addressxx {
my (@stuff) = (
map {
my $count = 0;
my $t = $_->tag;
## LEFT CAN BE A STRING
my @left = $_->left;
for my $left (@left) {
eval { $count++ if $left->tag eq $t };
}
if ( my $attid = $_->attr('id') ) {
$count = "[\@id='$attid']";
} elsif ( $count > 1 ) {
$count = "[$count]";
} else {
$count = '';
}
$t . $count
} $_[0], # self and...
$_[0]->lineage
);
#~ use DDS; print Dump(\@stuff),"\n";
use List::MoreUtils qw[ before_incl ];
my $stuff = @stuff;
@stuff = before_incl { /\[\@id/i } @stuff;
return join(
'/',
( $stuff > @stuff ? '/' : '' ),
reverse( # so it starts at the top
@stuff
)
);
} ## end sub HTML::Element::addressxx
sub HTML::Element::addressxX {
my (@stuff) = (
map {
my $e = $_;
my $count = 0;
my $t = $e->tag;
my @left = $e->left;
for my $left (@left) {
eval { $count++ if $left->tag eq $t };
}
if ( my $attid = $e->id ) {
$count = "[\@id='$attid']";
} elsif ( my @att = grep !/^id$/, $e->all_external_attr_names ) {
$count = '['
. join( ' and ',
map { sprintf q!@%s='%s'!, $_, $e->attr($_) } @att )
. ']';
} elsif ( $count > 1 ) {
$count = "[$count]";
} else {
$count = '';
}
$t . $count
} $_[0], # self and...
$_[0]->lineage
);
#~ use DDS; print Dump(\@stuff),"\n";
my $stuff = @stuff;
use List::MoreUtils qw[ before_incl ];
@stuff = before_incl { /\[\@id/i } @stuff;
return join(
'/',
( $stuff > @stuff ? '/' : '' ),
reverse( # so it starts at the top
@stuff
)
);
} ## end sub HTML::Element::addressxX
sub PumpDump {
my ( $html, @lookdown ) = @_;
my $tree = HTML::TreeBuilder->new();
if ( $html =~ / ) {
$tree->parse($html);
} else {
$tree->parse_file($html);
}
$tree->eof;
for my $td ( $tree->look_down(@lookdown) ) {
my $text = $td->as_trimmed_text;
next if $text =~ /^\p{Zs}*$/; ## ysth, nbsp isn't \s
print $td, "\t", $td->address, "\n";
print $text, "\n";
print $td->addressx, "\n";
print $td->addressxx, "\n";
print $td->addressxX, "\n";
print '-' x 66, "\n";
} ## end for my $td ( $tree->look_down...)
$tree->delete;
undef $tree;
print '#' x 66, "\n\n";
} ## end sub PumpDump
__END__