I had a need to compare two fragments of HTML to see if they were equivalent.
This snippet builds two HTML::TreeBuilder representations of the fragments, then recursively compares the contents of the fragments.
To use the snippet call cmpHtml passing the two fragments as strings:
print cmpHtml(
'<p><font foo="bar" bar="1">bar 1</font></p>',
'<p><font bar="2" foo="bar">bar 1</font></p>'
);
or if you already have two HTML::Elements that you want to compare you can:
print cmpHtmlElt ($elt1, $elt2);
sub cmpHtml {
my ($html1, $html2) = @_;
my $root1 = HTML::TreeBuilder->new;
my $root2 = HTML::TreeBuilder->new;
$root1->parse_content ($html1);
$root1->elementify ();
$root2->parse_content ($html2);
$root2->elementify ();
return cmpHtmlElt ($root1, $root2);
}
sub cmpHtmlElt {
my ($elt1, $elt2) = @_;
my $cmp = defined $elt1 cmp defined $elt2;
return $cmp if $cmp;
return 0 unless defined $elt1;
$cmp = ref $elt1 cmp ref $elt2;
return $cmp if $cmp;
return $elt1 cmp $elt2 unless ref $elt1;
$cmp = $elt1->tag () cmp $elt2->tag ();
return $cmp if $cmp;
my %attribs1 = $elt1->all_attr ();
my %attribs2 = $elt2->all_attr ();
$cmp = keys %attribs1 <=> keys %attribs2;
return $cmp if $cmp;
for my $key (keys %attribs1) {
return 1 unless exists $attribs2{$key};
next if $key =~ /^_/;
$cmp = $attribs1{$key} cmp $attribs2{$key};
return $cmp if $cmp;
}
my @children1 = $elt1->content_list ();
my @children2 = $elt2->content_list ();
$cmp = @children1 <=> @children2;
return $cmp if $cmp;
for my $index (0 .. $#children1) {
$cmp = cmpHtmlElt ($children1[$index], $children2[$index]);
return $cmp if $cmp;
}
}