#!/usr/bin/perl --
use strict;
use warnings;
use XML::LibXML 1.70; ## for load_html/load_xml/location
Main( @ARGV );
exit( 0 );
sub Main {
my $loc = shift or die "
Usage:
$0 ko00010.html
$0 http://example.com/ko00010.html\n\n";
my $dom = XML::LibXML->new(
qw/ recover 2 /
)->load_html(
location => $loc,
);
for my $node ( $dom->findnodes( '//table | //img | // script | //
+style | //noscript ' ) ){
$node->detach;
}
print $dom->find('//body')->[0]->textContent,"\n";
}
sub XML::LibXML::Node::detach { my( $self ) = @_; $self->paren
+tNode->removeChild( $self ); }
|