use warnings; use strict;
use Getopt::Long
qw( :config gnu_compat no_ignore_case
no_debug
) ;
my %opt =
( 'clobber' => 0
, 'backup-suffix' => '--OLD'
, 'text' => 'lc $_[0]'
, 'url' => undef
) ;
my @files = handle_options();
# Compile code strings.
foreach my $c qw(text url)
{
next unless $opt{$c};
$opt{$c} = eval "sub { $opt{$c} }";
}
# Use the File::Temp in non-OO mode to get both the filename & the
# filehandle; OO mode can return only the filename.
use File::Temp qw( tempfile );
use File::Copy;
use HTML::TreeBuilder;
foreach my $f ( @files )
{
my $message = sanity_check($f);
if ( $message ) { warn $message; next; }
my $tree = make_html_tree($f);
# Make changes.
foreach my $node ( $tree->look_down('_tag' , 'a') )
{
# Change link text.
if ( ref $opt{'text'} && scalar $node->content_refs_list)
{
foreach my $text ( $node->content_refs_list )
{
next if ref $$text;
$$text = $opt{'text'}->($$text);
}
}
# Change link URL.
my $url = $node->attr('href');
$node->attr( 'href' , $opt{'url'}->($url) )
if ref $opt{'url'} && $url ;
}
# Save changes.
my ($fh , $new_name) = open_new_file($f);
print $fh $tree->as_HTML;
$tree = $tree->delete;
close $fh or die "Could not close $new_name\n";
}
exit;
# Given a file name, returns the HTML::TreeBuilder object.
sub make_html_tree
{
my ($file) = @_;
my $tree = HTML::TreeBuilder->new;
$tree->warn(1);
# Configure parsing.
$tree->no_space_compacting(1);
$tree->ignore_ignorable_whitespace(0);
$tree->ignore_unknown(0);
$tree->store_comments(1);
$tree->store_declarations(1);
$tree->store_pis(1);
#
$tree->parse_file($file);
$tree->eof;
$tree->elementify;
return $tree;
}
# Given a file name, checks if it is plain readable file and if clobb
+ering
# specified if it is writable too. Returns undef if sanity is ok.
sub sanity_check
{
my ($f) = @_;
stat $f;
return
!( -f _ && -r _ )
? "'$f' skipped, either it is not a plain file or is unreadable\n"
: $opt{'clobber'} && ! -w _
? "'$f' skipped, clobbering specified but file is not writable\n
+"
: undef;
}
# Returns file handle (to a new file) and optionally a file name.
sub open_new_file
{
my ($old) = @_;
my @open = $opt{'clobber'} ? tempfile() : backup_and_open($old);
# Return file handle in scalar context.
return wantarray ? @open : $open[0];
}
# Copies the given file to backup; opens the copy and returns the fil
+e handle
# and the given file name (to be consistent w/ File::Temp::tempfile()
+).
{
my $FH;
sub backup_and_open
{
my ($old) = @_;
my $backup = $old . $opt{'backup-suffix'};
copy $old , $backup or die "Could not make backup: $!";
open $FH, '>' , $old or die "Could not open '$old' to write: $!";
return ($FH , $old);
}
}
# Handle options.
sub handle_options
{
GetOptions
( 'help|usage' => \$opt{'usage'}
, 'no-clobber' => \$opt{'clobber'}
, 'clobber' => \$opt{'clobber'}
, 'i|suffix' => \$opt{'backup-suffix'}
, 'text=s' => \$opt{'text'}
, 'url=s' => \$opt{'url'}
)
or die 'Wrong option(s) given' , options_doc();
if ( $opt{'usage'} )
{
print "$0 [options] file [file file ...]\n" , options_doc();
exit 0;
}
die "Give at least one HTML file to work with.\n"
unless scalar @ARGV;
# Files to work on.
return @ARGV;
}
# Document the options.
sub options_doc
{
return <<_USAGE_;
Options are ...
-help Show this message.
File Output:
-no-clobber Save the original files as a backups. (Default)
-clobber Overwrite the given files. This overrides the
-no-clobber option.
-suffix Text to append at the end of backup. (Default is
'--OLD'.)
Code (given as a string which is run inside a sub{}; last value
returned from the sub is used as then new value for text or URL):
-text Code to run over text of each link. (Default is to
lowercase the text.)
-url Code to run over url of each link. (Default is
nothing to do.)
_USAGE_
}
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.