Greetings wise Perl monks. My problem is that I am unable to dynamically pass a set of html tags over input command line to HTML::TreeBuilder's look_down() method.
I want to invoke the program something like this:
perl ./download.pl --url 'http://www.gocomics.com/9chickweedlane/2019/
+04/17' --tags div --tags class="comic container js-comic-"
I cannot figure out how to pass the 'class' word and its value "comic container js-comic-" and drop them appropriately into the function call 'look_down()'. Below here is a hardcoded example
(this works just fine if I use it in the program):
@results = $tree->look_down( _tag => "div", "class" => qr(comic co
+ntainer js-comic-) ); # HARDCODED, should by dynamic
Ideally it would be something dynamic like this (forgive the dumb example):
# PROCESSING THE TAG LIST....
my $first_tag = $self->{ tags(0) };
my $second_tag = $self->{ tags(1) };
if( $second_tag =~ "=" )
{ # Split apart the key-value pair
my @words = split /\=/,$second_tag;
}
@results = $tree->look_down( _tag => $first_tag, "$words[0]" => qr("$w
+ords[1]") );
Here is the code in action:
### MAIN PROGRAM
sub main
{
use File::Spec;
use Getopt::Long;
my $url = undef;
my @tags = undef;
GetOptions(
"tags=s" => \@tags,
"url=s" => \$url,
)
or die("Error in command line arguments. $!\n");
my $dlobj = DownloadObject->new( $url, \@tags );
$dlobj->download();
}
### DOWNLOAD OBJECT CLASS
#!/usr/bin/perl -w
use warnings;
use strict;
package DownloadObject;
# Simple Constructor
sub new
{
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = {};
$self->{url} = undef; # URL to target
$self->{tags_list_process_order} = undef; # List of html-tags, in se
+quential order, to process in order to extract the target content
$self->{url} = $_[0];
$self->{tags_list_process_order} = $_[1];
bless ($self, $class);
return $self;
}
sub download
{
my $self = shift;
require LWP::UserAgent;
my $ua = LWP::UserAgent->new;
$ua->agent('Mozilla/5.0');
$ua->timeout(10);
$ua->env_proxy;
my $response = $ua->get( $self->{url} ); # Download the content
if( $response->is_success )
{
use HTML::TreeBuilder 5 -weak;
my $tree = HTML::TreeBuilder->new_from_content( $response->content
+() ); # Put the contents into HTML-Treebuilder
my @results = ();
# THIS IS THE PROBLEM LINE. It is HARDCODED, and I want to make it
+ dynamic.
@results = $tree->look_down( _tag => "div", "class" => qr(comic co
+ntainer js-comic-) );
foreach(@results)
{
say " Data-Image URL: " . $_->attr('data-image');
# Gonna do something with result......
}
}
}
Any help would be greatly appreciated.