$ ./2.quotes.pl Fargo >1.txt No matches for "Fargo" were found. $ ll 1.txt -rw-r--r-- 1 hogan hogan 128858 Apr 12 22:02 1.txt $ cat 2.quotes.pl #!/usr/bin/perl -w use strict; use 5.016; use WWW::Mechanize; use Getopt::Long; use Text::Wrap; use Log::Log4perl; use Data::Dump; my $log_conf = "/home/hogan/Documents/hogan/logs/conf_files/3.conf"; Log::Log4perl::init($log_conf); my $logger = Log::Log4perl->get_logger(); #$logger->level('DEBUG'); my $match = undef; my $random = undef; GetOptions( "match=s" => \$match, "random" => \$random, ) or exit 1; my $movie = shift @ARGV or die "Must specify a movie\n"; my $quotes_page = get_quotes_page($movie); my @quotes = extract_quotes($quotes_page); if ($match) { $match = quotemeta($match); @quotes = grep /$match/i, @quotes; } if ($random) { print $quotes[ rand @quotes ]; } else { print join( "\n", @quotes ); } sub get_quotes_page { my $movie = shift; my $mech = WWW::Mechanize->new; $mech->get("https://www.imdb.com/search/name-text/"); $mech->success or die "Can't get the search page"; open my $fh, '>', '/home/hogan/Documents/hogan/logs/1.form-log.txt' or die "Couldn't open logfile 'form-log.txt': $!"; $mech->dump_forms($fh); my $ret1 = $mech->submit_form( form_number => 2, fields => { title => $movie, restrict => "Movies only", }, ); $logger->info("return1 is $ret1"); # dd $ret1; # yikes if ( $ret1->is_success ) { $logger->info("Supposedly successful so far"); print $ret1->decoded_content; } else { print STDERR $ret1->status_line, "\n"; } my @links = $mech->find_all_links( url_regex => qr[^/Title] ) or die "No matches for \"$movie\" were found.\n"; # Use the first link my ( $url, $title ) = @{ $links[0] }; warn "Checking $title...\n"; $mech->get($url); my $link = $mech->find_link( text_regex => qr/Memorable Quotes/i ) or die qq{"$title" has no quotes in IMDB!\n}; warn "Fetching quotes...\n\n"; $mech->get( $link->[0] ); return $mech->content; } sub extract_quotes { my $page = shift; # Nibble away at the unwanted HTML at the beginnning... $page =~ s/.+Memorable Quotes//si; $page =~ s/.+?( tag my @quotes = split( //, $page ); for my $quote (@quotes) { my @lines = split( /
/, $quote ); for (@lines) { s/<[^>]+>//g; # Strip HTML tags s/\s+/ /g; # Squash whitespace s/^ //; # Strip leading space s/ $//; # Strip trailing space s/"/"/g; # Replace HTML entity quotes # Word-wrap to fit in 72 columns $Text::Wrap::columns = 72; $_ = wrap( '', ' ', $_ ); } $quote = join( "\n", @lines ); } return @quotes; } __END__ $ #### $ cat /home/hogan/Documents/hogan/logs/1.form-log.txt GET https://www.imdb.com/find [nav-search-form] navbar-search-category-select= (checkbox) [*/off|on] q= (text) = (submit) ref_=nv_sr_sm (hidden readonly) POST https://www.imdb.com/search/title-text/ type=plot (option) [*plot/Plot|quotes/Quotes|trivia/Trivia|goofs/Goofs|crazy_credits/Crazy Credits|location/Filming Locations|soundtracks/Soundtracks|versions/Versions] query= (search) = (submit) POST https://www.imdb.com/search/name-text/ type=bio (option) [*bio/Biographies|quotes/Quotes|trivia/Trivia] query= (search) = (submit) $ #### 2020/04/12 22:20:23 INFO return1 is HTTP::Response=HASH(0x5653c7bfc2e8) 2020/04/12 22:20:23 INFO Supposedly successful so far #### $ ./2.quotes.pl Fargo >1.txt No matches for "Fargo" were found. $ ./2.quotes.pl Jaws >1.txt No matches for "Jaws" were found. #### $ ll 1.txt -rw-r--r-- 1 hogan hogan 128858 Apr 12 22:02 1.txt