Beefy Boxes and Bandwidth Generously Provided by pair Networks
Your skill will accomplish
what the force of many cannot
 
PerlMonks  

Re: Combining Ffile:fetch with MySQL

by bliako (Monsignor)
on Jul 19, 2022 at 14:00 UTC ( [id://11145583]=note: print w/replies, xml ) Need Help??


in reply to Combining Ffile:fetch with MySQL

Are you asking how to loop over the results of an SQL select and then feed each result to file::fetch?

use DBI; use File::Fetch; my $dbh = DBI->connect($dsn, $user, $password) or die "Couldn't connect to database: " . DBI->errstr; my $query = "select url FROM tablexxx where date='?'"; # << minor edit my $SQL = $dbh->prepare($query) or die "prepare: ".$dbh->errstr; $SQL-> execute('202207') or die "execute: ".$dbh->errstr; while (my $row = $sth->fetchrow_hashref) { print "url: $row->{url}\n"; # logic to get unique filename for output my $outfile = ...; my $ff = File::Fetch->new( uri => $url ); $ff->fetch(to => $outfile) or die "failed to fetch '$url' to '$outf +ile'."; # or get file contents in a scalar and then process them, e.g. to e +xtract # pdf metadata e.g. date, author, and md5 hash my $contents; $ff->fetch(to => \$contents); # use PDF::API2 my %options; my $pdf = PDF::API2->from_string($contents, %options); my $author = $pdf->author(); # I wish it was that simple... you may + need to sieve through all the metadata }

totally untested, bw, bliako

Replies are listed 'Best First'.
Re^2: Combining Ffile:fetch with MySQL
by justin423 (Scribe) on Jul 24, 2022 at 01:43 UTC

    ok, here is the code I put together from the really helpful response. it runs with no errors, it just doesn't download... I must be missing something.

    $host = "localhost"; $port = "3306"; $dbdriver='mysql'; $tablename = "linktable"; print "Enter year and month to download in format YYYYMM:"; $date = <STDIN>; chomp $date; $dsn = "dbi:$dbdriver:$database:$host:$port"; $path='/data/'; my $dbh = DBI->connect($dsn, $user, $password) or die "Couldn't connect to database: " . DBI->errstr; $dbh-> do("DROP TABLE IF EXISTS LINKTABLE"); $dbh-> do("CREATE TABLE IF NOT EXISTS LINKTABLE(document_id CHAR(9),IN +DEX(DOCUMENT_ID),PUBLISH_DATE CHAR(8),URL CHAR(150))"); $dbh-> do("LOAD DATA LOCAL INFILE '/data/DATA.CSV' INTO TABLE $tablena +me(DOCUMENT_ID,PUBLISH_DATE)"); $dbh-> do("UPDATE LINKTABLE SET URL=CONCAT('www.example.com/document/' +,DOCUMENT_ID,'document.pdf')") ; my $SQL = "select url,document_id FROM $tablename where left(publish_d +ate,6) ='$date'"; my $query = $dbh->prepare($SQL) or die "prepare: ".$dbh->errstr; $query-> execute() or die "execute: ".$dbh->errstr; while ($url,$document_id = $query->fetchrow_array()) { print "url: $row->{url} \n"; my $outfile = '$path.$document_id'; my $ff = File::Fetch->new( uri => url ); $ff->fetch(to => $outfile) or die "failed to fetch '$url' to '$outf +ile'."; #rename("document.pdf","/output/$document_id.pdf") || die "Can't re +name file: ";
      my $dbh = DBI->connect($dsn, $user, $password) or die "Couldn't connect to database: " . DBI->errstr;

      Here you test that the connection has succeeded and bail out if it hasn't. This is good. However, on the subsequent 4 $dbh->do statements you make no such tests. How can you tell if one or more of these have failed?

      my $SQL = "select url,document_id FROM $tablename where left(publish_d +ate,6) ='$date'"; my $query = $dbh->prepare($SQL) or die "prepare: ".$dbh->errstr; $query-> execute() or die "execute: ".$dbh->errstr;

      No reason at all not to use a placeholder here:

      my $SQL = "select url,document_id FROM $tablename where left(publish_d +ate,6) = ?"; my $query = $dbh->prepare($SQL) or die "prepare: ".$dbh->errstr; $query-> execute($date) or die "execute: ".$dbh->errstr;
      it runs with no errors, it just doesn't download

      Does it print the URL correctly each time through the loop or not?


      🦛

        However, on the subsequent 4 $dbh->do statements you make no such tests.

        ... and with a little bit of RTFM, none of those tests is needed. Change

        my $dbh = DBI->connect($dsn, $user, $password) or die "Couldn't connect to database: " . DBI->errstr;

        to

        my $dbh = DBI->connect($dsn, $user, $password, { RaiseError => 1, Prin +tError => 0 }) or die "Couldn't connect to database: " . DBI->errstr;

        and DBI will automatically check for errors, no manual checks needed.

        Alexander

        --
        Today I will gladly share my knowledge and experience, for there are no sweeter words than "I told you so". ;-)
        you are right. it doesn't print the url. it just exits after printing url: But the table has the correct url in the field url.

        so this is the code where there is an issue.

        while ($url = $query->fetchrow_array()) { print "url: $row->{url}\n"; # logic to get unique filename for output my $outfile = $path.$document_id; print $outfile; my $ff = File::Fetch->new( uri => url ); $ff->fetch(to => $outfile) or die "failed to fetch '$url' to '$outf +ile'.";

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: note [id://11145583]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others admiring the Monastery: (5)
As of 2024-04-25 10:42 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found