jtech has asked for the wisdom of the Perl Monks concerning the following question:
Hi Perlmonks,
I am new at the Perl world and this is my first post and script
The below script generates .csv files corresponding to each executed query using DBI but, I am not happy with the code redundancy.
Had to search for some examples but didn't find anything. I would appreciate an example using a hash to understand how it works
Is there any nice way to do it?
Thanks in advance
# Create multiples csv file corresponding to each sql query
my $csvfile1 = "csvfile1.csv";
my $csvfile2 = "csvfile2.csv";
my $csvfile3 = "csvfile3.csv";
# DBI CONNECTION
my($dbh) = DBI->connect("dbi:Ingres:$dbname","$user","")
or die "Could not connect to database $dbname\n";
#tb1 select
my ($sth) = $dbh->prepare(SELECT * FROM tb1)
or die "Prepare failed: $DBI::errstr\n";
$sth->execute()
or die "Prepare failed: $DBI::errstr\n";
#create csvfile1 according to tb1 select
open my $fh, ">raw", $csvfile1 or die "$csvfile1: $!";
$fh->print (join(",", @{$sth->{NAME}}), "\n"); #show header
while (my @row = $sth->fetchrow_array())
{
$fh->print (join(",", @row), "\n");
};
close $fh or die "$csvfile1: $!";
$sth->finish();
#tb2 select
my ($sth) = $dbh->prepare(SELECT * FROM tb2)
or die "Prepare failed: $DBI::errstr\n";
$sth->execute()
or die "Prepare failed: $DBI::errstr\n";
#create csvfile2 according to tb2 select
open my $fh, ">raw", $csvfile2 or die "$csvfile2: $!";
$fh->print (join(",", @{$sth->{NAME}}), "\n"); #show header
while (my @row = $sth->fetchrow_array())
{
$fh->print (join(",", @row), "\n");
};
close $fh or die "$csvfile2: $!";
$sth->finish();
#tb3 select
my ($sth) = $dbh->prepare(SELECT * FROM tb3)
or die "Prepare failed: $DBI::errstr\n";
$sth->execute()
or die "Prepare failed: $DBI::errstr\n";
#create csvfile3 according to tb3 select
open my $fh, ">raw", $csvfile3 or die "$csvfile3: $!";
$fh->print (join(",", @{$sth->{NAME}}), "\n"); #show header
while (my @row = $sth->fetchrow_array())
{
$fh->print (join(",", @row), "\n");
};
close $fh or die "$csvfile3: $!";
$sth->finish();
$dbh->disconnect();
Re: Multiple queries on DBI corresponding to multiple csv files?
by poj (Abbot) on Feb 18, 2019 at 11:46 UTC
|
#!/usr/bin/perl
use strict;
use warnings;
use DBI;
use Text::CSV_XS;
# Create multiples csv file corresponding to each sql query
my @query = (
['SELECT * FROM account','accounts.csv'],
['SELECT * FROM facts','facts.csv'],
['SELECT * FROM mytable','mytable.csv'],
);
# DBI CONNECTION
my $dbh = get_dbh();
for (@query){
run_query(@$_);
}
sub run_query {
my ($sql,$csvfile) = @_;
print "Running $sql\n";
open my $fh, '>', $csvfile
or die "Could not open $csvfile: $!";
my $csv = Text::CSV_XS->new ({ binary => 1, eol => "\n" });
my $sth = $dbh->prepare ($sql);
$sth->execute;
$csv->print($fh, $sth->{NAME_lc});
my $count = 1;
while (my $row = $sth->fetchrow_arrayref) {
$csv->print($fh, $row);
++$count;
}
close $fh;
print "$count lines dumped to $csvfile\n";
}
sub get_dbh {
my $dbname = '';
my $user = '****';
my $pw = '';
my $dsn = "dbi:Ingress:$dbname";
my $dbh = DBI->connect($dsn, $user, $pw,
{ RaiseError=>1, AutoCommit=>1 } );
return $dbh;
}
poj | [reply] [d/l] |
|
Nice! But, sadly I don't have permission to install Perl modules on the server, I am afraid that cannot use Text::CSV_XS :(
| [reply] |
|
| [reply] |
|
| [reply] |
|
Re: Multiple queries on DBI corresponding to multiple csv files?
by huck (Prior) on Feb 18, 2019 at 11:19 UTC
|
I'll leave it to others to comment about using other modules to write csv files so proper quoting is performed and just discuss your imediate question
This is untested, demonstration code
use strict;
use warnings;
use DBI;
my @clumps=(
{table=>'tb1',file=>"csvfile1.csv"},
{table=>'tb2',file=>"csvfile2.csv"},
{table=>'tb3',file=>"csvfile3.csv"}
);
my $dbname='a';
my $user='b';
# DBI CONNECTION
my($dbh) = DBI->connect("dbi:Ingres:$dbname","$user","")
or die "Could not connect to database $dbname\n";
for my $clump (@clumps){
my ($sth) = $dbh->prepare('SELECT * FROM '.$clump->{table})
or die "Prepare failed: $DBI::errstr\n";
$sth->execute()
or die "Prepare failed: $DBI::errstr\n";
open my $fh, ">raw", $clump->{file} or die $clump->{file}. ": $!";
$fh->print (join(",", @{$sth->{NAME}}), "\n"); #show header
while (my @row = $sth->fetchrow_array())
{
$fh->print (join(",", @row), "\n");
};
close $fh or die $clump->{file}.": $!";
$sth->finish();
}
$dbh->disconnect();
Notice how i used an array of hashes to contain the table name and outfile file, then cycled thru it to do each table/outfile file pair
Edit: Hippo was right "You've set @clumps as an arrayref but then treat as an array. Typo?"
he saw it as
my @clumps=[
{table=>'tb1',file=>"csvfile1.csv"},
{table=>'tb2',file=>"csvfile2.csv"},
{table=>'tb3',file=>"csvfile3.csv"}
];
| [reply] [d/l] [select] |
Re: Multiple queries on DBI corresponding to multiple csv files?
by hippo (Bishop) on Feb 18, 2019 at 11:17 UTC
|
Welcome to Perl and the Monastery, brother jtech.
I am not happy with the code redundancy.
That you have spotted this and identified it as a problem is a very promising start. Perhaps the first thing to consider is to use subroutines. You can read the details of how to use these in Perl at perlsub. I wouldn't bother going past the first section, "DESCRIPTION" for now. The examples in there should show you the way to go.
There are other things in your script which could be improved (not least that it fails with Can't locate object method "connect" via package "DBI" at 1230078.pl line 9.) but those can certainly wait until the subroutine is in place. Good luck.
| [reply] [d/l] |
Re: Multiple queries on DBI corresponding to multiple csv files?
by jtech (Sexton) on Feb 18, 2019 at 11:55 UTC
|
| [reply] |
Re: Multiple queries on DBI corresponding to multiple csv files?
by Marshall (Canon) on Feb 20, 2019 at 00:25 UTC
|
The basic idea appears to be straight-forward, but there are some complications. I wouldn't use a hash representation for this application.
Some untested code follows...
I would use {RaiseError => 1} in the DBI connect. That way you do not have to check with "or die" for every prepare or whatever. A "die" will happen automatically.
The big problem here is that writing a CSV file can be devilishly tricky when there are imbedded "," in the data field! Like 'James,Bob,Jr' or whatever. I think (if I remember right) that also a string should be in quotes if there are embedded spaces, "Jessie Jones". The full CSV spec is more complicated than you might think. There are also "bad" implementations that don't meet the spec. The Perl CSV module pretty much "comes up with the ball", even for not quite right input. Generating a proper CSV output is easier than reading a "supposed CSV" input, however there is more than one way to screw this up.
Mileage varies, but often I use the pipe "|" character instead of a comma "," for writing a simple CSV file. That way I don't have to worry about using the CSV module. I only do that for files which I privately consume - not for export to other programs. However, I actually receive multi-million line address files that are delimited that way. "Comma Separated Value" is actually a bit of a misnomer. Any character can be used. In Excel, you just say "hey this is a pipe delimited file" and away you go albeit with a few extra mouse clicks required.
If you really want a "solid CSV file", using an actual comma for the field delimiter, I would use the Perl CSV module.
Anyway, I think something like this is similar to your code.
## untested ##
my @tables = qw (tb1 tb2 tb3);
# DBI CONNECTION
my $dbh = DBI->connect("dbi:Ingres:$dbname","$user","",{RaiseError =>
+ 1})
or die "Could not connect to database $dbname\n";
foreach my $table (@tables)
{
my $get_table = $dbh->prepare("SELECT * FROM $table");
$get_table->execute();
my $table_ref = $get_table -> fetchall_arrayref;
open my $file, ">", "CSV$table.csv" or die "unable to open CSV$tabl
+e.csv for writing $!";
print $file join(",", @{$table_ref->{NAME}}),"\n"; #show header - I
+'m not sure this works.
print $file join(',',@$_),"\n" for @$table_ref;
close $file;
}
| [reply] [d/l] |
|
my %attr =
(
AutoCommit => 0, # Require explicit commit or rollback.
PrintError => 1,
ShowErrorStatement => 0,
RaiseError => 0,
HandleError => \&Usage,
);
On my TODO list:
replace "," for "|"
replace Text::CSV_XS for Perl CSV module
Cheers!
| [reply] [d/l] |
|
Happy to see that you are investigating some of these other options!
Normally, you don't have to fiddle with "AutoCommit" as the default is "true", at least it is with SQLite. Basically you want write operations to commit automatically unless you override that behavior explicitly.
A commit operation is very expensive - the DB has to do a lot of work to make sure the data is "really there". The DB will have a limit on the number of transactions per second (basically commits). The number of operations per second will be more than an order of magnitude more than that. One reason to manually control the start/conclusion of a transaction is when doing a lot of inserts at once. For example:
my $addRoster = $dbh->prepare ("INSERT INTO Roster (Call,Owner)
VALUES (?,?)");
$dbh->do("BEGIN"); # start a new transaction for all inserts
foreach my $row (@$info) #the 'roster' sheet of Roster.xls #20 Feb 2
+018
{
...blah...blah...
$addRoster -> execute ($callsign, $owner);
}
$dbh->do("COMMIT"); #massive speed up by making one transaction!!
The above code slams data from an Excel spreadsheet into a DB. The sequence is typical. An SQL statement is prepared outside the loop and then executed many times with different data values. An SQL prepare statement is table specific, so this can't be done with a table name as a variable. The above code has been in production for a year. It takes <1 second to run. If I take out the BEGIN and COMMIT statements, it takes ~10 seconds -> 10x difference. Without the BEGIN and COMMIT statements, each insert would be committed automatically. You can run 1 million inserts as a single transaction and I have code that does that. This can make the difference between say 20 seconds and 4-6 hours! | [reply] [d/l] |
Re: Multiple queries on DBI corresponding to multiple csv files?
by jtech (Sexton) on Feb 19, 2019 at 09:05 UTC
|
I have included some features and still a bit redundant on the condition "if id exists", it is ok for now and open to suggestions.
This is the result:
#!/usr/bin/perl -W
# This script will create multiple csv files based on
# passed arguments "id" and custom "where" clause. Also,
# in the future it will read and display it as
# floating boxes in an HTML page (D3JS does the trick)
use DBI;
use Text::CSV_XS;
# Constant:
# my $htmlpage = "htmlfile_$id"; #TODO: it will build the html file
# Here are the temporary params (TODO: it will use @ARGV)
# This is an important param because it will run bunch of queries
# just for the provided id, if no id was provided then
# it will run the queries for all ids from the table
my $id = "";
# DBI CONNECTION
my $dbh = get_dbh();
# SUBROUTINES
sub run_query
{
my ($sql, $csvfile) = @_;
print "Running $sql\n";
open my $fh, '>', $csvfile or die "Could not open $csvfile: $!";
my $csv = Text::CSV_XS->new ({ binary => 1, eol => "\n" });
my $sth = $dbh->prepare ($sql);
$sth->execute;
$csv->print($fh, $sth->{NAME_lc});
my $count = 1;
while (my $row = $sth->fetchrow_arrayref)
{
$csv->print($fh, $row);
++$count;
}
close $fh;
print "ID: $id, $count lines dumped to $csvfile\n";
}
sub get_dbh
{
my $dbname = "jtech";
my $user = "jtech";
my($dbh) = DBI->connect("dbi:IngresII:$dbname","$user","")
or die "Could not connect to database $dbname\n";
return $dbh;
}
# MAIN PROGRAM
# If the user type an ID it will generate 4 csv files
# that corresponds to each custom SQL query
if ($id)
{
# In this case it generates reports for the typed "id" only
$clause = "id=$id AND";
my @query = (
["SELECT id FROM mytable WHERE $clause salary >= 1","csvfile1_
+$id.csv"],
["SELECT name FROM mytable WHERE $clause salary >= 1","csvfile
+2_$id.csv"],
["SELECT salary FROM mytable WHERE $clause salary >= 1","csvfi
+le3_$id.csv"],
["SELECT dept FROM mytable WHERE $clause salary >= 1","csvfile
+4_$id.csv"],
);
for (@query)
{
run_query(@$_);
}
}
# If there is NO ID provided than it will generate csv files
# corresponding to all ids found with a special clause where
# active ='no' (all IDs will get 4 csv file)
else
{
# Returns a list of all existents "id" (TO BE UPDATED TO USE DBI)
my $cmd = `echo 'SELECT DISTINCT id from mytable\\g\\q' | sql jtec
+h -jtech -S | sed -e 's/ //g'`;
foreach $cmd (split /[\n\r]+/, $cmd)
{
$id = $cmd;
$clause = "active ='no' AND";
my @query = (
["SELECT id FROM mytable WHERE $clause salary >= 1","csvfi
+le1_$id.csv"],
["SELECT name FROM mytable WHERE $clause salary >= 1","csv
+file2_$id.csv"],
["SELECT salary FROM mytable WHERE $clause salary >= 1","c
+svfile3_$id.csv"],
["SELECT dept FROM mytable WHERE $clause salary >= 1","csv
+file4_$id.csv"],
);
for (@query)
{
run_query(@$_);
}
}
}
| [reply] [d/l] |
|
#!/usr/bin/perl
use strict;
use warnings;
use DBI;
use Text::CSV_XS;
# DBI CONNECTION
my $dbh = get_dbh();
my @id = @ARGV;
unless (@id){
# no ids in ARGV
my $sql = "SELECT DISTINCT id from mytable
WHERE active = ?";
@id = map{ $_->[0] } $dbh->selectall_array($sql,undef,'no');
}
# MAIN
for my $id (@id){
my @query = (
["SELECT id FROM mytable WHERE id=? AND salary >= 1","csvfile1
+_$id.csv"],
["SELECT name FROM mytable WHERE id=? AND salary >= 1","csvfile2
+_$id.csv"],
["SELECT salary FROM mytable WHERE id=? AND salary >= 1","csvfile3
+_$id.csv"],
["SELECT dept FROM mytable WHERE id=? AND salary >= 1","csvfile4
+_$id.csv"],
);
for (@query){
run_query(@$_,$id);
}
}
# SUBROUTINES
sub run_query
{
my ($sql, $csvfile,$id) = @_;
print "Running $sql for $id\n";
open my $fh, '>', $csvfile or die "Could not open $csvfile: $!";
my $csv = Text::CSV_XS->new ({ binary => 1, eol => "\n" });
my $sth = $dbh->prepare ($sql);
$sth->execute($id);
$csv->print($fh, $sth->{NAME_lc});
my $count = 1;
while (my $row = $sth->fetchrow_arrayref)
{
$csv->print($fh, $row);
++$count;
}
close $fh;
print "ID: $id, $count lines dumped to $csvfile\n";
}
sub get_dbh
{
my $dbname = "jtech"; my $user = "jtech";
my $dbh = DBI->connect("dbi:IngresII:$dbname",$user,'')
or die "Could not connect to database $dbname\n";
return $dbh;
}
poj | [reply] [d/l] |
|
Good stuff!
I have created the Menu and it is accepting params so, it works fine when the id is typed, but it doesn't work when it is left blank ""
I am struggling with this error message: Can't locate object method "selectall_array" via package "DBI::db" at ./test.pl line 60.
Well, that is the line 60: @id = map{ $_->[0] } $dbh->selectall_array($sql,undef,'no');
what am I doing wrong here?
#!/usr/bin/perl -W
use DBI;
use Text::CSV_XS;
# GLOBALS
@id = "";
$dbname = "";
$user = "";
# MAIN MENU
# Don't allow duplicated arguments and blank/undef arguments
# except for id (in this case blank/undef id means all ids)
foreach my $arg (@ARGV)
{
if ($arg eq '-h' || $arg eq '--help')
{
die "Script usage manual here\n";
exit;
}
if ($arg =~ /^(\w+)$/)
{
die "Error: Multiple databases: $dbname and $arg." if $dbname;
$dbname = $1 ; next;
}
if ($arg =~ /^--?u(\w+)$/)
{
die "Error: Multiple users $user and $arg." if $user;
$user = $1 ; next;
}
if ($arg =~ /^--?id(\d+)$/)
{
die "Error: Multiple ids specified: -id$id[0] and $arg.\n" if
+$id[0];
$id[0] = $1 ; next;
}
elsif ($arg =~ /^--?id(\S+|)$/)
{
die "Error id format, use -id<integer>\n";
}
}
die "No database, type <dbname>\n" unless $dbname;
die "No username, use -u<username>\n" unless $user;
# DBI CONNECTION
my $dbh = get_dbh();
# EXEC
if ($id[0] eq "")
{
my $sql = "SELECT DISTINCT id from mytable
WHERE active = ?";
@id = map{ $_->[0] } $dbh->selectall_array($sql,undef,'no');
}
for my $id (@id)
{
my @query =
(
["SELECT id FROM mytable WHERE id=? AND salary >= 1","csvfile1
+_$id.csv"],
["SELECT name FROM mytable WHERE id=? AND salary >= 1","csvfile2
+_$id.csv"],
["SELECT salary FROM mytable WHERE id=? AND salary >= 1","csvfile3
+_$id.csv"],
["SELECT dept FROM mytable WHERE id=? AND salary >= 1","csvfile4
+_$id.csv"],
);
for (@query)
{
run_query(@$_,$id);
}
}
# SUBROUTINES
sub get_dbh
{
my($dbh) = DBI->connect("dbi:IngresII:$dbname","$user","")
or die "Could not connect to database $dbname\n";
return $dbh;
}
sub run_query
{
my ($sql, $csvfile, $id) = @_;
print "Running $sql for $id\n";
open my $fh, '>', $csvfile or die "Could not open $csvfile: $!";
my $csv = Text::CSV_XS->new ({ binary => 1, eol => "\n" });
my $sth = $dbh->prepare ($sql);
$sth->execute($id);
$csv->print($fh, $sth->{NAME_lc});
my $count = 1;
while (my $row = $sth->fetchrow_arrayref)
{
$csv->print($fh, $row);
++$count;
}
close $fh;
print "ID: $id, $count lines dumped to $csvfile\n";
}
| [reply] [d/l] [select] |
|
|
|
|
|
|
|
|
my @query =
(
[DECLARE GLOBAL TEMPORARY TABLE SESSION.temp SELECT id FROM mytabl
+e ON COMMIT PRESERVE ROWS WITH NORECOVERY, "/dev/null"],
["SELECT name FROM SESSION.temp WHERE id=? AND salary >= 1","csv
+file2_$id.csv"],
}
| [reply] [d/l] |
|
|
|
Multiple queries on DBI corresponding to multiple csv files?
by jtech (Sexton) on Feb 22, 2019 at 13:33 UTC
|
| [reply] |
|
|