#!/usr/bin/env perl
use strict;
use warnings;
use constant {
AMOUNT => 3,
ADDL_RATE_PER => 4,
DISCOUNT_PRICE => 6,
};
use CAM::PDF;
use Data::Dumper;
my $jacket_id = $ARGV[0];
my $pdf_file = "pm_11113472_$jacket_id.pdf";
my $pdf = CAM::PDF::->new($pdf_file) or die $CAM::PDF::errstr;
my $re = qr{(?x:
\A
\s*?
((?:A|)) # Awd
\s+
(\d+-\d+) # Contractor Code
\s+
([^\$]+?) # Name
\s+
(\$\s[0-9,.]+) # Amount
\s+
(\$\s[0-9,.]+\s[A-Z]) # Add'l Rate/PER
\s+
([0-9.]+\s+\d+) # Discount % Days
\s+
(\$\s[0-9,.]+) # Discount Price
\s+
([\D]+?) # Bidders Name
\s+
(\S+) # Date Received
\s+
(\(\d+\)\s\d+-\d+) # Phone Number
)};
for my $page_num (1 .. $pdf->numPages) {
my $text = $pdf->getPageText($page_num);
my @lines;
my $wanted_line = 0;
for my $line (split /$jacket_id/, $text) {
next unless $wanted_line++;
my @fields = $line =~ $re;
$fields[AMOUNT] =~ y/ //d;
$fields[ADDL_RATE_PER] =~ s/ //;
$fields[DISCOUNT_PRICE] =~ y/ //d;
push @lines, [ $jacket_id, @fields ];
}
print Dumper(\@lines);
}
####
$ ./pm_11113472_pdf_parse.pl 746810
$VAR1 = [
[
'746810',
'A',
'140-89226',
'UNION HOERMANN PRESS',
'$844.00',
'$15.00 C',
'1 20',
'$835.56',
'Randy Sigman',
'01/22/2020',
'(563) 582-3631'
],
[
'746810',
'',
'190-38407',
'GRAPHIC VISIONS',
'$869.00',
'$140.00 M',
'0.5 20',
'$864.66',
'Howard Roskosky',
'01/22/2020',
'(301) 987-5586'
],
##
##
$ ./pm_11113472_pdf_parse.pl 746810
$VAR1 = [
[
'746810',
'A',
'140-89226',
'UNION HOERMANN PRESS',
'$844.00',
'$15.00 C',
'1 20',
'$835.56',
'Randy Sigman',
'01/22/2020',
'(563) 582-3631'
],
[
'746810',
'',
'190-38407',
'GRAPHIC VISIONS',
'$869.00',
'$140.00 M',
'0.5 20',
'$864.66',
'Howard Roskosky',
'01/22/2020',
'(301) 987-5586'
],
[
'746810',
'',
'040-13121',
'BONADA ENTERPRISES/BLUE EARTH',
'$902.00',
'$0.18 E',
'1 7',
'$902.00',
'fernando',
'01/22/2020',
'(323) 272-6430'
],
[
'746810',
'',
'420-52700',
'LITHO PRESS, INC.',
'$941.00',
'$18.00 C',
'1 20',
'$931.59',
'Tim Sankey',
'01/22/2020',
'(210) 333-1711'
],
[
'746810',
'',
'420-31784',
'GRAFIKSHOP CORP. DBA FALCON',
'$945.00',
'$110.00 M',
'1 20',
'$935.55',
'Mei-Ing Hoffman',
'01/22/2020',
'(713) 977-2555'
],
[
'746810',
'',
'430-08870',
'BKR PRINTING',
'$1,090.00',
'$155.00 M',
'5 20',
'$1,035.50',
'Mark Bengtzen',
'01/22/2020',
'(801) 532-5363'
],
[
'746810',
'',
'190-28460',
'DOYLE PRINTING',
'$1,177.00',
'$227.00 M',
'5 20',
'$1,118.15',
'Michael Carey',
'01/22/2020',
'(301) 991-2637'
],
[
'746810',
'',
'120-71652',
'PRODUCTION PRESS, INC.',
'$1,357.00',
'$232.00 M',
'0.25 20',
'$1,353.61',
'Brad Racey',
'01/22/2020',
'(217) 243-3353'
],
[
'746810',
'',
'450-34976',
'GABRO GRAPHICS INC.',
'$1,940.00',
'$295.00 M',
'2 20',
'$1,901.20',
'Tony Gabro',
'01/22/2020',
'(703) 464-8588'
],
[
'746810',
'',
'130-13540',
'BOWMAN DISPLAY DIGITAL IMAGING',
'$9,327.91',
'$1.86 E',
'0 0',
'$9,327.91',
'Sara Veld',
'01/22/2020',
'(219) 595-6542'
]
];
##
##
$ ./pm_11113472_pdf_parse.pl 746819
$VAR1 = [
[
'746819',
'A',
'120-64255',
'NOOR INTERNATIONAL CORP',
'$387.86',
'$7.75 C',
'1 20',
'$383.98',
'Max Saleem',
'01/23/2020',
'(847) 985-2300'
],
[
'746819',
'',
'040-44026',
'IMAGE SQUARE INC',
'$463.00',
'$0.09 E',
'0 0',
'$463.00',
'Ash Soudbash',
'01/22/2020',
'(310) 586-2333'
],
[
'746819',
'',
'190-43435',
'HUB LABELS, INC.',
'$731.00',
'$14.62 C',
'1 20',
'$723.69',
'Kim Clark',
'01/23/2020',
'(301) 671-2230'
],
[
'746819',
'',
'090-28380',
'DOUGLASS SCREEN PRINTERS',
'$800.00',
'$140.00 M',
'0.5 20',
'$796.00',
'Debbie Carrigan',
'01/23/2020',
'(863) 899-7130'
],
[
'746819',
'',
'480-79295',
'SERIGRAPHIC SCREEN PRINT',
'$800.00',
'$0.16 E',
'0.5 20',
'$796.00',
'Teri Tropple',
'01/22/2020',
'(800) 657-6740'
],
[
'746819',
'',
'120-77235',
'DRI-STICK DECAL/RYDIN DECAL',
'$1,150.00',
'$0.00 N',
'0 0',
'$1,150.00',
'Lori Haberstich',
'01/23/2020',
'(800) 448-1991'
]
];