#!/usr/bin/env perl use strict; use warnings; use constant { AMOUNT => 3, ADDL_RATE_PER => 4, DISCOUNT_PRICE => 6, }; use CAM::PDF; use Data::Dumper; my $jacket_id = $ARGV[0]; my $pdf_file = "pm_11113472_$jacket_id.pdf"; my $pdf = CAM::PDF::->new($pdf_file) or die $CAM::PDF::errstr; my $re = qr{(?x: \A \s*? ((?:A|)) # Awd \s+ (\d+-\d+) # Contractor Code \s+ ([^\$]+?) # Name \s+ (\$\s[0-9,.]+) # Amount \s+ (\$\s[0-9,.]+\s[A-Z]) # Add'l Rate/PER \s+ ([0-9.]+\s+\d+) # Discount % Days \s+ (\$\s[0-9,.]+) # Discount Price \s+ ([\D]+?) # Bidders Name \s+ (\S+) # Date Received \s+ (\(\d+\)\s\d+-\d+) # Phone Number )}; for my $page_num (1 .. $pdf->numPages) { my $text = $pdf->getPageText($page_num); my @lines; my $wanted_line = 0; for my $line (split /$jacket_id/, $text) { next unless $wanted_line++; my @fields = $line =~ $re; $fields[AMOUNT] =~ y/ //d; $fields[ADDL_RATE_PER] =~ s/ //; $fields[DISCOUNT_PRICE] =~ y/ //d; push @lines, [ $jacket_id, @fields ]; } print Dumper(\@lines); } #### $ ./pm_11113472_pdf_parse.pl 746810 $VAR1 = [ [ '746810', 'A', '140-89226', 'UNION HOERMANN PRESS', '$844.00', '$15.00 C', '1 20', '$835.56', 'Randy Sigman', '01/22/2020', '(563) 582-3631' ], [ '746810', '', '190-38407', 'GRAPHIC VISIONS', '$869.00', '$140.00 M', '0.5 20', '$864.66', 'Howard Roskosky', '01/22/2020', '(301) 987-5586' ], #### $ ./pm_11113472_pdf_parse.pl 746810 $VAR1 = [ [ '746810', 'A', '140-89226', 'UNION HOERMANN PRESS', '$844.00', '$15.00 C', '1 20', '$835.56', 'Randy Sigman', '01/22/2020', '(563) 582-3631' ], [ '746810', '', '190-38407', 'GRAPHIC VISIONS', '$869.00', '$140.00 M', '0.5 20', '$864.66', 'Howard Roskosky', '01/22/2020', '(301) 987-5586' ], [ '746810', '', '040-13121', 'BONADA ENTERPRISES/BLUE EARTH', '$902.00', '$0.18 E', '1 7', '$902.00', 'fernando', '01/22/2020', '(323) 272-6430' ], [ '746810', '', '420-52700', 'LITHO PRESS, INC.', '$941.00', '$18.00 C', '1 20', '$931.59', 'Tim Sankey', '01/22/2020', '(210) 333-1711' ], [ '746810', '', '420-31784', 'GRAFIKSHOP CORP. DBA FALCON', '$945.00', '$110.00 M', '1 20', '$935.55', 'Mei-Ing Hoffman', '01/22/2020', '(713) 977-2555' ], [ '746810', '', '430-08870', 'BKR PRINTING', '$1,090.00', '$155.00 M', '5 20', '$1,035.50', 'Mark Bengtzen', '01/22/2020', '(801) 532-5363' ], [ '746810', '', '190-28460', 'DOYLE PRINTING', '$1,177.00', '$227.00 M', '5 20', '$1,118.15', 'Michael Carey', '01/22/2020', '(301) 991-2637' ], [ '746810', '', '120-71652', 'PRODUCTION PRESS, INC.', '$1,357.00', '$232.00 M', '0.25 20', '$1,353.61', 'Brad Racey', '01/22/2020', '(217) 243-3353' ], [ '746810', '', '450-34976', 'GABRO GRAPHICS INC.', '$1,940.00', '$295.00 M', '2 20', '$1,901.20', 'Tony Gabro', '01/22/2020', '(703) 464-8588' ], [ '746810', '', '130-13540', 'BOWMAN DISPLAY DIGITAL IMAGING', '$9,327.91', '$1.86 E', '0 0', '$9,327.91', 'Sara Veld', '01/22/2020', '(219) 595-6542' ] ]; #### $ ./pm_11113472_pdf_parse.pl 746819 $VAR1 = [ [ '746819', 'A', '120-64255', 'NOOR INTERNATIONAL CORP', '$387.86', '$7.75 C', '1 20', '$383.98', 'Max Saleem', '01/23/2020', '(847) 985-2300' ], [ '746819', '', '040-44026', 'IMAGE SQUARE INC', '$463.00', '$0.09 E', '0 0', '$463.00', 'Ash Soudbash', '01/22/2020', '(310) 586-2333' ], [ '746819', '', '190-43435', 'HUB LABELS, INC.', '$731.00', '$14.62 C', '1 20', '$723.69', 'Kim Clark', '01/23/2020', '(301) 671-2230' ], [ '746819', '', '090-28380', 'DOUGLASS SCREEN PRINTERS', '$800.00', '$140.00 M', '0.5 20', '$796.00', 'Debbie Carrigan', '01/23/2020', '(863) 899-7130' ], [ '746819', '', '480-79295', 'SERIGRAPHIC SCREEN PRINT', '$800.00', '$0.16 E', '0.5 20', '$796.00', 'Teri Tropple', '01/22/2020', '(800) 657-6740' ], [ '746819', '', '120-77235', 'DRI-STICK DECAL/RYDIN DECAL', '$1,150.00', '$0.00 N', '0 0', '$1,150.00', 'Lori Haberstich', '01/23/2020', '(800) 448-1991' ] ];