my @line_format = (
[ 'ssn', qr/(\d{9})/ ],
[ 'emp_num', qr/(\d+)/ ],
[ 'emp_name', 'COUNT' ],
[ 'hire_date', qr/(\d{8})/ ],
[ 'city', 'COUNT' ],
[ 'state', qr/([A-Z]{2})/ ],
[ 'city', 'COUNT' ],
[ 'zip', qr/(\d{5})/ ],
);
####
sub parse_line {
my ($line, $linenum) = @_;
my %parsed = ( );
foreach my $format (@line_format) {
my ($label, $expected) = @$format;
if ($expected eq 'COUNT') {
# Pull the COUNT off the beginning of the line and apply it
if ($line !~ s/\s*(\d+) //) {
die "Error #1 parsing item '$label' (line #$linenum)\n";
}
my $count = $1;
if ($line !~ s/(.{$count})//) {
die "Error #2 parsing item '$label' (line #$linenum)\n";
}
$parsed{$label} = $1;
} else {
# Pull of the next non-space word, and test with the regex
if ($line !~ s/^\s*(\S+)//) {
die "Error #3 parsing item '$label' (line #$linenum)\n";
}
$parsed{$label} = $1;
}
}
return \%parsed;
}
##
##
use Data::Dumper::Concise;
my $line = "123445678 45612 11 Steve Smith 11012015 16 1001 Main Street GA 7 Atlanta 30553";
my $result = parse_line($line, 1);
die Dumper $result;
##
##
{
city => "Atlanta",
emp_name => "Steve Smith",
emp_num => 45612,
hire_date => 11012015,
ssn => 123445678,
state => "GA",
zip => 30553
}