my @line_format = ( [ 'ssn', qr/(\d{9})/ ], [ 'emp_num', qr/(\d+)/ ], [ 'emp_name', 'COUNT' ], [ 'hire_date', qr/(\d{8})/ ], [ 'city', 'COUNT' ], [ 'state', qr/([A-Z]{2})/ ], [ 'city', 'COUNT' ], [ 'zip', qr/(\d{5})/ ], ); #### sub parse_line { my ($line, $linenum) = @_; my %parsed = ( ); foreach my $format (@line_format) { my ($label, $expected) = @$format; if ($expected eq 'COUNT') { # Pull the COUNT off the beginning of the line and apply it if ($line !~ s/\s*(\d+) //) { die "Error #1 parsing item '$label' (line #$linenum)\n"; } my $count = $1; if ($line !~ s/(.{$count})//) { die "Error #2 parsing item '$label' (line #$linenum)\n"; } $parsed{$label} = $1; } else { # Pull of the next non-space word, and test with the regex if ($line !~ s/^\s*(\S+)//) { die "Error #3 parsing item '$label' (line #$linenum)\n"; } $parsed{$label} = $1; } } return \%parsed; } #### use Data::Dumper::Concise; my $line = "123445678 45612 11 Steve Smith 11012015 16 1001 Main Street GA 7 Atlanta 30553"; my $result = parse_line($line, 1); die Dumper $result; #### { city => "Atlanta", emp_name => "Steve Smith", emp_num => 45612, hire_date => 11012015, ssn => 123445678, state => "GA", zip => 30553 }