#!usr/bin/env perl use strict; use warnings; sub is_street {return shift =~ m/^\d+/;} sub is_postal {return shift =~ m/^\w+.+\d$/;} sub street_components { my $address = shift; if ($address =~ m/^ (.+) \s+APT\s+ # APT anchor (?:\(Range\s+)? # Range syntax ([\w\d]+(?:\s+-\s+[\w\d]+)?) # Apartment number \)? # Closing range syntax $/x ) { return {street => $1, apartment => $2} } else { die "Street address match failure: <<$address>>\n"; } } sub postal_component {return shift} sub apartment_expand { my $apartment_range = shift; my ($low, $high) = split /\s*-\s*/, $apartment_range; return [$low] if !length($high); my ($low_num, $low_alpha ) = $low =~ m/^(\d+)(\w+)$/; my ($high_num, $high_alpha) = $high =~ m/^(\d+)(\w+)$/; my @return; foreach my $num ($low_num .. $high_num) { # Numeric increment. foreach my $letter ($low_alpha .. $high_alpha) { # Alpha increment. push @return, "${num}${letter}"; } } return \@return; } my %record; while (my $line = ) { chomp $line; next unless length $line; $record{'addr'} = street_components($line) if is_street($line); $record{'postal'} = postal_component($line) if is_postal($line); if (exists $record{'addr'} && exists $record{'postal'}) { my $apartments = apartment_expand($record{'addr'}->{'apartment'}); foreach my $apartment (@$apartments) { printf "%s, APT %s, %s\n" => $record{'addr'}->{'street'}, $apartment, $record{'postal'}; } undef %record; } } __DATA__ 432 10TH ST APT (Range 2A - 2B) BROOKLYN NY 10598-6601 432 10TH ST APT (Range 3A - 3B) BROOKLYN NY 10598-6601 432 10TH ST APT (Range 4A - 4B) BROOKLYN NY 10598-6605 432 10TH ST APT (Range 5A - 5D) BROOKLYN NY 10598-6605 432 10TH ST APT 6A BROOKLYN NY 10598-6605 #### 432 10TH ST, APT 2A, BROOKLYN NY 10598-6601 432 10TH ST, APT 2B, BROOKLYN NY 10598-6601 432 10TH ST, APT 3A, BROOKLYN NY 10598-6601 432 10TH ST, APT 3B, BROOKLYN NY 10598-6601 432 10TH ST, APT 4A, BROOKLYN NY 10598-6605 432 10TH ST, APT 4B, BROOKLYN NY 10598-6605 432 10TH ST, APT 5A, BROOKLYN NY 10598-6605 432 10TH ST, APT 5B, BROOKLYN NY 10598-6605 432 10TH ST, APT 5C, BROOKLYN NY 10598-6605 432 10TH ST, APT 5D, BROOKLYN NY 10598-6605 432 10TH ST, APT 6A, BROOKLYN NY 10598-6605