use warnings; use strict; my %subject_offset; my $last_offset = 0; { # divide file into email chunks local $/ = "\n\nFrom "; # note single whitespace at the end open(my $FH, "<", "20090101.arch") or die($!); while (my $block = <$FH>) { if ($block =~ /^.*?\nSubject: (.*?)\n/s) { $subject_offset{$1} = $last_offset; $last_offset = tell($FH) - 5; # minus length("From ") } } } use Data::Dumper; print Dumper(\%subject_offset); __END__ $VAR1 = { 'Test mail 1' => 0, 'Test mail 2' => 395 };