Instead of having one regex for each field, you can use the /g modifier to go from one field to the other, and use the (?=EXPR) syntax to check that what follows your field is another one and not data without
use Data::Dumper;
my $regex = qr/
^field(\d+): # find a line starting by 'field' and
+ capture its number
(.*?)\n? # find the smallest string before the
+ next
(?=^field\d+:|\z) # line starting by 'field' or end of
+record. Rewind just before that point after the match.
/msx; # ^ matches beginning of line, . matches \n and spac
+es and comments are ignored in the regex
my %result;
my $count = 1;
{ # block to limit the scope of local
local $/ = ""; # records are separated by empty lines
while(<DATA>)
{
my %hash;
while(/$regex/g)
{
$hash{"field$1"} = $2;
}
$result{"record ".$count++} = \%hash;
}
}
print Dumper \%result;
__DATA__
field1: data 1 monday
field2: data 2 monday
field3: data 3 monday
field1: data 1 tuesday
field2: data 2 tuesday
tuesday details line 1
tuesday details line 2
field3: data 3 tuesday
$VAR1 = {
'record 1' => {
'field1' => ' data 1 monday',
'field2' => ' data 2 monday',
'field3' => ' data 3 monday
'
},
'record 2' => {
'field1' => ' data 1 tuesday',
'field2' => ' data 2 tuesday
tuesday details line 1
tuesday details line 2',
'field3' => ' data 3 tuesday'
}
};
|