use strict; use warnings; @ARGV = 'GUTINDEX-2004.txt' unless @ARGV; my $file = shift or die "Usage: $0 GUTINDEX-2001.txt"; my @data; { open IN, '<', $file or die "Error: Can't open $file : $!"; my $start_parsing; local $_; while(){ next if /^\s*$/; last if /^End\s+of\s+\Q$file\E/; chomp; if( $start_parsing ){ if(/^(\w+.*?) by (.+?)\s+(\d{5}.?)$/){ my( $title, $author, $id ) = ( $1, $2, $3 ); $data[$#data+1]->{id} = $id; $data[$#data]->{title} = $title; $data[$#data]->{author} = $author; } else { for( /\[(.+?)(\])?/g ){ my $foo = $1; unless( $2 ){ while(){ if( /(.+?)\]/ ){ $foo .= $1; last; } else { $foo .= } } } if( $foo =~ /\[?(\S+):(.+)\]?/s ){ $data[$#data]->{$1} = $2; } elsif( $foo =~ /,\s\d+$/s ){ $data[$#data]->{Date} = $foo; } } } } # elsif( /^Title\s+and\s+Author/ ){ elsif( /^\Q~ ~ ~ ~ Posting Dates for the below eBooks\E/ ){ $start_parsing++; } } } use Data::Dumper; print Dumper( \@data );