#!usr/bin/perl use warnings; use strict; use Data::Dumper; $Data::Dumper::Sortkeys =1; my %hash; sub get_html { while () { if (// .. /<\/html>/) ##see [id://525392] { chomp; push @{$hash{DATA}},$_; } } } while () { if ( (my ($name, $date) = m/^\s*([\w ]+):\s+([\w-]+)/)) { $hash{$name}=$date; } if (my ($filename) = m/^\s*\s*(\w+ex(-)?21.*.htm)/i) { $hash{FILENAME}=$filename; get_html(); } } print Dumper \%hash; =Prints: ********* $VAR1 = { 'CENTRAL INDEX KEY' => '0000786368', 'CONFORMED PERIOD OF REPORT' => '20081231', 'DATA' => [ '', 'blah ', 'smore blah', 'blahblah', ' **** BODY OF TEXT I WISH TO EXTRACT *****', '' ], 'DATE AS OF CHANGE' => '20090331', 'FILED AS OF DATE' => '20090331', 'FILENAME' => 'v144610_ex21.htm', 'FORM TYPE' => '10-K' }; =cut __DATA__ *********** Sample text *************** CONFORMED PERIOD OF REPORT: 20081231 ------ individual line I want FILED AS OF DATE: 20090331 ------ individual line I want DATE AS OF CHANGE: 20090331 ------ individual line I want CENTRAL INDEX KEY: 0000786368 ------ individual line I want FORM TYPE: 10-K ------ individual line I want Whole buncha text here . EX-21 7 v144610_ex21.htm -----------My starting point blah smore blah blahblah **** BODY OF TEXT I WISH TO EXTRACT ***** ----------- My ending point **********End of sample text ***********