*********** Sample text ***************
CONFORMED PERIOD OF REPORT:	20081231     &#61663;------ individual line I want
FILED AS OF DATE:		20090331     &#61663;------ individual line I want
DATE AS OF CHANGE:		20090331     &#61663;------ individual line I want

CENTRAL INDEX KEY:		0000786368	&#61663;------ individual line I want 
		
	FORM TYPE:		10-K	&#61663;------ individual line I want

Whole buncha text here ЕЕЕЕЕ.

</DOCUMENT>
<DOCUMENT>
<TYPE>EX-21
<SEQUENCE>7
<FILENAME>v144610_ex21.htm      &#61663;-----------My starting point
<TEXT>
<html>
  	*************  BODY OF TEXT I WISH TO EXTRACT ****************
</html>
</TEXT>
</DOCUMENT>               &#61663;----------- My ending point

**********End of sample text ***********



#!/usr/bin/perl -w
use strict;
use warnings;
use File::stat;
use lib "c:/strawberry/perl/site/lib";

#Specify the directory containing the files that you want to read;
my $files_dir = 'E:\research\audit fee models\filings\Test';

#Specify the directory containing the results/output;
my $write_dir =  'E:\research\audit fee models\filings\filenames\filenames.txt';

#Open the directory containing the files you plan to read;
opendir(my $dir_handle, $files_dir) or die "Can't open directory $!";

#Initialize the variable names.
my $file_count = 0;
my $line_count=0;
my $cik=-99;
my $form_type="";
my $form="";
my $report_date=-99;
my $htm="";
my $url="";
my $slash='/';
my $line_count=0;

#Loop for reading each file in the input directory;

while (my $filename = readdir($dir_handle))  {
next unless -f $files_dir.'/'.$filename;
print "Processing $filename\n";

#Open the input file;
open my $FH_IN, '<',$files_dir.'/'.$filename or die "Can't open $filename";

#Within the file loop, read each line of the current file;
while (my $line = <$FH_IN>) {     
next unless -f $files_dir.'/'.$filename;

 if ($line_count > 500000) { last;}

#Begin extracting header type data from the file;

  if($line=~m/^\s*CENTRAL\s*INDEX\s*KEY:\s*(\d*)/m){$cik=$1; $cik =~ s/^0+//;}
 
  if($line=~m/^\s*FORM\s*TYPE:\s*(10k.*$)/im || ($line=~m/^\s*FORM\s*TYPE:\s*(10-k.*$)/im))
     {$form_type=$1;}
  if($line=~m/^\s*CONFORMED\s*PERIOD\s*OF\s*REPORT:\s*(\d*)/m){$report_date=$1;}

#End of header type information;

#Begin block text accumulation;

#This REGEX identifies the starting point of the text I wish to accumulate;  

  if($line=~m/^\s*<FILENAME>(.*?)(ex21)(.*?)(.htm$)/igm ||
     $line=~m/^\s*<FILENAME>(.*?)(EX-21)(.*?)(.htm$)/igm ||
     $line=~m/^\s*<FILENAME>(.*?)(ex21)(.*?)(.htm$)/igm   ||
     $line=~m/^\s*<FILENAME>(.*?)(EX-21)(.*?)(.htm$)/igm)        
         {$htm=join('',$1,$2,$3,$4);    }
     
#Something seemingly here that accumulates text, using PUSH, or whatever;     
     
         
#This is the ending point of the text I wish to accumulate;        
if($line=~m/^\s*</DOCUMENT>/igm;

#End block text accumulation;         
 
#Update line counter;

++$line_count;

 }