No, I don't know how to monitor that. Not sure this is a memory issue. I just bought this desktop which has 16GB memory. Below is the major part of the code.
foreach (@infiles) {
my $data;
my $address = $_;
unless(open (FILE,"<$_")){
print STDERR "could not open $_: $!\n";
next;}
local $/;
$data=<FILE>;
my($html) = $data =~ /(<p>|<\/P>| |<html>)/im;
if (defined($html)){
$data =~ s/<[^>]+>/\n/g;
$data =~ s/ //ig;
$data =~ s/</</ig;
$data =~ s/>/>/ig;
$data =~ s/"/"/ig;
$data =~ s/&/&/ig;
$data =~ s/&#\d+;/&/ig;
$data =~ s/&&/ &/ig;
$data =~ s/(\w+)&(\d+)/$1 $2/ig;
}
my($block, $lines, $auditor, $auditorcity, $auditorstate,
$date_audited, $keyword1, $keyword2, $keyword3, $keyword4,
+$block2,);
($keyword1) = $data =~ /(have(\s*|\n)audited [^a])/i;
($keyword2) = $data =~ /(our(\s+|\n)audits|consent to)/i;
if (!defined($keyword1) && !defined($keyword2)) {($block) =""
+;}
elsif (!defined($keyword1) && defined($keyword2)) {($block) =
+ $data =~ /($keyword2(?:[^\n]*\n){1,350})/im;}
elsif(defined($keyword1)) {($block) = $data =~ /($keyword1(
+?:[^\n]*\n){1,350})/im;}
($keyword3) = $block =~ /(((REPORT(\s+|\n)OF|CONSENT OF)|)\s
+*INDEPENDENT\s*(CERTIFIED|registered|registered CERTIFIED|)\s*PUBLIC\
+s*(ACCOUNTANTS|accounting(\s+|\n)firm)|REPORT OF INDEPENDENT ACCOUNTA
+NTS|REPORT OF INDEPENDENT AUDITORS|)/im;
if (defined($keyword3)) {($lines) = substr ($block,1,index($b
+lock,$keyword3)-1);}
elsif (!defined($keyword3)){($lines) = $block;}
($auditor) = $lines =~ /^\s*((?:[A-Z]\w+|\/\w+|&\s*\w+).+?\s*
+(LLP|L\.L\.P\.|LLC|LTD|P.A.|P\.C\.|PC)(\.|))$/m;
if(defined ($auditor)){
($auditorcity) = $lines =~ /$auditor\s*(?:(?:.+?\s*(?:LLP|L\
+.L\.P\.|LLC|LTD|P.A.|P\.C\.|PC)(?:\.|))|\(.+\)|)\s*(.+?)(?<![\d]),\s*
+(?:.+?)$/m;
($auditorstate) = $lines =~ /$auditor\s*(?:(?:.+?\s*(?:LLP|L\
+.L\.P\.|LLC|LTD|P.A.|P\.C\.|PC)(?:\.|)|\(.+\)|)\s*.+?(?<![\d]),\s*)(.
++?)$/m;
($date_audited) = $lines =~ /$auditorstate\s*(\w+\s*(\d\d|\d)
+,\s*\d{4})\s*($|except|\()/m;
}
if(!defined($auditorcity) && !defined($auditorstate)){
($auditorcity) = $lines =~ /^\s*(\w+)(?<![\d]),\s*(?:\w+)(?
+<!LLP)$/m;
($auditorstate) = $lines =~ /^\s*(?:\w+(?<![\d]),\s*)(\w+)(?
+<!LLP)$/m;
($date_audited) = $lines =~ /^\s*(\w+\s*(\d\d|\d),\s*\d{4})\
+s*($|except|\()/m;
}
if(defined ($auditor) && !defined($auditorcity) && !defined($
+auditorstate)){
($auditorcity) = $lines =~ /^\s*(\w+|\w+ \w+|\w+ \w+ \w+)(?:
+(?<![\d]),\s*(?:\w+|\w+ \w+|\w+ [^a] \w+))(?<![\d])$/m;
($auditorstate) = $lines =~ /^\s*(?:(?:\w+|\w+ \w+|\w+ \w+ \w
++)[^\d],\s*)(\w+|\w+ \w+|\w+ [^a] \w+)(?<![\d])$/m;}
if (!defined($auditorcity) && !defined($auditorstate) && defi
+ned($date_audited)){
($auditorcity) = $lines =~ /^\s*(\w+|\w+ \w+|\w+ \w+ \w+)(?:
+(?<![\d]),\s*(?:\w+|\w+ \w+|\w+ [^a] \w+))\s*$date_audited/m;
($auditorstate) = $lines =~ /^\s*(?:(?:\w+|\w+ \w+|\w+ \w+ \w
++)[^\d],\s*)(\w+|\w+ \w+|\w+ [^a] \w+)\s*$date_audited/m;
}
if (!defined($auditor) && defined($auditorcity)){
($auditor) = $lines =~ /\n{2,}\s*(.+?)?\s+$auditorcity/m;
}
if (!defined($auditor) && defined($auditorcity)){
($auditor) = $lines =~ /(?:\/s\/)\s*([^-]+?)\s*$auditorcit
+y/m;}
if (!defined($auditor)){
($auditor) = $lines =~ /(?:\/s\/)\s*([^-]+?)$/m;}
if (!defined($auditor)){
($lines) = $data =~ /(consent to(?:[^\n]*\n){1,90})/im;
($auditor) = $lines =~ /^\s*(.+?\s*(LLP|L\.L\.P\.|LLC|L
+TD|P.A.|P\.C\.|PC))$/m;
if(defined ($auditor) && !defined($auditorcity) && !defined($
+auditorstate)){
($auditorcity) = $lines =~ /$auditor\s*(.+?),\s*(?:.+?)$/m;
($auditorstate) = $lines =~ /$auditor\s*(?:.+?,\s*)(.+?)$/m;}
}
if (!defined($auditor)){
($auditor) = $data =~ /((PWC|KPMG|ERNST & YOUNG|DELOITTE &
+ TOUCHE|PRICEWATERHOUSECOOPERS|Young LLP)\s*(LLP|))/i;}
if(defined ($auditor) && !defined($auditorcity) && !defined($au
+ditorstate)){
($auditorcity) = $lines =~ /$auditor\s*(.+?)(?<![\d]),\s*(?:.+
+?)$/m;
($auditorstate) = $lines =~ /$auditor\s*(?:.+?(?<![\d]),\s*)(.+
+?)$/m;}
if(defined ($auditor) && !defined($auditorcity) && !defined($au
+ditorstate)){
($auditorcity) = $lines =~ /$auditor\s*(?:\w+ (?:\d|\d\d)),\s*
+\d{2,4}\s*(.+?),(?:.+?)$/m;
($auditorstate) = $lines =~ /$auditor\s*(?:\w+ (?:\d|\d\d)),\s*
+\d{2,4}\s*(?:.+?),(.+?)$/m;}
if(!defined($date_audited)){
($date_audited) = $lines =~ /^\s*(\w+\s*(\d\d|\d),\s*\d{4})(,|
+$)/m;}
print OUTFILE "$auditor\t";
print OUTFILE "$auditorcity\t";
print OUTFILE "$auditorstate\t";
print OUTFILE "$date_audited\n";
|