Use hashes to match records and store them in an array that you can iterate over multiple times to create the output in the original order. The input files are only read once.
#!/usr/bin/perl
use strict;
use warnings;
# config
my $FileA = $ARGV[0] || 'FileA.txt';
my $FileB = $ARGV[1] || 'FileB.txt';
my $FileC = "result.csv";
# read in smaller file key1,key2,key3,count
# example l103709,bbbbbbb,c_0200,929
# and create lookup hash_B
my %hash_B = ();
open FB, '<', $FileB or die "File $FileB Not Found!";
my $countB = 0;
while ( <FB> ) {
next unless /\S/; # skip blank lines
chomp;
my ($look, $sec, $cls, $max) = split ",";
my $key = join ',',$look, $sec, $cls;
if ( exists $hash_B{$key} ){
warn "WARNING : Duplicate key [$key] in $FileB at line $.\n";
} else {
$hash_B{$key} = $max;
++$countB;
}
}
close FB;
printf "%d lines read from %s\n",$countB,$FileB;
# scan larger FileA to match with smaller FileB
# example l103709,bbbbbbb,c_0200,loc,10,1
open FA, '<', $FileA or die "File $FileA Not Found!";
my $countA = 0;
my $sum_qtd = 0;
my %hash_A = ();
my @match = ();
while ( <FA> ) {
next unless /\S/; # skip blank lines
chomp;
my ($look, $sec, $cls, $att, $idx, $qtd) = split ",";
my $key = join ',',$look, $sec, $cls;
if (exists $hash_B{$key}){
# match found
my $max = $hash_B{$key};
$hash_A{$key} += $qtd;
$sum_qtd += $qtd; # use to check output
my $record = join ',',$look,$sec,$att,$idx,$max;
push @match,[$record,$qtd]; # store for output
}
++$countA;
}
close FA;
printf "%d lines read from %s\n",$countA,$FileA;
printf "%d matches with %s\n",scalar @match,$FileB;
# check total of qtd in FileA
# equals max in FileB for each matched key
my $error = 0;
for my $key (keys %hash_A){
if ($hash_A{$key} != $hash_B{$key}){
++$error;
printf "WARNING $key fileA = %d ; fileB = %d\n",$hash_A{$key},$has
+h_B{$key};
}
}
if ($error== 0){
print "OK - no errors\n";
} else {
printf "ERROR - see %d warnings\n",$error;
}
# output records
printf "%d lines expected\n",$sum_qtd;
open FC, '>', $FileC or die "File $FileC Not Found!";
my $count_zero = 0;
my $countC = 0;
while ($count_zero < @match ){
for (@match){
my ($record,$qtd) = @$_;
if ($qtd > 0){
--$qtd;
++$count_zero if $qtd == 0;
$_->[1] = $qtd ; # update qtd in array
print FC join ',',$record,$qtd."\n";
++$countC;
}
}
}
close FC;
printf "%d lines written to %s\n",$countC,$FileC;
poj
|