Hello kroach,
I tried to compare 2 way of doing this, and clearly letting Perl do the buffering wins out, but with the size of your line, you may want to look at the 2nd subroutine 'getnum_new' for how to do partial reads from the file. I think both will work for your requirement ( memory allowing ). Reading a line at a time was about 4-6 times faster.
use strict;
use warnings;
use Benchmark qw(:all);
our ( $eof, $buffer );
# Build a file for testing!
open ( my $data, ">", "./slurp.txt" ) || die "$!";
for my $lines ( 0..10 )
{ my $unit = '';
for my $nos ( 0..30)
{ $unit .= int( rand(3000) ) . " "; # simulate keys
}
$unit .= $lines; # make sure last doesn't have
+space.
print $data "$unit\n";
}
close $data;
my $sa = &getnum1;
my $sb = &getnum2;
# print "sa|$sa\n\nsb|$sb\n"; exit;
if ( $sa ne $sb ) { print "Didn't Work!\n"; exit(1); }
timethese ( -9 ,
{
case1 => sub { &getnum1 },
case2 => sub { &getnum2 },
},
);
sub getnum1
{ my $s1 = '';
open ( my $data, "<", "./slurp.txt" ) || die "$!";
while ( my $line = <$data> )
{ chomp( $line );
my @ar = split( /\ /, $line );
for ( 0..$#ar ) { $s1 .= "$ar[$_],"; }
}
close $data;
return $s1;
}
sub getnum2
{ my $s2 = ''; $eof = 0;
open ( my $inp, "<", "./slurp.txt" ) || die "$!";
while ( 1 )
{ $s2 .= getnum_new( \$inp ) . ',';
if ( $eof ) { chop $s2; last; }
}
close $inp;
return $s2;
}
sub getnum_new
{ my $file = shift; my $ret = ''; our $eof; our $buffer;
while( 1 )
{ if ( ! $buffer )
{ my $size = read ( $$file, $buffer, 1024 );
if ( $size == 0 ) { $eof = 1; return $ret; }
}
my $val = substr( $buffer,0,1,'');
if ( ( $val eq ' ' )||( $val eq "\n" ) ) { return $ret; }
$ret .= $val;
}
}
That's one long line :-)
Regards...Ed
"Well done is better than well said." - Benjamin Franklin