Re: Reading a huge input line in parts

Hello kroach,

I tried to compare 2 way of doing this, and clearly letting Perl do the buffering wins out, but with the size of your line, you may want to look at the 2nd subroutine 'getnum_new' for how to do partial reads from the file. I think both will work for your requirement ( memory allowing ). Reading a line at a time was about 4-6 times faster.

use strict;
use warnings;
use Benchmark qw(:all);

    our ( $eof, $buffer );

#   Build a file for testing!
    open ( my $data, ">", "./slurp.txt" ) || die "$!";
    for my $lines ( 0..10 )
    {    my $unit = '';
        for my $nos ( 0..30)
        {    $unit .= int( rand(3000) ) . " ";    # simulate keys
        }
        $unit .= $lines;                # make sure last doesn't have 
+space.
        print $data "$unit\n";
    }
    close $data;

    my $sa = &getnum1;
    my $sb = &getnum2;
#    print "sa|$sa\n\nsb|$sb\n"; exit;
    if ( $sa ne $sb ) { print "Didn't Work!\n"; exit(1); }


timethese ( -9 ,
        {
         case1 => sub { &getnum1 },
         case2 => sub { &getnum2 },
       },
    );


sub getnum1
{    my $s1 = '';
    open ( my $data, "<", "./slurp.txt" ) || die "$!";
    while ( my $line = <$data> )
    {    chomp( $line );
        my @ar = split( /\ /, $line );
        for ( 0..$#ar ) { $s1 .= "$ar[$_],"; }
    }
    close $data;
    return $s1;
}

sub getnum2
{   my $s2 = ''; $eof = 0;
    open ( my $inp, "<", "./slurp.txt" ) || die "$!";
    while ( 1 )
    {    $s2 .= getnum_new( \$inp ) . ',';
        if ( $eof ) { chop $s2; last; }
    }
    close $inp;
    return $s2;
}

sub getnum_new
{   my $file = shift; my $ret = ''; our $eof; our $buffer;
    while( 1 )
    {    if ( ! $buffer )
        {    my $size = read ( $$file, $buffer, 1024 );
            if ( $size == 0 ) { $eof = 1; return $ret; }
        }
        my $val = substr( $buffer,0,1,'');
        if ( ( $val eq ' ' )||( $val eq "\n" ) ) { return $ret; }
        $ret .= $val;
    }
}
[download]

That's one long line :-)

Regards...Ed

"Well done is better than well said." - Benjamin Franklin

Comment on Re: Reading a huge input line in parts Download Code


Your skill will accomplish what the force of many cannot
	PerlMonks