# gen-long-llil.pl # Crude program to generate a LLiL test file with long names and counts # perl gen-long-llil.pl long1.txt 600 use strict; use warnings; use autodie; { my $ordmin = ord('a'); my $ordmax = ord('z') + 1; # Generate a random word sub gen_random_word { my $word = shift; # word prefix my $nchar = shift; # the number of random chars to append for my $i (1 .. $nchar) { $word .= chr( $ordmin + int( rand($ordmax - $ordmin) ) ); } return $word; } } my $longworda = join '', 'a' .. 'z'; my $longwordz = join '', reverse('a' .. 'z'); my $longcount = 1_000_000; sub create_long_test_file { my $fname = shift; my $howmany = shift; open( my $fh_out, '>', $fname ); # Some with no randomness for my $h ( 1 .. $howmany ) { for my $i ( 1 .. 8 ) { my $cnt = $longcount + $i - 1; my $worda = $longworda x $i; my $wordz = $longwordz x $i; print {$fh_out} "$worda\t$cnt\n$wordz\t$cnt\n"; } } # Some with randomness my $wordlen = 1; for my $h ( 1 .. $howmany ) { for my $i ( 1 .. 8 ) { my $cnt = $longcount + $i - 1; my $worda = $longworda x $i; my $wordz = $longwordz x $i; for my $c ( 'a' .. 'z' ) { for my $z ( 1 .. 2 ) { print {$fh_out} $worda . gen_random_word( $c, $wordlen ) . "\t" . (1000000 + $z) . "\n"; print {$fh_out} $wordz . gen_random_word( $c, $wordlen ) . "\t" . (1000000 + $z) . "\n"; } } } } } my $outfile = shift; my $count = shift; $outfile or die "usage: $0 outfile count\n"; $count or die "usage: $0 outfile count\n"; $count =~ /^\d+$/ or die "error: count '$count' is not a number\n"; print "generating short long test file '$outfile' with count '$count'\n"; create_long_test_file( $outfile, $count ); print "file size=", -s $outfile, "\n"; #### > perl gen-long-llil.pl long1.txt 600 generating short long test file 'long1.txt' with count '600' file size=65616000 > perl gen-long-llil.pl long2.txt 600 generating short long test file 'long2.txt' with count '600' file size=65616000 > perl gen-long-llil.pl long3.txt 600 generating short long test file 'long3.txt' with count '600' file size=65616000 #### > perl llil2d.pl big1.txt big2.txt big3.txt long1.txt long2.txt long3.txt >perl2.tmp llil2d start get_properties : 11 secs sort + output : 23 secs total : 34 secs > llil2a big1.txt big2.txt big3.txt long1.txt long2.txt long3.txt >cpp2.tmp llil2 start get_properties : 6 secs sort + output : 5 secs total : 11 secs > diff cpp2.tmp perl2.tmp #### # gen-llil.pl # Crude program to generate a big LLiL test file to use in benchmarks # On Windows running: # perl gen-llil.pl big2.txt 200 3 - produces a test file with size = 35,152,000 bytes # (lines terminated with "\r\n") # perl gen-llil.pl big2.txt 200 3 1 - produces a test file with size = 31,636,800 bytes # (lines terminated with "\n") # On Unix, lines are terminated with "\n" and the file size is always 31,636,800 bytes use strict; use warnings; use autodie; { my $ordmin = ord('a'); my $ordmax = ord('z') + 1; # Generate a random word sub gen_random_word { my $word = shift; # word prefix my $nchar = shift; # the number of random chars to append for my $i (1 .. $nchar) { $word .= chr( $ordmin + int( rand($ordmax - $ordmin) ) ); } return $word; } } sub create_test_file { my $fname = shift; my $count = shift; my $wordlen = shift; my $fbin = shift; open( my $fh_out, '>', $fname ); $fbin and binmode($fh_out); for my $c ( 'aaa' .. 'zzz' ) { for my $i (1 .. $count) { print {$fh_out} gen_random_word( $c, $wordlen ) . "\t" . 1 . "\n"; } } } my $outfile = shift; my $count = shift; my $wordlen = shift; my $fbin = shift; # default is to use text stream (not a binary stream) defined($fbin) or $fbin = 0; $outfile or die "usage: $0 outfile count wordlen\n"; $count or die "usage: $0 outfile count wordlen\n"; print "generating test file '$outfile' with count '$count' (binmode=$fbin)\n"; create_test_file($outfile, $count, $wordlen, $fbin); print "file size=", -s $outfile, "\n"; #### # gen-long-llil.pl # Crude program to generate a LLiL test file with long names and counts # perl gen-long-llil.pl long1.txt 600 # On Windows running: # perl gen-long-llil.pl long1.txt 600 - produces a test file with size = 65,616,000 bytes # (lines terminated with "\r\n") # perl gen-long-llil.pl long1.txt 600 - produces a test file with size = 65,107,200 bytes # (lines terminated with "\n") # On Unix, lines are terminated with "\n" and the file size is always 65,107,200 bytes use strict; use warnings; use autodie; { my $ordmin = ord('a'); my $ordmax = ord('z') + 1; # Generate a random word sub gen_random_word { my $word = shift; # word prefix my $nchar = shift; # the number of random chars to append for my $i (1 .. $nchar) { $word .= chr( $ordmin + int( rand($ordmax - $ordmin) ) ); } return $word; } } my $longworda = join '', 'a' .. 'z'; my $longwordz = join '', reverse('a' .. 'z'); my $longcount = 1_000_000; sub create_long_test_file { my $fname = shift; my $howmany = shift; my $fbin = shift; open( my $fh_out, '>', $fname ); $fbin and binmode($fh_out); # Some with no randomness for my $h ( 1 .. $howmany ) { for my $i ( 1 .. 8 ) { my $cnt = $longcount + $i - 1; my $worda = $longworda x $i; my $wordz = $longwordz x $i; print {$fh_out} "$worda\t$cnt\n$wordz\t$cnt\n"; } } # Some with randomness my $wordlen = 1; for my $h ( 1 .. $howmany ) { for my $i ( 1 .. 8 ) { my $cnt = $longcount + $i - 1; my $worda = $longworda x $i; my $wordz = $longwordz x $i; for my $c ( 'a' .. 'z' ) { for my $z ( 1 .. 2 ) { print {$fh_out} $worda . gen_random_word( $c, $wordlen ) . "\t" . (1000000 + $z) . "\n"; print {$fh_out} $wordz . gen_random_word( $c, $wordlen ) . "\t" . (1000000 + $z) . "\n"; } } } } } my $outfile = shift; my $count = shift; my $fbin = shift; # default is to use text stream (not a binary stream) defined($fbin) or $fbin = 0; $outfile or die "usage: $0 outfile count\n"; $count or die "usage: $0 outfile count\n"; $count =~ /^\d+$/ or die "error: count '$count' is not a number\n"; print "generating short long test file '$outfile' with count '$count' (binmode=$fbin)\n"; create_long_test_file( $outfile, $count, $fbin ); print "file size=", -s $outfile, "\n";