# gen-long-llil.pl
# Crude program to generate a LLiL test file with long names and counts
# perl gen-long-llil.pl long1.txt 600

use strict;
use warnings;
use autodie;

{
   my $ordmin = ord('a');
   my $ordmax = ord('z') + 1;

   # Generate a random word
   sub gen_random_word {
      my $word  = shift;    # word prefix
      my $nchar = shift;    # the number of random chars to append
      for my $i (1 .. $nchar) {
         $word .= chr( $ordmin + int( rand($ordmax - $ordmin) ) );
      }
      return $word;
   }
}

my $longworda = join '', 'a' .. 'z';
my $longwordz = join '', reverse('a' .. 'z');
my $longcount = 1_000_000;

sub create_long_test_file {
   my $fname   = shift;
   my $howmany = shift;
   open( my $fh_out, '>', $fname );

   # Some with no randomness
   for my $h ( 1 .. $howmany ) {
      for my $i ( 1 .. 8 ) {
         my $cnt   = $longcount + $i - 1;
         my $worda = $longworda x $i;
         my $wordz = $longwordz x $i;
         print {$fh_out} "$worda\t$cnt\n$wordz\t$cnt\n";
      }
   }

   # Some with randomness
   my $wordlen = 1;
   for my $h ( 1 .. $howmany ) {
      for my $i ( 1 .. 8 ) {
         my $cnt   = $longcount + $i - 1;
         my $worda = $longworda x $i;
         my $wordz = $longwordz x $i;
         for my $c ( 'a' .. 'z' ) {
            for my $z ( 1 .. 2 ) {
               print {$fh_out} $worda . gen_random_word( $c, $wordlen ) . "\t" . (1000000 + $z) . "\n";
               print {$fh_out} $wordz . gen_random_word( $c, $wordlen ) . "\t" . (1000000 + $z) . "\n";
            }
         }
      }
   }
}

my $outfile = shift;
my $count   = shift;
$outfile or die "usage: $0 outfile count\n";
$count   or die "usage: $0 outfile count\n";
$count =~ /^\d+$/ or die "error: count '$count' is not a number\n";
print "generating short long test file '$outfile' with count '$count'\n";
create_long_test_file( $outfile, $count );
print "file size=", -s $outfile, "\n";

##</code><code>##

> perl gen-long-llil.pl long1.txt 600
generating short long test file 'long1.txt' with count '600'
file size=65616000

> perl gen-long-llil.pl long2.txt 600
generating short long test file 'long2.txt' with count '600'
file size=65616000

> perl gen-long-llil.pl long3.txt 600
generating short long test file 'long3.txt' with count '600'
file size=65616000

##</code><code>##

> perl llil2d.pl big1.txt big2.txt big3.txt long1.txt long2.txt long3.txt >perl2.tmp
llil2d start
get_properties : 11 secs
sort + output  : 23 secs
total          : 34 secs

> llil2a big1.txt big2.txt big3.txt long1.txt long2.txt long3.txt >cpp2.tmp
llil2 start
get_properties : 6 secs
sort + output  : 5 secs
total          : 11 secs

> diff cpp2.tmp perl2.tmp

##</code><code>##

# gen-llil.pl
# Crude program to generate a big LLiL test file to use in benchmarks
# On Windows running:
#   perl gen-llil.pl big2.txt 200 3   - produces a test file with size = 35,152,000 bytes
#                                       (lines terminated with "\r\n")
#   perl gen-llil.pl big2.txt 200 3 1 - produces a test file with size = 31,636,800 bytes
#                                       (lines terminated with "\n")
# On Unix, lines are terminated with "\n" and the file size is always 31,636,800 bytes

use strict;
use warnings;
use autodie;

{
   my $ordmin = ord('a');
   my $ordmax = ord('z') + 1;

   # Generate a random word
   sub gen_random_word {
      my $word  = shift;    # word prefix
      my $nchar = shift;    # the number of random chars to append
      for my $i (1 .. $nchar) {
         $word .= chr( $ordmin + int( rand($ordmax - $ordmin) ) );
      }
      return $word;
   }
}

sub create_test_file {
   my $fname   = shift;
   my $count   = shift;
   my $wordlen = shift;
   my $fbin    = shift;
   open( my $fh_out, '>', $fname );
   $fbin and binmode($fh_out);
   for my $c ( 'aaa' .. 'zzz' ) {
      for my $i (1 .. $count) {
         print {$fh_out} gen_random_word( $c, $wordlen ) . "\t" . 1 . "\n";
      }
   }
}

my $outfile = shift;
my $count   = shift;
my $wordlen = shift;
my $fbin    = shift;    # default is to use text stream (not a binary stream)
defined($fbin) or $fbin = 0;
$outfile or die "usage: $0 outfile count wordlen\n";
$count or die "usage: $0 outfile count wordlen\n";
print "generating test file '$outfile' with count '$count' (binmode=$fbin)\n";
create_test_file($outfile, $count, $wordlen, $fbin);
print "file size=", -s $outfile, "\n";

##</code><code>##

# gen-long-llil.pl
# Crude program to generate a LLiL test file with long names and counts
#   perl gen-long-llil.pl long1.txt 600
# On Windows running:
#   perl gen-long-llil.pl long1.txt 600  - produces a test file with size = 65,616,000 bytes
#                                         (lines terminated with "\r\n")
#   perl gen-long-llil.pl long1.txt 600  - produces a test file with size = 65,107,200 bytes
#                                         (lines terminated with "\n")
# On Unix, lines are terminated with "\n" and the file size is always 65,107,200 bytes

use strict;
use warnings;
use autodie;

{
   my $ordmin = ord('a');
   my $ordmax = ord('z') + 1;

   # Generate a random word
   sub gen_random_word {
      my $word  = shift;    # word prefix
      my $nchar = shift;    # the number of random chars to append
      for my $i (1 .. $nchar) {
         $word .= chr( $ordmin + int( rand($ordmax - $ordmin) ) );
      }
      return $word;
   }
}

my $longworda = join '', 'a' .. 'z';
my $longwordz = join '', reverse('a' .. 'z');
my $longcount = 1_000_000;

sub create_long_test_file {
   my $fname   = shift;
   my $howmany = shift;
   my $fbin    = shift;
   open( my $fh_out, '>', $fname );
   $fbin and binmode($fh_out);

   # Some with no randomness
   for my $h ( 1 .. $howmany ) {
      for my $i ( 1 .. 8 ) {
         my $cnt   = $longcount + $i - 1;
         my $worda = $longworda x $i;
         my $wordz = $longwordz x $i;
         print {$fh_out} "$worda\t$cnt\n$wordz\t$cnt\n";
      }
   }

   # Some with randomness
   my $wordlen = 1;
   for my $h ( 1 .. $howmany ) {
      for my $i ( 1 .. 8 ) {
         my $cnt   = $longcount + $i - 1;
         my $worda = $longworda x $i;
         my $wordz = $longwordz x $i;
         for my $c ( 'a' .. 'z' ) {
            for my $z ( 1 .. 2 ) {
               print {$fh_out} $worda . gen_random_word( $c, $wordlen ) . "\t" . (1000000 + $z) . "\n";
               print {$fh_out} $wordz . gen_random_word( $c, $wordlen ) . "\t" . (1000000 + $z) . "\n";
            }
         }
      }
   }
}

my $outfile = shift;
my $count   = shift;
my $fbin    = shift;    # default is to use text stream (not a binary stream)
defined($fbin) or $fbin = 0;
$outfile or die "usage: $0 outfile count\n";
$count   or die "usage: $0 outfile count\n";
$count =~ /^\d+$/ or die "error: count '$count' is not a number\n";
print "generating short long test file '$outfile' with count '$count' (binmode=$fbin)\n";
create_long_test_file( $outfile, $count, $fbin );
print "file size=", -s $outfile, "\n";