#!/usr/bin/env perl use strict; use warnings; use feature qw/say/; use File::Temp; use File::Path qw(make_path); use File::Spec::Functions qw(catdir catfile); use Time::HiRes qw(tv_interval gettimeofday); use Fcntl qw(:flock); my $FILE_COUNT = 5_000; # SETUP - Create 500k files that contain approximately 350k data with the # CHNL_ID line randomly distributed in each file. say "Generating $FILE_COUNT temporary files."; my @base_content = grep {!m/^\QA|CHNL_ID|\E\d+\n/} ; @base_content = (@base_content) x 1024; my $td = File::Temp->newdir( TEMPLATE => 'pm_tempXXXXX', TMPDIR => 1, CLEANUP => 1, ); for my $n (0 .. 31) { make_path(catdir($td->dirname, sprintf("%02d", $n))); } for (1 .. $FILE_COUNT) { my $rand_ix = int(rand(scalar(@base_content))); my $chnl_id = sprintf "%02d", int(rand(32)); my @output; for my $line_ix (0 .. $#base_content) { push @output, "A|CHNL_ID|$chnl_id\n" if $line_ix == $rand_ix; push @output, $base_content[$line_ix]; } my $tf = File::Temp->new( TEMPLATE => 'pm_XXXXXXXXXXXX', SUFFIX => '.txt', DIR => $td->dirname, UNLINK => 0, ); print $tf @output; $tf->flush; close $tf; } # Sample file processor: say "Processing of $FILE_COUNT files."; my $t0 = [gettimeofday]; opendir my $dh, $td->dirname or die "Cannot open temporary directory (", $td->dirname, "): $!\n"; FILE: while (defined(my $dirent = readdir($dh))) { next if $dirent =~ m/^\.\.?$/; next unless $dirent =~ m/\.txt$/; my $path = catfile($td->dirname, $dirent); next unless -f $path; open my $fh, '<', $path or die "Cannot open $path for read: $!"; flock $fh, LOCK_EX or die "Error obtaining a lock on $path: $!"; while (defined(my $line = <$fh>)) { if ($line =~ m/^\QA|CHNL_ID|\E(\d+)$/m) { my $target_dir = catdir($td->dirname, $1); make_path($target_dir) unless -d $target_dir; my $dest = catfile($target_dir, $dirent); rename $path, $dest or die "Could not rename $path into $dest: $!"; close $fh; next FILE; } } warn "Did not find CHNL_ID in $path. Skipping.\n"; close $fh; } my $elapsed = tv_interval($t0); say "Completed processing $FILE_COUNT files in $elapsed seconds."; __DATA__ A|RCPNT_ID|92299999 A|RCPNT_TYP_CD|QL A|ALERT_ID|264 A|FROM_ADDR_TX|14084007183 A|RQST_ID|PT201803989898 A|CRTEN_DT|02072018 A|CHNL_ID|17 A|RCPNT_FRST_NM|TESTSMSMIGRATION A|SBJ_TX|Subject value from CDC A|CLT_ID|14043 A|ALRT_NM|Order Shipped A|CNTCT_ADDR|16166354429 A|RCPNT_LAST_NM|MEMBER A|ORDR_NB|2650249999 A|LOB_CD|PBM D|QL_922917566|20180313123311|1|TESTSMSMIGRATION MEMBER||