using File::Find to grep for text

Aldebaran has asked for the wisdom of the Perl Monks concerning the following question:

Hello Monks,

I've written a script that attempts to run simulations of the US college men's basketball tournament. What I have "works," representing this tournament as I understand it until I try to use File::Find to see what I've got. I'll put abridged output and source between readmore tags for interested parties.

Output of command ./7.64.pl

round is 1
1.duke, 9.ucf, 5.msST, 13.stlouis, 6.maryland, 3.lsu, 7.louisville, 2.
+miST
1.gonzaga, 8.syracuse, 12.murrayST, 4.flaST, 11.azST, 3.texTech, 7.nev
+ada, 2.mi
1.va, 9.ok, 5.wi, 4.ksST, 11.stmarys, 3.purdue, 7.cincy, 2.tn
1.nc, 9.wa, 5.auburn, 4.ks, 11.ohST, 3.houston, 7.wofford, 2.ky
round is 2
1.duke, 5.msST, 3.lsu, 2.miST
1.gonzaga, 4.flaST, 3.texTech, 2.mi
1.va, 4.ksST, 3.purdue, 2.tn
1.nc, 4.ks, 3.houston, 2.ky
round is 3
1.duke, 3.lsu
1.gonzaga, 2.mi
1.va, 2.tn
4.ks, 2.ky
round is 4
3.lsu
1.gonzaga
1.va
4.ks
finals are 1.gonzaga 1.va
tournament winner is 1.va
----------------
$
[download]

Source listing of 7.64.pl :

#!/usr/bin/perl -w
use 5.011;
use Path::Tiny;
use utf8;
use open OUT => ':utf8';
use Data::Dump;
use POSIX qw(strftime);
binmode STDOUT, 'utf8';

# whereamI
my $path1 = Path::Tiny->cwd;
say "path1 is $path1";

my @region = ( 'east', 'west', 'south', 'midwest' );    #4 different b
+rackets

## main control
# set trials
my $parent;
my $trials       = 15;
my $dummy        = 1;
my $first_second = strftime( "%d-%m-%Y-%H-%M-%S", localtime );

while ( $trials > 0 ) {

  # unique point at which probability is assigned for teams.
  my $ref_bracket = pop_brackets();
  my %vars        = %$ref_bracket;
  my $rvars       = \%vars;

  # create an output file
  my $out_file =
    $path1->child( 'my_data', "$first_second", "$first_second\.$dummy.
+txt" )
    ->touchpath;
  $parent = $out_file->parent;
  say "out_file is $out_file";

  my $teams_left = 16;
  my $round      = 1;
  my @final_four;

  while ( $teams_left > 1 ) {

    say "round is $round";
    $out_file->append_utf8( "round is $round", "\n" );
    my $anzahl = 2;
    for my $r (@region) {

      say "r is $r";

      my $ref_calc = calc_winners( $rvars, $r );
      dd $ref_calc;
      $vars{$r} = $ref_calc;    #update regional bracket with winners

      my @sieger = @$ref_calc;
      say "winners are @sieger";
      $anzahl = scalar @sieger;
      if ( $anzahl == 1 ) {
        push @final_four, $sieger[0];
      }

      my $string_sieger = join( ', ', @sieger );
      say "string sieger is $string_sieger";
      $out_file->append_utf8( $string_sieger, "\n" );
    }

    $dummy += 1;
    $round++;
    $teams_left = $anzahl;

    say "final four are @final_four";

  }    #end for loop
  my $ref_finals = final_four( \@final_four );
  my @finals     = @$ref_finals;
  say "finals are @finals";
  $out_file->append_utf8( "finals are @finals", "\n" );
  my $ref_gewinner = final_four( \@finals );
  my @gewinner     = @$ref_gewinner;
  my $last         = $gewinner[0];
  say "tournament winner is $last";
  $out_file->append_utf8( "tournament winner is $last", "\n" );

  say "-------system out---------";
  system("cat $out_file");
  say "----------------";
  $trials--;

}    #end while loop

## see what we got

use File::Find;

# Get $dirname from first command-line argument
my $dirname = $parent;

find( \&do_process, $dirname );    

my ( $a, $b );

sub do_process {


  if ( -r $_ ) {                

    my $file_name = $_;
    open( my $fh, '<', $file_name );    # Use three-arg open!
    while (<$fh>) {
      chomp();
      if (/\btournament winner is 1.gonzaga\b/i) { $a = "$file_name:$_
+"; }
      if (/\btournament winner is 2.miSTb/i)     { $b = "$file_name:$_
+"; }
    }
  }
}

sub pop_brackets {

  use 5.016;
  use warnings;

  my %vars;
  my @east =
    qw(1.duke 16.ndST 8.vcu 9.ucf 5.msST 12.lib 4.vaTech 13.stlouis 6.
+maryland
    11.belmont 3.lsu 14.yale 7.louisville 10.mn 2.miST 15.bradley);

  my @west =
    qw(1.gonzaga 16.farleigh 8.syracuse 9.baylor 5.marquette 12.murray
+ST 4.flaST 13.vermont 6.buffalo
    11.azST 3.texTech 14.noKY 7.nevada 10.fla 2.mi 15.montana);

  my @south = qw(1.va 16.gardner 8.ms 9.ok 5.wi 12.or 4.ksST 13.UCirv 
+6.nova
    11.stmarys 3.purdue 14.olddominion 7.cincy 10.iowa 2.tn 15.colgate
+);

  my @midwest = qw(1.nc 16.iona 8.utST 9.wa 5.auburn 12.nmST 4.ks 13.n
+e 6.iowaST
    11.ohST 3.houston 14.gaST 7.wofford 10.setonhall 2.ky 15.abilene);

  $vars{east}    = \@east;
  $vars{west}    = \@west;
  $vars{south}   = \@south;
  $vars{midwest} = \@midwest;

  return \%vars;
}

sub calc_winners {

  use 5.016;
  use warnings;
  use Data::Dump;

  my ( $rvars, $region ) = (@_);
  my %vars = %$rvars;

  my $new_ref = $vars{$region};
  my @teams   = @$new_ref;

  my @pairs;
  while (@teams) {
    my $first = shift @teams;
    my $next  = shift @teams;
    push @pairs, "$first vs $next";
  }

  #say "pairs are @pairs";
  my $ref_pairs   = \@pairs;
  my $ref_winners = play_game($ref_pairs);

  return $ref_winners;    # end calc_winners
}

sub play_game {

  use 5.016;
  use warnings;

  my $ref_pairs = shift;
  my @pairs     = @$ref_pairs;
  say "in play_game";

  #say "pairs are @pairs";
  my @winners;
  for my $line (@pairs) {
    if ( $line =~ /^(\d+)\.(\w+) vs (\d+)\.(\w+)$/ ) {

      #say "matched";
      #say "$1 $2 $3 $4";

      my $denominator = $1 + $3;
      my $ratio       = $3 / $denominator;

      #say "ratio was $ratio";
      my $random_number = rand();
      if ( $random_number < $ratio ) {
        push @winners, "$1.$2";
      }
      else {
        push @winners, "$3.$4";
      }

    }

  }
  my $ref_winners = \@winners;

  return $ref_winners;
}    # end play_game

sub final_four {

  use 5.016;
  use warnings;
  use Data::Dump;

  my ($new_ref) = (@_);

  my @teams = @$new_ref;

  my @pairs;
  while (@teams) {
    my $first = shift @teams;
    my $next  = shift @teams;
    push @pairs, "$first vs $next";
  }

  say "pairs are @pairs";
  my $ref_pairs   = \@pairs;
  my $ref_winners = play_game($ref_pairs);

  return $ref_winners;    # final_four
}
__END__
[download]

How do I imitate the grep functionality with perl's File::Find? I expected to see files matched, as 1.gonzaga did win one of those simulated tournaments. These do serve as an SSCCE if you're open to having a governable amount of data created in a mydata/ subdirectory. You can turn trials down to 1 or 2 if you don't want much output.

As much as I don't want Nike, gambling, and unrealized social justice to get in the way of a Spring Classic, I realize that it means as much to you as you decide. That there is a tennis shoe intrigue this time is not normal, the way so much of american life is not normal now.

As for me, I would rather simulate the tourney using perl than gamble on it. But I would be less than sporting if I didn't try to predict a winner, and I'll go with 2.miST .

Thanks for your comment

Comment on using File::Find to grep for text Select or Download Code

Replies are listed 'Best First'.
Re: using File::Find to grep for text by Marshall (Canon) on Apr 04, 2019 at 04:11 UTC
I don't understand how you claim that this works?: `## see what we got use File::Find; # Get $dirname from first command-line argument my $dirname = $parent; find( \&do_process, $dirname ); my ( $a, $b ); sub do_process { if ( -r $_ ) { my $file_name = $_; open( my $fh, '<', $file_name ); # Use three-arg open! while (<$fh>) { chomp(); if (/\btournament winner is 1.gonzaga\b/i) { $a = "$file_name: +$_"; } if (/\btournament winner is 2.miSTb/i) { $b = "$file_name: +$_"; } } } }` [download] Please see: perl doc File::Find File::Find will traverse starting down from a list of directories, calling do_process() for each file name encountered. Note that links and directories are just special kinds of files as far as the directory system is concerned. This -r test looks meaningless to me in a normal sense. Perhaps you mean -f? for example: `find( \&do_process, ($dirname) ); sub do_process { my $file_name = File::Fine::name; #there is no $_ or @_ context her +e .... }` [download] To set $a or $b repeatedly with a different value makes no sense. You need a more complex data structure to save all values. BTW: I would not use $a or $b as scalar values in a user script. These names have special meanings to Perl like in sort() and other places. I would not do anything complicated at all in a do_process() subroutine. Minimize the chance of a "blow up". File::Find does cd's to traverse the directory tree. If you "blow up" at some random point, you are in a different directory than where you started from and that can cause various problems. Read the FAQ and then try again showing a simplified example. This is good: `open( my $fh, '<', $file_name ); # Use three-arg open!` This is better: `open( my $fh, '<', $file_name ) or die "Can't open $file_name for read $!";` Update: I would make this change: Don't use an O/S specific command where there is no need for it. I'm running Windows and your program bombed because of this. Also, system() will launch another process (an "expensive" cpu thing) where there is no need for that either (slows things down). `say "-------system out---------"; #system("cat $out_file"); open my $fh, '<', $out_file or die "can't print $out_file! $!"; print while (<$fh>); close $fh; say "----------------";` [download] I did get your script to run. It creates files underneath a "my_data" directory. There is no need for File::Find You have a single directory of directories: For example: `print "OUTFILES******************\n"; foreach my $directory (glob "my_data/") { print "Directory: $directory\n"; foreach my $file (glob "$directory/*") { print " File=$file\n"; } }` [download] =prints Read more... (9 kB) PS: Why this UTF-8 stuff? I don't see the need for that complication.	[reply] [d/l] [select]
Re^2: using File::Find to grep for text by Aldebaran (Curate) on Apr 04, 2019 at 06:48 UTC
There is no need for File::Find That part is becoming clear. Now that I've seen how you treated this, I used your treatment to get closer to what I want to do here. The part I'm changing out begins with this comment: `## see what we got my $phrase = "round is 4"; my @sought = qw ( 2.miST 3.texTech 1.va 5.auburn); print "OUTFILES******************\n"; foreach my $filename ( glob("./my_data/$first_second/") ) { open my $fh, '<', $filename or die "can't print $filename! $!"; while ( my $line = <$fh> ) { if ( $line =~ m/$phrase/g ) { say "filename is $filename"; } } close $fh; }` [download] So, I'd like to assemble stats on "who made the final four?" As we see here at official ncaa link, there are only 4 teams left, corresponding to round 4 of the tourney. Indeed, they are enumerated in the @sought variable that I assign in the new script. Again, I find myself looking for some elbow grease in dealing with these data: `round is 4 3.lsu 3.texTech 2.tn 1.nc finals are 3.lsu 1.nc tournament winner is 1.nc ---------------- OUTFILES******************* filename is ./my_data/03-04-2019-23-07-21/03-04-2019-23-07-21.1.txt filename is ./my_data/03-04-2019-23-07-21/03-04-2019-23-07-21.13.txt filename is ./my_data/03-04-2019-23-07-21/03-04-2019-23-07-21.17.txt ... $` [download] So, I would like this to match on 3.texTech and report that it matched on one correct team for the final four. The order will always be the same in that 2.miST will be only on the line that follows "round is 4", 3.texTech on the next, 1.va the next, and 5.auburn on the line before the line that starts with "tournament". If I have 15 trials, which one got the most correct in the final four? Why this UTF-8 stuff? No good reason other than that I'm used to it. (I don't know what to cut out without having the wheels fall off.) Thanks for your comments,	[reply] [d/l] [select]
Re^3: using File::Find to grep for text by Marshall (Canon) on Apr 04, 2019 at 08:37 UTC
Still not quite sure about this, but consider this for further improvement: #!/usr/bin/perl use strict; use warnings; use Data::Dump qw(pp); $\|=1; #turn off stdout buffering foreach my $directory (glob "my_data/") { print "Directory: $directory\n"; my %best4; foreach my $file (glob "$directory/.txt") { open(my $fh, "<", "$file") or die "Can't open < $file: $!"; while (<$fh>) { statsRound4($fh,\%best4) if /^round is 4/; } close $fh; } dumpBest4(\%best4); #pp \%best4; #uncomment this to see what it does - good tool } sub statsRound4 #add next 4 team lines to stat table #all of these guys made it to Round 4 { my ($fh, $hash_ref) = @_; for (1..4) { my $team_round4 = <$fh>; chomp $team_round4; $hash_ref->{$team_round4}++; } } sub dumpBest4 #print highest keys/values sorted by descending value { my $hash_ref = shift; my @top_teams = sort{my $myA = $hash_ref->{$a}; my $myB = $hash_ref->{$b}; $myB <=> $myA }keys %$hash_ref; foreach my $team (@top_teams[0..3]) { print "$team\t $hash_ref->{$team}\n"; } } __END__ Directory: my_data/03-04-2019-22-09-24 1.va 12 1.duke 12 1.gonzaga 8 1.nc 4 Directory: my_data/03-04-2019-22-14-09 1.duke 9 1.gonzaga 9 1.nc 8 1.va 7 Directory: my_data/03-04-2019-22-14-55 1.gonzaga 10 1.va 7 1.duke 7 1.nc 7 Directory: my_data/03-04-2019-22-16-53 1.va 8 1.nc 8 1.gonzaga 8 2.ky 5 Directory: my_data/03-04-2019-22-20-00 1.gonzaga 9 1.va 9 1.duke 8 1.nc 7 Directory: my_data/03-04-2019-22-21-01 1.nc 7 1.gonzaga 7 3.lsu 6 1.duke 6 Directory: my_data/03-04-2019-22-24-54 1.duke 9 1.va 7 1.nc 7 1.gonzaga 7 Directory: my_data/03-04-2019-22-26-59 1.va 11 1.duke 9 1.nc 8 3.texTech 5 Directory: my_data/03-04-2019-22-28-51 1.duke 12 2.ky 6 1.va 6 1.gonzaga 5 [download]	[reply] [d/l]
Re^4: using File::Find to grep for text by Aldebaran (Curate) on Apr 06, 2019 at 22:17 UTC


We don't bite newbies here... much
	PerlMonks