Beefy Boxes and Bandwidth Generously Provided by pair Networks
Do you know where your variables are?
 
PerlMonks  

Re: Re: Shell Script Woes (tye's try)

by Limbic~Region (Chancellor)
on Jan 09, 2003 at 20:14 UTC ( [id://225629]=note: print w/replies, xml ) Need Help??


in reply to Re: Shell Script Woes (tye's try)
in thread Shell Script Woes

tye,
Thank you very much for the insight. I have "borrowed" a great deal of your code and came up with the following:

#!/usr/bin/perl -w use strict; use Time::Local; chdir "/var/spool/wt400/gateways/$ARGV[0]" or exit; mkdir "capture", 0755 unless (-d "capture"); my $Dir = $ARGV[1]; my $ListTime = 0; my $BufferSize = 64 * 1024; my $MaxLen = 0; my %Traps; my @GrepList; my $GrepString; my $Counter = 1; my $Size; my $Prev; my $Now; my $NF; while (1) { if ($Counter > 20 || ! @GrepList) { if ( (stat("traplist.$Dir"))[9] gt $ListTime ) { $ListTime = (stat(_))[9]; open (LIST,"traplist.$Dir"); while (<LIST>) { next if ($_ =~ /^Created\t\tExpires/ || $_ =~ /^ *$/); my @Fields = split "\t" , $_; my($mon, $day, $year, $hour, $min) = split ?[-/:]? , $Fields[1]; my $Expiration = timelocal(0, $min, $hour, $day, $mon -1, $year + + 100); $Traps{"$Fields[6]"} = [ $Expiration,$Fields[2],$Fields[5],$Fields +[7] ]; } close (LIST); } } @GrepList = (); $Now = time; foreach my $trap (keys %Traps) { push @GrepList,$Traps{$trap}[3] unless (($Traps{$trap}[0] < $Now && +$Traps{$trap}[1]) || $trap eq "SIZE"); } map { $MaxLen = length($_) if length($_) > $MaxLen } @GrepList; $BufferSize = 2 * $MaxLen if ($BufferSize < 2 * $MaxLen); if (exists $Traps{"SIZE"} && $Traps{"SIZE"}[1]) { $Size = $Traps{"SIZE"}[2] unless ($Traps{"SIZE"}[0] < $Now && $Traps +{"SIZE"}[2] > 0); } exit unless (@GrepList || $Size); $GrepString = join '|', map quotemeta $_, @GrepList; $GrepString = qr/($GrepString)/i; if ($Dir eq "out") { @ARGV = <out/do*>; } elsif ($Dir eq "in") { @ARGV = <in/di*>; } else { @ARGV = <out/do* in/di*> } if (@ARGV) { $/=\$BufferSize; $Prev= ""; while (<>) { $_ =~ tr/\n//d; if(($Prev.$_) =~ /$GrepString/) { ($NF = "$ARGV-$+") =~ s/^.*\///; rename $ARGV , "capture/$NF"; close (ARGV); $Prev = ""; } if (eof) { $Prev = ""; } else { $Prev = substr($_,-$MaxLen); } } } $/ = "\n"; ++$Counter; sleep 3 }

This provides 10X more functionality the original shell script did.
I would appreciate any advice on how it could be made to go faster and still be efficient.

L~R

Replies are listed 'Best First'.
Re^3: Shell Script Woes (review)
by tye (Sage) on Jan 09, 2003 at 21:30 UTC

    First, I'd use more than one space for indentation. I use 4 because I like the way it discourages overly deep nesting of code. Even 2 or 3 would be quite a bit better than 1, IMO.

    $Traps{"$Fields[6]"} = [ $Expiration,$Fields[2],$Fields[5],$Fields[7] ]; can be written $Traps{$Fields[6]} = [ $Expiration, @Fields[­2,5,7] ]; Putting in too many quotes can bite you (though using it as a hash key also does the stringification which would bite you in the same way in this case -- changing an object into a string) so be careful of it.

    You can make the code clearer using a few constants:

    sub EXPIRE() { 0 } sub WHATEVER() { 1 } sub FOOBAR() { 2 } sub FILENAME() { 3 }
    (these make no difference in the running time of the code since they get optimized away at compile time). I'd also avoid 'unless' so push @GrepList,$Traps{$tr­ap}[3] unless (($Traps{$trap}[0] < $Now && $Traps{$trap}[1]) || $trap eq "SIZE"); becomes
    push @GrepList, $Traps{$tr­ap}[FILENAME] if $trap ne "SIZE" and ! $Traps{$trap}[WHATEVER] || $Now <= $Traps{$trap}[EXPIRES];
    for example (I find spacing more effective at conveying grouping than parens, YMMV).

    Don't use map unless you want the list that it builds: map { $MaxLen = length($_) if length($_) > $MaxLen } @GrepList; becomes

    for( @GrepList ) { $MaxLen = length($_) if length($_) > $MaxLen; }
    If you really have a need for single-line code (which is a mistake in my book), then remove the newlines.

    Use local( $/ )= \$BufferSize; and you can drop the $/ = "\n"; line.

    So no speed improvements to offer. (:

                    - tye
      I took a few of your suggestions in formatting, but left some as they were

      The problem I found is the more elements in the GrepString, the longer it took to process (exponential).

      I tore the modified tcgrep from the The Unix Reconstruction Project apart to find out how it was so quick. When I combined that code with the buffer stuff - it started screaming.

      Here is the latest version - I know better than to think it is the last

      #!/usr/bin/perl -w use strict; use Time::Local; chdir "/var/spool/wt400/gateways/$ARGV[0]"; mkdir "capture", 0755 unless (-d "capture"); my $Dir = $ARGV[1]; my $ListTime = 0; my $BufferSize = 64 * 1024; my $MaxLen = 0; my %Traps; my %Vals; my @GrepList; my $Counter = 1; my $Size; my $Prev; my $Now; my $NF; my $Matches; my $matcher; my $match_code; my $String; while (1) { if ($Counter > 20 || ! @GrepList) { if ( (stat("traplist.$Dir"))[9] gt $ListTime ) { $ListTime = (stat(_))[9]; open (LIST,"traplist.$Dir"); while (<LIST>) { next if ($_ =~ /^Created\t\tExpires/ || $_ =~ /^ *$/); my @Fields = split "\t" , $_; my($mon, $day, $year, $hour, $min) = split ?[-/:]? , $Fields[1 +]; my $Expiration = timelocal(0, $min, $hour, $day, $mon - 1, $y +ear + 100); $Traps{"$Fields[6]"} = [ $Expiration,@Fields[2,5,7] ]; } close (LIST); } } @GrepList = (); $Now = time; foreach (keys %Traps) { push @GrepList,$Traps{$_}[3] unless (($Traps{$_}[0] < $Now && $Tra +ps{$_}[1]) || $_ eq "SIZE"); $Vals{$Traps{$_}[3]} = $_; } foreach (@GrepList) { $MaxLen = length($_) if length($_) > $MaxLen; $_ =~ s/(\W)/\\$1/g; $_ = "(?i-xsm)$_"; $match_code .= "(\$Matches = \"$_\") =~ s/\\(\\?i-xsm\\)// if \$Str +ing =~ /$_/;"; } if (exists $Traps{"SIZE"} && $Traps{"SIZE"}[1]) { $Size = $Traps{"SIZE"}[2] unless ($Traps{"SIZE"}[0] < $Now && $Tra +ps{"SIZE"}[2] > 0); } exit unless (@GrepList || $Size); $matcher = eval "sub { $match_code }"; if ($Dir eq "out") { @ARGV = <out/do*>; } elsif ($Dir eq "in") { @ARGV = <in/di*>; } else { @ARGV = <out/do* in/di*> } matchfile($matcher,@ARGV); ++$Counter; sleep 3 } sub matchfile { $matcher = shift; my $file; my $name; local($_); local($/) = \$BufferSize; FILE: while (defined ($file = shift(@_))) { if ($Size && (stat("$file"))[7] >= $Size) { ($NF = "$file-SIZE") =~ s/^.*\///; rename $file , "capture/$NF"; next FILE; } unless (open(FILE, $file)) { next FILE; } $Matches = 0; $Prev = ""; while (<FILE>) { $_ =~ tr/\n//d; $String = $Prev . $_; &{$matcher}(); if ($Matches) { ($NF = "$file-$Vals{$Matches}") =~ s/^.*\///; rename $file , "capture/$NF"; $Prev = ""; next FILE; } if (eof) { $Prev = ""; } else { $Prev = substr($_,-$MaxLen); } } } }
      Any comments (especially in the way of speed improvements) will be greatly appreciated!

      L~R

Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Node Status?
node history
Node Type: note [id://225629]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others pondering the Monastery: (7)
As of 2024-03-29 08:31 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found