$_ = <<"..."; CPD6Z98SB2KQNWV0F7Y1IX4GLRA5MTOJHE3U CXZOL6SUI2WTJ30HF519YPGBRNAK48MQVD7E T8COSQU6I2FJN40DKL157WVGPYXARZ3MBHE9 KNCWVZDSR5420LP91FIQGB7Y3A6J8MOUXTEH XF9C4PSDY62TWJ0QBN17IKG3OH8ALVRM5UEZ D9QCHUSN7TW2YZL0O831FGXIR6JA4P5MVBKE ZC7ISQUPK6N20OLV4T31G9FRXBAWM5YJHED8 Z3C7SJVODL25TRQ01HPWGNKXB4UA68YMI9EF BC9OXDHS2FI5Z6U0TYL1VPGQK7ANR38MEWJ4 K4TCQBHS2ZV7FXU0P8R1YGDON3A6JILM9EW5 ... # $_ = <<"..."; # HOUSEBOAT # COMPUTER # DOUBT # ... use Benchmark; use constant { BACKREF_HACK => 1, LENGTH_HACK => 1 }; $| = 1; my $charclass = common_charclass(/([^\n]+)/); # use re 'debug'; # rx(8,$charclass); # exit; for ( my $len = 1;; ++$len ) { my $rx = rx( $len, $charclass ); my $t0 = Benchmark->new; my @found = /$rx/; my $t1 = Benchmark->new; print timestr( timediff( $t1, $t0 ) ) . " "; if (@found) { print join( '', @found ) . "\n"; } else { print "\n"; last; } } sub uniq { my %seen; return grep !$seen{$_}++, @_; } sub max { my $max = shift @_; $max < $_ and $max = $_ for @_; return $max; } sub common_charclass { my %seen; ++$seen{$_} for map @$_, map [ uniq( split // ) ], @_; my $max = max( values %seen ); my @no = delete @seen{ grep $max != $seen{$_}, keys %seen } or return undef; return do { my ($solo) = keys %seen; $solo } if 1 == scalar keys %seen; return '[' . join( '', sort keys %seen ) . ']'; } sub rx { # 0 wallclock secs ( 0.00 usr + 0.00 sys = 0.00 CPU) C # 0 wallclock secs ( 0.00 usr + 0.00 sys = 0.00 CPU) CP # 0 wallclock secs ( 0.00 usr + 0.00 sys = 0.00 CPU) CPM # 0 wallclock secs ( 0.01 usr + 0.00 sys = 0.01 CPU) CPME # 1 wallclock secs ( 0.42 usr + 0.00 sys = 0.42 CPU) CS201 # 3 wallclock secs ( 2.93 usr + 0.00 sys = 2.93 CPU) CS201G # 18 wallclock secs (16.28 usr + 0.00 sys = 16.28 CPU) CS201GA # 78 wallclock secs (72.17 usr + 0.02 sys = 72.19 CPU) CS201GAM # 277 wallclock secs (263.39 usr + 0.08 sys = 263.47 CPU) CS201GAME # 3466 wallclock secs (3260.91 usr + 0.98 sys = 3261.89 CPU) # # real 64m3.333s # user 60m16.157s # sys 0m1.088s my ( $len, $char ) = @_; $char = '\S' if not defined $char; my $pat = "\\A.*?" . ( "($char).*?" x $len ) . "\n"; # Start a new line $pat .= "(?>(?:"; # Make all my assertions about the content of the line $pat .= join '', map { my $capture_num = $_; # Skip past stuff that doesn't match $$_ my $seek = BACKREF_HACK ? "(?>(?:(?!\\$_).)*)" : ".*?"; # Find $$_ my $found_it = "\\$_"; # If I'm too close to the end of the line and don't have # enough characters to match, I'll assert that I need that # many and bail if there aren't enough. my $enough_left_at_end; if ( LENGTH_HACK ) { if ( $_ < $len ) { my $must_be_at_least_this_long = $len - $_; $enough_left_at_end = "(?=.{$must_be_at_least_this_long})"; } else { $enough_left_at_end = ""; } } else { $enough_left_at_end = ''; } # Match *this* "$seek$found_it$enough_left_at_end"; } 1 .. $len; # Skip to the end of the line. $pat .= "(?>.*)\n"; # Repeat lines til I get to the end $pat .= ")+)\\z"; return qr/(?-s)$pat/; }