#!/usr/bin/perl -w use strict; open (DATA,">data") or die "Unable to open data file $!"; for ( 1 .. 1000 ) { my $Length = int(rand 240) + 10; my $string = ""; while (length($string) < $Length) { $string .= chr( int(rand 58) + 65 ); } print DATA "$string\n"; } #### #!/usr/bin/perl -w use strict; use Benchmark 'cmpthese'; cmpthese 150, { 'merlyn' => sub { use Inline C =>; open (DATA,"data") or die "\nUnable to open data file\n"; while () { my $result = UniqueCount($_); } }, 'L~R' => sub { open (DATA,"data") or die "\nUnable to open data file\n"; while () { my %unique; scalar grep { !$unique{$_}++ } split //, $_; } }, 'OM_Zen' => sub { open (DATA,"data") or die "\nUnable to open data file\n"; while () { my $count; my %hashofuniq; my @txt1 = split ('',$_); @hashofuniq{@txt1} = 1; foreach (keys %hashofuniq){ $count++; } } }, 'rob_au' => sub { open (DATA,"data") or die "\nUnable to open data file\n"; while () { my %hash; @hash{ split '', $_ } = 1; scalar keys %hash; } }, 'pfaut' => sub { open (DATA,"data") or die "\nUnable to open data file\n"; while () { my $l = ''; my $u; for (sort split(//,$_)) { $u++,$l=$_ if ($_ ne $l); } } }, 'Coruscate' => sub { open (DATA,"data") or die "\nUnable to open data file\n"; while () { my %same = map { $_, 1 } split //, $_; scalar keys %same; } }, 'tall_man' => sub { open (DATA,"data") or die "\nUnable to open data file\n"; while () { $_ = join('', sort(split //,$_)); $_ =~ s/(.)\1+/$1/g; my $result = length($_); } }, 'Br_Uk-1' => sub { open (DATA,"data") or die "\nUnable to open data file\n"; while () { $_ = join('', sort(split //,$_)); $_ =~ tr/\x00-\xff//s; my $result = length($_); } }, 'Br_Uk-2' => sub { open (DATA,"data") or die "\nUnable to open data file\n"; while () { my @uniq; @uniq[unpack 'U*',$_] = (1)x length $_; scalar (grep defined $_, @uniq); } }, 'Br_Uk-3' => sub { open (DATA,"data") or die "\nUnable to open data file\n"; while () { my @uniq; @uniq[unpack 'C*',$_] = (1)x length $_; scalar (grep defined $_, @uniq); } }, 'Br_Uk-4' => sub { open (DATA,"data") or die "\nUnable to open data file\n"; while () { my @uniq; scalar grep{ ++$uniq[$_] == 1 } unpack('C*',$_); } }, 'Br_Uk-5' => sub { open (DATA,"data") or die "\nUnable to open data file\n"; while () { @_ = $_; scalar grep{ ++$_[$_] == 1 } unpack('C*',$_[0]); } }, 'hgolan30' => sub { use Data::Dumper; open (DATA,"data") or die "\nUnable to open data file\n"; while () { my @tmp=split('',$_); my %count; for(@tmp){ $count{$_} +=1; } Dumper \%count; } }, }; __END__ __C__ int UniqueCount(unsigned char *str) { char counts[256]; int i; int result; /* clear the array */ for (i = 0; i <= 255; i++) { counts[i] = (char) 0; } /* notice the characters */ while (*str) { counts[*str++] = 1; } /* now count the results */ result = 0; for (i = 0; i <= 255; i++) { result += counts[i]; } return result; } #### Benchmark: timing 150 iterations of Br_Uk-1, Br_Uk-2, Br_Uk-3, Br_Uk-4, Br_Uk-5, Coruscate, L~R, OM_Zen, hgolan30, merlyn, pfaut, rob_au, tall_man... Br_Uk-1: 72 wallclock secs (71.99 usr + 0.03 sys = 72.02 CPU) @ 2.08/s (n=150) Br_Uk-2: 43 wallclock secs (42.28 usr + 0.03 sys = 42.31 CPU) @ 3.55/s (n=150) Br_Uk-3: 41 wallclock secs (40.94 usr + 0.03 sys = 40.97 CPU) @ 3.66/s (n=150) Br_Uk-4: 44 wallclock secs (44.46 usr + 0.02 sys = 44.48 CPU) @ 3.37/s (n=150) Br_Uk-5: 48 wallclock secs (47.96 usr + 0.02 sys = 47.98 CPU) @ 3.13/s (n=150) Coruscate: 103 wallclock secs (102.52 usr + 0.04 sys = 102.56 CPU) @ 1.46/s (n=150) L~R: 77 wallclock secs (76.83 usr + 0.03 sys = 76.86 CPU) @ 1.95/s (n=150) OM_Zen: 75 wallclock secs (75.66 usr + 0.03 sys = 75.69 CPU) @ 1.98/s (n=150) hgolan30: 233 wallclock secs (232.13 usr + 0.42 sys = 232.55 CPU) @ 0.65/s (n=150) merlyn: 1 wallclock secs ( 0.96 usr + 0.02 sys = 0.98 CPU) @ 153.06/s (n=150) pfaut: 106 wallclock secs (105.95 usr + 0.04 sys = 105.99 CPU) @ 1.42/s (n=150) rob_au: 48 wallclock secs (47.90 usr + 0.02 sys = 47.92 CPU) @ 3.13/s (n=150) tall_man: 96 wallclock secs (96.52 usr + 0.03 sys = 96.55 CPU) @ 1.55/s (n=150) Rate hgolan30 pfaut Coruscate tall_man L~R OM_Zen Br_Uk-1 Br_Uk-5 rob_au Br_Uk-4 Br_Uk-2 Br_Uk-3 merlyn hgolan30 0.645/s -- -54% -56% -58% -67% -67% -69% -79% -79% -81% -82% -82% -100% pfaut 1.42/s 119% -- -3% -9% -27% -29% -32% -55% -55% -58% -60% -61% -99% Coruscate 1.46/s 127% 3% -- -6% -25% -26% -30% -53% -53% -57% -59% -60% -99% tall_man 1.55/s 141% 10% 6% -- -20% -22% -25% -50% -50% -54% -56% -58% -99% L~R 1.95/s 203% 38% 33% 26% -- -2% -6% -38% -38% -42% -45% -47% -99% OM_Zen 1.98/s 207% 40% 36% 28% 2% -- -5% -37% -37% -41% -44% -46% -99% Br_Uk-1 2.08/s 223% 47% 42% 34% 7% 5% -- -33% -33% -38% -41% -43% -99% Br_Uk-5 3.13/s 385% 121% 114% 101% 60% 58% 50% -- -0% -7% -12% -15% -98% rob_au 3.13/s 385% 121% 114% 101% 60% 58% 50% 0% -- -7% -12% -15% -98% Br_Uk-4 3.37/s 423% 138% 131% 117% 73% 70% 62% 8% 8% -- -5% -8% -98% Br_Uk-2 3.55/s 450% 151% 142% 128% 82% 79% 70% 13% 13% 5% -- -3% -98% Br_Uk-3 3.66/s 468% 159% 150% 136% 88% 85% 76% 17% 17% 9% 3% -- -98% merlyn 153/s 23630% 10715% 10365% 9752% 7743% 7623% 7249% 4796% 4790% 4439% 4217% 4081% --