#!/bin/perl -w

use strict;
use Benchmark( 'cmpthese');

use Encode;
use Text::Iconv;
use Unicode::Map8;
use Unicode::String qw(utf8);

use utf8;

my $enc= 'latin1';

my $convert_iconv   = Text::Iconv->new( 'utf8', $enc);
my $convert_unicode = Unicode::Map8->new ($enc);

my $text= <DATA>;
chomp $text; 


# lets just check the output!
print "Encode        : ", encode("iso-8859-1", $text), "\n";
print "Text::Iconv   : ", $convert_iconv->convert(   $text), "\n";
print "Unicode::Map8 : ", $convert_unicode->to8 (utf8($text)->ucs2), "\n";
print "regexp        : ", latin1(  $text), "\n";

# now benchmark
cmpthese( 500000, {
               'Encode'        => sub { encode("iso-8859-1", $text);               },
               'Text::Iconv'   => sub { $convert_iconv->convert( $text);           },
               'Unicode::Map8' => sub { $convert_unicode->to8 (utf8($text)->ucs2); },
               'regexp'        => sub { latin1( $text);                            },
           });


sub latin1 
  { my $text=shift;
    $text=~s{([\xc0-\xc3])(.)}{ my $hi = ord($1);
                                my $lo = ord($2);
                                chr((($hi & 0x03) <<6) | ($lo & 0x3F))
                              }ge;
    return $text;
  }


__DATA__
texte soupÃ§onnÃ© d'Ãªtre plein de caractÃšres accentuÃ©s

##</code><code>##

Encode        : texte soupçonné d'être plein de caractères accentués
Text::Iconv   : texte soupçonné d'être plein de caractères accentués
Unicode::Map8 : texte soupçonné d'être plein de caractères accentués
regexp        : texte soupçonné d'être plein de caractères accentués

Benchmark: timing 500000 iterations of Encode, Text::Iconv, Unicode::Map8, regexp...
Encode:         6 wallclock secs ( 4.91 usr +  0.02 sys =  4.93 CPU) @ 101419.88/s (n=500000)
Text::Iconv:    2 wallclock secs ( 2.20 usr +  0.00 sys =  2.20 CPU) @ 227272.73/s (n=500000)
Unicode::Map8:  7 wallclock secs ( 7.66 usr +  0.00 sys =  7.66 CPU) @ 65274.15/s (n=500000)
regexp:         6 wallclock secs ( 5.65 usr +  0.01 sys =  5.66 CPU) @ 88339.22/s (n=500000)

               Rate    Unicode::Map8        regexp        Encode   Text::Iconv
Unicode::Map8  65274/s            --          -26%          -36%          -71%
regexp         88339/s           35%            --          -13%          -61%
Encode        101420/s           55%           15%            --          -55%
Text::Iconv   227273/s          248%          157%          124%            --