.\File\AnyEncoding.pm
.\t\AnyEncoding.t
####
#!/usr/bin/perl
=pod
SYNOPSIS
use File::AnyEncoding; # unicode file writer - reader
my $fun1 = new File::AnyEncoding('utf-16le');
my $text1 = "Hello world";
my $filepath1 = "AnyEncoding-test1.txt";
$fun1->write_file($filepath1, $text1); # writes file with specified encoding
my $fun2 = new File::AnyEncoding();
my $text2 = $fun2->read_file($filepath1); # remembers the encoding found in $filepath1
$text2 =~s/world/unicode/; # modify file contents
my $filepath2 = "AnyEncoding-test2.txt";
$fun2->write_file($filepath2, $text2); # writes file with encoding found in $filepath1
AUTHOR
Rudif c/o Perlmonks
=cut
package File::AnyEncoding;
use strict;
use File::BOM qw( :all );
our %supported_encoding = map { $_ => 1 } ( 'NONE', 'UTF-8', 'UTF-16LE' );
sub new {
my $class = shift;
my $enc = shift // 'utf8';
my $self = {};
bless $self, $class;
$self->set_encoding($enc);
return $self;
}
sub set_encoding {
my $self = shift;
my $enc = shift;
unless ( defined $enc && defined $supported_encoding{ $enc } ) {
$enc = 'NONE';
#warn "defaulting to $enc";
}
$self->{encoding} = $enc;
}
sub get_encoding {
my $self = shift;
$self->{encoding};
}
sub write_file {
my $self = shift;
my $filepath = shift;
my $text = join '', @_;
my $enc = $self->{encoding};
my $FH;
if ( $enc eq 'NONE' ) {
open $FH, ">", $filepath;
#open $FH, ">:raw:encoding(UTF-8):crlf:utf8", $filepath;
}
else {
open $FH, ">:raw:encoding($enc):crlf:utf8", $filepath;
print $FH "\x{FEFF}";
}
print $FH $text;
close $FH;
}
sub read_file {
my $self = shift;
my $filepath = shift;
open my $FH, '<:bytes', "$filepath";
my ( $enc, $spillage ) = get_encoding_from_filehandle($FH);
$enc = $self->set_encoding($enc);
if ( $enc eq 'NONE' ) {
#binmode $FH, ":encoding(UTF-8)";
close $FH;
open $FH, '<', "$filepath";
}
else {
binmode $FH, ":encoding($enc)";
}
my @lines = <$FH>;
close $FH;
wantarray ? @lines : join '', @lines;
}
1;
##
##
#!/usr/bin/perl
use strict;
$|++;
use lib('..');
use Data::HexDump;
use File::Path;
my $data = '.\data';
rmtree $data; # remove old data if any
mkdir $data;
use Test::More tests => 28;
use File::AnyEncoding; # under test
use_ok('File::AnyEncoding');
# test utf-16le encoding
{
# create object and write test file with specified encoding
my $encoding = 'UTF-16LE';
my $fan1 = File::AnyEncoding->new($encoding);
isa_ok( $fan1, 'File::AnyEncoding', '$fan1' );
is( $fan1->get_encoding(), 'UTF-16LE', "specified encoding" );
my $text1 = "Hello world \x{263A}\n\n";
my $file1 = "$data\\$encoding.txt";
$fan1->write_file( $file1, $text1 );
my $expected1 = <set_encoding('NONE');
is( $fan1->get_encoding(), 'NONE', "default encoding" );
# read test file and remember encoding
my $text2 = $fan1->read_file($file1);
is( $fan1->get_encoding(), 'UTF-16LE', "detected encoding" );
my $expected2 = 'Hello world \x{263a}\x{d}\x{a}\x{d}\x{a}';
is( my_reasciify($text2), $expected2, "read_file $file1" );
my @lines2 = $fan1->read_file($file1);
is( scalar @lines2, 2, "read_file $file1" );
my $join2 = join( '', @lines2 );
is( my_reasciify($join2), $expected2, "read_file $file1" );
# modify text and write second file using the remebered encoding
$text2 =~ s/world/WORLD/;
$text2 =~ s/\x{263A}/\x{20AC}/;
( my $file2 = $file1 ) =~ s/.txt/-2.txt/;
$fan1->write_file( $file2, $text2 ); # writes file with remembered encoding
my $expected3 = <new($encoding);
isa_ok( $fan1, 'File::AnyEncoding', '$fan1' );
is( $fan1->get_encoding(), 'UTF-8', "specified encoding" );
my $text1 = "Hello world \x{263A}\n\n";
my $file1 = "$data\\$encoding.txt";
$fan1->write_file( $file1, $text1 );
my $expected1 = <set_encoding('NONE');
is( $fan1->get_encoding(), 'NONE', "default encoding" );
# read test file and remember encoding
my $text2 = $fan1->read_file($file1);
is( $fan1->get_encoding(), 'UTF-8', "detected encoding" );
my $expected2 = 'Hello world \x{263a}\x{a}\x{a}';
is( my_reasciify($text2), $expected2, "read_file $file1" );
my @lines2 = $fan1->read_file($file1);
is( scalar @lines2, 2, "read_file $file1" );
my $join2 = join( '', @lines2 );
is( my_reasciify($join2), $expected2, "read_file $file1" );
# modify text and write second file using the remebered encoding
$text2 =~ s/world/WORLD/;
$text2 =~ s/\x{263A}/\x{20AC}/;
( my $file2 = $file1 ) =~ s/.txt/-2.txt/;
$fan1->write_file( $file2, $text2 ); # writes file with remembered encoding
my $expected3 = <new($encoding);
isa_ok( $fan1, 'File::AnyEncoding', '$fan1' );
is( $fan1->get_encoding(), 'NONE', "specified encoding" );
my $text1 = "Hello world \x{263A}\n\n";
my $file1 = "$data\\$encoding.txt";
$fan1->write_file( $file1, $text1 );
my $expected1 = <set_encoding('UTF-8');
is( $fan1->get_encoding(), 'UTF-8', "default encoding" );
# read test file and remember encoding
my $text2 = $fan1->read_file($file1);
is( $fan1->get_encoding(), 'NONE', "detected encoding" );
my $expected2 = 'Hello world \x{e2}\x{98}\x{ba}\x{a}\x{a}';
is( my_reasciify($text2), $expected2, "read_file $file1" );
my @lines2 = $fan1->read_file($file1);
is( scalar @lines2, 2, "read_file $file1" );
my $join2 = join( '', @lines2 );
is( my_reasciify($join2), $expected2, "read_file $file1" );
# modify text and write second file using the remebered encoding
$text2 =~ s/world/WORLD/;
$text2 =~ s/\x{263A}/\x{20AC}/;
( my $file2 = $file1 ) =~ s/.txt/-2.txt/;
$fan1->write_file( $file2, $text2 ); # writes file with remembered encoding
my $expected3 = <file($file);
my $str = '';
while ( local $_ = $f->dump ) {
$str .= $_;
}
$str =~ s/.*00000000/00000000/s;
return $str;
}
# returns sprintf of characters in $string,
# replacing those not printable as ascii by their hex code point numeric value
# similar to sub in File::BOM
sub my_reasciify {
my $string = shift;
$string = join "", map {
my $ord = ord($_);
# ($ord > 127 || ($ord < 32 && $ord != 10))
( $ord > 127 || $ord < 32 )
? sprintf '\x{%x}', $ord
: $_
} split //, $string;
}
__END__