Thanks for the answers. Half way there. If I get the string my self with getData on DOM::Node, it looks great, but I still get Jibberish when printing the string XML::DOM produces.
xml:
<?xml version="1.0" encoding="utf-8"?>
<Name>IssuéTést</Name>
code:
#!/usr/bin/perl -w
use XML::DOM;
use Encode;
use open OUT => ":utf8";
use open ":std";
my $XmlParserObj = XML::DOM::Parser->new();
open(IN,"<:utf8","in.xml");
my @in = <IN>;
my $inStr = join("",@in);
#$inStr = encode("utf8",$inStr); # redundant if I use <:utf8 in open
#$inStr = decode("utf8",$inStr); # make all tested strings get "?" ins
+tead of latin chars
my $doc = $XmlParserObj->parse($inStr);
my $value = $doc->getElementsByTagName("Name")->item(0)->getChildNodes
+()->item(0)->getData();
my $str = $doc->toString();
#binmode(STDOUT,":utf8"); # redundant
print "is input utf8 ? ",Encode::is_utf8($inStr),"\n";
print "Input:\n".$inStr;
print "is value utf8 ? ",Encode::is_utf8($value),"\n";
print "Value: ".$value."\n";
print "is output utf8 ? ",Encode::is_utf8($str),"\n";
print "Output:\n".$str;
exit(0);
output:
is input utf8 ? 1
Input:
<?xml version="1.0" encoding="utf-8"?>
<Name>IssuéTést</Name>
is value utf8 ? 1
Value: IssuéTést
is output utf8 ? 1
Output:
<?xml version="1.0" encoding="utf-8"?>
<Name>Issu㩔st</Name>
Thanks again