use strict; use warnings; use Encode qw( encode decode ); sub fix_broken_text { my ($self, $field) = @_; $field =~ s/&/&/g; $field =~ s//>/g; $field =~ s/"/"/g; $field =~ s/'/'/g; return $field; } my $decoded_xml; { open(my $fh, '<', $xml_qfn) or die; binmode($fh); local $/; $xml = decode('cp1252', scalar(<$fh>)); } ...Try to fix problems with unescaped characters... my $encoded_xml = encode('UTF-8', $decoded_xml); ...Pass $encoded_xml to parser...