Beefy Boxes and Bandwidth Generously Provided by pair Networks
Perl Monk, Perl Meditation
 
PerlMonks  

comment on

( [id://3333]=superdoc: print w/replies, xml ) Need Help??

Here is a collection of subroutines for converting between:

  • Perl variables (nested data structures),
  • JSON strings,
  • YAML strings,
  • Data::Dumper output strings

I really needed one just recently after my older implementation broke because of unicode content and Data::Dumper's obsession with escaping unicode. And here is what I have whipped up for my own use and anyone else's after posting Convert JSON to Perl and back with unicode and getting pointers from haukex, kcott, an anonymous monk and Corion who solved (hopefully for eternity) how to make Data::Dumper unicode escaping optional,(see Corion's answer Re: Convert JSON to Perl and back with unicode).

Because I don't leave a challenge unchallenged here is the code, in the hope to be released as a module with your comments and suggestions.

package Data::Roundtrip; use 5.006; use strict; use warnings; use Encode qw/encode_utf8 decode_utf8/; use JSON qw/decode_json encode_json/; use Unicode::Escape; use YAML; use Sub::Override; use Data::Dumper qw/Dumper/; use Exporter qw(import); our @EXPORT = qw( _read_from_file _write_to_file _read_from_filehandle _write_to_filehandle perl2json json2perl perl2yaml yaml2perl perl2dump dump2perl ); sub _read_from_file { my $infile = $_[0]; my $FH; if( ! open $FH, '<:encoding(UTF-8)', $infile ){ warn "failed to open file '$infile' for reading, $!"; return undef; } my $contents = _read_from_filehandle($FH); close $FH; return $contents } sub _write_to_file { my $outfile = $_[0]; my $contents = $_[1]; my $FH; if( ! open $FH, '>:encoding(UTF-8)', $outfile ){ warn "failed to open file '$outfile' for reading, $!"; return undef } if( ! _write_to_filehandle($FH, $contents) ){ warn "error, call to + ".'_write_to_filehandle()'." has failed"; close $FH; return undef } close $FH; return $contents } sub _read_from_filehandle { my $FH = $_[0]; # you should open INFH as '<:encoding(UTF-8)' # or if it is STDIN, do binmode STDIN , ':encoding(UTF-8)'; return do { local $/; <$FH> } } sub _write_to_filehandle { my $FH = $_[0]; my $contents = $_[1]; # you should open $OUTFH as >:encoding(UTF-8)' # or if it is STDOUT, do binmode STDOUT , ':encoding(UTF-8)'; print $FH $contents; return 1; } sub perl2json { my $pv = $_[0]; my $params = defined($_[1]) ? $_[1] : {}; my $pretty_printing = exists($params->{'pretty'}) && defined($para +ms->{'pretty'}) ? $params->{'pretty'} : 0 ; my $escape_unicode = exists($params->{'escape-unicode'}) && define +d($params->{'escape-unicode'}) ? $params->{'escape-unicode'} : 0 ; my $json_string; if( $escape_unicode ){ if( $pretty_printing ){ $json_string = JSON->new->utf8(1)->pretty->encode($pv); } else { $json_string = JSON->new->utf8(1)->encode($pv) } $json_string = Unicode::Escape::escape($json_string, 'utf8'); } else { if( $pretty_printing ){ $json_string = JSON->new->utf8(0)->pretty->encode($pv); } else { $json_string = JSON->new->utf8(0)->encode($pv) } } if( ! $json_string ){ warn "perl2json() : error, no json produced +from perl variable"; return undef } return $json_string } sub perl2yaml { my $pv = $_[0]; my $params = defined($_[1]) ? $_[1] : {}; my $pretty_printing = exists($params->{'pretty'}) && defined($para +ms->{'pretty'}) ? $params->{'pretty'} : 0 ; warn "perl2yaml() : pretty-printing is not supported" and $pretty_ +printing=0 if $pretty_printing; my $escape_unicode = exists($params->{'escape-unicode'}) && define +d($params->{'escape-unicode'}) ? $params->{'escape-unicode'} : 0 ; my ($yaml_string, $escaped); if( $escape_unicode ){ if( $pretty_printing ){ #$yaml_string = Data::Format::Pretty::YAML::format_pretty( +$pv); } else { $yaml_string = YAML::Dump($pv) } $yaml_string = Unicode::Escape::escape($yaml_string, 'utf8'); } else { if( $pretty_printing ){ #$yaml_string = Data::Format::Pretty::YAML::format_pretty( +$pv); } else { $yaml_string = YAML::Dump($pv) } } if( ! $yaml_string ){ warn "perl2yaml() : error, no yaml produced +from perl variable"; return undef } # return Encode::decode_utf8($yaml_string) return $yaml_string } sub yaml2perl { my $yaml_string = $_[0]; #my $params = defined($_[1]) ? $_[1] : {}; my $pv = YAML::Load($yaml_string); if( ! $pv ){ warn "yaml2perl() : error, call to YAML::Load() has f +ailed"; return undef } return $pv } sub json2perl { my $json_string = $_[0]; #my $params = defined($_[1]) ? $_[1] : {}; my $pv = JSON::decode_json(Encode::encode_utf8($json_string)); if( ! defined $pv ){ warn "json2perl() : error, call to json2perl +() has failed"; return undef } return $pv; } sub yaml2json { my $yaml_string = $_[0]; my $params = defined($_[1]) ? $_[1] : {}; my $pv = yaml2perl($yaml_string, $params); if( ! $pv ){ warn "yaml2json() : error, call to yaml2perl() has fa +iled"; return undef } my $json = perl2json($pv, $params); if( ! $json ){ warn "yaml2json() : error, call to perl2json() has +failed"; return undef } return $json } sub json2yaml { my $json_string = $_[0]; my $params = defined($_[1]) ? $_[1] : {}; my $pv = json2perl($json_string, $params); if( ! defined $pv ){ warn "json2yaml() : error, call to json2perl +() has failed"; return undef } my $yaml_string = perl2yaml($pv, $params); if( ! defined $yaml_string ){ warn "json2yaml() : error, call to +perl2yaml() has failed"; return undef } return $yaml_string } # this bypasses Data::Dumper's obsession with escaping # non-ascii characters by redefining qquote() sub # The redefinition code is by [Corion] @ Perlmonks and cpan # see https://perlmonks.org/?node_id=11115271 # sub perl2dump { my $pv = $_[0]; my $params = defined($_[1]) ? $_[1] : {}; local $Data::Dumper::Terse = exists($params->{'terse'}) && defined +($params->{'terse'}) ? $params->{'terse'} : 0 ; local $Data::Dumper::Indent = exists($params->{'indent'}) && defin +ed($params->{'indent'}) ? $params->{'indent'} : 0 ; my $ret; if( exists($params->{'dont-bloody-escape-unicode'}) && defined($pa +rams->{'dont-bloody-escape-unicode'}) && ($params->{'dont-bloody-escape-unicode'}==1) ){ local $Data::Dumper::Useperl = 1; local $Data::Dumper::Useqq='utf8'; my $override = Sub::Override->new( 'Data::Dumper::qquote' => \& _qquote_redefinition_by_Corio +n ); $ret = Dumper($pv); # restore the overriden sub $override->restore; } else { $ret = Dumper($pv) } return $ret } sub dump2perl { my $dump_string = $_[0]; #my $params = defined($_[1]) ? $_[1] : {}; $dump_string =~ s/^\$VAR1\s*=\s*//g; my $pv = eval($dump_string); if( $@ || ! defined $pv ){ warn "error, failed to eval() input str +ing alledgedly a perl variable: $@"; return undef } return $pv } # Below code is by [Corion] @ Perlmonks and cpan # see https://perlmonks.org/?node_id=11115271 # it's for redefining Data::Dumper::qquote # (it must be accompanied by # $Data::Dumper::Useperl = 1; # $Data::Dumper::Useqq='utf8'; sub _qquote_redefinition_by_Corion { local($_) = shift; s/([\\\"\@\$])/\\$1/g; return qq("$_") unless /[[:^print:]]/; # fast exit if only printabl +es # Here, there is at least one non-printable to output. First, trans +late the # escapes. s/([\a\b\t\n\f\r\e])/$Data::Dumper::esc{$1}/g; # no need for 3 digits in escape for octals not followed by a digit. s/($Data::Dumper::low_controls)(?!\d)/'\\'.sprintf('%o',ord($1))/eg; # But otherwise use 3 digits s/($Data::Dumper::low_controls)/'\\'.sprintf('%03o',ord($1))/eg; # all but last branch below not supported --BEHAVIOR SUBJECT TO CH +ANGE-- my $high = shift || ""; if ($high eq "iso8859") { # Doesn't escape the Latin1 printables if ($Data::Dumper::IS_ASCII) { s/([\200-\240])/'\\'.sprintf('%o',ord($1))/eg; } elsif ($] ge 5.007_003) { my $high_control = utf8::unicode_to_native(0x9F); s/$high_control/sprintf('\\%o',ord($1))/eg; } } elsif ($high eq "utf8") { # Some discussion of what to do here is in # https://rt.perl.org/Ticket/Display.html?id=113088 # use utf8; # $str =~ s/([^\040-\176])/sprintf "\\x{%04x}", ord($1)/ge; } elsif ($high eq "8bit") { # leave it as it is } else { s/([[:^ascii:]])/'\\'.sprintf('%03o',ord($1))/eg; #s/([^\040-\176])/sprintf "\\x{%04x}", ord($1)/ge; } return qq("$_"); } =head1 NAME Data::Roundtrip - convert between Perl data structures, YAML and JSON +with unicode support (I believe ...) =head1 VERSION Version 0.01 =cut our $VERSION = '0.01'; =head1 SYNOPSIS Quick summary of what the module does. Perhaps a little code snippet. use Data::Roundtrip; my $foo = Data::Roundtrip->new(); ... =head1 EXPORT perl2json json2perl perl2yaml yaml2perl perl2dump dump2perl _read_from_file _write_to_file _read_from_filehandle _write_to_filehandle =head1 SUBROUTINES/METHODS =head2 perl2json($perlvar, $optional_paramshashref) Given an input C<$perlvar> (which can be a simple scalar or a nested data structure, but not an object), it will return the equivalent JSON string. In C<$optional_paramshashref> one can specify whether to escape unicode with C<'escape-unicode'=>1> and/or prettify the returned result with C<'pretty'=>1>. The output can fed to L<Data::Roundtrip::json2perl> for getting the Perl variable back. Returns the JSON string on success or C<undef> on failure. =head2 json2perl($jsonstring) Given an input C<$jsonstring> as a string, it will return the equivalent Perl data structure using C<JSON::decode_json(Encode::encode_utf8($jsonstring))>. Returns the Perl data structure on success or C<undef> on failure. =head2 perl2yaml($perlvar, $optional_paramshashref) Given an input C<$perlvar> (which can be a simple scalar or a nested data structure, but not an object), it will return the equivalent YAML string. In C<$optional_paramshashref> one can specify whether to escape unicode with C<'escape-unicode'=>1>. Prettify is not supported yet. The output can fed to L<Data::Roundtrip::yaml2perl> for getting the Perl variable back. Returns the YAML string on success or C<undef> on failure. =head2 yaml2perl($yamlstring) Given an input C<$yamlstring> as a string, it will return the equivalent Perl data structure using C<YAML::Load($yamlstring)> Returns the Perl data structure on success or C<undef> on failure. =head2 perl2dump($perlvar, $optional_paramshashref) Given an input C<$perlvar> (which can be a simple scalar or a nested data structure, but not an object), it will return the equivalent string (via L<Data::Dumper>). In C<$optional_paramshashref> one can specify whether to NOT escape unicode with C<'dont-bloody-escape-unicode'=>1>, and/or use terse output with C<'terse'=>1> and remove all the incessant indentation C<indent'=>1> which unfortunately goes to the other extreme of producing a space-less output, not fit for human consumption. The output can fed to L<Data::Roundtrip::dump2perl> for getting the Perl variable back. Returns the string representation of the input perl variable on success or C<undef> on failure. =head2 json2perl($jsonstring) Given an input C<$jsonstring> as a string, it will return the equivalent Perl data structure using C<JSON::decode_json(Encode::encode_utf8($jsonstring))>. Returns the Perl data structure on success or C<undef> on failure. In C<$optional_paramshashref> one can specify whether to escape unicode with C<'escape-unicode'=>1> and/or prettify the returned result with C<'pretty'=>1>. Returns the yaml string on success or C<undef> on failure. =head2 json2yaml($jsonstring, $optional_paramshashref) Given an input JSON string C<$jsonstring>, it will return the equivalent YAML string L<YAML> by first converting JSON to a Perl variable and then converting that variable to YAML using L<Data::Roundtrip::perl2yaml()> +. All the parameters supported by L<Data::Roundtrip::perl2yaml()> are accepted. Returns the YAML string on success or C<undef> on failure. =head2 yaml2json($yamlstring, $optional_paramshashref) Given an input YAML string C<$yamlstring>, it will return the equivalent YAML string L<YAML> by first converting YAML to a Perl variable and then converting that variable to JSON using L<Data::Roundtrip::perl2json()> +. All the parameters supported by L<Data::Roundtrip::perl2json()> are accepted. Returns the JSON string on success or C<undef> on failure. =head1 AUTHOR Andreas Hadjiprocopis, C<< <bliako at cpan.org> >> =head1 BUGS Please report any bugs or feature requests to C<bug-data-roundtrip at +rt.cpan.org>, or through the web interface at L<https://rt.cpan.org/NoAuth/ReportBug.html?Queue +=Data-Roundtrip>. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc Data::Roundtrip You can also look for information at: =over 4 =item * RT: CPAN's request tracker (report bugs here) L<https://rt.cpan.org/NoAuth/Bugs.html?Dist=Data-Roundtrip> =item * AnnoCPAN: Annotated CPAN documentation L<http://annocpan.org/dist/Data-Roundtrip> =item * CPAN Ratings L<https://cpanratings.perl.org/d/Data-Roundtrip> =item * Search CPAN L<https://metacpan.org/release/Data-Roundtrip> =back =head1 ACKNOWLEDGEMENTS =head1 LICENSE AND COPYRIGHT This software, EXCEPT the portion created by [Corion] @ Perlmonks, is Copyright (c) 2020 by Andreas Hadjiprocopis. This is free software, licensed under: The Artistic License 2.0 (GPL Compatible) =cut 1; # End of Data::Roundtrip

And here is a test script which demonstrates usage and tests unicoded content:

#!perl -T
use 5.006;
use strict;
use warnings;

use utf8;
binmode STDERR, ':encoding(UTF-8)';
binmode STDOUT, ':encoding(UTF-8)';
binmode STDIN,  ':encoding(UTF-8)';
# to avoid wide character in TAP output
# do this before loading Test* modules
use open ':std', ':encoding(utf8)';

use Test::More;
#use Test::Deep;

my $num_tests = 0;

use Data::Roundtrip;

use Data::Dumper qw/Dumper/;

my $abc = "abc-αβγ";
my $xyz = "χψζ-xyz";

my $json_string = <<EOS;
{"$abc":"$xyz"}
EOS
$json_string =~ s/\s*$//;

my $yaml_string = <<EOS;
---
$abc: $xyz
EOS
#$yaml_string =~ s/\s*$//;

my $perl_var = {$abc => $xyz};

# perl2json
my $result = Data::Roundtrip::perl2json($perl_var);
ok(defined $result, "perl2json() called."); $num_tests++;
ok($result eq $json_string, "perl2json() checked (got: '$result', expected: '$json_string')."); $num_tests++;

# json2perl
$result = Data::Roundtrip::json2perl($json_string);
ok(defined $result, "json2perl() called."); $num_tests++;
for (keys %$result){
	ok(exists $perl_var->{$_}, "json2perl() key exists."); $num_tests++;
	ok($perl_var->{$_} eq $result->{$_}, "json2perl() values are the same."); $num_tests++;
}
for (keys %$perl_var){
	ok(exists $result->{$_}, "json2perl() key exists (other way round)."); $num_tests++;
}
# this fails:
#cmp_deeply($perl_var, $result, "json2perl() checked (got: '".Dumper($result)."', expected: ".Dumper($perl_var).")."); $num_tests++;

# perl2yaml
$result = Data::Roundtrip::perl2yaml($perl_var);
ok(defined $result, "perl2yaml() called."); $num_tests++;
ok($result eq $yaml_string, "perl2yaml() checked (got: '$result', expected: '$yaml_string')."); $num_tests++;

# yaml2perl
$result = Data::Roundtrip::yaml2perl($yaml_string);
ok(defined $result, "yaml2perl() called."); $num_tests++;
for (keys %$result){
	ok(exists $perl_var->{$_}, "yaml2perl() key exists."); $num_tests++;
	ok($perl_var->{$_} eq $result->{$_}, "yaml2perl() values are the same."); $num_tests++;
}
for (keys %$perl_var){
	ok(exists $result->{$_}, "yaml2perl() key exists (other way round)."); $num_tests++;
}

# yaml2json
$result = Data::Roundtrip::yaml2json($yaml_string);
ok(defined $result, "yaml2json() called."); $num_tests++;
ok($result eq $json_string, "perl2yaml() checked (got: '$result', expected: '$json_string')."); $num_tests++;

# json2yaml
$result = Data::Roundtrip::json2yaml($json_string);
ok(defined $result, "json2yaml() called."); $num_tests++;
ok($result eq $yaml_string, "perl2yaml() checked (got: '$result', expected: '$yaml_string')."); $num_tests++;

# perl2dump and dump2perl with unicode quoting (default Data::Dumper behaviour)
my $adump = Data::Roundtrip::perl2dump($perl_var, {'terse'=>1});
ok(defined $adump, "perl2dump() called."); $num_tests++;
ok($adump=~/\\x\{3b1\}/, "perl2dump() unicode quoted."); $num_tests++;
# dump2perl
$result = Data::Roundtrip::dump2perl($adump);
ok(defined $result, "dump2perl() called."); $num_tests++;
for (keys %$result){
	ok(exists $perl_var->{$_}, "perl2dump() and dump2perl() key exists."); $num_tests++;
	ok($perl_var->{$_} eq $result->{$_}, "perl2dump() and dump2perl() values are the same."); $num_tests++;
}
for (keys %$perl_var){
	ok(exists $result->{$_}, "perl2dump() and dump2perl() key exists (other way round)."); $num_tests++;
}

# perl2dump and dump2perl WITHOUT unicode quoting
$adump = Data::Roundtrip::perl2dump($perl_var, {'terse'=>1, 'dont-bloody-escape-unicode'=>1});
ok(defined $adump, "perl2dump() called."); $num_tests++;
ok($adump!~/\\x\{3b1\}/, "perl2dump() unicode not quoted."); $num_tests++;
# dump2perl
$result = Data::Roundtrip::dump2perl($adump);
ok(defined $result, "dump2perl() called."); $num_tests++;
for (keys %$result){
	ok(exists $perl_var->{$_}, "perl2dump() and dump2perl() key exists."); $num_tests++;
	ok($perl_var->{$_} eq $result->{$_}, "perl2dump() and dump2perl() values are the same."); $num_tests++;
}
for (keys %$perl_var){
	ok(exists $result->{$_}, "perl2dump() and dump2perl() key exists (other way round)."); $num_tests++;
}

done_testing($num_tests);

bw, bliako


In reply to RFC: Perl<->JSON<->YAML<->Dumper : roundtripping and possibly with unicode by bliako

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Are you posting in the right place? Check out Where do I post X? to know for sure.
  • Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
    <code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
  • Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
  • Want more info? How to link or How to display code and escape characters are good places to start.
Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others making s'mores by the fire in the courtyard of the Monastery: (3)
As of 2024-04-19 23:40 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found