Beefy Boxes and Bandwidth Generously Provided by pair Networks
No such thing as a small change
 
PerlMonks  

comment on

( #3333=superdoc: print w/replies, xml ) Need Help??

You just write a recursive-descent parser. Doing that is pretty easy. Here's most of one:

#!/usr/bin/perl -w use strict; use Data::Dumper 'Dumper'; my %Data; my $Comment = '/[*]([^*]+|[*]+[^*/])*[*]/'; my $Name = '([a-zA-Z_][a-zA-Z0-9_]*)'; my $Quoted = '"([^"]*)"'; local $/; my $code = <DATA>; parse( \$code ); print Dumper( \%Data ); exit; sub parse { my( $svCode ) = @_; skip( $svCode ); while( $$svCode !~ /\G\z/gc ) { if( $$svCode =~ /\Goptions(?!\w)/gc ) { parseOptions( $svCode ); } elsif( $$svCode =~ /\Gobject(?!\w)/gc ) { parseObject( $svCode ); } else { fail( $svCode, "Expected 'options' or 'object'" ); } skip( $svCode ); } } sub skip { my( $svCode ) = @_; 0 while $$svCode =~ /\G\s+/gc || $$svCode =~ /\G$Comment/gc; } sub expect { my( $svCode, $re, $desc ) = @_; skip( $svCode ); fail( $svCode, "Expected ", $desc ) if $$svCode !~ /\G$re/gc; my $return = $1; skip( $svCode ); return $return; } sub fail { my( $svCode, @error ) = @_; my $pos = pos $$svCode; my $before = substr( $$svCode, 0, $pos ); my $line = 1 + ( $before =~ tr/\n/\n/ ); my $col = 1 + length( $before =~ /([^\n]*)\z/ ? $1 : '' ); my $next = $$svCode =~ /\G([^\n]{1,8})/gc ? $1 : undef; die @error, " at line $line, col $col, before '$next'.\n" if defined $next; my $after = $before =~ /([^\n]{1,8})\z/ ? $1 : undef; die @error, " at line $line, col $col, after '$after'.\n" if defined $after; die @error, " at line $line, col $col.\n"; } sub parseOptions { my( $svCode ) = @_; expect( $svCode, '[{]', "'{' after 'options'" ); while( $$svCode !~ /\G[}]/gc ) { my $name = expect( $svCode, $Name, 'option name' ); if( $$svCode =~ /\G$Name/gc ) { $Data{''}{$name} = $1; } elsif( $$svCode =~ /\G$Quoted/gc ) { # TODO: Unescape things that can be escaped inside quo +tes $Data{''}{$name} = $1; } elsif( $$svCode =~ /\G[{]/gc ) { parseListOption( $svCode, $name ); } elsif( $$svCode =~ /\G\z/gc ) { fail( $svCode, "Missing '}' at end of options" ); } else { fail( $svCode, "Unsupported value for option '$name'" +); } expect( $svCode, ';', "';' after option '$name'" ); } expect( $svCode, ';', "';' after options" ); } sub parseListOption { my( $svCode, $name ) = @_; skip( $svCode ); my @values; while( $$svCode !~ /\G[}]/gc ) { skip( $svCode ); if( $$svCode =~ /\G$Name/gc ) { push @values, $1; } elsif( $$svCode =~ /\G$Quoted/gc ) { # TODO: Unescape things that can be escaped inside quo +tes push @values, $1; } elsif( $$svCode =~ /\G\z/gc ) { fail( $svCode, "Missing '}' at end of list of option ' +$name'" ); } else { fail( $svCode, "Unsupported value in list of option '$ +name'" ); } expect( $svCode, ';', "';' after value in list of option ' +$name'" ); } $Data{''}{$name} = \@values; } sub parseListObject { my( $svCode, $obj, $name ) = @_; skip( $svCode ); my @values; while( $$svCode !~ /\G[}]/gc ) { skip( $svCode ); if( $$svCode =~ /\G$Name/gc ) { push @values, $1; } elsif( $$svCode =~ /\G$Quoted/gc ) { # TODO: Unescape things that can be escaped inside quo +tes push @values, $1; } elsif( $$svCode =~ /\G\z/gc ) { fail( $svCode, "Missing '}' at end of list '$obj'.'$na +me'" ); } else { fail( $svCode, "Unsupported value in list of '$obj'.'$ +name'" ); } expect( $svCode, ';', "';' after value in list of '$obj'.' +$name'" ); } $Data{$obj}{$name} = \@values; } sub parseObject { my( $svCode ) = @_; my $obj = expect( $svCode, $Quoted, 'object name' ); # TODO: Unescape things that can be escaped inside object name +s expect( $svCode, 'in', "'in' after object '$obj'" ); expect( $svCode, '[{]', "'{' for object '$obj'" ); while( $$svCode !~ /\G[}]/gc ) { my $name = expect( $svCode, $Name, "option name for object + '$obj'" ); if( $$svCode =~ /\G$Name/gc ) { $Data{$obj}{$name} = $1; } elsif( $$svCode =~ /\G$Quoted/gc ) { # TODO: Unescape things that can be escaped inside quo +tes $Data{$obj}{$name} = $1; } elsif( $$svCode =~ /\G[{]/gc ) { parseListObject( $svCode, $obj, $name ); } elsif( $$svCode =~ /\G\z/gc ) { fail( $svCode, "Missing '}' at end of object '$obj'" ) +; } else { fail( $svCode, "Unsupported value for '$obj'.'$name'" +); } expect( $svCode, ';', "';' after '$obj'.'$name'" ); } expect( $svCode, ';', "';' after object '$obj'" ); } __END__ /* SOME COMMENT HERE */ /* MORE COMMENT */ /* Description - information */ options { option1 value; option2 value; option3 "value"; option4 { value1; value2; }; }; /* identifier1 ID 123456 */ object "identifier1" in { option1 value; option2 "value"; option3 { value1; value2; }; option4 { value; }; }; /* identifier2 ID 234561 */ object "identifier2" in { option1 value; option2 "value"; option3 { value1; value2; }; option4 { value; }; }; /* identifier3 ID 345612 */ object "identifier3" in { option1 value; option2 "value"; option3 { value1; value2; }; option4 { value; }; }; /* EOF */

And it even runs:

$VAR1 = { '' => { 'option1' => 'value', 'option2' => 'value', 'option3' => 'value', 'option4' => [ 'value1', 'value2' ], }, 'identifier1' => { 'option1' => 'value', 'option2' => 'value', 'option3' => [ 'value1', 'value2' ], 'option4' => [ 'value' ], }, 'identifier2' => { 'option1' => 'value', 'option2' => 'value', 'option3' => [ 'value1', 'value2' ], 'option4' => [ 'value' ], }, 'identifier3' => { 'option1' => 'value', 'option2' => 'value', 'option3' => [ 'value1', 'value2' ], 'option4' => [ 'value' ], }, };

- tye        


In reply to Re: Regex Question (rec desc) by tye
in thread Regex Question by jedikaiti

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Posts are HTML formatted. Put <p> </p> tags around your paragraphs. Put <code> </code> tags around your code and data!
  • Titles consisting of a single word are discouraged, and in most cases are disallowed outright.
  • Read Where should I post X? if you're not absolutely sure you're posting in the right place.
  • Please read these before you post! —
  • Posts may use any of the Perl Monks Approved HTML tags:
    a, abbr, b, big, blockquote, br, caption, center, col, colgroup, dd, del, div, dl, dt, em, font, h1, h2, h3, h4, h5, h6, hr, i, ins, li, ol, p, pre, readmore, small, span, spoiler, strike, strong, sub, sup, table, tbody, td, tfoot, th, thead, tr, tt, u, ul, wbr
  • You may need to use entities for some characters, as follows. (Exception: Within code tags, you can put the characters literally.)
            For:     Use:
    & &amp;
    < &lt;
    > &gt;
    [ &#91;
    ] &#93;
  • Link using PerlMonks shortcuts! What shortcuts can I use for linking?
  • See Writeup Formatting Tips and other pages linked from there for more info.
  • Log In?
    Username:
    Password:

    What's my password?
    Create A New User
    Chatterbox?
    and the web crawler heard nothing...

    How do I use this? | Other CB clients
    Other Users?
    Others perusing the Monastery: (3)
    As of 2020-10-22 12:56 GMT
    Sections?
    Information?
    Find Nodes?
    Leftovers?
      Voting Booth?
      My favourite web site is:












      Results (225 votes). Check out past polls.

      Notices?