use 5.020; use Regexp::Grammars; my $input = <<'...'; input abc10; output wireax; checkinst_0( .port1(wireY), .port2(wireZ), .port3(wireX), .port4(port711), .port10 ); checkinst_2( .port5(wireYx), .port6(wireZ), .port7(wireaX), .port8(abc10), .port11 ); checkinst_3( .port100(wireYd), .port101(wireZS), .port102(wireXW), .port103(port10), .port12 ); ... my $parser = qr{ <[checkinst]>+ input ; output ; [(] [)]; <[port]>+ % , ? [(] [)] \w+\d* checkinst_ \d+ [.]port \d+ }msx; if ($input =~ $parser) { my %r; { my $i = $/{start}{input}{identifier}; my $o = $/{start}{output}{identifier}; $r{top} = { source => [$i, $o], dest => [$i, $o], }; } { my @implicit; for my $checkinst ($/{start}{checkinst}->@*) { for my $port ($checkinst->{ports}{port}->@*) { if ($port->{parens}) { push $r{$checkinst->{checkinstid}}{dest}->@*, $port->{parens}{identifier}; push $r{$checkinst->{checkinstid}}{source}->@*, $port->{portid} =~ s/^\.//r; } else { push @implicit, $port->{portid} =~ s/^\.//r; } } } $r{Implicit} = { source => \@implicit, dest => \@implicit, }; } } __END__ %r = ( checkinst_0 => { dest => [qw(wireY wireZ wireX port711)], source => [qw(port1 port2 port3 port4)], }, checkinst_2 => { dest => [qw(wireYx wireZ wireaX abc10)], source => [qw(port5 port6 port7 port8)], }, checkinst_3 => { dest => [qw(wireYd wireZS wireXW port10)], source => [qw(port100 port101 port102 port103)], }, Implicit => { dest => [qw(port10 port11 port12)], source => [qw(port10 port11 port12)], }, top => { dest => [qw(abc10 wireax)], source => [qw(abc10 wireax)] }, )