What about using a FSA based parser?
#!/usr/bin/perl
use strict;
use warnings;
use Text::Diff;
{
my $INDENT_STEP=4;
sub indenter {
my ($expr)=@_;
my $indent=0;
my $result='';
pos($expr)=undef;
while(1) {
if ($expr =~ m{\G \s* ( \( [&|!] )}smxcg) {
# combinatore: print, newline, inc indent
$result.=(' 'x$indent)."$1\n";
$indent+=$INDENT_STEP;
}
elsif ($expr =~ m{\G \s* ( \( [^)=]+ = [^)]+ \) )}smxcg) {
# test: print, newline
$result.=(' 'x$indent)."$1\n";
}
elsif ($expr =~ m{\G \s* ( \) )}smxcg) {
# fine combinatore: dec intert, print, newline
$indent-=$INDENT_STEP;
$result.=(' 'x$indent)."$1\n";
}
else {
last;
}
}
return $result;
}
}
my $expr=q{(&(&(&(& (mailnickname=*) (| (&(objectCategory=person)(obje
+ctClass=user)(!(homeMDB=*))(!(msExchHomeServerName=*)))(&(objectCateg
+ory=person)(objectClass=user)(|(homeMDB=*)(msExchHomeServerName=*)))(
+&(objectCategory=person)(objectClass=contact))(objectCategory=group)(
+objectCategory=publicFolder)(objectCategory=msExchDynamicDistribution
+List) )))(objectCategory=contact)(proxyAddresses=smtp:*example.com)))
+};
my $expected=<<'END_EXPECTED';
(&
(&
(&
(&
(mailnickname=*)
(|
(&
(objectCategory=person)
(objectClass=user)
(!
(homeMDB=*)
)
(!
(msExchHomeServerName=*)
)
)
(&
(objectCategory=person)
(objectClass=user)
(|
(homeMDB=*)
(msExchHomeServerName=*)
)
)
(&
(objectCategory=person)
(objectClass=contact)
)
(objectCategory=group)
(objectCategory=publicFolder)
(objectCategory=msExchDynamicDistributionList)
)
)
)
(objectCategory=contact)
(proxyAddresses=smtp:*example.com)
)
)
END_EXPECTED
my $ret=indenter($expr);
print "ok\n" if $ret eq $expected;
print diff \$ret,\$expected, {
STYLE => 'Unified',
};