use XML::TokeParser; my $file = 'REC-xml-19980210.xml'; my $i = 0; my $p = XML::TokeParser->new($file); my $Ret = ""; while(defined(my $t = $p->get_token() )){ if($t->[0] eq 'S' and $t->[1] eq 'lhs') { $i++; $Ret = join '', "[$i] ", $p->get_text('/lhs'), " ::= "; }elsif( $t->[0] eq 'S' and $t->[1] eq 'rhs'){ $Ret .= $p->get_text('/rhs'); }elsif($t->[0] eq 'E' and $t->[1] eq 'prod'){ print clean($Ret),"\n"; $Ret = ""; } } undef $Ret; undef $p; ## mirod already did this, so I'm borrowing sub prod { my( $twig, $prod)= @_; my $lhs= $prod->field( 'lhs'); my $rhs= join '', map {$_->text} $prod->children( 'rhs'); $i++; my $prod_text = "[$i] $lhs ::= $rhs"; print clean( $prod_text) . "\n"; } sub clean { my( $string)= @_; $string =~ s/\xc2\xa0/ /sg; $string =~ s/\s+/ /g; $string=~ s{\s$}{}g; return $string; }