#!perl -w
package Parse::Report;
use 5.006;
use strict;
our $VERSION = '0.03';
=head1 NAME
Parse::Report.pm - read in fixed-width ascii text using C<format>-like
+ pictures.
=head1 DESCRIPTION
A utility to parse a fixed-width ascii text report by passing
in a picture like that used by C<format>.
=head1 SYNOPSIS
use Parse::Report;
use YAML; # or Data::Dumper if you insist ;->
my $parser=Parse::Report->(<<'PARSER');
Subject: @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$subject
Index: @<<<<<<<<<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$index, $description
Priority: @<<<<<<<<<< Date: @<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$priority, $date, $description
From: @<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$from, $description
Assigned to: @<<<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$programmer, $description
~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$description
~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$description
~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$description
~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$description
~ ^<<<<<<<<<<<<<<<<<<<<<<<...
$description
PARSER
my @results=$parser->parse(<<'TEXT');
Subject: A very strange bug with Parse::Report
Index: 12345 This bug occurs occasionally
Priority: Low-ish Date: 20020814 for no reason. Well, maybe
From: osfameron there *is* a reason, who
Assigned to: osfameron knows what goes on in the
mind of bugs!
Subject: Another odd bug with Parse::Report
Index: 12346 No idea why this one happens
Priority: High Date: 20020814 pretty bad luck is all if
From: osfameron ask me really!
Assigned to: osfameron
TEXT
print Dump(\@results);
=cut
package Parse::Report;
use strict;
use YAML qw(:all);
my $parser=Parse::Report->new(<<'PARSER');
Subject: @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$subject
Index: @<<<<<<<<<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$index, $description
Priority: @<<<<<<<<<< Date: @<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$priority, $date, $description
From: @<<<<<<<<<<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$from, $description
Assigned to: @<<<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$programmer, $description
~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$description
~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$description
~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$description
~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$description
~ ^<<<<<<<<<<<<<<<<<<<<<<<...
$description
PARSER
my @results=$parser->parse(<<'TEXT');
Subject: A very strange bug with Parse::Report
Index: 12345 This bug occurs occasionally
Priority: Low-ish Date: 20020814 for no reason. Well, maybe
From: osfameron there *is* a reason, who
Assigned to: osfameron knows what goes on in the
+ mind of bugs!
Subject: Another odd bug with Parse::Report
Index: 12346 No idea why this one happens
Priority: High Date: 20020814 pretty bad luck is all if
From: osfameron ask me really!
Assigned to: osfameron
TEXT
print Dump(\@results);
sub new {
my $class=shift;
my $re='';
my (@process, @process2);
my %trim=(
'' => \<rim,
'<' => \<rim,
'>' => \&rtrim,
'|' => \&ctrim,
);
my @template=split /\n/, shift;
while (my $format=shift @template) {
my $optional=($format=~/~/);
if ($format=~/[@^]/) {
my $vars = shift @template;
(my @vars) = ($vars=~/\w+/g);
# escape any special characters
$format=~s/(?![@^<>| ])(\W)/\\$1/g;
# change the placeholders into capturing parentheses
$format=~s/(([@^])([<>|]*))/ push @process, [$2,substr($3,
+0,1), shift @vars];
'(.{'. length($1). '})' /eg;
# deal with the special case of a capture at the end of a
+string.
# the outputter may not have printed all the necessary whi
+tespace,
# so modify regex to account for this.
$format=~s/\(\.\{(\d+)}\)\s*$/(.{0,$1})/;
}
$format="^$format\\n";
$format="(?:$format)?" if $optional;
$re.=$format;
}
for my $process (@process) {
push @process2, sub {
my $value=shift || '';
my $result=shift;
my ($vartype, $align, $varname)=@$process;
#warn "ALIGN '$align'=> '$trim{$align}'";
$trim{$align}->($value);
if ($vartype eq '@') {
$result->{$varname} = $value;
} else {
$result->{$varname} ||='';
$result->{$varname}.=" " if $result->{$varname};
$result->{$varname}.= $value;
}
return $result;
};
}
return bless {
re => qr/$re/,
process => \@process2,
}, $class;
}
sub parse {
my $parser = shift;
my $text = shift;
my $re=$parser->{re};
my @results;
while ((my @vars)=($text=~/($re)/m)) {
my @process=@{$parser->{process}};
my $result= {};
my $match=shift @vars;
# consume the matched report
substr($text,0,length$match)='';
for (@vars) {
(shift @process)->($_, $result);
}
push @results, $result;
}
return @results;
}
###########
sub ltrim { $_[0]=~s/\s+$// }
sub rtrim { $_[0]=~s/\s+$// }
sub ctrim { $_[0]=~s/^\s*(.*?)\s*$/$1/ }
###########
=head1 BUGS, TODO
Many. This is alpha code, not complete, and not fully tested.
(Though it is the first module I've written where I've tried to
write tests from the beginning - it's very odd, but I'd recommend
it). (Though hard to keep up: I didn't bother with this version,
bad BAD module author!)
No attempt is made to parse number formats (###.##) as yet.
=head1 AUTHOR, VERSION, LICENSE
osfameron - osfperl@osfameron.abelgratis.co.uk
Version 0.01 - Alpha version. Not recommended or guaranteed safe
for anything.
This code may be freely distributed under the same conditions as Perl
+itself.
=cut
#####
1; # return a true value
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.