comment on

package Prst;

use strict;
use encoding 'utf8';
use Carp;

my $funs;
my $loop_iterators = +{};

# Initialization function. Needed to be called before using other func
+tions.
# The only parameter expected is a hash ref which contains functions t
+o be
# called from html.
sub init {
    $funs = shift;
    ref $funs or croak 'Reference to hash containing HTML handling fun
+ctions needed when initializing Prst.';
}

# The main function. Expects an open filehandle, which it then process
+es.
# That is, expands function calls and for loops.
sub pp {
    ref $funs or croak 'Module uninitialized. Call Prst::init($functio
+ns_ref) first.';
    my $html_file = shift || croak 'No HTML to preprocess passed to Pr
+st::pp';
    my @lines = <$html_file>;

    process(\@lines);
}

# The core function. Takes an array ref containing the lines to prepro
+cess.
# It determines the type (with parse()) and either prints the line to 
+STDOUT
# or calls a loop dispatcher (in case a for loop is being started at t
+he current line).
sub process {
    my $source = shift;
    ref $source eq 'ARRAY' or croak 'Prst::process got an invalid sour
+ce to process. Array ref expected.';

    # The source must be iterated like this because other instances
    # may process the same list and we don't want to repeat ourselves.
+ :-)
    while (my $line = shift @$source) {
        my $res = parse($line);
        if ($res->{'type'} eq 'for') {
            dispatch_loop($source, $res);
        }
        elsif ($res->{'type'} eq 'raw') {
            print $res->{'content'};
        }
        else {
            die 'Internal Error: Unknown parse result.';
        }
    }
}

# Determines the type of the line (whether it starts a loop).
# An appropriate result hash is synthetized and returned.
sub parse {
    my $line = shift || croak 'Prst::parse got nothing to parse.';
    my ($iterator, $loop_list);

    if (($iterator, $loop_list) = starts_loop($line)) {
        return +{
            type => 'for',
            iterator => $iterator,
            loop_list => $loop_list,
        };
    }
    else {
        return +{
            type => 'raw',
            content => expand($line),
        };
    }
}

# This function expands function calls and for loop iterator
# references in HTML.
sub expand {
    my $line = shift || croak 'Prst::expand got nothing to expand.';

    # The first <% ... %> tag on the line is always found and replaced
    while ($line =~ m/<%\s*(.*?)\s*%>/) {
        my $call = $1;
        my $res;

        # lines like <% f([i] 1 2 3) %>
        if ($call =~ /^(\w+)\(\s*(((\[\w+\]|\w+)\s+)*(\[\w+\]|\w+))?\s
+*\)$/) {   # a function call
            # collect the function name and parameters
            my $func_name = $1;
            my $parameters = $2;
            my @parameters = parse_parameters($parameters);
            # check if we know this function
            unless (exists $funs->{ $func_name } and ref $funs->{ $fun
+c_name } eq 'CODE') {
                croak "Unknown function called in HTML on line:\n$line
+\n";
            }
            # and call it
            $res = $funs->{ $func_name }(@parameters);
        }

        # lines like <% [i] %>
        elsif ($call =~ /^\[(\w+)\]$/) {  # a loop iterator
            my $iterator_name = $1;
            # store the iterator value
            $res = get_iterator_value($iterator_name);
        }
        # lines with <% and unexpected content %>
        else {
            croak "Unknown call ($call) made on line:\n$line\n";
        }

        # replace the first <% ... %> with the function call result or
+ the iterator value
        # so we can move on with the loop on the string
        $line =~ s/<%.*?%>/$res/;
    }
    return $line;
}

# Checks whether a line is a loop start.
# If so, returns the iterator name and the values it is to traverse.
sub starts_loop {
    my $line = shift || croak 'Prst::starts_loop got nothing to check.
+';

    # The line could be e.g. <% for i f(a b c) %>
    if ($line =~ /^\s*<%\s*for\s+(\w+)\s+(\w+)\(\s*([\w\s]*)\)\s*%>$/)
+ {
        # fetch the iterator name, the function name and the parameter
+s
        my $iterator = $1;
        my $func_name = $2;
        my $parameters = $3;
        my @parameters = parse_parameters($parameters);
        # check if we know this function
        unless (exists $funs->{ $func_name } and ref $funs->{ $func_na
+me } eq 'CODE') {
            croak "Unknown function '$func_name' called at loop start 
+on line:\n$line\n";
        }
        # call the function
        my @loop_list = $funs->{ $func_name }(@parameters);
        # return the result
        return ($iterator, \@loop_list);
    }

    # Lines like <% for name (John [name2] Mary Angus) %>
    elsif ($line =~ /^\s*<%\s*for\s+(\w+)\s+\(\s*([\w\s]*)\)\s*%>$/) {
        my $iterator = $1;
        my $parameters = $2;
        my @parameters = parse_parameters($parameters);
        return ($iterator, \@parameters);
    }
    # return undef otherwise. An explicit undef is not returned becaus
+e the function
    # is called in list context.
    return;
}

sub ends_loop {
    my $line = shift || croak 'Prst::ends_loop got nothing to check.';

    # e.g. <% endfor %>
    if ($line =~ /^\s*<%\s*endfor\s*%>\s*$/) {
        return 1;
    }

    # implicit false on non-match
    return;
}

# This function takes care of handling for loops. It tears the lines u
+p to the
# end of this for loop (containing optional inner loops) and has these
+ lines
# processed, actualizing the for loop iterator.
sub dispatch_loop {
    my $source = shift;
    ref $source eq 'ARRAY' or croak 'Array ref with source required as
+ param 1  at Prst::dispatch_loop.';
    my $res = shift;
    ref $res eq 'HASH' or croak 'Hash ref with parse result requires a
+s param 2 at Prst::dispatch_loop.';
    exists $res->{'type'} and $res->{'type'} eq 'for'
    or croak 'Prst::dispatch_loop called on other than "for" parse res
+ult,';
    exists $res->{'iterator'} and length $res->{'iterator'}
    or croak 'Prst::dispatch_loop called on a parse result with invali
+d iterator';
    exists $res->{'loop_list'} and ref $res->{'loop_list'} eq 'ARRAY'
    or croak 'Prst::dispatch_loop called on a parse result with invali
+d loop list';

    my $iterator = $res->{'iterator'};
    my $loop_list = $res->{'loop_list'};

    # check if the iterator is not used by another (outer) loop
    if (exists $loop_iterators->{ $iterator }) {
        croak "Duplicite loop iterator '$iterator'.";
    }

    # fetch the loop guts
    my @loop_guts;
    my $nest_level = 1;
    while ($nest_level) {
        my $line = shift @$source or croak 'Syntax error: unterminated
+ loop';
        if (starts_loop($line)) {
            $nest_level++;
        }
        elsif (ends_loop($line)) {
            $nest_level--;
        }
        push @loop_guts, $line;
    }
    # get rid of the <%endfor%>
    pop @loop_guts;

    # execute the loop
    for my $i (@$loop_list) {
        $loop_iterators->{ $iterator } = $i;
        my @guts_to_process = @loop_guts;
        process(\@guts_to_process);
    }
    delete $loop_iterators->{ $iterator };
}

sub get_iterator_value {
    my $iterator_name = shift || croak 'Prst::get_iterator_value expec
+ts an identifier.';

    unless (exists $loop_iterators->{ $iterator_name }) {
        croak "Unknown loop iterator '$iterator_name'";
    }
    return $loop_iterators->{ $iterator_name };
}

# This function parses what appears in parentheses, which is barewords
+ and
# [bracketed] [barewords] all separated by whitespace.
# Returns a list of the words. Expands the bracketed ones to the itera
+tors.
sub parse_parameters {
    my $parameters = shift or return ();

    $parameters =~ /^\s*(.*?)\s*$/;
    my @parameters = split /\s+/, $parameters;
    for my $parameter (@parameters) {
        if ($parameter =~ /^\[(\w+)\]$/) {
            $parameter = get_iterator_value($1);
        }
    }
    return @parameters;
}

=encoding utf8



=head1 Name

Prst - Preprocessor for static HTML

=head1 Synopsis

 use Prst;

 my $functions = +{
    foo => sub { ... },
    bar => sub { ... },
 }

 open my $html_file, '/path/to/html_file';

 Prst::init($functions);
 Prst::pp($html_file);

=head1 Description

Prst is a preprocessor for arbitrary text files. It somewhat resembles
+ PHP,
although it is by no means intended to be used for dynamic server-side
+ page
generation (hence static).

Prst needs two files to run: A program and a template. The template is
+ the
file which is to be preprocessed and the program is where the function
+s are
defined.

Prst evaluates the content of the <% tags %>  and replaces them with t
+he
resulting text.

=head1 Template Syntax

The template is a text file. Its content is printed to standard output
+.
Wherever the tag <% ... %> occurs, it is a directive to the preprocess
+or.

 <% foo() %>
 <% bar(param1 param2 param3) %>

The above are function calls. A function call is an identifier immedia
+tely
followed by a pair of parentheses. Within the parentheses,
whitespace-delimited list of parameters can occur. Only one function c
+all can
be placed within the <% tag %>. Any number of function calls can be on
+ a line.

 <% for iter list(params) %>
     ...
     <% [iter] %>
     <% foo(param1 [iter] param2) %>
     ...
 <% endfor %>

The above is a loop. The loop starts with a line where a start tag and
optional whitespace occur. No other characters are allowed on a loop-s
+tarting
line. The start tag has two alternative forms:

1) The opening tag '<%', the loop iterator, a function call and the cl
+osing
tag '%>'.

Examples:

 <% for file get_files() %>
 <% for i range(1 10) %>

2) The opening tag '<%', the loop iterator, mandatory whitespace, left
parenthesis, whitespace-delimited list, right parenthesis and the clos
+ing tag
'%>'.

Examples:

 <% for n (1 2 3 4 5) %>
 <% for girl (Ann Betty Cathie) %>

The loop ends with the end tag <% endfor %>. The end tag must also be 
+the only
non-whitespace on its line. Loops can be nested.

The iterator can be referred to within the <% tag %> by surrounding it
+s name
with [brackets]. There can be no whitespace between the brackets and t
+he
iterator. Such reference can occur: 1) As the only expression in the t
+ag, 2)
in a function's parameter list, 3) in a loop start tag list.

Examples:

 <% [n] %>
 <% [girl] %>

 <% func([n]) %>
 <% to_uppercase(html [girl] usa) %>
 <% for i range(1 [n])

 <% for i ([girl] is pretty) %>

=head1 Methods

=head2 init

The module must be initialized before use. The init function takes one
parameter - a hash reference. Its keys are the function names used in 
+the
template. The values are references to the code to be run upon calling
+ the
function.

=head2 pp

pp stands for preprocess. It takes one argument - an open filehandle t
+o the
template.

The text in the template is analyzed. Lines not containing the <% tags
+ %> are
printed to standard output unchanged. In case of function calls, the n
+ame of
the function is searched in the hash which init() received, the code i
+t points
to is run with the parameters specified in the function call and the w
+hole
<% tag %> is replaced with the text the code returns. In case of plain
+ loop
[iterator] references, the <% tag %> is replaced by the current iterat
+or
value.

In case of loops, the iteration list is first determined. The iteratio
+n
list is whatever the function in the start tag returns or in the case 
+of the
second form of loop invocation, the list is directly taken from the
declaration. Then, the lines up to the matching end tag are gathered a
+nd
evaluated, setting the iterator to each value from the iteration list.
+ The
start tag and end tag lines are discarded.

=head1 Example

Generate a html table with three names starting with letters a up to d
+.

=head2 The program - nametable.pl

 #!/usr/bin/perl

 use strict;
 use warnings;
 use encoding 'utf8';
 use Prst;

 my $functions = +{
     range => sub {
         my ($left, $right) = @_;
         return ($left .. $right);
     },
     name => sub {
         my ($letter, $number) = @_;
         $number--;
         my %names = (
             a => ['Alice', 'Amelie',  'Ann',      ],
             b => ['Betty', 'Beverly', 'Brooke',   ],
             c => ['Cindy', 'Cynthia', 'Catherine',],
             d => ['Daisy', 'Diana',   'Deborah',  ],
         );
         return $names{ $letter }[ $number ];
     },
 };

 open (my $file, '<', 'template.html');

 Prst::init($functions);
 Prst::pp($file);

=head2 The template - template.html

 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">

 <html xmlns="http://www.w3.org/1999/xhtml" lang="en">
 <head>
     <title>names</title>
     <meta http-equiv="Content-Type" content="text/html; charset=UTF-8
+" />
 </head>
 <body>
     <table>
         <tr>
         <% for letter (a b c d) %>
             <th><% [letter] %></th>
         <% endfor %>
         </tr>
     <% for number range(1 3) %>
         <tr>
         <% for letter (a b c d) %>
             <td><% name([letter] [number]) %></td>
         <% endfor %>
         </tr>
     <% endfor %>
     </table>
 </body>
 </html>

=head2 The command

 perl nametable.pl > index.html

This will create the file index.html, which will contain the table.

=head1 Author

Oldrich Kruza aka Sixtease <Oldrich.Kruza@sixtease.net>

=head1 Version

0.1

=head1 Copyright

Copyright (c) 2006 Oldrich Kruza
All rights reserved.

This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut
[download]
In reply to Prst by Sixtease
Are you posting in the right place? Check out Where do I post X? to know for sure.
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
Want more info? How to link or How to display code and escape characters are good places to start.

more useful options
	PerlMonks