Beefy Boxes and Bandwidth Generously Provided by pair Networks
Clear questions and runnable code
get the best and fastest answer


by Sixtease (Friar)
on Mar 11, 2007 at 23:14 UTC ( #604260=sourcecode: print w/replies, xml ) Need Help??
Category: HTML utility
Author/Contact Info ,

Prst stands for "Preprocessor for Static HTML".

Prst is for generating a HTML page with a lot of code that would be cumbersome to write by hand. It's like PHP only you write in Perl and it's not intended for dynamic page generation.

To generate a webpage, you write a template where inside <% %> tags you call perl functions and their output is substituted for the tag.

See Prst's webpage.

package Prst;

use strict;
use encoding 'utf8';
use Carp;

my $funs;
my $loop_iterators = +{};

# Initialization function. Needed to be called before using other func
# The only parameter expected is a hash ref which contains functions t
+o be
# called from html.
sub init {
    $funs = shift;
    ref $funs or croak 'Reference to hash containing HTML handling fun
+ctions needed when initializing Prst.';

# The main function. Expects an open filehandle, which it then process
# That is, expands function calls and for loops.
sub pp {
    ref $funs or croak 'Module uninitialized. Call Prst::init($functio
+ns_ref) first.';
    my $html_file = shift || croak 'No HTML to preprocess passed to Pr
    my @lines = <$html_file>;


# The core function. Takes an array ref containing the lines to prepro
# It determines the type (with parse()) and either prints the line to 
# or calls a loop dispatcher (in case a for loop is being started at t
+he current line).
sub process {
    my $source = shift;
    ref $source eq 'ARRAY' or croak 'Prst::process got an invalid sour
+ce to process. Array ref expected.';

    # The source must be iterated like this because other instances
    # may process the same list and we don't want to repeat ourselves.
+ :-)
    while (my $line = shift @$source) {
        my $res = parse($line);
        if ($res->{'type'} eq 'for') {
            dispatch_loop($source, $res);
        elsif ($res->{'type'} eq 'raw') {
            print $res->{'content'};
        else {
            die 'Internal Error: Unknown parse result.';

# Determines the type of the line (whether it starts a loop).
# An appropriate result hash is synthetized and returned.
sub parse {
    my $line = shift || croak 'Prst::parse got nothing to parse.';
    my ($iterator, $loop_list);

    if (($iterator, $loop_list) = starts_loop($line)) {
        return +{
            type => 'for',
            iterator => $iterator,
            loop_list => $loop_list,
    else {
        return +{
            type => 'raw',
            content => expand($line),

# This function expands function calls and for loop iterator
# references in HTML.
sub expand {
    my $line = shift || croak 'Prst::expand got nothing to expand.';

    # The first <% ... %> tag on the line is always found and replaced
    while ($line =~ m/<%\s*(.*?)\s*%>/) {
        my $call = $1;
        my $res;

        # lines like <% f([i] 1 2 3) %>
        if ($call =~ /^(\w+)\(\s*(((\[\w+\]|\w+)\s+)*(\[\w+\]|\w+))?\s
+*\)$/) {   # a function call
            # collect the function name and parameters
            my $func_name = $1;
            my $parameters = $2;
            my @parameters = parse_parameters($parameters);
            # check if we know this function
            unless (exists $funs->{ $func_name } and ref $funs->{ $fun
+c_name } eq 'CODE') {
                croak "Unknown function called in HTML on line:\n$line
            # and call it
            $res = $funs->{ $func_name }(@parameters);

        # lines like <% [i] %>
        elsif ($call =~ /^\[(\w+)\]$/) {  # a loop iterator
            my $iterator_name = $1;
            # store the iterator value
            $res = get_iterator_value($iterator_name);
        # lines with <% and unexpected content %>
        else {
            croak "Unknown call ($call) made on line:\n$line\n";

        # replace the first <% ... %> with the function call result or
+ the iterator value
        # so we can move on with the loop on the string
        $line =~ s/<%.*?%>/$res/;
    return $line;

# Checks whether a line is a loop start.
# If so, returns the iterator name and the values it is to traverse.
sub starts_loop {
    my $line = shift || croak 'Prst::starts_loop got nothing to check.

    # The line could be e.g. <% for i f(a b c) %>
    if ($line =~ /^\s*<%\s*for\s+(\w+)\s+(\w+)\(\s*([\w\s]*)\)\s*%>$/)
+ {
        # fetch the iterator name, the function name and the parameter
        my $iterator = $1;
        my $func_name = $2;
        my $parameters = $3;
        my @parameters = parse_parameters($parameters);
        # check if we know this function
        unless (exists $funs->{ $func_name } and ref $funs->{ $func_na
+me } eq 'CODE') {
            croak "Unknown function '$func_name' called at loop start 
+on line:\n$line\n";
        # call the function
        my @loop_list = $funs->{ $func_name }(@parameters);
        # return the result
        return ($iterator, \@loop_list);

    # Lines like <% for name (John [name2] Mary Angus) %>
    elsif ($line =~ /^\s*<%\s*for\s+(\w+)\s+\(\s*([\w\s]*)\)\s*%>$/) {
        my $iterator = $1;
        my $parameters = $2;
        my @parameters = parse_parameters($parameters);
        return ($iterator, \@parameters);
    # return undef otherwise. An explicit undef is not returned becaus
+e the function
    # is called in list context.

sub ends_loop {
    my $line = shift || croak 'Prst::ends_loop got nothing to check.';

    # e.g. <% endfor %>
    if ($line =~ /^\s*<%\s*endfor\s*%>\s*$/) {
        return 1;

    # implicit false on non-match

# This function takes care of handling for loops. It tears the lines u
+p to the
# end of this for loop (containing optional inner loops) and has these
+ lines
# processed, actualizing the for loop iterator.
sub dispatch_loop {
    my $source = shift;
    ref $source eq 'ARRAY' or croak 'Array ref with source required as
+ param 1  at Prst::dispatch_loop.';
    my $res = shift;
    ref $res eq 'HASH' or croak 'Hash ref with parse result requires a
+s param 2 at Prst::dispatch_loop.';
    exists $res->{'type'} and $res->{'type'} eq 'for'
    or croak 'Prst::dispatch_loop called on other than "for" parse res
    exists $res->{'iterator'} and length $res->{'iterator'}
    or croak 'Prst::dispatch_loop called on a parse result with invali
+d iterator';
    exists $res->{'loop_list'} and ref $res->{'loop_list'} eq 'ARRAY'
    or croak 'Prst::dispatch_loop called on a parse result with invali
+d loop list';

    my $iterator = $res->{'iterator'};
    my $loop_list = $res->{'loop_list'};

    # check if the iterator is not used by another (outer) loop
    if (exists $loop_iterators->{ $iterator }) {
        croak "Duplicite loop iterator '$iterator'.";

    # fetch the loop guts
    my @loop_guts;
    my $nest_level = 1;
    while ($nest_level) {
        my $line = shift @$source or croak 'Syntax error: unterminated
+ loop';
        if (starts_loop($line)) {
        elsif (ends_loop($line)) {
        push @loop_guts, $line;
    # get rid of the <%endfor%>
    pop @loop_guts;

    # execute the loop
    for my $i (@$loop_list) {
        $loop_iterators->{ $iterator } = $i;
        my @guts_to_process = @loop_guts;
    delete $loop_iterators->{ $iterator };

sub get_iterator_value {
    my $iterator_name = shift || croak 'Prst::get_iterator_value expec
+ts an identifier.';

    unless (exists $loop_iterators->{ $iterator_name }) {
        croak "Unknown loop iterator '$iterator_name'";
    return $loop_iterators->{ $iterator_name };

# This function parses what appears in parentheses, which is barewords
+ and
# [bracketed] [barewords] all separated by whitespace.
# Returns a list of the words. Expands the bracketed ones to the itera
sub parse_parameters {
    my $parameters = shift or return ();

    $parameters =~ /^\s*(.*?)\s*$/;
    my @parameters = split /\s+/, $parameters;
    for my $parameter (@parameters) {
        if ($parameter =~ /^\[(\w+)\]$/) {
            $parameter = get_iterator_value($1);
    return @parameters;

=encoding utf8

=head1 Name

Prst - Preprocessor for static HTML

=head1 Synopsis

 use Prst;

 my $functions = +{
    foo => sub { ... },
    bar => sub { ... },

 open my $html_file, '/path/to/html_file';


=head1 Description

Prst is a preprocessor for arbitrary text files. It somewhat resembles
+ PHP,
although it is by no means intended to be used for dynamic server-side
+ page
generation (hence static).

Prst needs two files to run: A program and a template. The template is
+ the
file which is to be preprocessed and the program is where the function
+s are

Prst evaluates the content of the <% tags %>  and replaces them with t
resulting text.

=head1 Template Syntax

The template is a text file. Its content is printed to standard output
Wherever the tag <% ... %> occurs, it is a directive to the preprocess

 <% foo() %>
 <% bar(param1 param2 param3) %>

The above are function calls. A function call is an identifier immedia
followed by a pair of parentheses. Within the parentheses,
whitespace-delimited list of parameters can occur. Only one function c
+all can
be placed within the <% tag %>. Any number of function calls can be on
+ a line.

 <% for iter list(params) %>
     <% [iter] %>
     <% foo(param1 [iter] param2) %>
 <% endfor %>

The above is a loop. The loop starts with a line where a start tag and
optional whitespace occur. No other characters are allowed on a loop-s
line. The start tag has two alternative forms:

1) The opening tag '<%', the loop iterator, a function call and the cl
tag '%>'.


 <% for file get_files() %>
 <% for i range(1 10) %>

2) The opening tag '<%', the loop iterator, mandatory whitespace, left
parenthesis, whitespace-delimited list, right parenthesis and the clos
+ing tag


 <% for n (1 2 3 4 5) %>
 <% for girl (Ann Betty Cathie) %>

The loop ends with the end tag <% endfor %>. The end tag must also be 
+the only
non-whitespace on its line. Loops can be nested.

The iterator can be referred to within the <% tag %> by surrounding it
+s name
with [brackets]. There can be no whitespace between the brackets and t
iterator. Such reference can occur: 1) As the only expression in the t
+ag, 2)
in a function's parameter list, 3) in a loop start tag list.


 <% [n] %>
 <% [girl] %>

 <% func([n]) %>
 <% to_uppercase(html [girl] usa) %>
 <% for i range(1 [n])

 <% for i ([girl] is pretty) %>

=head1 Methods

=head2 init

The module must be initialized before use. The init function takes one
parameter - a hash reference. Its keys are the function names used in 
template. The values are references to the code to be run upon calling
+ the

=head2 pp

pp stands for preprocess. It takes one argument - an open filehandle t
+o the

The text in the template is analyzed. Lines not containing the <% tags
+ %> are
printed to standard output unchanged. In case of function calls, the n
+ame of
the function is searched in the hash which init() received, the code i
+t points
to is run with the parameters specified in the function call and the w
<% tag %> is replaced with the text the code returns. In case of plain
+ loop
[iterator] references, the <% tag %> is replaced by the current iterat

In case of loops, the iteration list is first determined. The iteratio
list is whatever the function in the start tag returns or in the case 
+of the
second form of loop invocation, the list is directly taken from the
declaration. Then, the lines up to the matching end tag are gathered a
evaluated, setting the iterator to each value from the iteration list.
+ The
start tag and end tag lines are discarded.

=head1 Example

Generate a html table with three names starting with letters a up to d

=head2 The program -


 use strict;
 use warnings;
 use encoding 'utf8';
 use Prst;

 my $functions = +{
     range => sub {
         my ($left, $right) = @_;
         return ($left .. $right);
     name => sub {
         my ($letter, $number) = @_;
         my %names = (
             a => ['Alice', 'Amelie',  'Ann',      ],
             b => ['Betty', 'Beverly', 'Brooke',   ],
             c => ['Cindy', 'Cynthia', 'Catherine',],
             d => ['Daisy', 'Diana',   'Deborah',  ],
         return $names{ $letter }[ $number ];

 open (my $file, '<', 'template.html');


=head2 The template - template.html

 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"

 <html xmlns="" lang="en">
     <meta http-equiv="Content-Type" content="text/html; charset=UTF-8
+" />
         <% for letter (a b c d) %>
             <th><% [letter] %></th>
         <% endfor %>
     <% for number range(1 3) %>
         <% for letter (a b c d) %>
             <td><% name([letter] [number]) %></td>
         <% endfor %>
     <% endfor %>

=head2 The command

 perl > index.html

This will create the file index.html, which will contain the table.

=head1 Author

Oldrich Kruza aka Sixtease <>

=head1 Version


=head1 Copyright

Copyright (c) 2006 Oldrich Kruza
All rights reserved.

This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

Replies are listed 'Best First'.
Re: Prst
by Fletch (Chancellor) on Mar 12, 2007 at 00:47 UTC

    Sounds similar to tpage and/or ttree from Template Toolkit. If there's anything the world needs more of, it's Perl HTML templating systems . . . :)

      Yeah I thought I hadn't come with anything new. I had it written already so I thought I'd post it anyway. Thanks for those links.
Re: Prst
by explorer (Chaplain) on Mar 12, 2007 at 08:35 UTC

Log In?

What's my password?
Create A New User
Node Status?
node history
Node Type: sourcecode [id://604260]
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others about the Monastery: (10)
As of 2020-09-23 16:12 GMT
Find Nodes?
    Voting Booth?
    If at first I donít succeed, I Ö

    Results (131 votes). Check out past polls.