package Prst;

use strict;
use encoding 'utf8';
use Carp;

my $funs;
my $loop_iterators = +{};

# Initialization function. Needed to be called before using other functions.
# The only parameter expected is a hash ref which contains functions to be
# called from html.
sub init {
    $funs = shift;
    ref $funs or croak 'Reference to hash containing HTML handling functions needed when initializing Prst.';
}

# The main function. Expects an open filehandle, which it then processes.
# That is, expands function calls and for loops.
sub pp {
    ref $funs or croak 'Module uninitialized. Call Prst::init($functions_ref) first.';
    my $html_file = shift || croak 'No HTML to preprocess passed to Prst::pp';
    my @lines = <$html_file>;

    process(\@lines);
}

# The core function. Takes an array ref containing the lines to preprocess.
# It determines the type (with parse()) and either prints the line to STDOUT
# or calls a loop dispatcher (in case a for loop is being started at the current line).
sub process {
    my $source = shift;
    ref $source eq 'ARRAY' or croak 'Prst::process got an invalid source to process. Array ref expected.';

    # The source must be iterated like this because other instances
    # may process the same list and we don't want to repeat ourselves. :-)
    while (my $line = shift @$source) {
        my $res = parse($line);
        if ($res->{'type'} eq 'for') {
            dispatch_loop($source, $res);
        }
        elsif ($res->{'type'} eq 'raw') {
            print $res->{'content'};
        }
        else {
            die 'Internal Error: Unknown parse result.';
        }
    }
}

# Determines the type of the line (whether it starts a loop).
# An appropriate result hash is synthetized and returned.
sub parse {
    my $line = shift || croak 'Prst::parse got nothing to parse.';
    my ($iterator, $loop_list);

    if (($iterator, $loop_list) = starts_loop($line)) {
        return +{
            type => 'for',
            iterator => $iterator,
            loop_list => $loop_list,
        };
    }
    else {
        return +{
            type => 'raw',
            content => expand($line),
        };
    }
}

# This function expands function calls and for loop iterator
# references in HTML.
sub expand {
    my $line = shift || croak 'Prst::expand got nothing to expand.';

    # The first <% ... %> tag on the line is always found and replaced
    while ($line =~ m/<%\s*(.*?)\s*%>/) {
        my $call = $1;
        my $res;

        # lines like <% f([i] 1 2 3) %>
        if ($call =~ /^(\w+)\(\s*(((\[\w+\]|\w+)\s+)*(\[\w+\]|\w+))?\s*\)$/) {   # a function call
            # collect the function name and parameters
            my $func_name = $1;
            my $parameters = $2;
            my @parameters = parse_parameters($parameters);
            # check if we know this function
            unless (exists $funs->{ $func_name } and ref $funs->{ $func_name } eq 'CODE') {
                croak "Unknown function called in HTML on line:\n$line\n";
            }
            # and call it
            $res = $funs->{ $func_name }(@parameters);
        }

        # lines like <% [i] %>
        elsif ($call =~ /^\[(\w+)\]$/) {  # a loop iterator
            my $iterator_name = $1;
            # store the iterator value
            $res = get_iterator_value($iterator_name);
        }
        # lines with <% and unexpected content %>
        else {
            croak "Unknown call ($call) made on line:\n$line\n";
        }

        # replace the first <% ... %> with the function call result or the iterator value
        # so we can move on with the loop on the string
        $line =~ s/<%.*?%>/$res/;
    }
    return $line;
}

# Checks whether a line is a loop start.
# If so, returns the iterator name and the values it is to traverse.
sub starts_loop {
    my $line = shift || croak 'Prst::starts_loop got nothing to check.';

    # The line could be e.g. <% for i f(a b c) %>
    if ($line =~ /^\s*<%\s*for\s+(\w+)\s+(\w+)\(\s*([\w\s]*)\)\s*%>$/) {
        # fetch the iterator name, the function name and the parameters
        my $iterator = $1;
        my $func_name = $2;
        my $parameters = $3;
        my @parameters = parse_parameters($parameters);
        # check if we know this function
        unless (exists $funs->{ $func_name } and ref $funs->{ $func_name } eq 'CODE') {
            croak "Unknown function '$func_name' called at loop start on line:\n$line\n";
        }
        # call the function
        my @loop_list = $funs->{ $func_name }(@parameters);
        # return the result
        return ($iterator, \@loop_list);
    }

    # Lines like <% for name (John [name2] Mary Angus) %>
    elsif ($line =~ /^\s*<%\s*for\s+(\w+)\s+\(\s*([\w\s]*)\)\s*%>$/) {
        my $iterator = $1;
        my $parameters = $2;
        my @parameters = parse_parameters($parameters);
        return ($iterator, \@parameters);
    }
    # return undef otherwise. An explicit undef is not returned because the function
    # is called in list context.
    return;
}

sub ends_loop {
    my $line = shift || croak 'Prst::ends_loop got nothing to check.';

    # e.g. <% endfor %>
    if ($line =~ /^\s*<%\s*endfor\s*%>\s*$/) {
        return 1;
    }

    # implicit false on non-match
    return;
}

# This function takes care of handling for loops. It tears the lines up to the
# end of this for loop (containing optional inner loops) and has these lines
# processed, actualizing the for loop iterator.
sub dispatch_loop {
    my $source = shift;
    ref $source eq 'ARRAY' or croak 'Array ref with source required as param 1  at Prst::dispatch_loop.';
    my $res = shift;
    ref $res eq 'HASH' or croak 'Hash ref with parse result requires as param 2 at Prst::dispatch_loop.';
    exists $res->{'type'} and $res->{'type'} eq 'for'
    or croak 'Prst::dispatch_loop called on other than "for" parse result,';
    exists $res->{'iterator'} and length $res->{'iterator'}
    or croak 'Prst::dispatch_loop called on a parse result with invalid iterator';
    exists $res->{'loop_list'} and ref $res->{'loop_list'} eq 'ARRAY'
    or croak 'Prst::dispatch_loop called on a parse result with invalid loop list';

    my $iterator = $res->{'iterator'};
    my $loop_list = $res->{'loop_list'};

    # check if the iterator is not used by another (outer) loop
    if (exists $loop_iterators->{ $iterator }) {
        croak "Duplicite loop iterator '$iterator'.";
    }

    # fetch the loop guts
    my @loop_guts;
    my $nest_level = 1;
    while ($nest_level) {
        my $line = shift @$source or croak 'Syntax error: unterminated loop';
        if (starts_loop($line)) {
            $nest_level++;
        }
        elsif (ends_loop($line)) {
            $nest_level--;
        }
        push @loop_guts, $line;
    }
    # get rid of the <%endfor%>
    pop @loop_guts;

    # execute the loop
    for my $i (@$loop_list) {
        $loop_iterators->{ $iterator } = $i;
        my @guts_to_process = @loop_guts;
        process(\@guts_to_process);
    }
    delete $loop_iterators->{ $iterator };
}

sub get_iterator_value {
    my $iterator_name = shift || croak 'Prst::get_iterator_value expects an identifier.';

    unless (exists $loop_iterators->{ $iterator_name }) {
        croak "Unknown loop iterator '$iterator_name'";
    }
    return $loop_iterators->{ $iterator_name };
}

# This function parses what appears in parentheses, which is barewords and
# [bracketed] [barewords] all separated by whitespace.
# Returns a list of the words. Expands the bracketed ones to the iterators.
sub parse_parameters {
    my $parameters = shift or return ();

    $parameters =~ /^\s*(.*?)\s*$/;
    my @parameters = split /\s+/, $parameters;
    for my $parameter (@parameters) {
        if ($parameter =~ /^\[(\w+)\]$/) {
            $parameter = get_iterator_value($1);
        }
    }
    return @parameters;
}

=encoding utf8



=head1 Name

Prst - Preprocessor for static HTML

=head1 Synopsis

 use Prst;

 my $functions = +{
    foo => sub { ... },
    bar => sub { ... },
 }

 open my $html_file, '/path/to/html_file';

 Prst::init($functions);
 Prst::pp($html_file);

=head1 Description

Prst is a preprocessor for arbitrary text files. It somewhat resembles PHP,
although it is by no means intended to be used for dynamic server-side page
generation (hence static).

Prst needs two files to run: A program and a template. The template is the
file which is to be preprocessed and the program is where the functions are
defined.

Prst evaluates the content of the <% tags %>  and replaces them with the
resulting text.

=head1 Template Syntax

The template is a text file. Its content is printed to standard output.
Wherever the tag <% ... %> occurs, it is a directive to the preprocessor.

 <% foo() %>
 <% bar(param1 param2 param3) %>

The above are function calls. A function call is an identifier immediately
followed by a pair of parentheses. Within the parentheses,
whitespace-delimited list of parameters can occur. Only one function call can
be placed within the <% tag %>. Any number of function calls can be on a line.

 <% for iter list(params) %>
     ...
     <% [iter] %>
     <% foo(param1 [iter] param2) %>
     ...
 <% endfor %>

The above is a loop. The loop starts with a line where a start tag and
optional whitespace occur. No other characters are allowed on a loop-starting
line. The start tag has two alternative forms:

1) The opening tag '<%', the loop iterator, a function call and the closing
tag '%>'.

Examples:

 <% for file get_files() %>
 <% for i range(1 10) %>

2) The opening tag '<%', the loop iterator, mandatory whitespace, left
parenthesis, whitespace-delimited list, right parenthesis and the closing tag
'%>'.

Examples:

 <% for n (1 2 3 4 5) %>
 <% for girl (Ann Betty Cathie) %>

The loop ends with the end tag <% endfor %>. The end tag must also be the only
non-whitespace on its line. Loops can be nested.

The iterator can be referred to within the <% tag %> by surrounding its name
with [brackets]. There can be no whitespace between the brackets and the
iterator. Such reference can occur: 1) As the only expression in the tag, 2)
in a function's parameter list, 3) in a loop start tag list.

Examples:

 <% [n] %>
 <% [girl] %>

 <% func([n]) %>
 <% to_uppercase(html [girl] usa) %>
 <% for i range(1 [n])

 <% for i ([girl] is pretty) %>

=head1 Methods

=head2 init

The module must be initialized before use. The init function takes one
parameter - a hash reference. Its keys are the function names used in the
template. The values are references to the code to be run upon calling the
function.

=head2 pp

pp stands for preprocess. It takes one argument - an open filehandle to the
template.

The text in the template is analyzed. Lines not containing the <% tags %> are
printed to standard output unchanged. In case of function calls, the name of
the function is searched in the hash which init() received, the code it points
to is run with the parameters specified in the function call and the whole
<% tag %> is replaced with the text the code returns. In case of plain loop
[iterator] references, the <% tag %> is replaced by the current iterator
value.

In case of loops, the iteration list is first determined. The iteration
list is whatever the function in the start tag returns or in the case of the
second form of loop invocation, the list is directly taken from the
declaration. Then, the lines up to the matching end tag are gathered and
evaluated, setting the iterator to each value from the iteration list. The
start tag and end tag lines are discarded.

=head1 Example

Generate a html table with three names starting with letters a up to d.

=head2 The program - nametable.pl

 #!/usr/bin/perl

 use strict;
 use warnings;
 use encoding 'utf8';
 use Prst;

 my $functions = +{
     range => sub {
         my ($left, $right) = @_;
         return ($left .. $right);
     },
     name => sub {
         my ($letter, $number) = @_;
         $number--;
         my %names = (
             a => ['Alice', 'Amelie',  'Ann',      ],
             b => ['Betty', 'Beverly', 'Brooke',   ],
             c => ['Cindy', 'Cynthia', 'Catherine',],
             d => ['Daisy', 'Diana',   'Deborah',  ],
         );
         return $names{ $letter }[ $number ];
     },
 };

 open (my $file, '<', 'template.html');

 Prst::init($functions);
 Prst::pp($file);

=head2 The template - template.html

 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
 "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">

 <html xmlns="http://www.w3.org/1999/xhtml" lang="en">
 <head>
     <title>names</title>
     <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
 </head>
 <body>
     <table>
         <tr>
         <% for letter (a b c d) %>
             <th><% [letter] %></th>
         <% endfor %>
         </tr>
     <% for number range(1 3) %>
         <tr>
         <% for letter (a b c d) %>
             <td><% name([letter] [number]) %></td>
         <% endfor %>
         </tr>
     <% endfor %>
     </table>
 </body>
 </html>

=head2 The command

 perl nametable.pl > index.html

This will create the file index.html, which will contain the table.

=head1 Author

Oldrich Kruza aka Sixtease <Oldrich.Kruza@sixtease.net>

=head1 Version

0.1

=head1 Copyright

Copyright (c) 2006 Oldrich Kruza
All rights reserved.

This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut