package Prst;
use strict;
use encoding 'utf8';
use Carp;
my $funs;
my $loop_iterators = +{};
# Initialization function. Needed to be called before using other func
+tions.
# The only parameter expected is a hash ref which contains functions t
+o be
# called from html.
sub init {
$funs = shift;
ref $funs or croak 'Reference to hash containing HTML handling fun
+ctions needed when initializing Prst.';
}
# The main function. Expects an open filehandle, which it then process
+es.
# That is, expands function calls and for loops.
sub pp {
ref $funs or croak 'Module uninitialized. Call Prst::init($functio
+ns_ref) first.';
my $html_file = shift || croak 'No HTML to preprocess passed to Pr
+st::pp';
my @lines = <$html_file>;
process(\@lines);
}
# The core function. Takes an array ref containing the lines to prepro
+cess.
# It determines the type (with parse()) and either prints the line to
+STDOUT
# or calls a loop dispatcher (in case a for loop is being started at t
+he current line).
sub process {
my $source = shift;
ref $source eq 'ARRAY' or croak 'Prst::process got an invalid sour
+ce to process. Array ref expected.';
# The source must be iterated like this because other instances
# may process the same list and we don't want to repeat ourselves.
+ :-)
while (my $line = shift @$source) {
my $res = parse($line);
if ($res->{'type'} eq 'for') {
dispatch_loop($source, $res);
}
elsif ($res->{'type'} eq 'raw') {
print $res->{'content'};
}
else {
die 'Internal Error: Unknown parse result.';
}
}
}
# Determines the type of the line (whether it starts a loop).
# An appropriate result hash is synthetized and returned.
sub parse {
my $line = shift || croak 'Prst::parse got nothing to parse.';
my ($iterator, $loop_list);
if (($iterator, $loop_list) = starts_loop($line)) {
return +{
type => 'for',
iterator => $iterator,
loop_list => $loop_list,
};
}
else {
return +{
type => 'raw',
content => expand($line),
};
}
}
# This function expands function calls and for loop iterator
# references in HTML.
sub expand {
my $line = shift || croak 'Prst::expand got nothing to expand.';
# The first <% ... %> tag on the line is always found and replaced
while ($line =~ m/<%\s*(.*?)\s*%>/) {
my $call = $1;
my $res;
# lines like <% f([i] 1 2 3) %>
if ($call =~ /^(\w+)\(\s*(((\[\w+\]|\w+)\s+)*(\[\w+\]|\w+))?\s
+*\)$/) { # a function call
# collect the function name and parameters
my $func_name = $1;
my $parameters = $2;
my @parameters = parse_parameters($parameters);
# check if we know this function
unless (exists $funs->{ $func_name } and ref $funs->{ $fun
+c_name } eq 'CODE') {
croak "Unknown function called in HTML on line:\n$line
+\n";
}
# and call it
$res = $funs->{ $func_name }(@parameters);
}
# lines like <% [i] %>
elsif ($call =~ /^\[(\w+)\]$/) { # a loop iterator
my $iterator_name = $1;
# store the iterator value
$res = get_iterator_value($iterator_name);
}
# lines with <% and unexpected content %>
else {
croak "Unknown call ($call) made on line:\n$line\n";
}
# replace the first <% ... %> with the function call result or
+ the iterator value
# so we can move on with the loop on the string
$line =~ s/<%.*?%>/$res/;
}
return $line;
}
# Checks whether a line is a loop start.
# If so, returns the iterator name and the values it is to traverse.
sub starts_loop {
my $line = shift || croak 'Prst::starts_loop got nothing to check.
+';
# The line could be e.g. <% for i f(a b c) %>
if ($line =~ /^\s*<%\s*for\s+(\w+)\s+(\w+)\(\s*([\w\s]*)\)\s*%>$/)
+ {
# fetch the iterator name, the function name and the parameter
+s
my $iterator = $1;
my $func_name = $2;
my $parameters = $3;
my @parameters = parse_parameters($parameters);
# check if we know this function
unless (exists $funs->{ $func_name } and ref $funs->{ $func_na
+me } eq 'CODE') {
croak "Unknown function '$func_name' called at loop start
+on line:\n$line\n";
}
# call the function
my @loop_list = $funs->{ $func_name }(@parameters);
# return the result
return ($iterator, \@loop_list);
}
# Lines like <% for name (John [name2] Mary Angus) %>
elsif ($line =~ /^\s*<%\s*for\s+(\w+)\s+\(\s*([\w\s]*)\)\s*%>$/) {
my $iterator = $1;
my $parameters = $2;
my @parameters = parse_parameters($parameters);
return ($iterator, \@parameters);
}
# return undef otherwise. An explicit undef is not returned becaus
+e the function
# is called in list context.
return;
}
sub ends_loop {
my $line = shift || croak 'Prst::ends_loop got nothing to check.';
# e.g. <% endfor %>
if ($line =~ /^\s*<%\s*endfor\s*%>\s*$/) {
return 1;
}
# implicit false on non-match
return;
}
# This function takes care of handling for loops. It tears the lines u
+p to the
# end of this for loop (containing optional inner loops) and has these
+ lines
# processed, actualizing the for loop iterator.
sub dispatch_loop {
my $source = shift;
ref $source eq 'ARRAY' or croak 'Array ref with source required as
+ param 1 at Prst::dispatch_loop.';
my $res = shift;
ref $res eq 'HASH' or croak 'Hash ref with parse result requires a
+s param 2 at Prst::dispatch_loop.';
exists $res->{'type'} and $res->{'type'} eq 'for'
or croak 'Prst::dispatch_loop called on other than "for" parse res
+ult,';
exists $res->{'iterator'} and length $res->{'iterator'}
or croak 'Prst::dispatch_loop called on a parse result with invali
+d iterator';
exists $res->{'loop_list'} and ref $res->{'loop_list'} eq 'ARRAY'
or croak 'Prst::dispatch_loop called on a parse result with invali
+d loop list';
my $iterator = $res->{'iterator'};
my $loop_list = $res->{'loop_list'};
# check if the iterator is not used by another (outer) loop
if (exists $loop_iterators->{ $iterator }) {
croak "Duplicite loop iterator '$iterator'.";
}
# fetch the loop guts
my @loop_guts;
my $nest_level = 1;
while ($nest_level) {
my $line = shift @$source or croak 'Syntax error: unterminated
+ loop';
if (starts_loop($line)) {
$nest_level++;
}
elsif (ends_loop($line)) {
$nest_level--;
}
push @loop_guts, $line;
}
# get rid of the <%endfor%>
pop @loop_guts;
# execute the loop
for my $i (@$loop_list) {
$loop_iterators->{ $iterator } = $i;
my @guts_to_process = @loop_guts;
process(\@guts_to_process);
}
delete $loop_iterators->{ $iterator };
}
sub get_iterator_value {
my $iterator_name = shift || croak 'Prst::get_iterator_value expec
+ts an identifier.';
unless (exists $loop_iterators->{ $iterator_name }) {
croak "Unknown loop iterator '$iterator_name'";
}
return $loop_iterators->{ $iterator_name };
}
# This function parses what appears in parentheses, which is barewords
+ and
# [bracketed] [barewords] all separated by whitespace.
# Returns a list of the words. Expands the bracketed ones to the itera
+tors.
sub parse_parameters {
my $parameters = shift or return ();
$parameters =~ /^\s*(.*?)\s*$/;
my @parameters = split /\s+/, $parameters;
for my $parameter (@parameters) {
if ($parameter =~ /^\[(\w+)\]$/) {
$parameter = get_iterator_value($1);
}
}
return @parameters;
}
=encoding utf8
=head1 Name
Prst - Preprocessor for static HTML
=head1 Synopsis
use Prst;
my $functions = +{
foo => sub { ... },
bar => sub { ... },
}
open my $html_file, '/path/to/html_file';
Prst::init($functions);
Prst::pp($html_file);
=head1 Description
Prst is a preprocessor for arbitrary text files. It somewhat resembles
+ PHP,
although it is by no means intended to be used for dynamic server-side
+ page
generation (hence static).
Prst needs two files to run: A program and a template. The template is
+ the
file which is to be preprocessed and the program is where the function
+s are
defined.
Prst evaluates the content of the <% tags %> and replaces them with t
+he
resulting text.
=head1 Template Syntax
The template is a text file. Its content is printed to standard output
+.
Wherever the tag <% ... %> occurs, it is a directive to the preprocess
+or.
<% foo() %>
<% bar(param1 param2 param3) %>
The above are function calls. A function call is an identifier immedia
+tely
followed by a pair of parentheses. Within the parentheses,
whitespace-delimited list of parameters can occur. Only one function c
+all can
be placed within the <% tag %>. Any number of function calls can be on
+ a line.
<% for iter list(params) %>
...
<% [iter] %>
<% foo(param1 [iter] param2) %>
...
<% endfor %>
The above is a loop. The loop starts with a line where a start tag and
optional whitespace occur. No other characters are allowed on a loop-s
+tarting
line. The start tag has two alternative forms:
1) The opening tag '<%', the loop iterator, a function call and the cl
+osing
tag '%>'.
Examples:
<% for file get_files() %>
<% for i range(1 10) %>
2) The opening tag '<%', the loop iterator, mandatory whitespace, left
parenthesis, whitespace-delimited list, right parenthesis and the clos
+ing tag
'%>'.
Examples:
<% for n (1 2 3 4 5) %>
<% for girl (Ann Betty Cathie) %>
The loop ends with the end tag <% endfor %>. The end tag must also be
+the only
non-whitespace on its line. Loops can be nested.
The iterator can be referred to within the <% tag %> by surrounding it
+s name
with [brackets]. There can be no whitespace between the brackets and t
+he
iterator. Such reference can occur: 1) As the only expression in the t
+ag, 2)
in a function's parameter list, 3) in a loop start tag list.
Examples:
<% [n] %>
<% [girl] %>
<% func([n]) %>
<% to_uppercase(html [girl] usa) %>
<% for i range(1 [n])
<% for i ([girl] is pretty) %>
=head1 Methods
=head2 init
The module must be initialized before use. The init function takes one
parameter - a hash reference. Its keys are the function names used in
+the
template. The values are references to the code to be run upon calling
+ the
function.
=head2 pp
pp stands for preprocess. It takes one argument - an open filehandle t
+o the
template.
The text in the template is analyzed. Lines not containing the <% tags
+ %> are
printed to standard output unchanged. In case of function calls, the n
+ame of
the function is searched in the hash which init() received, the code i
+t points
to is run with the parameters specified in the function call and the w
+hole
<% tag %> is replaced with the text the code returns. In case of plain
+ loop
[iterator] references, the <% tag %> is replaced by the current iterat
+or
value.
In case of loops, the iteration list is first determined. The iteratio
+n
list is whatever the function in the start tag returns or in the case
+of the
second form of loop invocation, the list is directly taken from the
declaration. Then, the lines up to the matching end tag are gathered a
+nd
evaluated, setting the iterator to each value from the iteration list.
+ The
start tag and end tag lines are discarded.
=head1 Example
Generate a html table with three names starting with letters a up to d
+.
=head2 The program - nametable.pl
#!/usr/bin/perl
use strict;
use warnings;
use encoding 'utf8';
use Prst;
my $functions = +{
range => sub {
my ($left, $right) = @_;
return ($left .. $right);
},
name => sub {
my ($letter, $number) = @_;
$number--;
my %names = (
a => ['Alice', 'Amelie', 'Ann', ],
b => ['Betty', 'Beverly', 'Brooke', ],
c => ['Cindy', 'Cynthia', 'Catherine',],
d => ['Daisy', 'Diana', 'Deborah', ],
);
return $names{ $letter }[ $number ];
},
};
open (my $file, '<', 'template.html');
Prst::init($functions);
Prst::pp($file);
=head2 The template - template.html
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<title>names</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8
+" />
</head>
<body>
<table>
<tr>
<% for letter (a b c d) %>
<th><% [letter] %></th>
<% endfor %>
</tr>
<% for number range(1 3) %>
<tr>
<% for letter (a b c d) %>
<td><% name([letter] [number]) %></td>
<% endfor %>
</tr>
<% endfor %>
</table>
</body>
</html>
=head2 The command
perl nametable.pl > index.html
This will create the file index.html, which will contain the table.
=head1 Author
Oldrich Kruza aka Sixtease <Oldrich.Kruza@sixtease.net>
=head1 Version
0.1
=head1 Copyright
Copyright (c) 2006 Oldrich Kruza
All rights reserved.
This program is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.
=cut
|