package Prst; use strict; use encoding 'utf8'; use Carp; my $funs; my $loop_iterators = +{}; # Initialization function. Needed to be called before using other functions. # The only parameter expected is a hash ref which contains functions to be # called from html. sub init { $funs = shift; ref $funs or croak 'Reference to hash containing HTML handling functions needed when initializing Prst.'; } # The main function. Expects an open filehandle, which it then processes. # That is, expands function calls and for loops. sub pp { ref $funs or croak 'Module uninitialized. Call Prst::init($functions_ref) first.'; my $html_file = shift || croak 'No HTML to preprocess passed to Prst::pp'; my @lines = <$html_file>; process(\@lines); } # The core function. Takes an array ref containing the lines to preprocess. # It determines the type (with parse()) and either prints the line to STDOUT # or calls a loop dispatcher (in case a for loop is being started at the current line). sub process { my $source = shift; ref $source eq 'ARRAY' or croak 'Prst::process got an invalid source to process. Array ref expected.'; # The source must be iterated like this because other instances # may process the same list and we don't want to repeat ourselves. :-) while (my $line = shift @$source) { my $res = parse($line); if ($res->{'type'} eq 'for') { dispatch_loop($source, $res); } elsif ($res->{'type'} eq 'raw') { print $res->{'content'}; } else { die 'Internal Error: Unknown parse result.'; } } } # Determines the type of the line (whether it starts a loop). # An appropriate result hash is synthetized and returned. sub parse { my $line = shift || croak 'Prst::parse got nothing to parse.'; my ($iterator, $loop_list); if (($iterator, $loop_list) = starts_loop($line)) { return +{ type => 'for', iterator => $iterator, loop_list => $loop_list, }; } else { return +{ type => 'raw', content => expand($line), }; } } # This function expands function calls and for loop iterator # references in HTML. sub expand { my $line = shift || croak 'Prst::expand got nothing to expand.'; # The first <% ... %> tag on the line is always found and replaced while ($line =~ m/<%\s*(.*?)\s*%>/) { my $call = $1; my $res; # lines like <% f([i] 1 2 3) %> if ($call =~ /^(\w+)\(\s*(((\[\w+\]|\w+)\s+)*(\[\w+\]|\w+))?\s*\)$/) { # a function call # collect the function name and parameters my $func_name = $1; my $parameters = $2; my @parameters = parse_parameters($parameters); # check if we know this function unless (exists $funs->{ $func_name } and ref $funs->{ $func_name } eq 'CODE') { croak "Unknown function called in HTML on line:\n$line\n"; } # and call it $res = $funs->{ $func_name }(@parameters); } # lines like <% [i] %> elsif ($call =~ /^\[(\w+)\]$/) { # a loop iterator my $iterator_name = $1; # store the iterator value $res = get_iterator_value($iterator_name); } # lines with <% and unexpected content %> else { croak "Unknown call ($call) made on line:\n$line\n"; } # replace the first <% ... %> with the function call result or the iterator value # so we can move on with the loop on the string $line =~ s/<%.*?%>/$res/; } return $line; } # Checks whether a line is a loop start. # If so, returns the iterator name and the values it is to traverse. sub starts_loop { my $line = shift || croak 'Prst::starts_loop got nothing to check.'; # The line could be e.g. <% for i f(a b c) %> if ($line =~ /^\s*<%\s*for\s+(\w+)\s+(\w+)\(\s*([\w\s]*)\)\s*%>$/) { # fetch the iterator name, the function name and the parameters my $iterator = $1; my $func_name = $2; my $parameters = $3; my @parameters = parse_parameters($parameters); # check if we know this function unless (exists $funs->{ $func_name } and ref $funs->{ $func_name } eq 'CODE') { croak "Unknown function '$func_name' called at loop start on line:\n$line\n"; } # call the function my @loop_list = $funs->{ $func_name }(@parameters); # return the result return ($iterator, \@loop_list); } # Lines like <% for name (John [name2] Mary Angus) %> elsif ($line =~ /^\s*<%\s*for\s+(\w+)\s+\(\s*([\w\s]*)\)\s*%>$/) { my $iterator = $1; my $parameters = $2; my @parameters = parse_parameters($parameters); return ($iterator, \@parameters); } # return undef otherwise. An explicit undef is not returned because the function # is called in list context. return; } sub ends_loop { my $line = shift || croak 'Prst::ends_loop got nothing to check.'; # e.g. <% endfor %> if ($line =~ /^\s*<%\s*endfor\s*%>\s*$/) { return 1; } # implicit false on non-match return; } # This function takes care of handling for loops. It tears the lines up to the # end of this for loop (containing optional inner loops) and has these lines # processed, actualizing the for loop iterator. sub dispatch_loop { my $source = shift; ref $source eq 'ARRAY' or croak 'Array ref with source required as param 1 at Prst::dispatch_loop.'; my $res = shift; ref $res eq 'HASH' or croak 'Hash ref with parse result requires as param 2 at Prst::dispatch_loop.'; exists $res->{'type'} and $res->{'type'} eq 'for' or croak 'Prst::dispatch_loop called on other than "for" parse result,'; exists $res->{'iterator'} and length $res->{'iterator'} or croak 'Prst::dispatch_loop called on a parse result with invalid iterator'; exists $res->{'loop_list'} and ref $res->{'loop_list'} eq 'ARRAY' or croak 'Prst::dispatch_loop called on a parse result with invalid loop list'; my $iterator = $res->{'iterator'}; my $loop_list = $res->{'loop_list'}; # check if the iterator is not used by another (outer) loop if (exists $loop_iterators->{ $iterator }) { croak "Duplicite loop iterator '$iterator'."; } # fetch the loop guts my @loop_guts; my $nest_level = 1; while ($nest_level) { my $line = shift @$source or croak 'Syntax error: unterminated loop'; if (starts_loop($line)) { $nest_level++; } elsif (ends_loop($line)) { $nest_level--; } push @loop_guts, $line; } # get rid of the <%endfor%> pop @loop_guts; # execute the loop for my $i (@$loop_list) { $loop_iterators->{ $iterator } = $i; my @guts_to_process = @loop_guts; process(\@guts_to_process); } delete $loop_iterators->{ $iterator }; } sub get_iterator_value { my $iterator_name = shift || croak 'Prst::get_iterator_value expects an identifier.'; unless (exists $loop_iterators->{ $iterator_name }) { croak "Unknown loop iterator '$iterator_name'"; } return $loop_iterators->{ $iterator_name }; } # This function parses what appears in parentheses, which is barewords and # [bracketed] [barewords] all separated by whitespace. # Returns a list of the words. Expands the bracketed ones to the iterators. sub parse_parameters { my $parameters = shift or return (); $parameters =~ /^\s*(.*?)\s*$/; my @parameters = split /\s+/, $parameters; for my $parameter (@parameters) { if ($parameter =~ /^\[(\w+)\]$/) { $parameter = get_iterator_value($1); } } return @parameters; } =encoding utf8 =head1 Name Prst - Preprocessor for static HTML =head1 Synopsis use Prst; my $functions = +{ foo => sub { ... }, bar => sub { ... }, } open my $html_file, '/path/to/html_file'; Prst::init($functions); Prst::pp($html_file); =head1 Description Prst is a preprocessor for arbitrary text files. It somewhat resembles PHP, although it is by no means intended to be used for dynamic server-side page generation (hence static). Prst needs two files to run: A program and a template. The template is the file which is to be preprocessed and the program is where the functions are defined. Prst evaluates the content of the <% tags %> and replaces them with the resulting text. =head1 Template Syntax The template is a text file. Its content is printed to standard output. Wherever the tag <% ... %> occurs, it is a directive to the preprocessor. <% foo() %> <% bar(param1 param2 param3) %> The above are function calls. A function call is an identifier immediately followed by a pair of parentheses. Within the parentheses, whitespace-delimited list of parameters can occur. Only one function call can be placed within the <% tag %>. Any number of function calls can be on a line. <% for iter list(params) %> ... <% [iter] %> <% foo(param1 [iter] param2) %> ... <% endfor %> The above is a loop. The loop starts with a line where a start tag and optional whitespace occur. No other characters are allowed on a loop-starting line. The start tag has two alternative forms: 1) The opening tag '<%', the loop iterator, a function call and the closing tag '%>'. Examples: <% for file get_files() %> <% for i range(1 10) %> 2) The opening tag '<%', the loop iterator, mandatory whitespace, left parenthesis, whitespace-delimited list, right parenthesis and the closing tag '%>'. Examples: <% for n (1 2 3 4 5) %> <% for girl (Ann Betty Cathie) %> The loop ends with the end tag <% endfor %>. The end tag must also be the only non-whitespace on its line. Loops can be nested. The iterator can be referred to within the <% tag %> by surrounding its name with [brackets]. There can be no whitespace between the brackets and the iterator. Such reference can occur: 1) As the only expression in the tag, 2) in a function's parameter list, 3) in a loop start tag list. Examples: <% [n] %> <% [girl] %> <% func([n]) %> <% to_uppercase(html [girl] usa) %> <% for i range(1 [n]) <% for i ([girl] is pretty) %> =head1 Methods =head2 init The module must be initialized before use. The init function takes one parameter - a hash reference. Its keys are the function names used in the template. The values are references to the code to be run upon calling the function. =head2 pp pp stands for preprocess. It takes one argument - an open filehandle to the template. The text in the template is analyzed. Lines not containing the <% tags %> are printed to standard output unchanged. In case of function calls, the name of the function is searched in the hash which init() received, the code it points to is run with the parameters specified in the function call and the whole <% tag %> is replaced with the text the code returns. In case of plain loop [iterator] references, the <% tag %> is replaced by the current iterator value. In case of loops, the iteration list is first determined. The iteration list is whatever the function in the start tag returns or in the case of the second form of loop invocation, the list is directly taken from the declaration. Then, the lines up to the matching end tag are gathered and evaluated, setting the iterator to each value from the iteration list. The start tag and end tag lines are discarded. =head1 Example Generate a html table with three names starting with letters a up to d. =head2 The program - nametable.pl #!/usr/bin/perl use strict; use warnings; use encoding 'utf8'; use Prst; my $functions = +{ range => sub { my ($left, $right) = @_; return ($left .. $right); }, name => sub { my ($letter, $number) = @_; $number--; my %names = ( a => ['Alice', 'Amelie', 'Ann', ], b => ['Betty', 'Beverly', 'Brooke', ], c => ['Cindy', 'Cynthia', 'Catherine',], d => ['Daisy', 'Diana', 'Deborah', ], ); return $names{ $letter }[ $number ]; }, }; open (my $file, '<', 'template.html'); Prst::init($functions); Prst::pp($file); =head2 The template - template.html names <% for letter (a b c d) %> <% endfor %> <% for number range(1 3) %> <% for letter (a b c d) %> <% endfor %> <% endfor %>
<% [letter] %>
<% name([letter] [number]) %>
=head2 The command perl nametable.pl > index.html This will create the file index.html, which will contain the table. =head1 Author Oldrich Kruza aka Sixtease =head1 Version 0.1 =head1 Copyright Copyright (c) 2006 Oldrich Kruza All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut