package MarkupHandler; use Carp; use HTML::Entities qw(encode_entities); use Exporter; @ISA = 'Exporter'; @EXPORT_OK = qw( ret_bal_tag_handlers ret_escape_code ret_ok_attr_val ret_scrubber ); use strict; use vars qw(@open_tags); # Gets the value of a tag attribute sub get_attr_val { my $t_ref = shift; if ( $$t_ref =~ / \G\s*=\s*( [^\s>"'][^\s>]* | # Unquoted "[^"]*" | # Double-quoted '[^']*' # Single-quoted ) /gx ) { return $1; } else { return ('', "no value found"); } } sub ret_bal_tag_handlers { my @out = (); while (@_) { my $tag = shift; my $name = $tag; if (ref($tag)) { if (exists $tag->{name}) { $name = $tag->{name}; } else { confess("Tags must have name attributes"); } } else { $tag = {name => $name}; } my $attribs = shift; if (@$attribs) { push @out, "<$name", ret_tag_open($name, @$attribs); } else { push @out, "<$name>", ret_const_tag_open($name); } # Rewrite handler? if (exists $tag->{pre}) { push @out, wrap_handler($tag->{pre}, pop(@out), ''); } push @out, "", ret_tag_close($name); if (exists $tag->{post}) { push @out, wrap_handler('', pop(@out), $tag->{post}); } } return @out; } # Many tags have no attributes allowed. Handle # them efficiently. sub ret_const_tag_open { my $tag = shift; return sub { push @open_tags, $tag; return "<$tag>"; }; } # Returns the basic "literal escape" that you see for # code. sub ret_escape_code { my $end_pat = shift; my $name = shift; return ( $name, sub { my $t_ref = shift; if ($$t_ref =~ m=\G(.*?)$end_pat=gs) { return "
" . encode_entities($1) . "
"; } else { return show_err("Unmatched $name tag found"); } } ); } # Generate an attribute handler based on an "ok value" test. # Note that quotes on the attribute will exist in the # value passed to the ok test. sub ret_ok_attr_val { my $ok_test = shift; wrap_handler( '', \&get_attr_val, sub { my $text = shift; if ($ok_test->($text)) { $text; } else { return ('', "Illegal val '$text'"); } } ); } # Pass a list of case/handler pairs, returns an anonymous # sub that processes those pairs. sub ret_scrubber { my %handler = @_; # Sanity check foreach my $case (keys %handler) { unless (UNIVERSAL::isa($handler{$case}, 'CODE')) { carp("Case '$case' dropped - handlers must be functions"); delete $handler{$case}; } } $handler{pre} ||= sub {return '';}; $handler{post} ||= sub { return join '', map "", reverse @open_tags; }; # Sorted in reverse so that '($t_ref); if (@err) { pos($$t_ref) = 0; $scrubbed .= show_err(@err); } else { $scrubbed .= $chunk; # Obscure bug fix. You cannot match 2 zero # length patterns in a row, this resets the # flag so you can. pos($$t_ref) = pos($$t_ref); } } unless (pos($$t_ref)) { if (length($$t_ref) == $pos) { # EXIT HERE # return $scrubbed . $handler{post}->($t_ref); } else { my $char = substr($$t_ref, $pos, 1); pos($$t_ref) = $pos + 1; $scrubbed .= encode_entities($char); } } } confess("I have no idea how I got here!"); } } # Returns a sub that closes a tag sub ret_tag_close { my $tag = shift; return sub { my @searched; while (@open_tags) { my $open = pop(@open_tags); push @searched, $open; if ($open eq $tag) { # Close em! return join '', map "", @searched; } } # No you cannot close a tag you didn't open! @open_tags = reverse @searched; pos(${$_[0]}) = 0; return show_err("Unmatched close tag "); }; } # The general open tag sub ret_tag_open { my $tag = shift; my %attr_test; foreach (@_) { if (ref($_)) { foreach my $attrib (keys %$_) { $attr_test{lc($attrib)} = $_->{$attrib}; } } else { $attr_test{lc($_)} = \&get_attr_val; } } return sub { my $t_ref = shift; my $text = "<$tag"; while ($$t_ref =~ /\G(?:\s+(\w+)|\s*>)/g) { if (defined($1)) { my $attrib = lc($1); if (exists $attr_test{$attrib}) { my ($chunk, @err) = $attr_test{$attrib}->($t_ref); if (@err) { return show_err( "While processing '$attrib' in <$tag>:", @err ); } else { $text .= " $attrib=$chunk"; } } else { pos($$t_ref) = 0; return show_err( "Tag '$tag' cannot accept attribute '$attrib'" ); } } else { $text .= ">"; push @open_tags, $tag; return $text; } } return show_err("Unended <$tag> detected"); }; } sub show_err { if (wantarray()) { return ('', @_); } else { my $err = encode_entities(join ' ', grep length($_), @_); return "

$err

"; } } sub wrap_handler { my $pre = shift() || sub {''}; my $fn = shift(); my $post = shift() || sub {@_}; return sub { my $t_ref = shift; my ($text, @err) = $pre->($t_ref); if (@err) { return show_err($text, @err); } (my $chunk, @err) = $fn->($t_ref); @err ? show_err("$text$chunk", @err) : $post->("$text$chunk"); }; } 1;