#!/usr/bin/perl -w
require 5.004;
use strict;
use CGI;
use File::Basename;
use HTML::TokeParser;
my $doc = shift or &usage;
my $basename = basename( $doc );
$basename =~ s/\..*$//;
my $p = HTML::TokeParser->new($doc) || die "Can't open: $!";
my $formnum = 1;
######--------------------------------------######
# Begin user config section #
######--------------------------------------######
# The following variables should be set by the user to control
+
# the output of the code generator
#
# The only thing you *need* to set it the shebang line. The other opt
+ions
# are just for configuration.
#
# The following variable MUST only containt letters, numbers, or under
+scores:
# qw/ $taint_pfx $cgi_obj $err_var $err_sub $log_sub /
# If you slip up, the script will die rather than produce bad code.
# This is the prefix of all variables that need to be untainted.
# should be letters, numbers, or underscores.
my $taint_pfx = 'tainted_';
# set this to false to have OO cgi code written out.
my $cgi_std = 0;
# if $cgi_std is set to false, use this to specify the variable
# name of the CGI object (e.g. 'q' becomes 'my $q = CGI->new;')
my $cgi_obj = 'q';
# This is the shebang line that will be used.
# If left blank, it will be skipped.
my $shebang = '#!/usr/bin/perl -wT';
# use this for the name of the hash that contains the errors
my $err_var = 'errors';
# set this to true to have the program print the &error stub
my $print_err = 1;
# set this to the name of your error handling routine
my $err_sub = 'error';
# if $err_sub is true, this will be the stub of your security
# log routine. If the form has been tampered with (i.e. data
# in @safe_types does not untaint), the use this to log the info.
my $log_sub = 'sec_log';
# set this to true for lower case variable names
# If your forms elements that have the same name except
# for case, this could cause problems.
my $lc_names = 1;
######--------------------------------------######
# End user config section #
######--------------------------------------######
# These are the form elements for which we can *safely* create regular
+ expressions
# for untainting. Do not change this array unless you know what you a
+re doing.
my @safe_types = qw/ hidden checkbox radio select submit /;
my %element; # holds all of the form element types, names, and v
+alues
my %select; # holds select form elements so we know if we've se
+en them
my @element_order; # order that elements appear in the form. It's not
+ really
# needed, but we do this so that variables in gener
+ated code
# appear roughly in the same order as the form.
my $select_token; # holds the select token when parsing <select> valu
+es
# walk through document and get each tag
# we're building a list of form elements here
while (my $token = $p->get_tag) {
my $tag = $token->[0];
if ( my $form_pos = ( $tag eq 'form' .. $tag eq '/form' ) ) {
# Oh! We're in a form. Start looking for stuff.
if ( $form_pos != 1 and substr( $form_pos, -2 ) ne 'E0' ) {
add_input_element( $token ) if $tag eq 'input';
# <select> is a pain, so we need to handle it differently
if ( my $select_pos = ( $tag eq 'select' .. $tag eq '/sele
+ct' ) ) {
$select_token = $token if $tag eq 'select';
if ( $select_pos != 1 and substr( $select_pos, -2 ) ne
+ 'E0' ) {
add_select_element( $token, $p, $select_token ) if
+ $tag eq 'option';
} elsif ( substr( $select_pos, -2 ) eq 'E0' ) {
# we've finished the <select>, so add it to
# %select so we knows we've seen it.
$select{ $select_token->[1]->{ 'name' } } = '';
}
} # end if (select)
foreach ( qw/ textarea button / ) {
add_generic_element( $token ) if $tag eq $_;
}
} elsif ( substr( $form_pos, -2 ) eq 'E0' ) {
# we've finished the form, so let's write the document, cl
+ear the vars,
# and start looking for more forms.
&write_template;
%element = ();
@element_order = ();
$formnum++;
}
} # end if (form)
}
# This is to extract necessary data from form element
# that will later be added to %element
sub add_generic_element {
my $token = shift;
my $tag = $token->[0];
my $name = $token->[1]->{ 'name' } || '';
my $value = $token->[1]->{ 'value' } || '';
# I don't want to pass 'text' defaults as they can often be huge
if ( $tag eq 'textarea' ) {
$value = '';
}
update_element_hash( $name, $tag, $value, 1 );
}
# <select> needs to be handled different.
sub add_select_element {
my ( $token, $p, $select_token ) = @_;
my $name = $select_token->[1]->{ 'name' } || '';
my $value = $token->[1]->{ 'value' } || '';
# The following is because an <option> with a select will
# assume the value of the text if no value is specified
if ( ! $value ) {
$value = $p->get_trimmed_text;
}
# If <select> has the 'multiple' attribute or we've previously
# encountered a <select> with the same name, set multiple to true
my $multiple = exists $select_token->[1]->{ 'multiple' } ? 1 :
exists $select{ $name } ? 1 :
0 ;
update_element_hash( $name, 'select', $value, $multiple );
}
# Need to format the data from the input element
sub add_input_element {
my $token = shift;
my $type = $token->[1]->{ 'type' } || '';
my $name = $token->[1]->{ 'name' } || '';
my $value = $token->[1]->{ 'value' } || '';
my $multiple = $type eq 'radio' ? 0 : 1;
update_element_hash( $name, $type, $value, $multiple );
}
# here's where the formatted form element data is actually added
# to the element hash.
sub update_element_hash {
my ( $name, $type, $value, $multiple ) = @_;
return if $type eq 'reset'; # This only affects the form, not the
+script
$value =~ s/\n/\\n/g;
if ( $name ) {
if ( ! exists $element{ $name } ) {
$element{ $name }{ 'multiple' } = 0;
$element{ $name }{ 'type' } = $type;
$element{ $name }{ 'value' } = $value ? [ $value ] : []
+;
push @element_order, $name;
} else {
$element{ $name }{ 'multiple' } += $multiple;
push @{ $element{ $name }{ 'value' } }, $value if $value;
}
}
}
# Duh!
sub usage {
print <<" END_HERE";
Usage: formparse.pl some.html
END_HERE
exit;
}
# Oh, goody! We actually get to start writing the code :)
sub write_template {
my $filename = "${basename}_form_${formnum}.cgi";
my $cgi_var = $cgi_std ? '' : "\$${cgi_obj}->";
my $cgi_line = 'use CGI';
$cgi_line .= $cgi_std ? " qw/:standard/;\n" : ";\n";
my $max_var_length = 1;
foreach ( keys %element ) {
$max_var_length = length if length > $max_var_length;
}
open OUT, "> $filename" or die "Can't open $filename for writing:
+$!";
print OUT $shebang."\n" if $shebang;
print OUT &template;
print OUT "use strict;\n";
print OUT $cgi_line;
if ( ! $cgi_std ) {
print OUT "my \$$cgi_obj = CGI->new;\n";
}
print OUT "my \%$err_var;\n";
print OUT "\n# Grab all data\n";
# Here's where we print param() calls
foreach my $element ( @element_order ) {
my $var_name = get_var_name( $element );
my $data_type = $element{ $element }{ 'multiple' } ? '@' : '$
+';
my $default = $element{ $element }{ 'multiple' } ? '()' : "'
+'";
print OUT qq/my ${data_type}${taint_pfx}${var_name} / .
' ' x ( $max_var_length - length $var_name ) .
qq/= ${cgi_var}param( '$element' )/ .
' ' x ( $max_var_length - length $var_name ) .
qq/ || $default; # $element{ $element }{ 'type' }\n/
+;
}
print OUT <<" END_HERE";
# The following is just a rough "fill in" template for untainting your
+ data.
# It will need to be customized to suit your particular needs. You'll
+ need
# to create regular expressions to untaint your data and if you skimp
+on this,
# it's at your peril!!!
END_HERE
# here's where we print the untainting template
foreach my $element ( @element_order ) {
my $var_name = get_var_name( $element );
my $type = $element{ $element }{ 'type' };
my $multiple = $element{ $element }{ 'multiple' };
if ( $multiple ) {
# Ooh, multiple values. Need to untaint an array.
print OUT qq!my \@${var_name}; # $type values: ! . join( "
+,", @{ $element{ $element }{ 'value' } } ) . "\n";
print OUT "foreach ( 0 .. \$#${taint_pfx}${var_name} ) {\n
+".
qq! ( \$${var_name}\[\$_] ) ! .
untainting_code( $element, $var_name, $multiple, $ty
+pe, '[$_]' ) .
"}\n\n";
} else {
# Untainting a scalar.
print OUT qq!# $type values: ! . join( ",", @{ $element{ $
+element }{ 'value' } } ) . "\n";
print OUT qq!my ( \$${var_name} ) ! .
untainting_code( $element, $var_name, $multiple,
+ $type, '' ) . "\n";
}
}
print OUT err_stub() if $print_err;
close OUT or die "Can't close $filename: $!";
}
# return the code that will actually go in the untainting routine
sub untainting_code {
my ( $element, $var_name, $multiple, $type, $index ) = @_;
my $var_prefix = $multiple ? '@' : '$';
return qq!= ( ! . untainting_regexes( $element, $var_name, $multip
+le ) . " )\n" .
qq! or \$${err_var}\{ '$var_name' } = [ "$type", \\$
+{var_prefix}${taint_pfx}${var_name}, "You must supply a proper value
+for '$var_name'. Allowed characters are letters, numbers, or punctua
+tion." ];\n!;
}
# need to create the regexes for untainting.
sub untainting_regexes {
my ( $element, $var_name, $multiple ) = @_;
my $type = $element{ $element }{ 'type' };
my $code;
# Can't create a safe regex, so we insert code to kill the script
+unless
# the programmer creates his or her own regex
$code = '"" ), die # could not auto-create regex # ';
return $code if ! grep { /$type/ } @safe_types;
my @values = @{ $element{ $element }{ 'value' } };
@values = return_unique_items( @values ) if @values;
if ( scalar @values > 1 ) {
my ( $all_length_of_one, $formatted_values ) = escape_values(
+\@values );
my $array_index = $multiple ? '[$_]' : '';
if ( $all_length_of_one ) {
# we're returning a character class
$code = qq!\$${taint_pfx}${var_name}$array_index =~ /^(! .
+ '['. join( '', @$formatted_values ) .'])$/';
} else {
# we have multiple values that a character class is not su
+itable for,
# so we return separate regex tests for each value
#my $offset = $multiple ? length( $taint_pfx ) + 17 : leng
+th( $taint_pfx ) + 12;
my $offset = $multiple ? 18 : 13;
$code = qq!\$${taint_pfx}${var_name}$array_index =~ /^(! .
+ $$formatted_values[0] . ")\$/ or \n";
for ( 1 .. scalar @$formatted_values - 1 ) {
$code .= ' ' x ( $offset + length $var_name );
$code .= qq!\$${taint_pfx}${var_name}$array_index =~ /
+^(! . $values[$_] . ")\$/";
$code .= " or \n" if $_ < scalar @$formatted_values -
+1;
}
}
} else {
$code = qq!\$${taint_pfx}${var_name} =~ /^(! . quotemeta( $val
+ues[0] ) . ')$/';
}
return $code;
}
###############################################################
# if you want to maintain this, please note that this appears #
# to have subs within subs. Look closer: HERE docs. Rather #
# tricky. #
###############################################################
sub err_stub {
my $code =<<END_CODE;
$err_sub( \\\%$err_var ) if \%$err_var;
sub $err_sub {
my \$err_hash = shift;
# \$err_hash is a reference to an anoymous hash. Keys are form at
+tribute names and values
# are an anonymous array: [ element type, value, error_message ]
# Example:
# \$err_hash = {
# 'username' => [ 'text', '????', "You must supply a proper
+value for 'username'..." ]
# }
# Might be generated from: <input type="text" name="username">
END_CODE
$code .= " my \@safe_elements = qw/ ";
foreach ( @safe_types ) {
$code .= "$_ " if $_ ne 'checkbox'; # skip checkbox as it's po
+ssible for no data to be sent
}
$code .= "/;\n";
$code .=<<'END_CODE';
foreach my $key ( keys %$err_hash ) {
if ( grep { /$$err_hash{ $key }->[0]/ } @safe_elements ) {
# The value of an element whose value shouldn't change app
+ears to be wrong
sec_log( $$err_hash{ $key }, $key );
} else {
# insert error handling code here
}
}
}
END_CODE
$code .= "sub $log_sub {\n";
$code .=<<'END_CODE';
my ( $bad_data, $element_name ) = @_;
# $bad_data is a reference to an anoymous array.
# [ element type, value, error_message ]
# Example: [ 'text', '%#$!#$', "You must supply a proper value for
+ 'username'..." ]
# Might be generated from: <input type="text" name="username">
}
END_CODE
}
##########################
# End 'HERE' doc section #
##########################
# return a safe variable name based upon the element name
sub get_var_name {
my $var_name = shift;
$var_name = lc $var_name if $lc_names;
$var_name =~ s/\W/_/g; # nice and safe variable names
return $var_name;
}
# escape data for created regexes
sub escape_values {
# if all values are only one character in length, we'll escape cha
+racters
# with special meaning in a character class. Otherwise, we'll esc
+ape all
# non-word characters.
my $values = shift;
my $all_length_of_one = 1;
foreach ( @$values ) {
$all_length_of_one = 0, last if length > 1;
}
if ( $all_length_of_one ) {
# We don't want a negated character class!
if ( $$values[0] eq '^' ) {
$$values[0] = '\^';
}
for ( 1 .. scalar @$values - 1 ) {
# quote ']', '\', and '-' in character class
if ( $$values[ $_ ] =~ /[\]\\\-]/ ) {
$$values[ $_ ] = '\\' . $$values[ $_ ];
}
}
} else {
@$values = map { quotemeta( $_ ) } @$values if @$values;
}
return ( $all_length_of_one, $values );
}
# remove duplicates from array
sub return_unique_items {
my %seen;
grep { ! $seen{ $_ }++ } @_;
}
# This is the 'comment' template written at the top of the code
sub template {
my ( $day, $month, $year ) = (localtime)[3..5];
$year += 1900;
my @months = qw/January February March April May June
July August September October November Decemb
+er/;
return <<END_HERE;
#
# Program:
# Author:
# Date Created:
# Purpose:
#
# Inputs:
#
# Outputs:
#
# --------------- Maintenance log ------------------------
# Date: $months[ $month ] $day, $year
# Programmer: parse_form.pl
# Action: automatic code generation template
# --------------------------------------------------------
# Date:
# Programmer:
# Action:
# --------------------------------------------------------
END_HERE
}
# If the user config information is bad, kill the script rather than w
+rite bad code
sub validate {
unless( $taint_pfx =~ /^\w+$/ ) { bad_config( '$taint_pfx' ) };
unless( $cgi_obj =~ /^\w+$/ ) { bad_config( '$cgi_obj' ) };
unless( $err_var =~ /^\w+$/ ) { bad_config( '$err_var' ) };
unless( $err_sub =~ /^\w+$/ ) { bad_config( '$err_sub' ) };
unless( $log_sub =~ /^\w+$/ ) { bad_config( '$log_sub' ) };
if ( $HTML::Parser::VERSION < 3.25 ) {
warn "\n You have HTML::Parser version $HTML::Parser::VERSI
+ON, which is out of date.\n".
" XHTML may not parse correctly\n".
" If you cannot upgrade \$HTML::Parser, you will need
+to carefully double-check\n".
" that all form elements have been included.";
}
}
# This is where the script is actually killed
sub bad_config {
my $config_var = shift;
die "$config_var must only containt letters, numbers, or underscor
+es:\n\t".
"$config_var =~ /^\\w+\$/ or die;";
}
__END__
=head1 NAME
parse_form.pl Quick and easy form handling code
=head1 USAGE
parse_form.pl some.html
=head1 DESCRIPTION
One of the most tedious tasks of writing CGI scripts is creating the f
+orm handling routines.
The larger the form, the more tedious this can be. Typically, this is
+ a series of 'param'
calls followed by a bunch of rather boring untainting expressions. Th
+is program will read
an HTML document and produce a series of files, one per form in the sc
+ript.
Each file will be the beginnings of the CGI script used to process eac
+h form in the HTML
document. Each file has several logical sections:
=over 4
=item Shebang Line
The programmer sets the C<$shebang> variable to point to the Perl inte
+rpreter and add
necessary switches:
my $shebang = '/usr/bin/perl -wT';
If C<$shebang> is set to a false value (e.g., the empty string), then
+the shebang line will
not be included.
=item Header Template
The C<&template> subroutine should be altered to fit the programmer's
+shop standards. This
template is a series of comments added to the beginning of the output
+files, after the
shebang line, which identify the programmer, author, purpose of the pr
+ogram, etc.
=item C<use> Statements
After the template, it is assumed the the programmer will C<use strict
+> and C<use CGI>.
If the programmer sets the C<$cgi_std> variable to false, then this se
+ction will be:
use strict;
use CGI;
my $q = CGI->new;
The variable name for the CGI object is set with the C<$cgi_obj> varia
+ble.
=item Grabbing Form Data
The next section will be a series of C<CGI::param> calls. These will
+grab in all data from
the CGI form the variable name set to whatever value is assigned to C<
+$taint_pfx> followed
by the C<name> attribute in each CGI element. To ensure that safe var
+iable names are created,
the C<name> attribute will be parsed through the following regex:
$var_name = s/\W/_/g;
Here is an example of a text field and the resulting C<param> call (as
+sumes using the
function-oriented version of CGI.pm:
<input type="text" name="first name">
my $_first_name = param( 'first name' ); # text
Note the the type of form element generating the data is appended as a
+ comment. If
C<parse_form.pl> determines that more than one value could be for a pa
+rticular name,
it will use an array instead of a scalar:
<select name="sports" multiple>
<option>Football
<option>Basketball</option>
<option>Other
</select>
my @_sports = param( 'sports' ); # select
Note that the HTML is sloppy. "Basketball" has a closing E<lt>/option
+E<gt>, while the
other options do not. C<parse_form.pl> does not care.
=item Untainting the data
Untainting the form data is perhaps one of the most onerous tasks and
+it's an easy one
to skip (whether through false laziness or genuine oversight). C<pars
+e_for.pl> creates
a series of stub untainting methods. These B<are not fully functional
+>!!! Blindly
untainting data is foolish. Instead, we create generic routines that
+the programmer will
need to complete. However, for certain types of form elements, the fo
+rm data should be
relatively simple to untaint. These are elements for which the data i
+s already specified
and should not change:
=over 4
=item *
hidden
=item *
checkbox
=item *
radio
=item *
select
=item *
submit
=back
For these five items, the program will read the default values and cre
+ate a regular expression
to match this data. If the default values are all single character, i
+t will use a character class
to match the data. Otherwise, it will use alternation. For the C<@_s
+ports> example above, the
following untainting code will be created:
my @sports; # select values: Football,Basketball,Other
foreach ( 0 .. $#_sports ) {
( $sports[$_] ) = ( $_sports[$_] =~ /^(Football)$/ or
$_sports[$_] =~ /^(Basketball)$/ or
$_sports[$_] =~ /^(Other)$/ )
or push @{ $errors{ 'sports' } }, "You must supply a proper v
+alue for 'sports'. Allowed characters are letters, numbers, or punct
+uation.";
}
The first line is a comment listing the type of form element and any d
+efault values that were
provided by the form and a declaration of the variable to hold the unt
+ainted data. The subsequent
lines represent the untainting code that's generated..
If you prefer, you could use alternation for the regex:
( $sports[$_] ) = ( $_sports[$_] =~ /^(Football|Basketball|Other)
+$/ )
I have elected not to do this as alternation is much less efficient.
+If you have many values,
particularly with larger forms, this could have a a significant impact
+ on your script's performance.
For simple alternation, however, you I<probably> wouldn't notice much
+of a speed difference.
It is recommended, however, that you go through the regular expression
+s manually and optimize them.
The untainting code for days of the month might be something like:
my ( $user_datesuse2_day ) = ( $tainted_user_datesuse_day =~ /^(01)$/
+ or
$tainted_user_datesuse_day =~ /^(02)$/
+ or
$tainted_user_datesuse_day =~ /^(03)$/
+ or
...
$tainted_user_datesuse_day =~ /^(31)$/
+ )
Needless to say, that's a waste! A quick optimization produces someth
+ing like:
my ( $user_datesuse_day ) = ( $tainted_user_datesuse_day =~ /^([0-2]\
+d|3[01])$/ )
For form elements other than the five listed above, the code will be s
+lightly different. This is
to force the programmer to actually go through and add all of these re
+gexes. In the C<$_first_name>
example above, the following untainting code will be created:
# text values:
my ( $first_name ) = ( "" ), die # could not auto-create regex # )
What this does is set C<$first_name> to the empty string and kill the
+script. This is to force the
programmer to create the regex manually. Also, with large forms, it e
+nsures that you are less likely
to overlook the untainting.
=back
=head1 User Configuration
No point in having auto-generated code unless you have some control of
+ the output. At the beginning
of the program is a 'user config' section. The following variables ca
+n be changed by the user to
affect the output of the script.
=over 4
=item C<$taint_pfx>
This is the prefix of all variables that need to be untainted. Must m
+atch /^\w+$/
my $taint_pfx = 'tainted_';
=item C<$cgi_std>
Set this to false to have OO cgi code written out.
my $cgi_std = 0;
=item C<$cgi_obj>
If C<$cgi_std> is set to false, use this to specify the variable name
+of the CGI object
(e.g. 'q' becomes C<my $q = CGI->new;>). Must match /^\w+$/
my $cgi_obj = 'q';
=item C<$shebang>
This is the shebang line that will be used. If left blank, it will b
+e skipped.
my $shebang = '#!/usr/bin/perl -wT';
=item C<$err_var>
Use this for the name of the hash that will contain the errors generat
+ed by scalars that
do not untaint. Must match /^\w+$/
my $err_var = 'errors';
=item C<$print_err>
Set this to true to have the program print the C<&error> stub.
my $print_err = 1;
=item C<$err_sub>
Set this to the name of your error handling routine. Must match /^\w+
+$/
my $err_sub = 'error';
=item C<$log_sub>
If C<$err_sub> is true, this will be the stub of your security log rou
+tine. If the form has
been tampered with (i.e. data in C<@safe_types> does not untaint), the
+ use this to log the info.
Must match /^\w+$/
my $log_sub = 'sec_log';
=item C<$lc_names>
Set this to true for lower case variable names. If your forms element
+s that have the same name
except for case, this could cause problems.
my $lc_names = 1;
=back
=head1 Sample Input and Output
=head2 Input: HTML document named test.html
Note the the following document has very poorly-formed HTML. Not all
+element attributes are quoted.
The case varies on input elements and whether or not a closing E<lt>/o
+ptionE<gt> tag has been used
varies.
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<title>This is for a form parsing test</title>
</head>
<body>
<p>I don't think anyone will even <em>look</em> at this docum
+ent.</p>
This is a line with no tags.
<form action='C:\Inetpub\wwwroot\garban\cgi-bin\cookie.cgi' m
+ethod=post enctype='multipart/form-data'>
<input type='hidden' name=somename value="asdf">
<input type=text name=name value=Ovid size="30" maxsize="
+30">
<br />
<br>
<input type="checkbox" name="group1" value="1" checked />
+ box 1 group 1
<br>
<input type="checkbox" NAME="group1" value="2"> box 2 gro
+up 1
<br>
<input type="password" name="pass"> Password
<br />
<select name="sports">
<option>Tiddly winks
<option>Mud wrestling</option>
</select>
<br>
<textarea name="test">Some text</textarea>
<br>
<input type="radio" name="radio1" value="^" checked /> ra
+dio 1 group 1
<input type="radio" name="radio1" value="^" checked /> ra
+dio 1 group 1
<input type="radio" name="radio1" value="^" checked /> ra
+dio 1 group 1
<input type="radio" name="radio1" value="\" checked /> ra
+dio 1 group 1
<input type="radio" name="radio1" value="]" checked /> ra
+dio 1 group 1
<input type="radio" name="radio1" value="1" checked /> ra
+dio 1 group 1
<br>
<input type="radio" NAME="radio1" value="2"> radio 2 grou
+p 1
<!-- a comment -->
<br />
<SELEct name="religions" multiple>
<option>Democrat
<option>Republican</option>
</select>
<textarea name="test">Some text</textarea>
<br />
<select name="asdf">
<option>Dasdfsdf
<option>asfdasdfasdf</option>
</select>
<br>
<select name="sports">
<option>Twister
<option>Jello wrestling</option>
</select>
<br>
<input type="submit" name=".submit" value="why bother?">
<br>
<input type="radio" name="radio1" value="3" checked /> ra
+dio 3 group 1
<br>
<input type="radio" NAME="radio1" value="4"> radio 4 gro
+up 1
</form>
</body>
</html>
=head2 Output: document named test_form_1.cgi
Note that from a 60 line HTML document, we have instantly generated a
+124 line Perl script.
#!/usr/bin/perl -wT
#
# Program:
# Author:
# Date Created:
# Purpose:
#
# Inputs:
#
# Outputs:
#
# --------------- Maintenance log ------------------------
# Date: June 14, 2001
# Programmer: parse_form.pl
# Action: automatic code generation template
# --------------------------------------------------------
# Date:
# Programmer:
# Action:
# --------------------------------------------------------
use strict;
use CGI;
my $q = CGI->new;
my %errors;
# Grab all data
my $tainted_somename = $q->param( 'somename' ) || ''; # hidden
my $tainted_name = $q->param( 'name' ) || ''; # text
my @tainted_group1 = $q->param( 'group1' ) || (); # checkbox
my $tainted_pass = $q->param( 'pass' ) || ''; # password
my @tainted_sports = $q->param( 'sports' ) || (); # select
my @tainted_test = $q->param( 'test' ) || (); # textarea
my $tainted_radio1 = $q->param( 'radio1' ) || ''; # radio
my @tainted_religions = $q->param( 'religions' ) || (); # select
my $tainted_asdf = $q->param( 'asdf' ) || ''; # select
my $tainted__submit = $q->param( '.submit' ) || ''; # submit
# The following is just a rough "fill in" template for untainting you
+r data.
# It will need to be customized to suit your particular needs. You'l
+l need
# to create regular expressions to untaint your data and if you skimp
+ on this,
# it's at your peril!!!
# hidden values: asdf
my ( $somename ) = ( $tainted_somename =~ /^(asdf)$/ )
or $errors{ 'somename' } = [ "hidden", \$tainted_somename, "Y
+ou must supply a proper value for 'somename'. Allowed characters are
+ letters, numbers, or punctuation." ];
# text values: Ovid
my ( $name ) = ( "" ), die # could not auto-create regex # )
or $errors{ 'name' } = [ "text", \$tainted_name, "You must su
+pply a proper value for 'name'. Allowed characters are letters, numb
+ers, or punctuation." ];
my @group1; # checkbox values: 1,2
foreach ( 0 .. $#tainted_group1 ) {
( $group1[$_] ) = ( $tainted_group1[$_] =~ /^([12])$/ )
or $errors{ 'group1' } = [ "checkbox", \@tainted_group1, "You
+ must supply a proper value for 'group1'. Allowed characters are let
+ters, numbers, or punctuation." ];
}
# password values:
my ( $pass ) = ( "" ), die # could not auto-create regex # )
or $errors{ 'pass' } = [ "password", \$tainted_pass, "You mus
+t supply a proper value for 'pass'. Allowed characters are letters,
+numbers, or punctuation." ];
my @sports; # select values: Tiddly winks,Mud wrestling,Twister,Jello
+ wrestling
foreach ( 0 .. $#tainted_sports ) {
( $sports[$_] ) = ( $tainted_sports[$_] =~ /^(Tiddly\ winks)$/ or
+
$tainted_sports[$_] =~ /^(Mud\ wrestling)$/ o
+r
$tainted_sports[$_] =~ /^(Twister)$/ or
$tainted_sports[$_] =~ /^(Jello\ wrestling)$/
+ )
or $errors{ 'sports' } = [ "select", \@tainted_sports, "You m
+ust supply a proper value for 'sports'. Allowed characters are lette
+rs, numbers, or punctuation." ];
}
my @test; # textarea values:
foreach ( 0 .. $#tainted_test ) {
( $test[$_] ) = ( "" ), die # could not auto-create regex # )
or $errors{ 'test' } = [ "textarea", \@tainted_test, "You mus
+t supply a proper value for 'test'. Allowed characters are letters,
+numbers, or punctuation." ];
}
# radio values: ^,^,^,\,],1,2,3,4
my ( $radio1 ) = ( $tainted_radio1 =~ /^([\^\\\]1234])$/ )
or $errors{ 'radio1' } = [ "radio", \$tainted_radio1, "You mu
+st supply a proper value for 'radio1'. Allowed characters are letter
+s, numbers, or punctuation." ];
my @religions; # select values: Democrat,Republican
foreach ( 0 .. $#tainted_religions ) {
( $religions[$_] ) = ( $tainted_religions[$_] =~ /^(Democrat)$/ o
+r
$tainted_religions[$_] =~ /^(Republican)$/
+ )
or $errors{ 'religions' } = [ "select", \@tainted_religions,
+"You must supply a proper value for 'religions'. Allowed characters
+are letters, numbers, or punctuation." ];
}
# select values: Dasdfsdf,asfdasdfasdf
my ( $asdf ) = ( $tainted_asdf =~ /^(Dasdfsdf)$/ or
$tainted_asdf =~ /^(asfdasdfasdf)$/ )
or $errors{ 'asdf' } = [ "select", \$tainted_asdf, "You must
+supply a proper value for 'asdf'. Allowed characters are letters, nu
+mbers, or punctuation." ];
# submit values: why bother?
my ( $_submit ) = ( $tainted__submit =~ /^(why\ bother\?)$/ )
or $errors{ '_submit' } = [ "submit", \$tainted__submit, "You
+ must supply a proper value for '_submit'. Allowed characters are le
+tters, numbers, or punctuation." ];
error( \%errors ) if %errors;
sub error {
my $err_hash = shift;
# $err_hash is a reference to an anoymous hash. Keys are form at
+tribute names and values
# are an anonymous array: [ element type, value, error_message ]
# Example:
# $err_hash = {
# 'username' => [ 'text', '????', "You must supply a proper
+ value for 'username'..." ]
# }
# Might be generated from: <input type="text" name="username">
my @safe_elements = qw/ hidden radio select submit /;
foreach my $key ( keys %$err_hash ) {
if ( grep { /$$err_hash{ $key }->[0]/ } @safe_elements ) {
# The value of an element whose value shouldn't change ap
+pears to be wrong
sec_log( $$err_hash{ $key }, $key );
} else {
# insert error handling code here
}
}
}
sub sec_log {
my ( $bad_data, $element_name ) = @_;
# $bad_data is a reference to an anoymous array.
# [ element type, value, error_message ]
# Example: [ 'text', '%#$!#$', "You must supply a proper value fo
+r 'username'..." ]
# Might be generated from: <input type="text" name="username">
}
=head1 COPYRIGHT
Copyright (c) 2001 Curtis A. Poe. All rights reserved.
This program is free software; you may redistribute it and/or modify i
+t under
the same terms as Perl itself. The author does not warranty this code
+ for any
particular purpose and strongly recommends that the programmer using t
+his code
have a thorough understanding of Perl security issues. The author acc
+epts
absolutely no responsibility for problems arising from this code. If
+you do
not agree to these terms, please do not use this code.
=head1 AUTHOR
Curtis A. Poe <poec@yahoo.com>
Address bug reports and comments to: poec@yahoo.com. When sending bug
+ reports,
please provide the HTML the program was run against.
=head1 MISCELLANEOUS
This program does not produce production-ready code. It is merely int
+ended to simplify the
process of creating code to read in form data. The untainting section
+ will need to be reviewed
carefully and modified to suit the programmer's needs. Further, no I<
+actual> error handling is
provided in the event that a regex fails to match. As the needs can v
+ary widely, I have
not tried to implement this. You have the error stubs that will have
+an error hash passed. The
rest is up to you.
Note that code is generated with spaces for indenting instead of tabs.
+ As different editors render
tabs at different widths, I felt it prudent to go for safety, as a spa
+ce is a space is a space.
Use of C<$HTML::Parser::VERSION> which is less than 3.25 may result in
+ incomplete output as older
versions do not always handle XHTML correctly. It is the programmer's
+ responsibility to verify
that the output of this code matches the programmer's needs. Naturall
+y, bugs may exist and it's
possible that some form elements may not be properly dealt with by thi
+s code.
The author strongly recommends that you use a validator such as Tidy (
+L<http://www.w3.org/People/Raggett/tidy/>)
to validate your HTML. C<HTML::Parser> does an excellent job of deali
+ng with 'dirty' HTML, but
improperly formatted documents with overlapping E<lt>formE<gt> element
+s (amongst other things),
will cause unpredictable output.
=cut
|