I'm a Perl Newbie, and this is my first attempt at a Perl script.
Below is the source code.
I would like to make this work with the filename to be processed as a command line arugment, however every time I've tried using the while(<>){ } I see in the Perl books, the program runs once for every line in the file.
I know this code is clunky, and probably a lot longer than it needs to be, suggestions on shortening it would also be appreciated.
####################################################
# InvClean.pl : Raw Scanned File Processing program
# Version 1.0
# Written by Robb Pickinpaugh
# 01/31/2002
# for use on Windows NT
####################################################
use strict;
# Get Filename to process.
my $processfilename='';
print "\nEnter filename to process (type exit to quit): ";
chomp ($processfilename = <STDIN>);
###########################################
#
# Setting the Rules for Processing
#
###########################################
###########################################
#
# This sets to name of the file to which
# the corrected data will be saved
#
###########################################
my $cleanfilename = "$processfilename.clean";
###########################################
#
# This sets the numeric value
# for the "usual" starting character
# for each line in the raw file
#
###########################################
my $correctstartchar = 16;
############################################
#
# This sets the "usual" starting length for
# lines starting with the "usual" starting
# character.
#
############################################
my $correctstartlength = 16;
############################################
#
# This sets the correct length of lines
# after they have been stripped of extra
# characters.
#
############################################
my $correctcleanlength = 13;
##############################################
#
# This sets the length of lines that do not
# include the extra stop and start characters
# that are sometimes included in scanned data
#
##############################################
my $typedlength = 14;
###############################################
#
# Do not change these values, they are used to
# report the number of lines read, and written
#
###############################################
my $rawfilelength = 0;
my $cleanfilelength = 0;
###########################
#
# Call Processing Routine
#
###########################
&ProcessFile;
#############################################
#
# Report number of lines read from raw file,
# and written to "cleaned" file.
#
#############################################
print "$rawfilelength lines read from $processfilename\n";
print "$cleanfilelength lines written to $cleanfilename\n";
#################################
#
# Actual Processing of the File
#
#################################
sub ProcessFile {
my $data='';
my $datalength=0;
my $startchar='';
open (RAWFILE, "$processfilename") || die "cannot open: $!";
open (CLEANFILE, ">$cleanfilename") || die "cannot open: $!";
while (<RAWFILE>){
$rawfilelength++;
$data = $_;
$datalength = length($data);
$startchar = ord($data);
if ($startchar == $correctstartchar){
if($datalength == $correctstartlength){
chomp $data;
chop $data;
$data = reverse ($data);
chop $data;
$data = reverse ($data);
}else{
next;
}
if (length($data) == $correctcleanlength){
print CLEANFILE "$data\n";
$cleanfilelength++;
}
}elsif ($datalength == $typedlength){
print CLEANFILE "$data";
$cleanfilelength++;
}elsif ($datalength > $correctcleanlength) {
my $datalengthtrack = $datalength;
chomp $data;
$datalengthtrack--;
chop $data;
$datalengthtrack--;
$data = reverse ($data);
while ($datalengthtrack > $correctcleanlength){
chop $data;
$datalengthtrack--;
}
$data = reverse ($data);
print CLEANFILE "$data\n";
$cleanfilelength++;
}elsif ($datalength < $correctcleanlength) {
next;
}
}
close (RAWFILE) || die "cannot close $processfilename: $!";
close (CLEANFILE) || die "cannot close $cleanfilename: $!";
}
print "\a";
exit(0);