Re^4: Code Critique

A tidied up version. Not perfect and not enough subs. Dealing with multi input and multi output subs is something I haven't mastered yet but I've learnt a lot tonight.
###This script takes lines in the following format


#Report Type:Defined Patient Summary
#Ordered By:  Numeric
#Report Name:CKD all details report
#Divisions:  None
#Creation Date:  27/09/2010 09:26:16
#Search Name:  CKD audit population
#Search Population:   2238
#
#Mr  Mickey Mouse  - Patient No: 5
#
#Patient ID  :5
#Date Of Birth  :07/02/1960
#Sex  :M
#NHS Number  :1234567890
#Postcode  :LL53 6EA
#AllValues
#Assigned Date,Term,Description,Value
#15/09/2010,Serum calcium,,2.43 mmol/L
#15/09/2010,Serum inorganic phosphate,,0.89 mmol/L
#15/09/2010,Serum total protein,,63 g/L
#15/09/2010,Serum albumin,,43 g/L


#First Outfile Individual test results
#5,Serum calcium,15/09/2010,2.43
#5,Serum inorganic phosphate,15/09/2010,0.89
#5,Serum total protein,15/09/2010,63
#5,Serum albumin,15/09/2010,43

#Second outfile - a register of all tests ( no duplicates)
#Serum calcium
#5,Serum inorganic phosphate
#5,Serum total protein
#5,Serum albumin


#It creates a lookup file called lookup.csv in the format
#5,1234567890,LL53 6EA,07/02/1960,M
#Usage is  the input file name, output file name

      

        use strict;
        

my $ID;
my $nhs;
my $postcode;
my $dob;
my $sex;
my %hash = ();
my @keys;
my @sorted;
my @lines;

if ( $#ARGV != 2 ) { die "usage $0 InputFileName OutputFileNameForData
+ OutputFileNameForTestNames \n"; }




open( INFILE,  "< $ARGV[0]" ) || die "Cannot open $ARGV[0] $!\n";
open( OUTFILE, "> $ARGV[1]" ) || die "Cannot open $ARGV[1] $!\n";
open( OUTFILE2, "> $ARGV[2]" ) || die "Cannot open $ARGV[2] $!\n";
my $path = "lookup.CSV";
open( OUTFILE3, "> $path" ) || die "Cannot open LOOKUP.CSV $!\n";



my $lookup = "";
#my $n=0; # just a debug variable


while (<INFILE>) {
#$n=$n+1; 


    my $line = $_;
    if ( $line =~ m/^Patient ID.*?(\d+)/ ) {
        $ID    = $1;
        my $blank = "";

        print $ID. "\n";

        %hash = (); #Find a new patient so blank everything out 
    }
    elsif ( $line =~ m/^NHS.*?(\d+)/ ) {
        chomp;
        $nhs = $1;
    }

    elsif ( $line =~ m/^Date Of Birth/ ) {
        my $dob = $line;
        $dob =~ s/Date Of Birth  ://;
        chomp $dob;
    }

    elsif ( $line =~ m/^Sex/ ) {
        my $sex = $line;
        $sex =~ s/Sex  ://;
        chomp $sex;
    }

    elsif ( $line =~ m/^Post/ ) {
        $postcode = $line;
        $postcode =~ s/Postcode  :(\w+)/$1/;
        chomp $postcode;
        $lookup =
            $lookup 
          . $ID . "," 
          . $nhs . ","
          . $postcode . ","
          . $dob . ","
          . $sex . "\n"; #Postcode is the last value in the identifier
+ initial array, so at this point we can print out the lookup

        #$age = <STDIN>; # pause
        #print $lookup;
    }

    elsif ( $line =~ m/^\d\d\/\d\d.*?/ ) { #if a line is a value line

       # print "$line";
        my @names    = split( ',', $line );
        my $date     = $names[0];  #date of test
        my $index    = $names[1]; # is test name
        my $nonsense = $names[2]; # not used
        my $val      = $names[3]; # value
        my $test     = $names[3]; 
       
        chomp $val;
        chomp $test;
        
        $test = tidyup_raw_data_test($test);
        $index =  tidyup_raw_data_index($index);
            
        my $testother = "$test" . "0";
   
       
        $index =~ s/\s$testother\s.*//;
        $index =~ s/\s$test\s.*//;
        $index =~ s/$testother//;
        $index =~ s/$test//;   
       
       
        $index = delete_superfluous_testname_data ( $index );
        $val = delete_units ( $val );
        # print "-$test-  substituted    -$index-  \n";

        if ( $val != "" ) {    #if the value is not blank
            $hash{$index} = $hash{$index} . $val . ",";


#print $n."line no".$test."....test....".$val."\n"; a debug
#my $age = <STDIN>; # a pause for debug       

          push @lines,
                $ID . "," 
              . $index . "," 
              . $date . "," 
              . $val
              . "\n";    #HanDY TO TEST
            push @keys, $index . "\n";
        }
    }
}
close(INFILE);

#print "Sorting and collating records.....takes some time.....\n Wait 
+for this window to close before continuing \n ";
my @unique = ();
my %seen   = ();
@unique = grep { !$seen{$_}++ } @keys;
@sorted = sort { $a cmp $b } @unique;

#print @sorted;

print OUTFILE @lines;
print "\n$. lines of data Processed. ";
close(OUTFILE);

print OUTFILE2 @sorted;
close(OUTFILE2);

print OUTFILE3 $lookup;
close(OUTFILE3);

sub tidyup_raw_data_test {

        my $name=shift;
        $name  =~ s/\s$//;  
        $name  =~ s/\<|(\)|\()|\/|\^|\.|\*|//g;    #substitute all ope
+rators
        $name  =~ s/\s0//;    
        return $name;
}

sub delete_units { # Deletes units from the end of a value


my $name=shift;
        $name =~s/10\^.*\/L//; #Why on earth has the ^ replaced * in t
+he dataset.  ah well
        $name =~ s/mL\/min\/1.73m2//; 
        $name =~ s/10\*.*//;  
        $name =~ s/109L//;
        $name =~ tr/[0-9][.]//cd;
        return $name;
}

sub delete_superfluous_testname_data {
my $name= shift;
        $name =~ s/\s+$//;   
        $name =~ s/\.$//;  
        $name =~ s/\s$//;    
        $name =~ s/(OE -\w*)\d/$1/;           
        $name =~ s/\s\d+\scm$//;
        $name =~ s/\s+$//;  
        
        return $name;
}
        
        
        sub tidyup_raw_data_index {
my $name= shift;
        $name =~ s/(\d)(\.00)(\s)/$1.$3/;     #delet     .00s
        $name =~ s/(\d\.\d)(0)(\s)/$1.$3/;    #delet     .x0s
        $name =~ s/(\d)(\.0)(\s)/$1.$3/;      #delet e.0s
        $name =~ s/\s(\.\d)/0.$1/; #add a 0 in front of a period  This
+ is so that the units match the old units and that replace works
        $name =~ s/\<|(\)|\()|\/|\^|\.|\*|//g; 
                return $name;
}
[download]
Comment on Re^4: Code Critique Download Code

Do you know where your variables are?
	PerlMonks