###This script takes lines in the following format #Report Type:Defined Patient Summary #Ordered By: Numeric #Report Name:CKD all details report #Divisions: None #Creation Date: 27/09/2010 09:26:16 #Search Name: CKD audit population #Search Population: 2238 # #Mr Mickey Mouse - Patient No: 5 # #Patient ID :5 #Date Of Birth :07/02/1960 #Sex :M #NHS Number :1234567890 #Postcode :LL53 6EA #AllValues #Assigned Date,Term,Description,Value #15/09/2010,Serum calcium,,2.43 mmol/L #15/09/2010,Serum inorganic phosphate,,0.89 mmol/L #15/09/2010,Serum total protein,,63 g/L #15/09/2010,Serum albumin,,43 g/L #First Outfile Individual test results #5,Serum calcium,15/09/2010,2.43 #5,Serum inorganic phosphate,15/09/2010,0.89 #5,Serum total protein,15/09/2010,63 #5,Serum albumin,15/09/2010,43 #Second outfile - a register of all tests ( no duplicates) #Serum calcium #5,Serum inorganic phosphate #5,Serum total protein #5,Serum albumin #It creates a lookup file called lookup.csv in the format #5,1234567890,LL53 6EA,07/02/1960,M #Usage is the input file name, output file name use strict; my $ID; my $nhs; my $postcode; my $dob; my $sex; my %hash = (); my @keys; my @sorted; my @lines; if ( $#ARGV != 2 ) { die "usage $0 InputFileName OutputFileNameForData OutputFileNameForTestNames \n"; } open( INFILE, "< $ARGV[0]" ) || die "Cannot open $ARGV[0] $!\n"; open( OUTFILE, "> $ARGV[1]" ) || die "Cannot open $ARGV[1] $!\n"; open( OUTFILE2, "> $ARGV[2]" ) || die "Cannot open $ARGV[2] $!\n"; my $path = "lookup.CSV"; open( OUTFILE3, "> $path" ) || die "Cannot open LOOKUP.CSV $!\n"; my $lookup = ""; #my $n=0; # just a debug variable while () { #$n=$n+1; my $line = $_; if ( $line =~ m/^Patient ID.*?(\d+)/ ) { $ID = $1; my $blank = ""; print $ID. "\n"; %hash = (); #Find a new patient so blank everything out } elsif ( $line =~ m/^NHS.*?(\d+)/ ) { chomp; $nhs = $1; } elsif ( $line =~ m/^Date Of Birth/ ) { my $dob = $line; $dob =~ s/Date Of Birth ://; chomp $dob; } elsif ( $line =~ m/^Sex/ ) { my $sex = $line; $sex =~ s/Sex ://; chomp $sex; } elsif ( $line =~ m/^Post/ ) { $postcode = $line; $postcode =~ s/Postcode :(\w+)/$1/; chomp $postcode; $lookup = $lookup . $ID . "," . $nhs . "," . $postcode . "," . $dob . "," . $sex . "\n"; #Postcode is the last value in the identifier initial array, so at this point we can print out the lookup #$age = ; # pause #print $lookup; } elsif ( $line =~ m/^\d\d\/\d\d.*?/ ) { #if a line is a value line # print "$line"; my @names = split( ',', $line ); my $date = $names[0]; #date of test my $index = $names[1]; # is test name my $nonsense = $names[2]; # not used my $val = $names[3]; # value my $test = $names[3]; chomp $val; chomp $test; $test = tidyup_raw_data_test($test); $index = tidyup_raw_data_index($index); my $testother = "$test" . "0"; $index =~ s/\s$testother\s.*//; $index =~ s/\s$test\s.*//; $index =~ s/$testother//; $index =~ s/$test//; $index = delete_superfluous_testname_data ( $index ); $val = delete_units ( $val ); # print "-$test- substituted -$index- \n"; if ( $val != "" ) { #if the value is not blank $hash{$index} = $hash{$index} . $val . ","; #print $n."line no".$test."....test....".$val."\n"; a debug #my $age = ; # a pause for debug push @lines, $ID . "," . $index . "," . $date . "," . $val . "\n"; #HanDY TO TEST push @keys, $index . "\n"; } } } close(INFILE); #print "Sorting and collating records.....takes some time.....\n Wait for this window to close before continuing \n "; my @unique = (); my %seen = (); @unique = grep { !$seen{$_}++ } @keys; @sorted = sort { $a cmp $b } @unique; #print @sorted; print OUTFILE @lines; print "\n$. lines of data Processed. "; close(OUTFILE); print OUTFILE2 @sorted; close(OUTFILE2); print OUTFILE3 $lookup; close(OUTFILE3); sub tidyup_raw_data_test { my $name=shift; $name =~ s/\s$//; $name =~ s/\<|(\)|\()|\/|\^|\.|\*|//g; #substitute all operators $name =~ s/\s0//; return $name; } sub delete_units { # Deletes units from the end of a value my $name=shift; $name =~s/10\^.*\/L//; #Why on earth has the ^ replaced * in the dataset. ah well $name =~ s/mL\/min\/1.73m2//; $name =~ s/10\*.*//; $name =~ s/109L//; $name =~ tr/[0-9][.]//cd; return $name; } sub delete_superfluous_testname_data { my $name= shift; $name =~ s/\s+$//; $name =~ s/\.$//; $name =~ s/\s$//; $name =~ s/(OE -\w*)\d/$1/; $name =~ s/\s\d+\scm$//; $name =~ s/\s+$//; return $name; } sub tidyup_raw_data_index { my $name= shift; $name =~ s/(\d)(\.00)(\s)/$1.$3/; #delet .00s $name =~ s/(\d\.\d)(0)(\s)/$1.$3/; #delet .x0s $name =~ s/(\d)(\.0)(\s)/$1.$3/; #delet e.0s $name =~ s/\s(\.\d)/0.$1/; #add a 0 in front of a period This is so that the units match the old units and that replace works $name =~ s/\<|(\)|\()|\/|\^|\.|\*|//g; return $name; }