Hello monks,
I have some problems with troubleshooting my script.. Because my input files have so MANY lines, I had to make mock input files to make sure my output file looks good. So my program works well with my mock input files, but when I use large input files (lines about 360k), my foreach loop works for only 357k lines.. I did not see any problem with my original input files. I would very appreciate if you could locate the problem. Let me know if you need any further information. :)
## Script description: This program takes input and match the chromoso
+me location from Bowtie parser file,
# then adds the read # to the window (also includi
+ng neighboring windows)
## Perl interpreter command
# use warnings;
use strict;
## Hashes initialized
my %input;
my %bowtieP;
## Open files
# file1 - input: create window # from position
#my $file1 = shift; # Input (chr # and position - base pair)
open (FILE1, "/data/GAII/prostate_cells/SNP/Prostate_SNP_filtered.txt"
+);
my $head = <FILE1>; # If there is a header in the input #1, include th
+is code
while (<FILE1>) {
chomp;
my $input_orig = $_;
my @line = split /\s+/, $_;
my $chr = $line[1];
my $w = (int($line[2]/100))*100; # Window # created
my $w1 = $w-100;
my $w2 = $w+100;
my $pos = "chr$chr\_$w"; # Get new variable chr_win for inpu
+t
$input{$pos}[0] = $input_orig;
$input{$pos}[1] = $pos;
$input{$pos}[2] = 0; # Read value initialized as 0
$input{$pos}[3] = $chr;
$input{$pos}[4] = $w;
$input{$pos}[5] = "chr$chr\_$w1";
$input{$pos}[6] = "chr$chr\_$w2";
$input{$pos}[7] = 0; # Read value for win-100
$input{$pos}[8] = 0; # Read value for win+100
}
close FILE1;
# file2 - bowtie parser output
#my $file2 = shift; # Bowtie Parser Input file (chr #, window #, and r
+ead value)
open (FILE2, "/data/GAII/prostate_cells/DU145v2_Bow_Per100.txt");
while (<FILE2>) {
chomp;
my @line = split /\s+/, $_;
my $chr = $line[0];
my $w = $line[1];
my $read = $line[3];
my $pos = "$chr\_$w"; # Get new variable chr_win for bowtie parse
+r
$bowtieP{$pos}[0] = $pos;
$bowtieP{$pos}[1] = $read;
}
close FILE2;
open (OUT, "> test.txt"); ### Change if file name changes
print OUT "Name\tChr\tPosition\tGenTrain Score\tPrEC\tPrEC alleles\tRW
+PE\tRWPE alleles\tLNCaP\tLNCaP alleles\tDU145\tDU145 alleles\tchr_win
+\tread\tchr_win-100\tread-100\tchr_win+100\tread+100\ttotal\n";
foreach my $pos (keys %input) {
if (exists $bowtieP{$input{$pos}[1]}[0]) {
$input{$pos}[2]=$bowtieP{$pos}[1]; # Change read value from
+ input (initialized as 0) to the read value from bowtie parser data
}
if (exists $bowtieP{$input{$pos}[5]}[1]) {
$input{$pos}[7] = $bowtieP{$input{$pos}[5]}[1]; # Change r
+ead value from input (initialized as 0) to the read value from bowtie
+ parser data
}
if (exists $bowtieP{$input{$pos}[6]}[1]) {
$input{$pos}[8] = $bowtieP{$input{$pos}[6]}[1]; # Change r
+ead value from input (initialized as 0) to the read value from bowtie
+ parser data
}
my $total = $input{$pos}[2] + $input{$pos}[7] + $input{$pos}[8];
print OUT "$input{$pos}[0]\t$input{$pos}[1]\t$input{$pos}[2]\t$
+input{$pos}[5]\t$input{$pos}[7]\t$input{$pos}[6]\t$input{$pos}[8]\t$t
+otal\n";
}
close OUT;
exit;
OMG IT'S PERL!!
~(o.o~) (~o.o)~
Perl and a blind date both require regular expression.. -_-