#!/usr/bin/perl use warnings; use strict; use Getopt::Long; use List::Util qw(shuffle); my $k=6; my $window=31; my $bins=2*$window+$k; my %ktc; my $sequence_fname; my $wiggle_fname; my $permute=0; my $normalize_fname; my $out='default_out'; GetOptions ( "o=s" => \$out, "s=s" => \$sequence_fname, "wig=s" => \$wiggle_fname, "n=s" => \$normalize_fname, "p=i" => \$permute); ############initialize %ktc $out=$out.'counts.tab'; my $letters=[ qw(A C G T N) ]; my @words=&combin($letters,$k ); foreach(@words){ @{$ktc{$_}}= ((0)x$bins) ; } ########## ##########Load normalization file data open(NORM,"<$normalize_fname")||die "Cannot open normalization file\n"; my %normalize; while (){ my @a=split(/\t/,$_); $normalize{$a[0]}=$a[1]; } close NORM; ##########Load position counts my %counts; open (CTS,'<',$wiggle_fname)||die "Cannot open wiggle file\n"; my $cur_chr=''; my $pos=''; my $counts=''; while (){ if($_=~/variableStep chrom=(.*?)$/){ $cur_chr=$1; $counts{$cur_chr}={}; } else{ ($pos,$counts)=split(/\t/,$_); $counts{$cur_chr}{$pos}=$counts; } } close CTS; ####### #########Map counts to orfs my $test=0; if(!($permute)){ open(SEQ,'<',$sequence_fname)||die "Cannot open sequence file\n"; while (my @line=split (/\t/,)){ my $ct=0; my $start=$line[1]; if ($start < $line[2]){##Watson Strand until ($start > $line[2]-$window){ for(my $i=0;$i<$bins;++$i){ if(defined $normalize{$line[3]} && $normalize{$line[3]} > 0 && $counts{$line[0]}{$start-$window+$i}){ ${$ktc{substr($line[4],$ct,$k)}}[$i]+=$counts{$line[0]}{$start-$window+$i}/$normalize{$line[3]}; } else{ ++$test; } } ++$start; ++$ct; } } else{##Crick Strand until($start < $line[2]+$window){ for(my $i=0;$i<$bins;++$i){ if(defined $normalize{$line[3]} && $normalize{$line[3]} > 0 && $counts{$line[0]}{$start-$window+$i}){ ${$ktc{substr($line[4],$ct,$k)}}[$i]+=$counts{$line[0]}{$start-$window+$i}/$normalize{$line[3]}; } else{ ++$test; } } --$start; ++$ct; } } } foreach my $key(keys %ktc){ if($key=~/N/){ next; } print $key,"\t", "@{$ktc{$key}}\n"; } } else{ for(my $r=0;$r<$permute;++$r){#for all permutation iterations foreach my $key(keys %counts){#for each chromosome my @values = shuffle(values %{$counts{$key}});#make a array of values $counts{$key}{$_} = shift @values for keys %{$counts{$key}};#permute the values to a read position in the chromosome } foreach(@words){ @{$ktc{$_}}= ((0)x$bins) ; } open(SEQ,'<',$sequence_fname)||die "Cannot open sequence file\n";#open the sequence file while (my @line=split (/\t/,)){#for each sequence record the nmer information my $ct=0; my $start=$line[1]; if ($start < $line[2]){##Watson Strand until ($start > $line[2]-$window){ for(my $i=0;$i<$bins;++$i){ if(defined $normalize{$line[3]} && $normalize{$line[3]} > 0 && $counts{$line[0]}{$start-$window+$i}){ ${$ktc{substr($line[4],$ct,$k)}}[$i]+=$counts{$line[0]}{$start-$window+$i}/$normalize{$line[3]}; } else{ ++$test; } } ++$start; ++$ct; } } else{##Crick Strand until($start < $line[2]+$window){ for(my $i=0;$i<$bins;++$i){ if(defined $normalize{$line[3]} && $normalize{$line[3]} > 0 && $counts{$line[0]}{$start-$window+$i}){ ${$ktc{substr($line[4],$ct,$k)}}[$i]+=$counts{$line[0]}{$start-$window+$i}/$normalize{$line[3]}; } else{ ++$test; } } --$start; ++$ct; } } } close SEQ; open(OUT,">>$out")||die "Cannot open $out\n"; foreach my $key(keys %ktc){ if($key=~/N/){#skip if the nmer contains an N # $ktc{$key}=((0)x$bins);#reset the hash next; } print OUT $key,"\t","@{$ktc{$key}}\n";#print result # $ktc{$key}=((0)x$bins);#reset hash } close OUT; } } sub combin{ my ($data, $q) = @_; return if $q < 1; my $results = $data; while (--$q) { my @new; for my $letter (@$data) { push @new, map { $letter . $_ } @$results; } # end for $letter in @$data $results = \@new; } # end while --$q is not 0 return @$results; } # end ordered_combinations