#!/usr/bin/perl use strict; use warnings; my $input_file = '/scratch/Drosophila/dmel-all-chromosome-r6.02.fasta'; my $output_file = 'unique12KmersEndingGG.fasta'; open my $FASTA, '<', $input_file or die "Cannot open '$input_file' because: $!"; open my $KMERS, '>', $output_file or die "Cannot open '$output_file' because: $!"; my ( $count, %unique_data ); while ( my $line = <$FASTA> ) { next if $line =~ /^>/; while ( $line =~ / ( .{9} [ATCG]{10} G \K G ) /gsx ) { print $KMERS '>crispr_', ++$count "\n$1\n" unless $unique_data{ $1 }++; } }