# https://www.perlmonks.org/?node_id=11110379
# usage: perl create_db.pl
use strict;
use warnings;
use KyotoCabinet;
unlink 'db.kch';
# construct the database object
my $db = KyotoCabinet::DB->new();
# open the database
if (! $db->open('db.kch#msiz=128m', $db->OWRITER | $db->OCREATE)) {
die "open error (db): ", $db->error;
}
open my $fh, '<', 'NR.fasta' or die "open error: $!\n";
my $nrid = '';
my $seq = '';
while ( <$fh> ) {
chomp; $_ =~ s/\r$//g;
if ( /^>/ ) {
$db->set($nrid, $seq) if $seq;
$nrid = ( split /\s/, $_, 2 )[0];
$seq = '';
}
else {
$seq .= $_;
}
}
$db->set($nrid, $seq) if $seq;
close $fh;
$db->close;
####
# https://www.perlmonks.org/?node_id=11110379
# usage: perl search_db.pl > Outfile.txt
use strict;
use warnings;
use KyotoCabinet;
# construct the database object
my $db = KyotoCabinet::DB->new();
# open the database
if (! $db->open('db.kch#msiz=128m', $db->OREADER | $db->ONOLOCK)) {
die "open error (db): ", $db->error;
}
my $cur = $db->cursor;
open my $fh, '<', 'peptides.txt' or die "open error: $!\n";
while ( my $pep = <$fh> ) {
my $ids = ''; chomp $pep;
$cur->jump; # first record
while ( my ($key, $val) = $cur->get(1) ) {
$ids .= ",$key" if index($val, $pep) >= 0;
}
print "$pep\t", substr($ids, 1), "\n" if $ids;
}
close $fh;
$cur->disable;
$db->close;
##
##
# https://www.perlmonks.org/?node_id=11110379
# usage: perl search_db_mce.pl > Outfile.txt
use strict;
use warnings;
use KyotoCabinet;
use MCE;
# construct the database object
my $db = KyotoCabinet::DB->new();
my $cur;
my $mce = MCE->new(
max_workers => MCE::Util::get_ncpu(),
chunk_size => 1,
init_relay => 1,
user_begin => sub {
# open the database
if (! $db->open('db.kch#msiz=128m', $db->OREADER | $db->ONOLOCK)) {
die "open error (db): ", $db->error;
}
$cur = $db->cursor;
},
user_end => sub {
# close the database
$cur->disable;
$db->close;
},
user_func => sub {
my $pep = $_; chomp $pep;
my $ids = '';
$cur->jump; # first record
while ( my ($key, $val) = $cur->get(1) ) {
$ids .= ",$key" if index($val, $pep) >= 0;
}
# output serially, one worker at a time
MCE::relay {
print "$pep\t", substr($ids, 1), "\n" if $ids;
};
}
);
$mce->process('peptides.txt');
$mce->shutdown;
##
##
# https://www.perlmonks.org/?node_id=11110379
# usage: perl search_db_chunk.pl > Outfile.txt
use strict;
use warnings;
use KyotoCabinet;
use MCE;
# construct the database object
my $db = KyotoCabinet::DB->new();
my $cur;
my $mce = MCE->new(
max_workers => MCE::Util::get_ncpu(),
chunk_size => 7,
init_relay => 1,
user_begin => sub {
# open the database
if (! $db->open('db.kch#msiz=128m', $db->OREADER | $db->ONOLOCK)) {
die "open error (db): ", $db->error;
}
$cur = $db->cursor;
},
user_end => sub {
# close the database
$cur->disable;
$db->close;
},
user_func => sub {
my ( $mce, $chunk_ref, $chunk_id ) = @_;
my %ret; chomp @$chunk_ref;
$cur->jump; # first record
while ( my ($key, $val) = $cur->get(1) ) {
for ( @$chunk_ref ) {
$ret{$_} .= ",$key" if index($val, $_) >= 0;
}
}
# output serially, one worker at a time
MCE::relay {
for ( @$chunk_ref ) {
print "$_\t", substr($ret{$_}, 1), "\n" if $ret{$_};
}
};
}
);
$mce->process('peptides.txt');
$mce->shutdown;
##
##
GAAGGACTGGGACCA >NR_000001,>NR_006611
AGGCTGCGGCAGGAC >NR_062102
GTGAGCCGGGCAGAG >NR_089584
AGGGGGGGTTGCTGA >NR_036454,>NR_068535,>NR_097889
CTGACATGCGGCGCA >NR_087289
GTGCATCGATGGCCG >NR_005535
GGGGTCAAGCGAACC >NR_076289,>NR_087856
TGAGACGGCGAACCT >NR_064242
AGCGACAAAGGAAAC >NR_045865
AGGTGCAACCATGGA >NR_046602,>NR_056869
GAGTAAACCGCGCGA >NR_093455
AACGACTGAACAGCG >NR_070693
ACGCGTAATTCGATA >NR_080086
GATGAGCGGAGCACT >NR_070118
CGTAGCGAAACCGAG >NR_092384,>NR_098291
GGGGGGGAGGTCCGA >NR_021671,>NR_036907,>NR_080961
AGGGAGGGGGGTTGT >NR_026207
ATGGGGCAGACGCGA >NR_072314