# https://fallabs.com/kyotocabinet/api/ use Inline C => config => inc => '-I/usr/local/include'; use Inline C => config => libs => '-L/usr/local/lib -lkyotocabinet'; use Inline C => <<'EOC'; #include #include KCDB *db; KCCUR *cur; // open the database int open_db(char* file) { db = kcdbnew(); if (!kcdbopen(db, file, KCOREADER | KCONOLOCK)) return 0; cur = kcdbcursor(db); return 1; } // close the database int close_db() { if (cur) kccurdel(cur); if (!kcdbclose(db)) return 0; return 1; } // search records containing substring SV * search_db(char* substr) { AV *ret = newAV(); char *kbuf, *vbuf; size_t ksiz, vsiz; const char *cvbuf; kccurjump(cur); while ((kbuf = kccurget(cur, &ksiz, &cvbuf, &vsiz, 1)) != NULL) { if (strstr(cvbuf, substr) != NULL) { av_push(ret, newSVpvn(kbuf, ksiz)); } kcfree(kbuf); } return newRV_noinc((SV *) ret); } EOC #### # https://fallabs.com/kyotocabinet/api/ use Inline CPP => config => inc => '-I/usr/local/include'; use Inline CPP => config => libs => '-L/usr/local/lib -lkyotocabinet'; use Inline CPP => <<'EOCPP'; #undef do_open #undef do_close #include #include using namespace std; using namespace kyotocabinet; PolyDB db; DB::Cursor *cur; // open the database int open_db(char* file) { if (!db.open(file, PolyDB::OREADER | PolyDB::ONOLOCK)) return 0; cur = db.cursor(); return 1; } // close the database int close_db() { if (cur) delete cur; if (!db.close()) return 0; return 1; } // search records containing substring SV * search_db(char* substr) { AV *ret = newAV(); string ckey, cvalue; cur->jump(); while (cur->get(&ckey, &cvalue, true)) { if (strstr(cvalue.c_str(), substr) != NULL) { av_push(ret, newSVpvn(ckey.c_str(), ckey.length())); } } return newRV_noinc((SV *) ret); } EOCPP #### # https://www.perlmonks.org/?node_id=11110379 # usage: perl search_db_inline_c.pl > Outfile.txt use strict; use warnings; # insert the Inline::C or Inline::CPP code here open_db('db.kch#msiz=128m') or die "db.kch: open error\n"; open my $fh, '<', 'peptides.txt' or die "open error: $!\n"; while ( my $pep = <$fh> ) { chomp $pep; my $ids = search_db($pep); print "$pep\t", join(',', @$ids), "\n" if @$ids; } close $fh; close_db(); #### # https://www.perlmonks.org/?node_id=11110379 # usage: perl search_db_inline_c_mce.pl > Outfile.txt use strict; use warnings; # insert the Inline::C or Inline::CPP code here use MCE; my $mce = MCE->new( max_workers => MCE::Util::get_ncpu(), chunk_size => 1, init_relay => 1, user_begin => sub { open_db('db.kch#msiz=128m') or die "db.kch: open error\n"; }, user_end => sub { close_db(); }, user_func => sub { my $pep = $_; chomp $pep; my $ids = search_db($pep); # output serially, one worker at a time MCE::relay { print "$pep\t", join(',', @$ids), "\n" if @$ids; }; } ); $mce->process('peptides.txt'); $mce->shutdown; #### GAAGGACTGGGACCA >NR_000001,>NR_006611 AGGCTGCGGCAGGAC >NR_062102 GTGAGCCGGGCAGAG >NR_089584 AGGGGGGGTTGCTGA >NR_036454,>NR_068535,>NR_097889 CTGACATGCGGCGCA >NR_087289 GTGCATCGATGGCCG >NR_005535 GGGGTCAAGCGAACC >NR_076289,>NR_087856 TGAGACGGCGAACCT >NR_064242 AGCGACAAAGGAAAC >NR_045865 AGGTGCAACCATGGA >NR_046602,>NR_056869 GAGTAAACCGCGCGA >NR_093455 AACGACTGAACAGCG >NR_070693 ACGCGTAATTCGATA >NR_080086 GATGAGCGGAGCACT >NR_070118 CGTAGCGAAACCGAG >NR_092384,>NR_098291 GGGGGGGAGGTCCGA >NR_021671,>NR_036907,>NR_080961 AGGGAGGGGGGTTGT >NR_026207 ATGGGGCAGACGCGA >NR_072314 #### 62.883 seconds - op's code 33.594 seconds - search_db.pl 6.367 seconds - search_db_mce.pl 4.991 seconds - search_db_chunk.pl 8.093 seconds - search_db_inline_c.pl 1.530 seconds - search_db_inline_c_mce.pl #### 36.684 seconds - search_db.pl 7.403 seconds - search_db_mce.pl 5.986 seconds - search_db_chunk.pl 11.009 seconds - search_db_inline_c.pl 2.188 seconds - search_db_inline_c_mce.pl #### 28.857 seconds - search_db.pl 6.130 seconds - search_db_mce.pl 5.219 seconds - search_db_chunk.pl 27.322 seconds - search_db_inline_c.pl 5.808 seconds - search_db_inline_c_mce.pl