Beefy Boxes and Bandwidth Generously Provided by pair Networks
Perl: the Markov chain saw
 
PerlMonks  

Re^3: Problem in RAM usage while threading the program

by marioroy (Parson)
on Dec 23, 2019 at 01:35 UTC ( #11110528=note: print w/replies, xml ) Need Help??


in reply to Re^2: Problem in RAM usage while threading the program
in thread Problem in RAM usage while threading the program

This is a continuation of Re: Problem in RAM usage while threading the program.

Update: Added Inline::CPP code.

Part Seven Inline::C and Inline::CPP demonstrations.

There was one more thing to try. Of all things, Inline::C / CPP on Linux to see what improvements that would bring.

Inline::C

# https://fallabs.com/kyotocabinet/api/ use Inline C => config => inc => '-I/usr/local/include'; use Inline C => config => libs => '-L/usr/local/lib -lkyotocabinet'; use Inline C => <<'EOC'; #include <string.h> #include <kclangc.h> KCDB *db; KCCUR *cur; // open the database int open_db(char* file) { db = kcdbnew(); if (!kcdbopen(db, file, KCOREADER | KCONOLOCK)) return 0; cur = kcdbcursor(db); return 1; } // close the database int close_db() { if (cur) kccurdel(cur); if (!kcdbclose(db)) return 0; return 1; } // search records containing substring SV * search_db(char* substr) { AV *ret = newAV(); char *kbuf, *vbuf; size_t ksiz, vsiz; const char *cvbuf; kccurjump(cur); while ((kbuf = kccurget(cur, &ksiz, &cvbuf, &vsiz, 1)) != NULL) { if (strstr(cvbuf, substr) != NULL) { av_push(ret, newSVpvn(kbuf, ksiz)); } kcfree(kbuf); } return newRV_noinc((SV *) ret); } EOC

Inline::CPP

# https://fallabs.com/kyotocabinet/api/ use Inline CPP => config => inc => '-I/usr/local/include'; use Inline CPP => config => libs => '-L/usr/local/lib -lkyotocabinet'; use Inline CPP => <<'EOCPP'; #undef do_open #undef do_close #include <string.h> #include <kcpolydb.h> using namespace std; using namespace kyotocabinet; PolyDB db; DB::Cursor *cur; // open the database int open_db(char* file) { if (!db.open(file, PolyDB::OREADER | PolyDB::ONOLOCK)) return 0; cur = db.cursor(); return 1; } // close the database int close_db() { if (cur) delete cur; if (!db.close()) return 0; return 1; } // search records containing substring SV * search_db(char* substr) { AV *ret = newAV(); string ckey, cvalue; cur->jump(); while (cur->get(&ckey, &cvalue, true)) { if (strstr(cvalue.c_str(), substr) != NULL) { av_push(ret, newSVpvn(ckey.c_str(), ckey.length())); } } return newRV_noinc((SV *) ret); } EOCPP

Serial

# https://www.perlmonks.org/?node_id=11110379 # usage: perl search_db_inline_c.pl > Outfile.txt use strict; use warnings; # insert the Inline::C or Inline::CPP code here open_db('db.kch#msiz=128m') or die "db.kch: open error\n"; open my $fh, '<', 'peptides.txt' or die "open error: $!\n"; while ( my $pep = <$fh> ) { chomp $pep; my $ids = search_db($pep); print "$pep\t", join(',', @$ids), "\n" if @$ids; } close $fh; close_db();

Parallel

# https://www.perlmonks.org/?node_id=11110379 # usage: perl search_db_inline_c_mce.pl > Outfile.txt use strict; use warnings; # insert the Inline::C or Inline::CPP code here use MCE; my $mce = MCE->new( max_workers => MCE::Util::get_ncpu(), chunk_size => 1, init_relay => 1, user_begin => sub { open_db('db.kch#msiz=128m') or die "db.kch: open error\n"; }, user_end => sub { close_db(); }, user_func => sub { my $pep = $_; chomp $pep; my $ids = search_db($pep); # output serially, one worker at a time MCE::relay { print "$pep\t", join(',', @$ids), "\n" if @$ids; }; } ); $mce->process('peptides.txt'); $mce->shutdown;

Outfile.txt

GAAGGACTGGGACCA >NR_000001,>NR_006611 AGGCTGCGGCAGGAC >NR_062102 GTGAGCCGGGCAGAG >NR_089584 AGGGGGGGTTGCTGA >NR_036454,>NR_068535,>NR_097889 CTGACATGCGGCGCA >NR_087289 GTGCATCGATGGCCG >NR_005535 GGGGTCAAGCGAACC >NR_076289,>NR_087856 TGAGACGGCGAACCT >NR_064242 AGCGACAAAGGAAAC >NR_045865 AGGTGCAACCATGGA >NR_046602,>NR_056869 GAGTAAACCGCGCGA >NR_093455 AACGACTGAACAGCG >NR_070693 ACGCGTAATTCGATA >NR_080086 GATGAGCGGAGCACT >NR_070118 CGTAGCGAAACCGAG >NR_092384,>NR_098291 GGGGGGGAGGTCCGA >NR_021671,>NR_036907,>NR_080961 AGGGAGGGGGGTTGT >NR_026207 ATGGGGCAGACGCGA >NR_072314

Benchmark - 8-core VM, CentOS 7.7

62.883 seconds - op's code 33.594 seconds - search_db.pl 6.367 seconds - search_db_mce.pl 4.991 seconds - search_db_chunk.pl 8.093 seconds - search_db_inline_c.pl 1.530 seconds - search_db_inline_c_mce.pl

Benchmark - 8-core VM, Xubuntu 18.04.3

36.684 seconds - search_db.pl 7.403 seconds - search_db_mce.pl 5.986 seconds - search_db_chunk.pl 11.009 seconds - search_db_inline_c.pl 2.188 seconds - search_db_inline_c_mce.pl

Benchmark - 8-core, macOS Mojave 10.14.6

28.857 seconds - search_db.pl 6.130 seconds - search_db_mce.pl 5.219 seconds - search_db_chunk.pl 27.322 seconds - search_db_inline_c.pl 5.808 seconds - search_db_inline_c_mce.pl

This completes the exercise. It boggles my mind comparing CentOS vs. Xubuntu. Ditto, Inline::C running faster on Linux vs. macOS. Testing was done with Perl 5.30.1. The virtualization is handled by VMware Fusion.

Regards, Mario

Log In?
Username:
Password:

What's my password?
Create A New User
Node Status?
node history
Node Type: note [id://11110528]
help
Chatterbox?
and the web crawler heard nothing...

How do I use this? | Other CB clients
Other Users?
Others contemplating the Monastery: (6)
As of 2020-09-24 14:55 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?
    If at first I donít succeed, I Ö










    Results (134 votes). Check out past polls.

    Notices?