Beefy Boxes and Bandwidth Generously Provided by pair Networks
Think about Loose Coupling
 
PerlMonks  

comment on

( [id://3333]=superdoc: print w/replies, xml ) Need Help??

This is a continuation of Re: Problem in RAM usage while threading the program.

Update: Added Inline::CPP code.

Part Seven Inline::C and Inline::CPP demonstrations.

There was one more thing to try. Of all things, Inline::C / CPP on Linux to see what improvements that would bring.

Inline::C

# https://fallabs.com/kyotocabinet/api/ use Inline C => config => inc => '-I/usr/local/include'; use Inline C => config => libs => '-L/usr/local/lib -lkyotocabinet'; use Inline C => <<'EOC'; #include <string.h> #include <kclangc.h> KCDB *db; KCCUR *cur; // open the database int open_db(char* file) { db = kcdbnew(); if (!kcdbopen(db, file, KCOREADER | KCONOLOCK)) return 0; cur = kcdbcursor(db); return 1; } // close the database int close_db() { if (cur) kccurdel(cur); if (!kcdbclose(db)) return 0; return 1; } // search records containing substring SV * search_db(char* substr) { AV *ret = newAV(); char *kbuf, *vbuf; size_t ksiz, vsiz; const char *cvbuf; kccurjump(cur); while ((kbuf = kccurget(cur, &ksiz, &cvbuf, &vsiz, 1)) != NULL) { if (strstr(cvbuf, substr) != NULL) { av_push(ret, newSVpvn(kbuf, ksiz)); } kcfree(kbuf); } return newRV_noinc((SV *) ret); } EOC

Inline::CPP

# https://fallabs.com/kyotocabinet/api/ use Inline CPP => config => inc => '-I/usr/local/include'; use Inline CPP => config => libs => '-L/usr/local/lib -lkyotocabinet'; use Inline CPP => <<'EOCPP'; #undef do_open #undef do_close #include <string.h> #include <kcpolydb.h> using namespace std; using namespace kyotocabinet; PolyDB db; DB::Cursor *cur; // open the database int open_db(char* file) { if (!db.open(file, PolyDB::OREADER | PolyDB::ONOLOCK)) return 0; cur = db.cursor(); return 1; } // close the database int close_db() { if (cur) delete cur; if (!db.close()) return 0; return 1; } // search records containing substring SV * search_db(char* substr) { AV *ret = newAV(); string ckey, cvalue; cur->jump(); while (cur->get(&ckey, &cvalue, true)) { if (strstr(cvalue.c_str(), substr) != NULL) { av_push(ret, newSVpvn(ckey.c_str(), ckey.length())); } } return newRV_noinc((SV *) ret); } EOCPP

Serial

# https://www.perlmonks.org/?node_id=11110379 # usage: perl search_db_inline_c.pl > Outfile.txt use strict; use warnings; # insert the Inline::C or Inline::CPP code here open_db('db.kch#msiz=128m') or die "db.kch: open error\n"; open my $fh, '<', 'peptides.txt' or die "open error: $!\n"; while ( my $pep = <$fh> ) { chomp $pep; my $ids = search_db($pep); print "$pep\t", join(',', @$ids), "\n" if @$ids; } close $fh; close_db();

Parallel

# https://www.perlmonks.org/?node_id=11110379 # usage: perl search_db_inline_c_mce.pl > Outfile.txt use strict; use warnings; # insert the Inline::C or Inline::CPP code here use MCE; my $mce = MCE->new( max_workers => MCE::Util::get_ncpu(), chunk_size => 1, init_relay => 1, user_begin => sub { open_db('db.kch#msiz=128m') or die "db.kch: open error\n"; }, user_end => sub { close_db(); }, user_func => sub { my $pep = $_; chomp $pep; my $ids = search_db($pep); # output serially, one worker at a time MCE::relay { print "$pep\t", join(',', @$ids), "\n" if @$ids; }; } ); $mce->process('peptides.txt'); $mce->shutdown;

Outfile.txt

GAAGGACTGGGACCA >NR_000001,>NR_006611 AGGCTGCGGCAGGAC >NR_062102 GTGAGCCGGGCAGAG >NR_089584 AGGGGGGGTTGCTGA >NR_036454,>NR_068535,>NR_097889 CTGACATGCGGCGCA >NR_087289 GTGCATCGATGGCCG >NR_005535 GGGGTCAAGCGAACC >NR_076289,>NR_087856 TGAGACGGCGAACCT >NR_064242 AGCGACAAAGGAAAC >NR_045865 AGGTGCAACCATGGA >NR_046602,>NR_056869 GAGTAAACCGCGCGA >NR_093455 AACGACTGAACAGCG >NR_070693 ACGCGTAATTCGATA >NR_080086 GATGAGCGGAGCACT >NR_070118 CGTAGCGAAACCGAG >NR_092384,>NR_098291 GGGGGGGAGGTCCGA >NR_021671,>NR_036907,>NR_080961 AGGGAGGGGGGTTGT >NR_026207 ATGGGGCAGACGCGA >NR_072314

Benchmark - 8-core VM, CentOS 7.7

62.883 seconds - op's code 33.594 seconds - search_db.pl 6.367 seconds - search_db_mce.pl 4.991 seconds - search_db_chunk.pl 8.093 seconds - search_db_inline_c.pl 1.530 seconds - search_db_inline_c_mce.pl

Benchmark - 8-core VM, Xubuntu 18.04.3

36.684 seconds - search_db.pl 7.403 seconds - search_db_mce.pl 5.986 seconds - search_db_chunk.pl 11.009 seconds - search_db_inline_c.pl 2.188 seconds - search_db_inline_c_mce.pl

Benchmark - 8-core, macOS Mojave 10.14.6

28.857 seconds - search_db.pl 6.130 seconds - search_db_mce.pl 5.219 seconds - search_db_chunk.pl 27.322 seconds - search_db_inline_c.pl 5.808 seconds - search_db_inline_c_mce.pl

This completes the exercise. It boggles my mind comparing CentOS vs. Xubuntu. Ditto, Inline::C running faster on Linux vs. macOS. Testing was done with Perl 5.30.1. The virtualization is handled by VMware Fusion.

Regards, Mario


In reply to Re^3: Problem in RAM usage while threading the program by marioroy
in thread Problem in RAM usage while threading the program by beherasan

Title:
Use:  <p> text here (a paragraph) </p>
and:  <code> code here </code>
to format your post; it's "PerlMonks-approved HTML":



  • Are you posting in the right place? Check out Where do I post X? to know for sure.
  • Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
    <code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
  • Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
  • Want more info? How to link or How to display code and escape characters are good places to start.
Log In?
Username:
Password:

What's my password?
Create A New User
Domain Nodelet?
Chatterbox?
and the web crawler heard nothing...

How do I use this?Last hourOther CB clients
Other Users?
Others scrutinizing the Monastery: (5)
As of 2024-04-19 23:20 GMT
Sections?
Information?
Find Nodes?
Leftovers?
    Voting Booth?

    No recent polls found