The following code demonstrates how to pass parameters to the function (do_saxpy()) living in Inline::C and how to get back the results. Both as arrayrefs. (see also: Inline::C::Cookbook)
Edit: caveat: return NULL does not translate back to undef in perl-space. Is there an AV-equaivalent for &PL_sv_undef?
#!/usr/bin/perl
# by bliako @ PerlMonks.org
# date: 01-Jul-2021
# see https://perlmonks.org/?node_id=11134582
# lame example for utilising GPGPU via Inline::C
# TODO: extend to taking params and returning back results
use strict;
use warnings;
use FindBin;
use Inline C => Config =>
cc => $FindBin::Bin.'/nvcc-compile.pl',
ld => $FindBin::Bin.'/nvcc-link.pl',
;
use Inline C => <<'EOC';
// from https://developer.nvidia.com/blog/easy-introduction-cuda-c-and
+-c/
// NOTE: don't use main(void), use main()!!!
#include <stdio.h>
AV *do_saxpy(int N, SV *_x, SV *_y);
int array_numelts(SV *array);
__global__
void saxpy(int n, double a, double *x, double *y)
{
int i = blockIdx.x*blockDim.x + threadIdx.x;
if (i < n) y[i] = a*x[i] + y[i];
}
int array_numelts(SV *array){
int numelts;
if( (!SvROK(array))
|| (SvTYPE(SvRV(array)) != SVt_PVAV)
|| ((numelts = av_len((AV *)SvRV(array))) < 0)
) return -1;
return numelts;
}
/* returns an arrayref of results */
AV* do_saxpy(
int N,
SV *_x,
SV *_y
)
{
double *x, *y, *d_x, *d_y;
int nX, nY, i;
AV *ret = newAV();
sv_2mortal((SV*)ret);
if( N <= 0 ){ fprintf(stderr, "error, N must be positive.\n"); retur
+n NULL; }
if( ((nX=array_numelts(_x))<0)
||((nY=array_numelts(_y))<0)
){ fprintf(stderr, "err\n"); return NULL; }
x = (double*)malloc(N*sizeof(double));
y = (double*)malloc(N*sizeof(double));
cudaMalloc(&d_x, N*sizeof(double));
cudaMalloc(&d_y, N*sizeof(double));
AV *deref_x = (AV *)SvRV(_x),
*deref_y = (AV *)SvRV(_y);
SV **dummy;
for(i=0;i<N;i++){
dummy = av_fetch(deref_x, i, 0);
x[i] = SvNV(*dummy);
dummy = av_fetch(deref_y, i, 0);
y[i] = SvNV(*dummy);
printf("do_saxpy() : got in x[%d]=%lf and y[%d]=%lf\n", i, x[i], i
+, y[i]);
}
cudaMemcpy(d_x, x, N*sizeof(double), cudaMemcpyHostToDevice);
cudaMemcpy(d_y, y, N*sizeof(double), cudaMemcpyHostToDevice);
// Perform SAXPY on 1M elements
saxpy<<<(N+255)/256, 256>>>(N, 2.0f, d_x, d_y);
// this copies data from GPU (dy) onto CPU memory, we use y because
// it's just sitting there and no longer needed
cudaMemcpy(y, d_y, N*sizeof(double), cudaMemcpyDeviceToHost);
/* add some rubbish to return back as array ref */
for(i=0;i<N;i++){
av_push(ret, newSVnv(y[i]));
}
double maxError = 0.0f;
for(i=0;i<N;i++){
maxError = max(maxError, abs(y[i]-4.0f));
}
printf("do_saxpy() : Max error: %f\n", maxError);
cudaFree(d_x);
cudaFree(d_y);
free(x);
free(y);
return ret;
}
EOC
my $N = 100; #1<<20;
my @x = map { rand() } 1..$N;
my @y = map { rand() } 1..$N;
my $err = do_saxpy($N, \@x, \@y);
if( ! defined $err ){ print STDERR "$0 : error, call to do_saxpy() has
+ failed.\n"; exit(1); }
printf "$0 : back to perl-code ...\n";
print "$0 : (perl-code) : got back result :\n".join("\n", @$err)."\n";
bw, bliako
-
Are you posting in the right place? Check out Where do I post X? to know for sure.
-
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big>
<blockquote> <br /> <dd>
<dl> <dt> <em> <font>
<h1> <h2> <h3> <h4>
<h5> <h6> <hr /> <i>
<li> <nbsp> <ol> <p>
<small> <strike> <strong>
<sub> <sup> <table>
<td> <th> <tr> <tt>
<u> <ul>
-
Snippets of code should be wrapped in
<code> tags not
<pre> tags. In fact, <pre>
tags should generally be avoided. If they must
be used, extreme care should be
taken to ensure that their contents do not
have long lines (<70 chars), in order to prevent
horizontal scrolling (and possible janitor
intervention).
-
Want more info? How to link
or How to display code and escape characters
are good places to start.