comment on

The following code demonstrates how to pass parameters to the function (do_saxpy()) living in Inline::C and how to get back the results. Both as arrayrefs. (see also: Inline::C::Cookbook)

Edit: caveat: return NULL does not translate back to undef in perl-space. Is there an AV-equaivalent for &PL_sv_undef?

#!/usr/bin/perl

# by bliako @ PerlMonks.org
# date: 01-Jul-2021
# see https://perlmonks.org/?node_id=11134582
# lame example for utilising GPGPU via Inline::C
# TODO: extend to taking params and returning back results

use strict;
use warnings;

use FindBin;

use Inline C => Config =>
    cc => $FindBin::Bin.'/nvcc-compile.pl',
    ld => $FindBin::Bin.'/nvcc-link.pl',
;

use Inline C => <<'EOC';
// from https://developer.nvidia.com/blog/easy-introduction-cuda-c-and
+-c/
// NOTE: don't use main(void), use main()!!!
#include <stdio.h>

AV *do_saxpy(int N, SV *_x, SV *_y);
int array_numelts(SV *array);

__global__
void saxpy(int n, double a, double *x, double *y)
{
  int i = blockIdx.x*blockDim.x + threadIdx.x;
  if (i < n) y[i] = a*x[i] + y[i];
}

int array_numelts(SV *array){
    int numelts;
    if( (!SvROK(array))
     || (SvTYPE(SvRV(array)) != SVt_PVAV)
     || ((numelts = av_len((AV *)SvRV(array))) < 0)
    ) return -1;
    return numelts;
}

/* returns an arrayref of results */
AV* do_saxpy(
    int N,
    SV *_x,
    SV *_y
)
{
  double *x, *y, *d_x, *d_y;
  int nX, nY, i;

  AV *ret = newAV();
  sv_2mortal((SV*)ret);

  if( N <= 0 ){ fprintf(stderr, "error, N must be positive.\n"); retur
+n NULL; }

  if( ((nX=array_numelts(_x))<0)
    ||((nY=array_numelts(_y))<0)
  ){ fprintf(stderr, "err\n"); return NULL; }

  x = (double*)malloc(N*sizeof(double));
  y = (double*)malloc(N*sizeof(double));

  cudaMalloc(&d_x, N*sizeof(double)); 
  cudaMalloc(&d_y, N*sizeof(double));

  AV *deref_x = (AV *)SvRV(_x),
     *deref_y = (AV *)SvRV(_y);
  SV **dummy;
  for(i=0;i<N;i++){
    dummy = av_fetch(deref_x, i, 0);
    x[i] = SvNV(*dummy);
    dummy = av_fetch(deref_y, i, 0);
    y[i] = SvNV(*dummy);
    printf("do_saxpy() : got in x[%d]=%lf and y[%d]=%lf\n", i, x[i], i
+, y[i]);
  }

  cudaMemcpy(d_x, x, N*sizeof(double), cudaMemcpyHostToDevice);
  cudaMemcpy(d_y, y, N*sizeof(double), cudaMemcpyHostToDevice);

  // Perform SAXPY on 1M elements
  saxpy<<<(N+255)/256, 256>>>(N, 2.0f, d_x, d_y);

  // this copies data from GPU (dy) onto CPU memory, we use y because
  // it's just sitting there and no longer needed
  cudaMemcpy(y, d_y, N*sizeof(double), cudaMemcpyDeviceToHost);

  /* add some rubbish to return back as array ref */
  for(i=0;i<N;i++){
    av_push(ret, newSVnv(y[i]));
  }

  double maxError = 0.0f;
  for(i=0;i<N;i++){
    maxError = max(maxError, abs(y[i]-4.0f));
  }
  printf("do_saxpy() : Max error: %f\n", maxError);

  cudaFree(d_x);
  cudaFree(d_y);
  free(x);
  free(y);

  return ret;
}
EOC

my $N = 100; #1<<20;

my @x = map { rand() } 1..$N;
my @y = map { rand() } 1..$N;

my $err = do_saxpy($N, \@x, \@y);
if( ! defined $err ){ print STDERR "$0 : error, call to do_saxpy() has
+ failed.\n"; exit(1); }
printf "$0 : back to perl-code ...\n";
print "$0 : (perl-code) : got back result :\n".join("\n", @$err)."\n";
[download]

bw, bliako

In reply to Re: Compile and run cuda code on the GPU via Perl's Inline::C - passing parameters by bliako
in thread Compile and possibly run cuda code on the GPU via Perl's Inline::C by bliako

Are you posting in the right place? Check out Where do I post X? to know for sure.
Posts may use any of the Perl Monks Approved HTML tags. Currently these include the following:
<code> <a> <b> <big> <blockquote> <br /> <dd> <dl> <dt> <em> <font> <h1> <h2> <h3> <h4> <h5> <h6> <hr /> <i> <li> <nbsp> <ol> <p> <small> <strike> <strong> <sub> <sup> <table> <td> <th> <tr> <tt> <u> <ul>
Snippets of code should be wrapped in <code> tags not <pre> tags. In fact, <pre> tags should generally be avoided. If they must be used, extreme care should be taken to ensure that their contents do not have long lines (<70 chars), in order to prevent horizontal scrolling (and possible janitor intervention).
Want more info? How to link or How to display code and escape characters are good places to start.


Think about Loose Coupling
	PerlMonks