#!/usr/bin/perl # by bliako @ PerlMonks.org # date: 01-Jul-2021 # see https://perlmonks.org/?node_id=11134582 # lame example for utilising GPGPU via Inline::C # TODO: extend to taking params and returning back results use strict; use warnings; use FindBin; use Inline C => Config => cc => $FindBin::Bin.'/nvcc-compile.pl', ld => $FindBin::Bin.'/nvcc-link.pl', ; use Inline C => <<'EOC'; // from https://developer.nvidia.com/blog/easy-introduction-cuda-c-and-c/ // NOTE: don't use main(void), use main()!!! #include AV *do_saxpy(int N, SV *_x, SV *_y); int array_numelts(SV *array); __global__ void saxpy(int n, double a, double *x, double *y) { int i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) y[i] = a*x[i] + y[i]; } int array_numelts(SV *array){ int numelts; if( (!SvROK(array)) || (SvTYPE(SvRV(array)) != SVt_PVAV) || ((numelts = av_len((AV *)SvRV(array))) < 0) ) return -1; return numelts; } /* returns an arrayref of results */ AV* do_saxpy( int N, SV *_x, SV *_y ) { double *x, *y, *d_x, *d_y; int nX, nY, i; AV *ret = newAV(); sv_2mortal((SV*)ret); if( N <= 0 ){ fprintf(stderr, "error, N must be positive.\n"); return NULL; } if( ((nX=array_numelts(_x))<0) ||((nY=array_numelts(_y))<0) ){ fprintf(stderr, "err\n"); return NULL; } x = (double*)malloc(N*sizeof(double)); y = (double*)malloc(N*sizeof(double)); cudaMalloc(&d_x, N*sizeof(double)); cudaMalloc(&d_y, N*sizeof(double)); AV *deref_x = (AV *)SvRV(_x), *deref_y = (AV *)SvRV(_y); SV **dummy; for(i=0;i>>(N, 2.0f, d_x, d_y); // this copies data from GPU (dy) onto CPU memory, we use y because // it's just sitting there and no longer needed cudaMemcpy(y, d_y, N*sizeof(double), cudaMemcpyDeviceToHost); /* add some rubbish to return back as array ref */ for(i=0;i