#!/usr/bin/perl # by bliako @ PerlMonks.org # date: 01-Jul-2021 # see https://perlmonks.org/?node_id=11134582 # lame example for utilising GPGPU via Inline::C # TODO: extend to taking params and returning back results use strict; use warnings; use FindBin; use Inline C => Config => cc => $FindBin::Bin.'/nvcc-compile.pl', ld => $FindBin::Bin.'/nvcc-link.pl', ; use Inline C => <<'EOC'; // from https://developer.nvidia.com/blog/easy-introduction-cuda-c-and-c/ #include __global__ void saxpy(int n, float a, float *x, float *y) { int i = blockIdx.x*blockDim.x + threadIdx.x; if (i < n) y[i] = a*x[i] + y[i]; } int main() { int N = 1<<20; float *x, *y, *d_x, *d_y; x = (float*)malloc(N*sizeof(float)); y = (float*)malloc(N*sizeof(float)); cudaMalloc(&d_x, N*sizeof(float)); cudaMalloc(&d_y, N*sizeof(float)); for (int i = 0; i < N; i++) { x[i] = 1.0f; y[i] = 2.0f; } cudaMemcpy(d_x, x, N*sizeof(float), cudaMemcpyHostToDevice); cudaMemcpy(d_y, y, N*sizeof(float), cudaMemcpyHostToDevice); // Perform SAXPY on 1M elements saxpy<<<(N+255)/256, 256>>>(N, 2.0f, d_x, d_y); cudaMemcpy(y, d_y, N*sizeof(float), cudaMemcpyDeviceToHost); float maxError = 0.0f; for (int i = 0; i < N; i++) maxError = max(maxError, abs(y[i]-4.0f)); printf("Max error: %f\n", maxError); cudaFree(d_x); cudaFree(d_y); free(x); free(y); return 0; // << late edit! } EOC main;