static double[] SumMatrixManagedCuda(double[][,] matrix) { int Z = matrix.Length; int Y = matrix[0].GetLength(0); int X = matrix[0].GetLength(1); var result = new double[Y * X]; var lm = ToLinearArray(matrix); int N = lm.Length; matrixSumCude.SetComputeSize((uint)X, (uint)Y); //matrixSumCude.BlockDimensions = 128; //matrixSumCude.GridDimensions = (N + 127) / 128; var da = cntxt.AllocateMemory(N * sizeof(double)); var db = cntxt.AllocateMemory(result.Length * sizeof(double)); cntxt.CopyToDevice(da, lm); cntxt.CopyToDevice(db, result); //CudaDeviceVariable<int> dA = a; //CudaDeviceVariable<int> dB = b; //CudaDeviceVariable<int> dC = new CudaDeviceVariable<int>(N); // Invoke kernel //kernel.Run(dA.DevicePointer, dC.DevicePointer, dimX, dimY, dimZ); matrixSumCude.Run(db, da, X, Y, Z); cntxt.CopyToHost <double>(result, db); return(result); }