static bool sdkCompareL2fe(cuFloatComplex[] h_convolved_signal_ref, cuFloatComplex[] h_convolved_signal, float eps) { float sumDiff = 0; for (int i = 0; i < h_convolved_signal_ref.Length; i++) { cuFloatComplex diff = h_convolved_signal_ref[i] - h_convolved_signal[i]; sumDiff += diff.real * diff.real + diff.imag * diff.imag; } return(Math.Sqrt(sumDiff / h_convolved_signal_ref.Length / 2.0f) < eps); }
public static void Init(int[] volSize, int[] volRes, float hbar, float dt) // vol_size::NTuple{ 3}, vol_res::NTuple{3}, hbar, dt) { properties = new SpaceProperties(volSize, volRes, hbar, dt); ISFKernels.Init(properties); psi1 = new CudaDeviceVariable <cuFloatComplex>(properties.num); psi2 = new CudaDeviceVariable <cuFloatComplex>(properties.num); FFT.init(properties.resx, properties.resy, properties.resz); _ix = Enumerable.Range(0, properties.resx).ToArray(); _iy = Enumerable.Range(0, properties.resy).ToArray(); _iz = Enumerable.Range(0, properties.resz).ToArray(); var ii = Matlab.ndgrid(_ix, _iy, _iz); _iix = ii.x; _iiy = ii.y; _iiz = ii.z; properties.px = new float[_iix.GetLength(0), _iix.GetLength(1), _iix.GetLength(2)]; properties.py = new float[_iiy.GetLength(0), _iiy.GetLength(1), _iiy.GetLength(2)]; properties.pz = new float[_iiz.GetLength(0), _iiz.GetLength(1), _iiz.GetLength(2)]; for (int i = 0; i < properties.resx; i++) { for (int j = 0; j < properties.resy; j++) { for (int k = 0; k < properties.resz; k++) { properties.px[i, j, k] = (_iix[i, j, k]) * properties.dx; properties.py[i, j, k] = (_iiy[i, j, k]) * properties.dy; properties.pz[i, j, k] = (_iiz[i, j, k]) * properties.dz; } } } _sx = _iix.Select3D((e, i, j, k) => (float)Math.Sin((float)Math.PI * e / properties.resx) / properties.dx); _sy = _iiy.Select3D((e, i, j, k) => (float)Math.Sin((float)Math.PI * e / properties.resy) / properties.dy); _sz = _iiz.Select3D((e, i, j, k) => (float)Math.Sin((float)Math.PI * e / properties.resz) / properties.dz); cuFloatComplex[,,] tmpFac = _iix.Select3D((e, i, j, k) => { return(new cuFloatComplex((float)(-0.25 / (Math.Pow(_sx[i, j, k], 2) + Math.Pow(_sy[i, j, k], 2) + Math.Pow(_sz[i, j, k], 2))), 0)); }); tmpFac[0, 0, 0] = new cuFloatComplex(0, 0); _fac = new CudaDeviceVariable <cuFloatComplex>(properties.num); _fac.CopyToDevice(tmpFac); var tmpMask = new cuFloatComplex[properties.resx, properties.resy, properties.resz]; build_schroedinger(tmpMask); _mask = new CudaDeviceVariable <cuFloatComplex>(properties.num); _mask.CopyToDevice(tmpMask); }
// Pad data static int PadData(cuFloatComplex[] signal, ref cuFloatComplex[] padded_signal, int signal_size, cuFloatComplex[] filter_kernel, ref cuFloatComplex[] padded_filter_kernel, int filter_kernel_size) { int minRadius = filter_kernel_size / 2; int maxRadius = filter_kernel_size - minRadius; int new_size = signal_size + maxRadius; // Pad signal padded_signal = new cuFloatComplex[new_size]; Array.Copy(signal, 0, padded_signal, 0, signal_size); // Pad filter padded_filter_kernel = new cuFloatComplex[new_size]; Array.Copy(filter_kernel, minRadius, padded_filter_kernel, 0, maxRadius); Array.Copy(filter_kernel, 0, padded_filter_kernel, (new_size - minRadius), minRadius); return(new_size); }
public static void add_circle(cuFloatComplex[,,] psi, float[] center, float[] normal, float r, float d) { float norm = (float)Math.Sqrt(Math.Pow(normal[0], 2) + Math.Pow(normal[1], 2) + Math.Pow(normal[2], 2)); for (int i = 0; i < 3; i++) { normal[i] /= norm; } float alpha, rx, ry, rz, z; Complex tmp; for (int i = 0; i < properties.resx; i++) { for (int j = 0; j < properties.resy; j++) { for (int k = 0; k < properties.resz; k++) { rx = properties.px[i, j, k] - center[0]; ry = properties.py[i, j, k] - center[1]; rz = properties.pz[i, j, k] - center[2]; alpha = 0; z = rx * normal[0] + ry * normal[1] + rz * normal[2]; if (rx * rx + ry * ry + rz * rz - z * z < r * r) { if (z > 0 && z <= d / 2) { alpha = (float)-Math.PI * (2 * z / d - 1); } if (z <= 0 && z >= -d / 2) { alpha = (float)-Math.PI * (2 * z / d + 1); } } tmp = new Complex(psi[i, j, k].real, psi[i, j, k].imag); tmp *= Complex.Exp(Complex.ImaginaryOne * alpha); psi[i, j, k] = new cuFloatComplex((float)tmp.Real, (float)tmp.Imaginary); } } } }
public void constraint(cuFloatComplex[,,] psi1, cuFloatComplex[,,] psi2, float t) { float phase; Complex amp1, amp2; Complex tmp1, tmp2; for (int i = 0; i < psi1.GetLength(0); i++) { for (int j = 0; j < psi1.GetLength(1); j++) { for (int k = 0; k < psi1.GetLength(2); k++) { amp1 = Complex.Sqrt(psi1[i, j, k].real * psi1[i, j, k].real + psi1[i, j, k].imag * psi1[i, j, k].imag); amp2 = Complex.Sqrt(psi2[i, j, k].real * psi2[i, j, k].real + psi2[i, j, k].imag * psi2[i, j, k].imag); phase = kvec[0] * _px[i, j, k] + kvec[1] * _py[i, j, k] + kvec[2] * _pz[i, j, k] - omega * t; if (isJet[i, j, k]) { tmp1 = amp1 * Complex.Exp(Complex.ImaginaryOne * phase); tmp2 = amp2 * Complex.Exp(Complex.ImaginaryOne * phase); psi1[i, j, k] = new cuFloatComplex((float)tmp1.Real, (float)tmp1.Imaginary); psi2[i, j, k] = new cuFloatComplex((float)tmp2.Real, (float)tmp2.Imaginary); if (float.IsInfinity(psi1[i, j, k].real) || float.IsNaN(psi1[i, j, k].real)) { psi1[i, j, k].real = 1; psi1[i, j, k].imag = 1; } if (float.IsInfinity(psi2[i, j, k].real) || float.IsNaN(psi2[i, j, k].real)) { psi2[i, j, k].real = (float)0.01; psi2[i, j, k].imag = (float)0.01; } } } } } }
private static void build_schroedinger(cuFloatComplex[,,] tmpMask) { var fac = -4 * Math.Pow(Math.PI, 2) * properties.hbar; float kx, ky, kz; for (int i = 0; i < _iix.GetLength(0); i++) { for (int j = 0; j < _iix.GetLength(1); j++) { for (int k = 0; k < _iix.GetLength(2); k++) { kx = (_iix[i, j, k] - (float)properties.resx / 2) / properties.sizex; ky = (_iiy[i, j, k] - (float)properties.resy / 2) / properties.sizey; kz = (_iiz[i, j, k] - (float)properties.resz / 2) / properties.sizez; var lambda = fac * (Math.Pow(kx, 2) + Math.Pow(ky, 2) + Math.Pow(kz, 2)); var tmp = Complex.Exp(Complex.ImaginaryOne * lambda * properties.dt / 2f); tmpMask[i, j, k] = new cuFloatComplex((float)tmp.Real, (float)tmp.Imaginary); } } } }
// Computes convolution on the host static void Convolve(cuFloatComplex[] signal, int signal_size, cuFloatComplex[] filter_kernel, int filter_kernel_size, cuFloatComplex[] filtered_signal) { int minRadius = filter_kernel_size / 2; int maxRadius = filter_kernel_size - minRadius; // Loop over output element indices for (int i = 0; i<signal_size; ++i) { filtered_signal[i].real = filtered_signal[i].imag = 0; // Loop over convolution indices for (int j = -maxRadius + 1; j <= minRadius; ++j) { int k = i + j; if (k >= 0 && k<signal_size) { filtered_signal[i] = filtered_signal[i] + signal[k] * filter_kernel[minRadius - j]; } } } }
public static extern CublasStatus cublasCgemmStridedBatched(CudaBlasHandle handle, Operation transa, Operation transb, int m, int n, int k, ref cuFloatComplex alpha, // host or device pointer CUdeviceptr A, int lda, long strideA, // purposely signed CUdeviceptr B, int ldb, long strideB, ref cuFloatComplex beta, // host or device pointer$ CUdeviceptr C, int ldc, long strideC, int batchCount);
public static extern CUResult cuMemcpyDtoH_v2(ref cuFloatComplex dstHost, CUdeviceptr srcDevice, SizeT ByteCount);
public static extern cusparseStatus cusparseChybmv(cusparseContext handle, cusparseOperation transA, ref cuFloatComplex alpha, cusparseMatDescr descrA, cusparseHybMat hybA, CUdeviceptr x, ref cuFloatComplex beta, CUdeviceptr y);
static void Main(string[] args) { int SIGNAL_SIZE = 50; int FILTER_KERNEL_SIZE = 11; Console.WriteLine("[simpleCUFFT] is starting..."); var assembly = Assembly.GetExecutingAssembly(); var resourceName = "simpleCUFFT.simpleCUFFTKernel.ptx"; CudaContext ctx = new CudaContext(0); CudaKernel ComplexPointwiseMulAndScale; string[] liste = assembly.GetManifestResourceNames(); using (Stream stream = assembly.GetManifestResourceStream(resourceName)) { ComplexPointwiseMulAndScale = ctx.LoadKernelPTX(stream, "ComplexPointwiseMulAndScale"); } // Allocate host memory for the signal cuFloatComplex[] h_signal = new cuFloatComplex[SIGNAL_SIZE]; //we use cuFloatComplex for complex multiplaction in reference host code... Random rand = new Random(0); // Initialize the memory for the signal for (int i = 0; i < SIGNAL_SIZE; ++i) { h_signal[i].real = (float)rand.NextDouble(); h_signal[i].imag = 0; } // Allocate host memory for the filter cuFloatComplex[] h_filter_kernel = new cuFloatComplex[FILTER_KERNEL_SIZE]; // Initialize the memory for the filter for (int i = 0; i < FILTER_KERNEL_SIZE; ++i) { h_filter_kernel[i].real = (float)rand.NextDouble(); h_filter_kernel[i].imag = 0; } // Pad signal and filter kernel cuFloatComplex[] h_padded_signal = null; cuFloatComplex[] h_padded_filter_kernel = null; int new_size = PadData(h_signal, ref h_padded_signal, SIGNAL_SIZE, h_filter_kernel, ref h_padded_filter_kernel, FILTER_KERNEL_SIZE); int mem_size = (int)cuFloatComplex.SizeOf * new_size; // Allocate device memory for signal CudaDeviceVariable <cuFloatComplex> d_signal = new CudaDeviceVariable <cuFloatComplex>(new_size); // Copy host memory to device d_signal.CopyToDevice(h_padded_signal); // Allocate device memory for filter kernel CudaDeviceVariable <cuFloatComplex> d_filter_kernel = new CudaDeviceVariable <cuFloatComplex>(new_size); // Copy host memory to device d_filter_kernel.CopyToDevice(h_padded_filter_kernel); // CUFFT plan simple API CudaFFTPlan1D plan = new CudaFFTPlan1D(new_size, cufftType.C2C, 1); // Transform signal and kernel Console.WriteLine("Transforming signal cufftExecC2C"); plan.Exec(d_signal.DevicePointer, TransformDirection.Forward); plan.Exec(d_filter_kernel.DevicePointer, TransformDirection.Forward); // Multiply the coefficients together and normalize the result Console.WriteLine("Launching ComplexPointwiseMulAndScale<<< >>>"); ComplexPointwiseMulAndScale.BlockDimensions = 256; ComplexPointwiseMulAndScale.GridDimensions = 32; ComplexPointwiseMulAndScale.Run(d_signal.DevicePointer, d_filter_kernel.DevicePointer, new_size, 1.0f / new_size); // Transform signal back Console.WriteLine("Transforming signal back cufftExecC2C"); plan.Exec(d_signal.DevicePointer, TransformDirection.Inverse); // Copy device memory to host cuFloatComplex[] h_convolved_signal = d_signal; // Allocate host memory for the convolution result cuFloatComplex[] h_convolved_signal_ref = new cuFloatComplex[SIGNAL_SIZE]; // Convolve on the host Convolve(h_signal, SIGNAL_SIZE, h_filter_kernel, FILTER_KERNEL_SIZE, h_convolved_signal_ref); // check result bool bTestResult = sdkCompareL2fe(h_convolved_signal_ref, h_convolved_signal, 1e-5f); //Destroy CUFFT context plan.Dispose(); // cleanup memory d_filter_kernel.Dispose(); d_signal.Dispose(); ctx.Dispose(); if (bTestResult) { Console.WriteLine("Test Passed"); } else { Console.WriteLine("Test Failed"); } }
public static extern cusparseStatus cusparseCcsrgeam(cusparseContext handle, int m, int n, ref cuFloatComplex alpha, cusparseMatDescr descrA, int nnzA, CUdeviceptr csrValA, CUdeviceptr csrRowPtrA, CUdeviceptr csrColIndA, ref cuFloatComplex beta, cusparseMatDescr descrB, int nnzB, CUdeviceptr csrValB, CUdeviceptr csrRowPtrB, CUdeviceptr csrColIndB, cusparseMatDescr descrC, CUdeviceptr csrValC, CUdeviceptr csrRowPtrC, CUdeviceptr csrColIndC);
public static extern cusparseStatus cusparseCbsrxmv(cusparseContext handle, cusparseDirection dirA, cusparseOperation transA, int sizeOfMask, int mb, int nb, int nnzb, ref cuFloatComplex alpha, cusparseMatDescr descrA, CUdeviceptr bsrValA, CUdeviceptr bsrMaskPtrA, CUdeviceptr bsrRowPtrA, CUdeviceptr bsrEndPtrA, CUdeviceptr bsrColIndA, int blockDim, CUdeviceptr x, ref cuFloatComplex beta, CUdeviceptr y);
public static extern cusparseStatus cusparseCaxpyi(cusparseContext handle, int nnz, ref cuFloatComplex alpha, CUdeviceptr xVal, CUdeviceptr xInd, CUdeviceptr y, cusparseIndexBase idxBase);
public static extern cusparseStatus cusparseCcsrgemm2_bufferSizeExt(cusparseContext handle, int m, int n, int k, ref cuFloatComplex alpha, cusparseMatDescr descrA, int nnzA, CUdeviceptr csrSortedRowPtrA, CUdeviceptr csrSortedColIndA, cusparseMatDescr descrB, int nnzB, CUdeviceptr csrSortedRowPtrB, CUdeviceptr csrSortedColIndB, ref cuFloatComplex beta, cusparseMatDescr descrD, int nnzD, CUdeviceptr csrSortedRowPtrD, CUdeviceptr csrSortedColIndD, csrgemm2Info info, ref SizeT pBufferSizeInBytes);
public static extern cusparseStatus cusparseCcsrmm2(cusparseContext handle, cusparseOperation transa, cusparseOperation transb, int m, int n, int k, int nnz, ref cuFloatComplex alpha, cusparseMatDescr descrA, CUdeviceptr csrValA, CUdeviceptr csrRowPtrA, CUdeviceptr csrColIndA, CUdeviceptr B, int ldb, ref cuFloatComplex beta, CUdeviceptr C, int ldc);
public static extern cusparseStatus cusparseCbsrsm2_solve(cusparseContext handle, cusparseDirection dirA, cusparseOperation transA, cusparseOperation transXY, int mb, int n, int nnzb, ref cuFloatComplex alpha, cusparseMatDescr descrA, CUdeviceptr bsrVal, CUdeviceptr bsrRowPtr, CUdeviceptr bsrColInd, int blockSize, bsrsm2Info info, CUdeviceptr X, int ldx, CUdeviceptr Y, int ldy, cusparseSolvePolicy policy, CUdeviceptr pBuffer);
public static extern cusparseStatus cusparseCcsrsv2_solve(cusparseContext handle, cusparseOperation transA, int m, int nnz, ref cuFloatComplex alpha, cusparseMatDescr descra, CUdeviceptr csrValA, CUdeviceptr csrRowPtrA, CUdeviceptr csrColIndA, csrsv2Info info, CUdeviceptr x, CUdeviceptr y, cusparseSolvePolicy policy, CUdeviceptr pBuffer);
static bool sdkCompareL2fe(cuFloatComplex[] h_convolved_signal_ref, cuFloatComplex[] h_convolved_signal, float eps) { float sumDiff = 0; for (int i = 0; i < h_convolved_signal_ref.Length; i++) { cuFloatComplex diff = h_convolved_signal_ref[i] - h_convolved_signal[i]; sumDiff += diff.real * diff.real + diff.imag * diff.imag; } return (Math.Sqrt(sumDiff / h_convolved_signal_ref.Length / 2.0f) < eps); }
// Pad data static int PadData(cuFloatComplex[] signal, ref cuFloatComplex[] padded_signal, int signal_size, cuFloatComplex[] filter_kernel, ref cuFloatComplex[] padded_filter_kernel, int filter_kernel_size) { int minRadius = filter_kernel_size / 2; int maxRadius = filter_kernel_size - minRadius; int new_size = signal_size + maxRadius; // Pad signal padded_signal = new cuFloatComplex[new_size]; Array.Copy(signal, 0, padded_signal, 0, signal_size); // Pad filter padded_filter_kernel = new cuFloatComplex[new_size]; Array.Copy(filter_kernel, minRadius, padded_filter_kernel, 0, maxRadius); Array.Copy(filter_kernel, 0, padded_filter_kernel, (new_size - minRadius), minRadius); return new_size; }
public static extern cusparseStatus cusparseCcsr2csr_compress(cusparseContext handle, int m, //number of rows int n, cusparseMatDescr descra, CUdeviceptr inVal, //csr values array-the elements which are below a certain tolerance will be remvoed CUdeviceptr inColInd, CUdeviceptr inRowPtr, //corresponding input noncompressed row pointer int inNnz, CUdeviceptr nnzPerRow, //output: returns number of nonzeros per row CUdeviceptr outVal, CUdeviceptr outColInd, CUdeviceptr outRowPtr, cuFloatComplex tol);
static void Main(string[] args) { //PARAMETERS int[] vol_size = { 10, 5, 5 }; // box size int[] vol_res = { 64, 32, 32 }; // volume resolution float hbar = (float)0.1; // Planck constant float dt = 1 / (float)24; // time step int tmax = 85; float[] background_vel = { (float)-0.2, 0, 0 }; float r1 = (float)1.5; float r2 = (float)0.9; float[] n1 = { -1, 0, 0 }; float[] n2 = { -1, 0, 0 }; float[] cen1 = { vol_size[0] / 2f, vol_size[1] / 2f, vol_size[2] / 2f }; float[] cen2 = { vol_size[0] / 2f, vol_size[1] / 2f, vol_size[2] / 2f }; int n_particles = 10000; //INITIALISATION ISF.Init(vol_size, vol_res, hbar, dt); Particles.init(n_particles); //init psi float[] kvec = { background_vel[0] / hbar, background_vel[1] / hbar, background_vel[2] / hbar }; float phase; var tmp1 = new cuFloatComplex[ISF.properties.resx, ISF.properties.resy, ISF.properties.resz]; var tmp2 = new cuFloatComplex[ISF.properties.resx, ISF.properties.resy, ISF.properties.resz]; Complex tmp; for (int i = 0; i < vol_res[0]; i++) { for (int j = 0; j < vol_res[1]; j++) { for (int k = 0; k < vol_res[2]; k++) { phase = kvec[0] * ISF.properties.px[i, j, k] + kvec[1] * ISF.properties.py[i, j, k] + kvec[2] * ISF.properties.pz[i, j, k]; tmp = Complex.Exp(Complex.ImaginaryOne * phase); tmp1[i, j, k] = new cuFloatComplex((float)tmp.Real, (float)tmp.Imaginary); tmp2[i, j, k] = new cuFloatComplex((float)(tmp.Real * 0.01), (float)(tmp.Imaginary * 0.01)); } } } float d = ISF.properties.dx * 5; ISF.add_circle(tmp1, cen1, n1, r1, d); ISF.add_circle(tmp1, cen2, n2, r2, d); ISF.psi1.CopyToDevice(tmp1); ISF.psi2.CopyToDevice(tmp2); ISF.Normalize(); ISF.PressureProject(); //init particles var x = new float[n_particles]; var y = new float[n_particles]; var z = new float[n_particles]; Random rnd = new Random(); for (int i = 0; i < n_particles; i++) { y[i] = (float)(rnd.NextDouble() * 4 + 0.5); z[i] = (float)(rnd.NextDouble() * 4 + 0.5); x[i] = 5; } Particles.add_particles(x, y, z, n_particles); Velocity vel = new Velocity(ISF.properties.resx, ISF.properties.resy, ISF.properties.resz); //MAIN ITERATION Console.Out.WriteLine("Start"); int itermax = (int)Math.Ceiling(tmax / dt); for (int i = 0; i < 100; i++) { //incompressible Schroedinger flow ISF.update_space(); //particle update ISF.update_velocities(vel); Particles.calculate_movement(vel); } float[] xx = Particles.x; float[] yy = Particles.y; float[] zz = Particles.z; for (int i = 0; i < 20; i++) { Console.Out.WriteLine(xx[i] + " " + yy[i] + " " + zz[i]); } }
public static extern cusparseStatus cusparseChybsv_solve(cusparseContext handle, cusparseOperation trans, ref cuFloatComplex alpha, cusparseMatDescr descra, cusparseHybMat hybA, cusparseSolveAnalysisInfo info, CUdeviceptr x, CUdeviceptr y);
static void Main(string[] args) { int SIGNAL_SIZE = 50; int FILTER_KERNEL_SIZE = 11; Console.WriteLine("[simpleCUFFT] is starting..."); var assembly = Assembly.GetExecutingAssembly(); var resourceName = "simpleCUFFT.simpleCUFFTKernel.ptx"; CudaContext ctx = new CudaContext(0); CudaKernel ComplexPointwiseMulAndScale; string[] liste = assembly.GetManifestResourceNames(); using (Stream stream = assembly.GetManifestResourceStream(resourceName)) { ComplexPointwiseMulAndScale = ctx.LoadKernelPTX(stream, "ComplexPointwiseMulAndScale"); } // Allocate host memory for the signal cuFloatComplex[] h_signal = new cuFloatComplex[SIGNAL_SIZE]; //we use cuFloatComplex for complex multiplaction in reference host code... Random rand = new Random(0); // Initialize the memory for the signal for (int i = 0; i < SIGNAL_SIZE; ++i) { h_signal[i].real = (float)rand.NextDouble(); h_signal[i].imag = 0; } // Allocate host memory for the filter cuFloatComplex[] h_filter_kernel = new cuFloatComplex[FILTER_KERNEL_SIZE]; // Initialize the memory for the filter for (int i = 0; i < FILTER_KERNEL_SIZE; ++i) { h_filter_kernel[i].real = (float)rand.NextDouble(); h_filter_kernel[i].imag = 0; } // Pad signal and filter kernel cuFloatComplex[] h_padded_signal = null; cuFloatComplex[] h_padded_filter_kernel = null; int new_size = PadData(h_signal, ref h_padded_signal, SIGNAL_SIZE, h_filter_kernel, ref h_padded_filter_kernel, FILTER_KERNEL_SIZE); int mem_size = (int)cuFloatComplex.SizeOf * new_size; // Allocate device memory for signal CudaDeviceVariable<cuFloatComplex> d_signal = new CudaDeviceVariable<cuFloatComplex>(new_size); // Copy host memory to device d_signal.CopyToDevice(h_padded_signal); // Allocate device memory for filter kernel CudaDeviceVariable<cuFloatComplex> d_filter_kernel = new CudaDeviceVariable<cuFloatComplex>(new_size); // Copy host memory to device d_filter_kernel.CopyToDevice(h_padded_filter_kernel); // CUFFT plan simple API CudaFFTPlan1D plan = new CudaFFTPlan1D(new_size, cufftType.C2C, 1); // Transform signal and kernel Console.WriteLine("Transforming signal cufftExecC2C"); plan.Exec(d_signal.DevicePointer, TransformDirection.Forward); plan.Exec(d_filter_kernel.DevicePointer, TransformDirection.Forward); // Multiply the coefficients together and normalize the result Console.WriteLine("Launching ComplexPointwiseMulAndScale<<< >>>"); ComplexPointwiseMulAndScale.BlockDimensions = 256; ComplexPointwiseMulAndScale.GridDimensions = 32; ComplexPointwiseMulAndScale.Run(d_signal.DevicePointer, d_filter_kernel.DevicePointer, new_size, 1.0f / new_size); // Transform signal back Console.WriteLine("Transforming signal back cufftExecC2C"); plan.Exec(d_signal.DevicePointer, TransformDirection.Inverse); // Copy device memory to host cuFloatComplex[] h_convolved_signal = d_signal; // Allocate host memory for the convolution result cuFloatComplex[] h_convolved_signal_ref = new cuFloatComplex[SIGNAL_SIZE]; // Convolve on the host Convolve(h_signal, SIGNAL_SIZE, h_filter_kernel, FILTER_KERNEL_SIZE, h_convolved_signal_ref); // check result bool bTestResult = sdkCompareL2fe(h_convolved_signal_ref, h_convolved_signal, 1e-5f); //Destroy CUFFT context plan.Dispose(); // cleanup memory d_filter_kernel.Dispose(); d_signal.Dispose(); ctx.Dispose(); if (bTestResult) { Console.WriteLine("Test Passed"); } else { Console.WriteLine("Test Failed"); } }
public static extern cusparseStatus cusparseCbsrmm(cusparseContext handle, cusparseDirection dirA, cusparseOperation transA, cusparseOperation transB, int mb, int n, int kb, int nnzb, ref cuFloatComplex alpha, cusparseMatDescr descrA, CUdeviceptr bsrValA, CUdeviceptr bsrRowPtrA, CUdeviceptr bsrColIndA, int blockSize, CUdeviceptr B, int ldb, ref cuFloatComplex beta, CUdeviceptr C, int ldc);
public static extern CublasStatus cublasCrotg_v2(CudaBlasHandle handle, ref cuFloatComplex a, // host or device pointer ref cuFloatComplex b, // host or device pointer ref float c, // host or device pointer ref cuFloatComplex s);
public static extern cusparseStatus cusparseCbsrilu02_numericBoost(cusparseContext handle, bsrilu02Info info, int enable_boost, ref double tol, ref cuFloatComplex boost_val);
public static extern CublasStatus cublasCsyrkEx( CudaBlasHandle handle, FillMode uplo, Operation trans, int n, int k, ref cuFloatComplex alpha, /* host or device pointer */ CUdeviceptr A, cudaDataType Atype, int lda, ref cuFloatComplex beta, /* host or device pointer */ CUdeviceptr C, cudaDataType Ctype, int ldc);
public static extern cusparseStatus cusparseCdotci(cusparseContext handle, int nnz, CUdeviceptr xVal, CUdeviceptr xInd, CUdeviceptr y, ref cuFloatComplex resultDevHostPtr, cusparseIndexBase idxBase);
public static extern CublasStatus cublasCtrsmBatched( CudaBlasHandle handle, SideMode side, FillMode uplo, Operation trans, DiagType diag, int m, int n, ref cuFloatComplex alpha, /*Host or Device Pointer*/ CUdeviceptr A, int lda, CUdeviceptr B, int ldb, int batchCount);
public static extern cusparseStatus cusparseCcsrgemm2(cusparseContext handle, int m, int n, int k, ref cuFloatComplex alpha, cusparseMatDescr descrA, int nnzA, CUdeviceptr csrSortedValA, CUdeviceptr csrSortedRowPtrA, CUdeviceptr csrSortedColIndA, cusparseMatDescr descrB, int nnzB, CUdeviceptr csrSortedValB, CUdeviceptr csrSortedRowPtrB, CUdeviceptr csrSortedColIndB, ref cuFloatComplex beta, cusparseMatDescr descrD, int nnzD, CUdeviceptr csrSortedValD, CUdeviceptr csrSortedRowPtrD, CUdeviceptr csrSortedColIndD, cusparseMatDescr descrC, CUdeviceptr csrSortedValC, CUdeviceptr csrSortedRowPtrC, CUdeviceptr csrSortedColIndC, csrgemm2Info info, CUdeviceptr pBuffer);
public static extern CublasStatus cublasCdotu_v2(CudaBlasHandle handle, int n, [In] CUdeviceptr x, int incx, [In] CUdeviceptr y, int incy, ref cuFloatComplex result);
public static extern cusparseStatus cusparseCgemvi(cusparseContext handle, cusparseOperation transA, int m, int n, ref cuFloatComplex alpha, /* host or device pointer */ CUdeviceptr A, int lda, int nnz, CUdeviceptr xVal, CUdeviceptr xInd, ref cuFloatComplex beta, /* host or device pointer */ CUdeviceptr y, cusparseIndexBase idxBase, CUdeviceptr pBuffer);
public static extern CublasStatus cublasCgemm3m(CudaBlasHandle handle, Operation transa, Operation transb, int m, int n, int k, ref cuFloatComplex alpha, /* host or device pointer */ CUdeviceptr A, int lda, CUdeviceptr B, int ldb, ref cuFloatComplex beta, /* host or device pointer */ CUdeviceptr C, int ldc);
public static extern cusparseStatus cusparseCcsrmv(cusparseContext handle, cusparseOperation transA, int m, int n, int nnz, ref cuFloatComplex alpha, cusparseMatDescr descrA, CUdeviceptr csrValA, CUdeviceptr csrRowPtrA, CUdeviceptr csrColIndA, CUdeviceptr x, ref cuFloatComplex beta, CUdeviceptr y);
public static extern CublasStatus cublasCgemmBatched(CudaBlasHandle handle, Operation transa, Operation transb, int m, int n, int k, ref cuFloatComplex alpha, /* host or device pointer */ CUdeviceptr Aarray, int lda, CUdeviceptr Barray, int ldb, ref cuFloatComplex beta, /* host or device pointer */ CUdeviceptr Carray, int ldc, int batchCount);
public static extern cusparseStatus cusparseCcsrsv_solve(cusparseContext handle, cusparseOperation transA, int m, ref cuFloatComplex alpha, cusparseMatDescr descrA, CUdeviceptr csrValA, CUdeviceptr csrRowPtrA, CUdeviceptr csrColIndA, cusparseSolveAnalysisInfo info, CUdeviceptr x, CUdeviceptr y);
public static extern CublasStatus cublasCgemmEx(CudaBlasHandle handle, Operation transa, Operation transb, int m, int n, int k, ref cuFloatComplex alpha, CUdeviceptr A, cudaDataType Atype, int lda, CUdeviceptr B, cudaDataType Btype, int ldb, ref cuFloatComplex beta, CUdeviceptr C, cudaDataType Ctype, int ldc);