Esempio n. 1
0
        static bool sdkCompareL2fe(cuFloatComplex[] h_convolved_signal_ref, cuFloatComplex[] h_convolved_signal, float eps)
        {
            float sumDiff = 0;

            for (int i = 0; i < h_convolved_signal_ref.Length; i++)
            {
                cuFloatComplex diff = h_convolved_signal_ref[i] - h_convolved_signal[i];
                sumDiff += diff.real * diff.real + diff.imag * diff.imag;
            }
            return(Math.Sqrt(sumDiff / h_convolved_signal_ref.Length / 2.0f) < eps);
        }
Esempio n. 2
0
        public static void Init(int[] volSize, int[] volRes, float hbar, float dt) // vol_size::NTuple{ 3}, vol_res::NTuple{3}, hbar, dt)
        {
            properties = new SpaceProperties(volSize, volRes, hbar, dt);
            ISFKernels.Init(properties);

            psi1 = new CudaDeviceVariable <cuFloatComplex>(properties.num);
            psi2 = new CudaDeviceVariable <cuFloatComplex>(properties.num);

            FFT.init(properties.resx, properties.resy, properties.resz);

            _ix = Enumerable.Range(0, properties.resx).ToArray();
            _iy = Enumerable.Range(0, properties.resy).ToArray();
            _iz = Enumerable.Range(0, properties.resz).ToArray();
            var ii = Matlab.ndgrid(_ix, _iy, _iz);

            _iix = ii.x;
            _iiy = ii.y;
            _iiz = ii.z;

            properties.px = new float[_iix.GetLength(0), _iix.GetLength(1), _iix.GetLength(2)];
            properties.py = new float[_iiy.GetLength(0), _iiy.GetLength(1), _iiy.GetLength(2)];
            properties.pz = new float[_iiz.GetLength(0), _iiz.GetLength(1), _iiz.GetLength(2)];
            for (int i = 0; i < properties.resx; i++)
            {
                for (int j = 0; j < properties.resy; j++)
                {
                    for (int k = 0; k < properties.resz; k++)
                    {
                        properties.px[i, j, k] = (_iix[i, j, k]) * properties.dx;
                        properties.py[i, j, k] = (_iiy[i, j, k]) * properties.dy;
                        properties.pz[i, j, k] = (_iiz[i, j, k]) * properties.dz;
                    }
                }
            }

            _sx = _iix.Select3D((e, i, j, k) => (float)Math.Sin((float)Math.PI * e / properties.resx) / properties.dx);
            _sy = _iiy.Select3D((e, i, j, k) => (float)Math.Sin((float)Math.PI * e / properties.resy) / properties.dy);
            _sz = _iiz.Select3D((e, i, j, k) => (float)Math.Sin((float)Math.PI * e / properties.resz) / properties.dz);

            cuFloatComplex[,,] tmpFac = _iix.Select3D((e, i, j, k) =>
            {
                return(new cuFloatComplex((float)(-0.25 / (Math.Pow(_sx[i, j, k], 2) + Math.Pow(_sy[i, j, k], 2) + Math.Pow(_sz[i, j, k], 2))), 0));
            });
            tmpFac[0, 0, 0] = new cuFloatComplex(0, 0);
            _fac            = new CudaDeviceVariable <cuFloatComplex>(properties.num);
            _fac.CopyToDevice(tmpFac);

            var tmpMask = new cuFloatComplex[properties.resx, properties.resy, properties.resz];

            build_schroedinger(tmpMask);
            _mask = new CudaDeviceVariable <cuFloatComplex>(properties.num);
            _mask.CopyToDevice(tmpMask);
        }
Esempio n. 3
0
        // Pad data
        static int PadData(cuFloatComplex[] signal, ref cuFloatComplex[] padded_signal, int signal_size,
                           cuFloatComplex[] filter_kernel, ref cuFloatComplex[] padded_filter_kernel, int filter_kernel_size)
        {
            int minRadius = filter_kernel_size / 2;
            int maxRadius = filter_kernel_size - minRadius;
            int new_size  = signal_size + maxRadius;

            // Pad signal
            padded_signal = new cuFloatComplex[new_size];
            Array.Copy(signal, 0, padded_signal, 0, signal_size);

            // Pad filter
            padded_filter_kernel = new cuFloatComplex[new_size];
            Array.Copy(filter_kernel, minRadius, padded_filter_kernel, 0, maxRadius);
            Array.Copy(filter_kernel, 0, padded_filter_kernel, (new_size - minRadius), minRadius);

            return(new_size);
        }
Esempio n. 4
0
        public static void add_circle(cuFloatComplex[,,] psi, float[] center, float[] normal, float r, float d)
        {
            float norm = (float)Math.Sqrt(Math.Pow(normal[0], 2) + Math.Pow(normal[1], 2) + Math.Pow(normal[2], 2));

            for (int i = 0; i < 3; i++)
            {
                normal[i] /= norm;
            }

            float   alpha, rx, ry, rz, z;
            Complex tmp;

            for (int i = 0; i < properties.resx; i++)
            {
                for (int j = 0; j < properties.resy; j++)
                {
                    for (int k = 0; k < properties.resz; k++)
                    {
                        rx    = properties.px[i, j, k] - center[0];
                        ry    = properties.py[i, j, k] - center[1];
                        rz    = properties.pz[i, j, k] - center[2];
                        alpha = 0;
                        z     = rx * normal[0] + ry * normal[1] + rz * normal[2];
                        if (rx * rx + ry * ry + rz * rz - z * z < r * r)
                        {
                            if (z > 0 && z <= d / 2)
                            {
                                alpha = (float)-Math.PI * (2 * z / d - 1);
                            }

                            if (z <= 0 && z >= -d / 2)
                            {
                                alpha = (float)-Math.PI * (2 * z / d + 1);
                            }
                        }

                        tmp          = new Complex(psi[i, j, k].real, psi[i, j, k].imag);
                        tmp         *= Complex.Exp(Complex.ImaginaryOne * alpha);
                        psi[i, j, k] = new cuFloatComplex((float)tmp.Real, (float)tmp.Imaginary);
                    }
                }
            }
        }
Esempio n. 5
0
        public void constraint(cuFloatComplex[,,] psi1, cuFloatComplex[,,] psi2, float t)
        {
            float   phase;
            Complex amp1, amp2;
            Complex tmp1, tmp2;

            for (int i = 0; i < psi1.GetLength(0); i++)
            {
                for (int j = 0; j < psi1.GetLength(1); j++)
                {
                    for (int k = 0; k < psi1.GetLength(2); k++)
                    {
                        amp1  = Complex.Sqrt(psi1[i, j, k].real * psi1[i, j, k].real + psi1[i, j, k].imag * psi1[i, j, k].imag);
                        amp2  = Complex.Sqrt(psi2[i, j, k].real * psi2[i, j, k].real + psi2[i, j, k].imag * psi2[i, j, k].imag);
                        phase = kvec[0] * _px[i, j, k] + kvec[1] * _py[i, j, k] + kvec[2] * _pz[i, j, k] - omega * t;
                        if (isJet[i, j, k])
                        {
                            tmp1          = amp1 * Complex.Exp(Complex.ImaginaryOne * phase);
                            tmp2          = amp2 * Complex.Exp(Complex.ImaginaryOne * phase);
                            psi1[i, j, k] = new cuFloatComplex((float)tmp1.Real, (float)tmp1.Imaginary);
                            psi2[i, j, k] = new cuFloatComplex((float)tmp2.Real, (float)tmp2.Imaginary);

                            if (float.IsInfinity(psi1[i, j, k].real) || float.IsNaN(psi1[i, j, k].real))
                            {
                                psi1[i, j, k].real = 1;
                                psi1[i, j, k].imag = 1;
                            }

                            if (float.IsInfinity(psi2[i, j, k].real) || float.IsNaN(psi2[i, j, k].real))
                            {
                                psi2[i, j, k].real = (float)0.01;
                                psi2[i, j, k].imag = (float)0.01;
                            }
                        }
                    }
                }
            }
        }
Esempio n. 6
0
        private static void build_schroedinger(cuFloatComplex[,,] tmpMask)
        {
            var fac = -4 * Math.Pow(Math.PI, 2) * properties.hbar;

            float kx, ky, kz;

            for (int i = 0; i < _iix.GetLength(0); i++)
            {
                for (int j = 0; j < _iix.GetLength(1); j++)
                {
                    for (int k = 0; k < _iix.GetLength(2); k++)
                    {
                        kx = (_iix[i, j, k] - (float)properties.resx / 2) / properties.sizex;
                        ky = (_iiy[i, j, k] - (float)properties.resy / 2) / properties.sizey;
                        kz = (_iiz[i, j, k] - (float)properties.resz / 2) / properties.sizez;

                        var lambda = fac * (Math.Pow(kx, 2) + Math.Pow(ky, 2) + Math.Pow(kz, 2));
                        var tmp    = Complex.Exp(Complex.ImaginaryOne * lambda * properties.dt / 2f);
                        tmpMask[i, j, k] = new cuFloatComplex((float)tmp.Real, (float)tmp.Imaginary);
                    }
                }
            }
        }
Esempio n. 7
0
        // Computes convolution on the host
        static void Convolve(cuFloatComplex[] signal, int signal_size,
              cuFloatComplex[] filter_kernel, int filter_kernel_size,
              cuFloatComplex[] filtered_signal)
        {
            int minRadius = filter_kernel_size / 2;
                int maxRadius = filter_kernel_size - minRadius;

            // Loop over output element indices
            for (int i = 0; i<signal_size; ++i)
            {
                filtered_signal[i].real = filtered_signal[i].imag = 0;

                // Loop over convolution indices
                for (int j = -maxRadius + 1; j <= minRadius; ++j)
                {
                    int k = i + j;

                    if (k >= 0 && k<signal_size)
                    {
                        filtered_signal[i] = filtered_signal[i] + signal[k] * filter_kernel[minRadius - j];
                    }
                }
            }
        }
        public static extern CublasStatus cublasCgemmStridedBatched(CudaBlasHandle handle,
																 Operation transa,
																 Operation transb,
																 int m,
																 int n,
																 int k,
																 ref cuFloatComplex alpha,  // host or device pointer
																 CUdeviceptr A,
																 int lda,
																 long strideA,   // purposely signed 
																 CUdeviceptr B,
																 int ldb,
																 long strideB,
																 ref cuFloatComplex beta,   // host or device pointer$
																 CUdeviceptr C,
																 int ldc,
																 long strideC,
																 int batchCount);
Esempio n. 9
0
 public static extern CUResult cuMemcpyDtoH_v2(ref cuFloatComplex dstHost, CUdeviceptr srcDevice, SizeT ByteCount);
		public static extern cusparseStatus cusparseChybmv(cusparseContext handle, cusparseOperation transA, ref cuFloatComplex alpha, cusparseMatDescr descrA, cusparseHybMat hybA, CUdeviceptr x, ref cuFloatComplex beta, CUdeviceptr y);
Esempio n. 11
0
        static void Main(string[] args)
        {
            int SIGNAL_SIZE        = 50;
            int FILTER_KERNEL_SIZE = 11;

            Console.WriteLine("[simpleCUFFT] is starting...");

            var assembly     = Assembly.GetExecutingAssembly();
            var resourceName = "simpleCUFFT.simpleCUFFTKernel.ptx";

            CudaContext ctx = new CudaContext(0);
            CudaKernel  ComplexPointwiseMulAndScale;

            string[] liste = assembly.GetManifestResourceNames();
            using (Stream stream = assembly.GetManifestResourceStream(resourceName))
            {
                ComplexPointwiseMulAndScale = ctx.LoadKernelPTX(stream, "ComplexPointwiseMulAndScale");
            }

            // Allocate host memory for the signal
            cuFloatComplex[] h_signal = new cuFloatComplex[SIGNAL_SIZE]; //we use cuFloatComplex for complex multiplaction in reference host code...

            Random rand = new Random(0);

            // Initialize the memory for the signal
            for (int i = 0; i < SIGNAL_SIZE; ++i)
            {
                h_signal[i].real = (float)rand.NextDouble();
                h_signal[i].imag = 0;
            }

            // Allocate host memory for the filter
            cuFloatComplex[] h_filter_kernel = new cuFloatComplex[FILTER_KERNEL_SIZE];

            // Initialize the memory for the filter
            for (int i = 0; i < FILTER_KERNEL_SIZE; ++i)
            {
                h_filter_kernel[i].real = (float)rand.NextDouble();
                h_filter_kernel[i].imag = 0;
            }

            // Pad signal and filter kernel
            cuFloatComplex[] h_padded_signal        = null;
            cuFloatComplex[] h_padded_filter_kernel = null;
            int new_size = PadData(h_signal, ref h_padded_signal, SIGNAL_SIZE,
                                   h_filter_kernel, ref h_padded_filter_kernel, FILTER_KERNEL_SIZE);
            int mem_size = (int)cuFloatComplex.SizeOf * new_size;


            // Allocate device memory for signal
            CudaDeviceVariable <cuFloatComplex> d_signal = new CudaDeviceVariable <cuFloatComplex>(new_size);

            // Copy host memory to device
            d_signal.CopyToDevice(h_padded_signal);

            // Allocate device memory for filter kernel
            CudaDeviceVariable <cuFloatComplex> d_filter_kernel = new CudaDeviceVariable <cuFloatComplex>(new_size);

            // Copy host memory to device
            d_filter_kernel.CopyToDevice(h_padded_filter_kernel);

            // CUFFT plan simple API
            CudaFFTPlan1D plan = new CudaFFTPlan1D(new_size, cufftType.C2C, 1);

            // Transform signal and kernel
            Console.WriteLine("Transforming signal cufftExecC2C");
            plan.Exec(d_signal.DevicePointer, TransformDirection.Forward);
            plan.Exec(d_filter_kernel.DevicePointer, TransformDirection.Forward);

            // Multiply the coefficients together and normalize the result
            Console.WriteLine("Launching ComplexPointwiseMulAndScale<<< >>>");
            ComplexPointwiseMulAndScale.BlockDimensions = 256;
            ComplexPointwiseMulAndScale.GridDimensions  = 32;
            ComplexPointwiseMulAndScale.Run(d_signal.DevicePointer, d_filter_kernel.DevicePointer, new_size, 1.0f / new_size);

            // Transform signal back
            Console.WriteLine("Transforming signal back cufftExecC2C");
            plan.Exec(d_signal.DevicePointer, TransformDirection.Inverse);

            // Copy device memory to host
            cuFloatComplex[] h_convolved_signal = d_signal;

            // Allocate host memory for the convolution result
            cuFloatComplex[] h_convolved_signal_ref = new cuFloatComplex[SIGNAL_SIZE];

            // Convolve on the host
            Convolve(h_signal, SIGNAL_SIZE,
                     h_filter_kernel, FILTER_KERNEL_SIZE,
                     h_convolved_signal_ref);

            // check result
            bool bTestResult = sdkCompareL2fe(h_convolved_signal_ref, h_convolved_signal, 1e-5f);

            //Destroy CUFFT context
            plan.Dispose();

            // cleanup memory
            d_filter_kernel.Dispose();
            d_signal.Dispose();
            ctx.Dispose();

            if (bTestResult)
            {
                Console.WriteLine("Test Passed");
            }
            else
            {
                Console.WriteLine("Test Failed");
            }
        }
		public static extern cusparseStatus cusparseCcsrgeam(cusparseContext handle,
											int m,
											int n,
											ref cuFloatComplex alpha,
											cusparseMatDescr descrA,
											int nnzA,
											CUdeviceptr csrValA,
											CUdeviceptr csrRowPtrA,
											CUdeviceptr csrColIndA,
											ref cuFloatComplex beta,
											cusparseMatDescr descrB,
											int nnzB,
											CUdeviceptr csrValB,
											CUdeviceptr csrRowPtrB,
											CUdeviceptr csrColIndB,
											cusparseMatDescr descrC,
											CUdeviceptr csrValC,
											CUdeviceptr csrRowPtrC,
											CUdeviceptr csrColIndC);
		public static extern cusparseStatus cusparseCbsrxmv(cusparseContext handle,
											cusparseDirection dirA,
											cusparseOperation transA,
											int sizeOfMask,
											int mb,
											int nb,
											int nnzb,
											ref cuFloatComplex alpha,
											cusparseMatDescr descrA,
											CUdeviceptr bsrValA,
											CUdeviceptr bsrMaskPtrA,
											CUdeviceptr bsrRowPtrA,
											CUdeviceptr bsrEndPtrA,
											CUdeviceptr bsrColIndA,
											int blockDim,
											CUdeviceptr x,
											ref cuFloatComplex beta,
											CUdeviceptr y);
		public static extern cusparseStatus cusparseCaxpyi(cusparseContext handle, int nnz, ref cuFloatComplex alpha, CUdeviceptr xVal, CUdeviceptr xInd, CUdeviceptr y, cusparseIndexBase idxBase);
		public static extern cusparseStatus cusparseCcsrgemm2_bufferSizeExt(cusparseContext handle,
															 int m,
															 int n,
															 int k,
															 ref cuFloatComplex alpha,
															 cusparseMatDescr descrA,
															 int nnzA,
															 CUdeviceptr csrSortedRowPtrA,
															 CUdeviceptr csrSortedColIndA,
															 cusparseMatDescr descrB,
															 int nnzB,
															 CUdeviceptr csrSortedRowPtrB,
															 CUdeviceptr csrSortedColIndB,
															 ref cuFloatComplex beta,
															 cusparseMatDescr descrD,
															 int nnzD,
															 CUdeviceptr csrSortedRowPtrD,
															 CUdeviceptr csrSortedColIndD,
															 csrgemm2Info info,
															 ref SizeT pBufferSizeInBytes);
		public static extern cusparseStatus cusparseCcsrmm2(cusparseContext handle, cusparseOperation transa, cusparseOperation transb, int m, int n, int k, int nnz,
                                            ref cuFloatComplex alpha, cusparseMatDescr descrA, CUdeviceptr csrValA, CUdeviceptr csrRowPtrA, CUdeviceptr csrColIndA, CUdeviceptr B, int ldb, ref cuFloatComplex beta, CUdeviceptr C, int ldc);
		public static extern cusparseStatus cusparseCbsrsm2_solve(cusparseContext handle,
												   cusparseDirection dirA,
												   cusparseOperation transA,
												   cusparseOperation transXY,
												   int mb,
												   int n,
												   int nnzb,
												   ref cuFloatComplex alpha,
												   cusparseMatDescr descrA,
												   CUdeviceptr bsrVal,
												   CUdeviceptr bsrRowPtr,
												   CUdeviceptr bsrColInd,
												   int blockSize,
												   bsrsm2Info info,
												   CUdeviceptr X,
												   int ldx,
												   CUdeviceptr Y,
												   int ldy,
												   cusparseSolvePolicy policy,
												   CUdeviceptr pBuffer);
		public static extern cusparseStatus cusparseCcsrsv2_solve(cusparseContext handle,
												   cusparseOperation transA,
												   int m,
												   int nnz,
												   ref cuFloatComplex alpha,
												   cusparseMatDescr descra,
												   CUdeviceptr csrValA,
												   CUdeviceptr csrRowPtrA,
												   CUdeviceptr csrColIndA,
												   csrsv2Info info,
												   CUdeviceptr x,
												   CUdeviceptr y,
												   cusparseSolvePolicy policy,
												   CUdeviceptr pBuffer);
Esempio n. 19
0
 static bool sdkCompareL2fe(cuFloatComplex[] h_convolved_signal_ref, cuFloatComplex[] h_convolved_signal, float eps)
 {
     float sumDiff = 0;
     for (int i = 0; i < h_convolved_signal_ref.Length; i++)
     {
         cuFloatComplex diff = h_convolved_signal_ref[i] - h_convolved_signal[i];
         sumDiff += diff.real * diff.real + diff.imag * diff.imag;
     }
     return (Math.Sqrt(sumDiff / h_convolved_signal_ref.Length / 2.0f) < eps);
 }
Esempio n. 20
0
        // Pad data
        static int PadData(cuFloatComplex[] signal, ref cuFloatComplex[] padded_signal, int signal_size,
            cuFloatComplex[] filter_kernel, ref cuFloatComplex[] padded_filter_kernel, int filter_kernel_size)
        {
            int minRadius = filter_kernel_size / 2;
            int maxRadius = filter_kernel_size - minRadius;
            int new_size = signal_size + maxRadius;

            // Pad signal
            padded_signal = new cuFloatComplex[new_size];
            Array.Copy(signal, 0, padded_signal, 0, signal_size);

            // Pad filter
            padded_filter_kernel = new cuFloatComplex[new_size];
            Array.Copy(filter_kernel, minRadius, padded_filter_kernel, 0, maxRadius);
            Array.Copy(filter_kernel, 0, padded_filter_kernel, (new_size - minRadius), minRadius);

            return new_size;
        }
        public static extern cusparseStatus cusparseCcsr2csr_compress(cusparseContext handle,
                                                        int m, //number of rows
                                                        int n,
                                                        cusparseMatDescr descra,
                                                        CUdeviceptr inVal, //csr values array-the elements which are below a certain tolerance will be remvoed
                                                        CUdeviceptr inColInd,
														CUdeviceptr inRowPtr,  //corresponding input noncompressed row pointer
                                                        int inNnz,
                                                        CUdeviceptr nnzPerRow, //output: returns number of nonzeros per row 
														CUdeviceptr outVal,
                                                        CUdeviceptr outColInd,
                                                        CUdeviceptr outRowPtr,
                                                        cuFloatComplex tol);
Esempio n. 22
0
    static void Main(string[] args)
    {
        //PARAMETERS
        int[] vol_size = { 10, 5, 5 };     // box size
        int[] vol_res  = { 64, 32, 32 };   // volume resolution
        float hbar     = (float)0.1;       // Planck constant
        float dt       = 1 / (float)24;    // time step
        int   tmax     = 85;

        float[] background_vel = { (float)-0.2, 0, 0 };

        float r1 = (float)1.5;
        float r2 = (float)0.9;

        float[] n1 = { -1, 0, 0 };
        float[] n2 = { -1, 0, 0 };

        float[] cen1 = { vol_size[0] / 2f, vol_size[1] / 2f, vol_size[2] / 2f };
        float[] cen2 = { vol_size[0] / 2f, vol_size[1] / 2f, vol_size[2] / 2f };

        int n_particles = 10000;

        //INITIALISATION
        ISF.Init(vol_size, vol_res, hbar, dt);
        Particles.init(n_particles);

        //init psi
        float[] kvec = { background_vel[0] / hbar, background_vel[1] / hbar, background_vel[2] / hbar };
        float   phase;
        var     tmp1 = new cuFloatComplex[ISF.properties.resx, ISF.properties.resy, ISF.properties.resz];
        var     tmp2 = new cuFloatComplex[ISF.properties.resx, ISF.properties.resy, ISF.properties.resz];
        Complex tmp;

        for (int i = 0; i < vol_res[0]; i++)
        {
            for (int j = 0; j < vol_res[1]; j++)
            {
                for (int k = 0; k < vol_res[2]; k++)
                {
                    phase = kvec[0] * ISF.properties.px[i, j, k] +
                            kvec[1] * ISF.properties.py[i, j, k] +
                            kvec[2] * ISF.properties.pz[i, j, k];
                    tmp           = Complex.Exp(Complex.ImaginaryOne * phase);
                    tmp1[i, j, k] = new cuFloatComplex((float)tmp.Real, (float)tmp.Imaginary);
                    tmp2[i, j, k] = new cuFloatComplex((float)(tmp.Real * 0.01), (float)(tmp.Imaginary * 0.01));
                }
            }
        }
        float d = ISF.properties.dx * 5;

        ISF.add_circle(tmp1, cen1, n1, r1, d);
        ISF.add_circle(tmp1, cen2, n2, r2, d);

        ISF.psi1.CopyToDevice(tmp1);
        ISF.psi2.CopyToDevice(tmp2);

        ISF.Normalize();
        ISF.PressureProject();

        //init particles
        var    x   = new float[n_particles];
        var    y   = new float[n_particles];
        var    z   = new float[n_particles];
        Random rnd = new Random();

        for (int i = 0; i < n_particles; i++)
        {
            y[i] = (float)(rnd.NextDouble() * 4 + 0.5);
            z[i] = (float)(rnd.NextDouble() * 4 + 0.5);
            x[i] = 5;
        }

        Particles.add_particles(x, y, z, n_particles);

        Velocity vel = new Velocity(ISF.properties.resx, ISF.properties.resy, ISF.properties.resz);



        //MAIN ITERATION
        Console.Out.WriteLine("Start");
        int itermax = (int)Math.Ceiling(tmax / dt);

        for (int i = 0; i < 100; i++)
        {
            //incompressible Schroedinger flow
            ISF.update_space();

            //particle update
            ISF.update_velocities(vel);

            Particles.calculate_movement(vel);
        }

        float[] xx = Particles.x;
        float[] yy = Particles.y;
        float[] zz = Particles.z;

        for (int i = 0; i < 20; i++)
        {
            Console.Out.WriteLine(xx[i] + " " + yy[i] + " " + zz[i]);
        }
    }
		public static extern cusparseStatus cusparseChybsv_solve(cusparseContext handle, cusparseOperation trans, ref cuFloatComplex alpha, cusparseMatDescr descra, cusparseHybMat hybA, cusparseSolveAnalysisInfo info, CUdeviceptr x, CUdeviceptr y);
Esempio n. 24
0
        static void Main(string[] args)
        {
            int SIGNAL_SIZE = 50;
            int FILTER_KERNEL_SIZE = 11;

            Console.WriteLine("[simpleCUFFT] is starting...");

            var assembly = Assembly.GetExecutingAssembly();
            var resourceName = "simpleCUFFT.simpleCUFFTKernel.ptx";

            CudaContext ctx = new CudaContext(0);
            CudaKernel ComplexPointwiseMulAndScale;
            string[] liste = assembly.GetManifestResourceNames();
            using (Stream stream = assembly.GetManifestResourceStream(resourceName))
            {
                ComplexPointwiseMulAndScale = ctx.LoadKernelPTX(stream, "ComplexPointwiseMulAndScale");
            }

            // Allocate host memory for the signal
            cuFloatComplex[] h_signal = new cuFloatComplex[SIGNAL_SIZE]; //we use cuFloatComplex for complex multiplaction in reference host code...

            Random rand = new Random(0);
            // Initialize the memory for the signal
            for (int i = 0; i < SIGNAL_SIZE; ++i)
            {
                h_signal[i].real = (float)rand.NextDouble();
                h_signal[i].imag = 0;
            }

            // Allocate host memory for the filter
            cuFloatComplex[] h_filter_kernel = new cuFloatComplex[FILTER_KERNEL_SIZE];

            // Initialize the memory for the filter
            for (int i = 0; i < FILTER_KERNEL_SIZE; ++i)
            {
                h_filter_kernel[i].real = (float)rand.NextDouble();
                h_filter_kernel[i].imag = 0;
            }

            // Pad signal and filter kernel
            cuFloatComplex[] h_padded_signal = null;
            cuFloatComplex[] h_padded_filter_kernel = null;
            int new_size = PadData(h_signal, ref h_padded_signal, SIGNAL_SIZE,
                                   h_filter_kernel, ref h_padded_filter_kernel, FILTER_KERNEL_SIZE);
            int mem_size = (int)cuFloatComplex.SizeOf * new_size;

            // Allocate device memory for signal
            CudaDeviceVariable<cuFloatComplex> d_signal = new CudaDeviceVariable<cuFloatComplex>(new_size);
            // Copy host memory to device
            d_signal.CopyToDevice(h_padded_signal);

            // Allocate device memory for filter kernel
            CudaDeviceVariable<cuFloatComplex> d_filter_kernel = new CudaDeviceVariable<cuFloatComplex>(new_size);

            // Copy host memory to device
            d_filter_kernel.CopyToDevice(h_padded_filter_kernel);

            // CUFFT plan simple API
            CudaFFTPlan1D plan = new CudaFFTPlan1D(new_size, cufftType.C2C, 1);

            // Transform signal and kernel
            Console.WriteLine("Transforming signal cufftExecC2C");
            plan.Exec(d_signal.DevicePointer, TransformDirection.Forward);
            plan.Exec(d_filter_kernel.DevicePointer, TransformDirection.Forward);

            // Multiply the coefficients together and normalize the result
            Console.WriteLine("Launching ComplexPointwiseMulAndScale<<< >>>");
            ComplexPointwiseMulAndScale.BlockDimensions = 256;
            ComplexPointwiseMulAndScale.GridDimensions = 32;
            ComplexPointwiseMulAndScale.Run(d_signal.DevicePointer, d_filter_kernel.DevicePointer, new_size, 1.0f / new_size);

            // Transform signal back
            Console.WriteLine("Transforming signal back cufftExecC2C");
            plan.Exec(d_signal.DevicePointer, TransformDirection.Inverse);

            // Copy device memory to host
            cuFloatComplex[] h_convolved_signal = d_signal;

            // Allocate host memory for the convolution result
            cuFloatComplex[] h_convolved_signal_ref = new cuFloatComplex[SIGNAL_SIZE];

            // Convolve on the host
            Convolve(h_signal, SIGNAL_SIZE,
                     h_filter_kernel, FILTER_KERNEL_SIZE,
                     h_convolved_signal_ref);

            // check result
            bool bTestResult = sdkCompareL2fe(h_convolved_signal_ref, h_convolved_signal, 1e-5f);

            //Destroy CUFFT context
            plan.Dispose();

            // cleanup memory
            d_filter_kernel.Dispose();
            d_signal.Dispose();
            ctx.Dispose();

            if (bTestResult)
            {
                Console.WriteLine("Test Passed");
            }
            else
            {
                Console.WriteLine("Test Failed");
            }
        }
		public static extern cusparseStatus cusparseCbsrmm(cusparseContext handle,
											cusparseDirection dirA,
											cusparseOperation transA,
											cusparseOperation transB,
											int mb,
											int n,
											int kb,
											int nnzb,
											ref cuFloatComplex alpha,
											cusparseMatDescr descrA,
											CUdeviceptr bsrValA,
											CUdeviceptr bsrRowPtrA,
											CUdeviceptr bsrColIndA,
											int blockSize,
											CUdeviceptr B,
											int ldb,
											ref cuFloatComplex beta,
											CUdeviceptr C,
											int ldc);
Esempio n. 26
0
        public static extern CublasStatus cublasCrotg_v2(CudaBlasHandle handle,
										ref cuFloatComplex a,  // host or device pointer
										ref cuFloatComplex b,  // host or device pointer
										ref float c,      // host or device pointer
										ref cuFloatComplex s);
		public static extern cusparseStatus cusparseCbsrilu02_numericBoost(cusparseContext handle,
															bsrilu02Info info,
															int enable_boost,
															ref double tol,
															ref cuFloatComplex boost_val);
Esempio n. 28
0
        public static extern CublasStatus cublasCsyrkEx( CudaBlasHandle handle,
															  FillMode uplo,
															  Operation trans,
															  int n,
															  int k,
															  ref cuFloatComplex alpha, /* host or device pointer */  
															  CUdeviceptr A, 
															  cudaDataType Atype, 
															  int lda,
															  ref cuFloatComplex beta, /* host or device pointer */  
															  CUdeviceptr C, 
															  cudaDataType Ctype, 
															  int ldc);
		public static extern cusparseStatus cusparseCdotci(cusparseContext handle, int nnz, CUdeviceptr xVal, CUdeviceptr xInd, CUdeviceptr y, ref cuFloatComplex resultDevHostPtr, cusparseIndexBase idxBase);
Esempio n. 30
0
 public static extern CublasStatus cublasCtrsmBatched( CudaBlasHandle    handle, 
                                                   SideMode  side, 
                                                   FillMode  uplo,
                                                   Operation trans, 
                                                   DiagType  diag,
                                                   int m, 
                                                   int n, 
                                                   ref cuFloatComplex alpha,       /*Host or Device Pointer*/
                                                   CUdeviceptr A, 
                                                   int lda,
                                                   CUdeviceptr B, 
                                                   int ldb,
                                                   int batchCount);
		public static extern cusparseStatus cusparseCcsrgemm2(cusparseContext handle,
											   int m,
											   int n,
											   int k,
											   ref cuFloatComplex alpha,
											   cusparseMatDescr descrA,
											   int nnzA,
											   CUdeviceptr csrSortedValA,
											   CUdeviceptr csrSortedRowPtrA,
											   CUdeviceptr csrSortedColIndA,
											   cusparseMatDescr descrB,
											   int nnzB,
											   CUdeviceptr csrSortedValB,
											   CUdeviceptr csrSortedRowPtrB,
											   CUdeviceptr csrSortedColIndB,
											   ref cuFloatComplex beta,
											   cusparseMatDescr descrD,
											   int nnzD,
											   CUdeviceptr csrSortedValD,
											   CUdeviceptr csrSortedRowPtrD,
											   CUdeviceptr csrSortedColIndD,
											   cusparseMatDescr descrC,
											   CUdeviceptr csrSortedValC,
											   CUdeviceptr csrSortedRowPtrC,
											   CUdeviceptr csrSortedColIndC,
											   csrgemm2Info info,
											   CUdeviceptr pBuffer);
Esempio n. 32
0
        public static extern CublasStatus cublasCdotu_v2(CudaBlasHandle handle,
										 int n,
										 [In] CUdeviceptr x, 
										 int incx,
										 [In] CUdeviceptr y, 
										 int incy,
										 ref cuFloatComplex result);
		public static extern cusparseStatus cusparseCgemvi(cusparseContext handle,
                                    cusparseOperation transA,
                                    int m,
                                    int n,
                                    ref cuFloatComplex alpha, /* host or device pointer */
                                    CUdeviceptr A,
                                    int lda,
                                    int nnz,
                                    CUdeviceptr xVal,
                                    CUdeviceptr xInd,
                                    ref cuFloatComplex beta, /* host or device pointer */
                                    CUdeviceptr y,
                                    cusparseIndexBase   idxBase,
                                    CUdeviceptr pBuffer);
Esempio n. 34
0
 public static extern CublasStatus cublasCgemm3m(CudaBlasHandle handle, 
                                               Operation transa,
                                               Operation transb, 
                                               int m,
                                               int n,
                                               int k,
                                               ref cuFloatComplex alpha, /* host or device pointer */  
                                               CUdeviceptr A, 
                                               int lda,
                                               CUdeviceptr B,
                                               int ldb, 
                                               ref cuFloatComplex beta, /* host or device pointer */  
                                               CUdeviceptr C,
                                               int ldc);
		public static extern cusparseStatus cusparseCcsrmv(cusparseContext handle, cusparseOperation transA, int m, int n, int nnz, ref cuFloatComplex alpha, cusparseMatDescr descrA, CUdeviceptr csrValA, CUdeviceptr csrRowPtrA, CUdeviceptr csrColIndA, CUdeviceptr x, ref cuFloatComplex beta, CUdeviceptr y);
Esempio n. 36
0
 public static extern CublasStatus cublasCgemmBatched(CudaBlasHandle handle,
                            Operation transa,
                            Operation transb, 
                            int m,
                            int n,
                            int k,
                            ref cuFloatComplex alpha, /* host or device pointer */ 
                            CUdeviceptr Aarray, 
                            int lda,
                            CUdeviceptr Barray,
                            int ldb, 
                            ref cuFloatComplex beta, /* host or device pointer */ 
                            CUdeviceptr Carray,
                            int ldc,
                            int batchCount);
		public static extern cusparseStatus cusparseCcsrsv_solve(cusparseContext handle, cusparseOperation transA, int m, ref cuFloatComplex alpha, cusparseMatDescr descrA, CUdeviceptr csrValA, CUdeviceptr csrRowPtrA, CUdeviceptr csrColIndA, cusparseSolveAnalysisInfo info, CUdeviceptr x, CUdeviceptr y);
Esempio n. 38
0
 public static extern CublasStatus cublasCgemmEx(CudaBlasHandle handle, 
                                              Operation transa, Operation transb,  
                                              int m, int n, int k, 
                                              ref cuFloatComplex alpha, 
                                              CUdeviceptr A, 
                                              cudaDataType Atype, 
                                              int lda, 
                                              CUdeviceptr B, 
                                              cudaDataType Btype, 
                                              int ldb,
                                              ref cuFloatComplex beta, 
                                              CUdeviceptr C, 
                                              cudaDataType Ctype, 
                                              int ldc);