public void Binding(MyMemoryBlock <float> first, MyMemoryBlock <float> second, MyMemoryBlock <float> temp, MyMemoryBlock <float> destination, bool DoQuery)
            {
                fft.Exec(first.GetDevicePtr(Owner), Owner.FirstInputFFT.GetDevicePtr(Owner));

                if (DoQuery)
                {
                    m_involutionKernel.Run(second, temp, second.Count);
                    fft.Exec(temp.GetDevicePtr(Owner), Owner.SecondInputFFT.GetDevicePtr(Owner));
                }
                else
                {
                    fft.Exec(second.GetDevicePtr(Owner), Owner.SecondInputFFT.GetDevicePtr(Owner));
                }

                m_kernel.Run(Owner.FirstInputFFT, Owner.SecondInputFFT, Owner.SecondInputFFT, Owner.InputSize + 1);

                ifft.Exec(Owner.SecondInputFFT.GetDevicePtr(Owner), temp.GetDevicePtr(Owner));

                float factor = 1.0f / Owner.Transform.Count;

                if (factor != 1)
                {
                    m_normalKernel.Run(0, 0, factor, 0, temp, destination, Owner.InputSize);
                }
            }
        public List <float> hypotesis(List <double> x, List <double> h, int N)
        {
            //int N = 2000000;

            string      path   = Path.GetDirectoryName(mv.plugins[0].filename);
            CudaContext ctx    = new CudaContext();
            CudaKernel  kernel = ctx.LoadKernel(path + "\\kernel.ptx", "ComplexMultCUDA");

            kernel.GridDimensions  = (int)Math.Ceiling((double)(N + h.Count - 1) / 1024);
            kernel.BlockDimensions = 1024;

            double[] temp_y = new double[N + h.Count - 1];
            double[] temp_h = new double[N + h.Count - 1];
            double[] temp_x = new double[N + h.Count - 1];

            double2[] temp_x2 = new double2[N + h.Count - 1];


            h.ToArray().CopyTo(temp_h, 0);
            x.ToArray().CopyTo(temp_x, 0);

            CudaDeviceVariable <double>  d_x = null;
            CudaDeviceVariable <double2> d_X = new CudaDeviceVariable <double2>(N + h.Count - 1);

            CudaDeviceVariable <double>  d_h = new CudaDeviceVariable <double>(N + h.Count - 1);
            CudaDeviceVariable <double2> d_H = new CudaDeviceVariable <double2>(N + h.Count - 1);

            CudaDeviceVariable <double> d_y = new CudaDeviceVariable <double>(N + h.Count - 1);


            CudaFFTPlan1D planForward = new CudaFFTPlan1D(N + h.Count - 1, cufftType.D2Z, 1);
            CudaFFTPlan1D planInverse = new CudaFFTPlan1D(N + h.Count - 1, cufftType.Z2D, 1);

            try
            {
                d_h = temp_h;
                planForward.Exec(d_h.DevicePointer, d_H.DevicePointer, TransformDirection.Forward);
            }
            catch (Exception exp)
            {
                mainView.log(exp, "CUDA error: Impulse response FFT", this);
                return(null);
            }

            try
            {
                d_x = temp_x;
                planForward.Exec(d_x.DevicePointer, d_X.DevicePointer);
                kernel.Run(d_H.DevicePointer, d_X.DevicePointer, N + h.Count - 1);
                planInverse.Exec(d_X.DevicePointer, d_y.DevicePointer);
            }
            catch (Exception exp)
            {
                mainView.log(exp, "Cuda error: kernel run cuda error", this);
            }
            temp_y = d_y;

            return(Array.ConvertAll <double, float>(temp_y, d => (float)d).ToList().GetRange(500, x.Count));
        }
        public List <float> CUDA_FIR(List <float> x, List <double> h)
        {
            CudaContext ctx = new CudaContext();

            //alloc data to cuda format
            double2[] temp_x = new double2[x.Count + h.Count - 1];
            double2[] temp_h = new double2[x.Count + h.Count - 1];
            double2[] temp_y = new double2[x.Count + h.Count - 1];

            //data copy
            for (int i = 0; i < x.Count; i++)
            {
                temp_x[i].x = x[i];
            }
            for (int i = 0; i < h.Count; i++)
            {
                temp_h[i].x = h[i];
            }


            CudaDeviceVariable <double2> d_x = null;
            CudaDeviceVariable <double2> d_h = null;


            CudaFFTPlan1D plan1D = new CudaFFTPlan1D(x.Count + h.Count - 1, cufftType.Z2Z, 1);
            CudaKernel    kernel = ctx.LoadKernel("kernel.ptx", "ComplexMultCUDA");

            kernel.GridDimensions  = (int)Math.Ceiling((double)(x.Count + h.Count - 1) / 1024);
            kernel.BlockDimensions = 1024;

            try
            {
                d_x = temp_x;
                d_h = temp_h;
            }
            catch (Exception e)
            {
                //("{0} Exception caught.", e);
                return(null);
            }

            plan1D.Exec(d_x.DevicePointer, TransformDirection.Forward);
            plan1D.Exec(d_h.DevicePointer, TransformDirection.Forward);
            kernel.Run(d_h.DevicePointer, d_x.DevicePointer, x.Count + h.Count - 1);
            plan1D.Exec(d_x.DevicePointer, TransformDirection.Inverse);
            temp_y = d_x;


            return(temp_y.Select(data => (float)data.x).ToList().GetRange(h.Count / 2, x.Count));
        }
Beispiel #4
0
        private void FinishBinding(CUdeviceptr output)
        {
            m_ifft.Exec(m_tempBlock.GetDevicePtr(m_owner, m_secondFFTOffset), output);

            float factor = 1.0f;

            if (NormalizeOutput)
            {
                m_dotKernel.Run(m_tempBlock, 0, output, output, m_inputSize);
                m_tempBlock.SafeCopyToHost(0, 1);

                if (m_tempBlock.Host[0] > 0.000001f)
                {
                    factor /= (float)(Math.Sqrt(m_tempBlock.Host[0]));
                }
            }
            else
            {
                factor = 1.0f / Denominator;
            }

            if (factor != 1.0f)
            {
                m_normalKernel.Run(0, 0, factor, 0, output, output, m_inputSize);
            }
        }
Beispiel #5
0
        public override void Bind(CUdeviceptr firstInput, IEnumerable <CUdeviceptr> otherInputs, CUdeviceptr output)
        {
            m_fft.Exec(firstInput, m_tempBlock.GetDevicePtr(m_owner, m_secondFFTOffset));

            foreach (var input in otherInputs)
            {
                m_fft.Exec(input, m_tempBlock.GetDevicePtr(m_owner, m_firstFFTOffset));
                m_mulkernel.RunAsync(
                    m_stream,
                    m_tempBlock.GetDevicePtr(m_owner, m_firstFFTOffset),
                    m_tempBlock.GetDevicePtr(m_owner, m_secondFFTOffset),
                    m_tempBlock.GetDevicePtr(m_owner, m_secondFFTOffset), m_inputSize + 1);
            }

            FinishBinding(output);
        }
Beispiel #6
0
        public override void Bind(CUdeviceptr firstInput, params CUdeviceptr[] otherInputs)
        {
            if (otherInputs == null)
            {
                otherInputs = new CUdeviceptr[] { firstInput };
            }
            m_fft.Exec(firstInput, m_tempBlock.GetDevicePtr(m_owner, m_secondFFTOffset));

            int count = otherInputs.Length == 1 ? otherInputs.Length : otherInputs.Length - 1;

            for (int i = 0; i < count; ++i)
            {
                CUdeviceptr start = otherInputs[i];
                m_fft.Exec(start, m_tempBlock.GetDevicePtr(m_owner, m_firstFFTOffset));
                m_mulkernel.Run(
                    m_tempBlock.GetDevicePtr(m_owner, m_firstFFTOffset),
                    m_tempBlock.GetDevicePtr(m_owner, m_secondFFTOffset),
                    m_tempBlock.GetDevicePtr(m_owner, m_secondFFTOffset), m_inputSize + 1);
            }

            CUdeviceptr output = otherInputs[otherInputs.Length - 1];

            FinishBinding(output);
        }
        public void cuFFTreconstruct()
        {
            CudaContext ctx = new CudaContext(0);

            ManagedCuda.BasicTypes.CUmodule cumodule = ctx.LoadModule("kernel.ptx");
            CudaKernel cuKernel = new CudaKernel("cu_ArrayInversion", cumodule, ctx);

            float2[] fData  = new float2[Resolution * Resolution];
            float2[] result = new float2[Resolution * Resolution];
            FFTData2D = new float[Resolution, Resolution, 2];
            CudaDeviceVariable <float2> devData      = new CudaDeviceVariable <float2>(Resolution * Resolution);
            CudaDeviceVariable <float2> copy_devData = new CudaDeviceVariable <float2>(Resolution * Resolution);

            int    i, j;
            Random rnd  = new Random();
            double avrg = 0.0;

            for (i = 0; i < Resolution; i++)
            {
                for (j = 0; j < Resolution; j++)
                {
                    fData[i * Resolution + j].x = i + j * 2;
                    avrg += fData[i * Resolution + j].x;
                    fData[i * Resolution + j].y = 0.0f;
                }
            }

            avrg = avrg / (double)(Resolution * Resolution);

            for (i = 0; i < Resolution; i++)
            {
                for (j = 0; j < Resolution; j++)
                {
                    fData[(i * Resolution + j)].x = fData[(i * Resolution + j)].x - (float)avrg;
                }
            }

            devData.CopyToDevice(fData);

            CudaFFTPlan1D plan1D = new CudaFFTPlan1D(Resolution, cufftType.C2C, Resolution);

            plan1D.Exec(devData.DevicePointer, TransformDirection.Forward);

            cuKernel.GridDimensions  = new ManagedCuda.VectorTypes.dim3(Resolution / cuda_blockNum, Resolution, 1);
            cuKernel.BlockDimensions = new ManagedCuda.VectorTypes.dim3(cuda_blockNum, 1, 1);

            cuKernel.Run(devData.DevicePointer, copy_devData.DevicePointer, Resolution);

            copy_devData.CopyToHost(result);

            for (i = 0; i < Resolution; i++)
            {
                for (j = 0; j < Resolution; j++)
                {
                    FFTData2D[i, j, 0] = result[i * Resolution + j].x;
                    FFTData2D[i, j, 1] = result[i * Resolution + j].y;
                }
            }

            //Clean up
            devData.Dispose();
            copy_devData.Dispose();
            plan1D.Dispose();
            CudaContext.ProfilerStop();
            ctx.Dispose();
        }
Beispiel #8
0
        static void Main(string[] args)
        {
            int SIGNAL_SIZE        = 50;
            int FILTER_KERNEL_SIZE = 11;

            Console.WriteLine("[simpleCUFFT] is starting...");

            var assembly     = Assembly.GetExecutingAssembly();
            var resourceName = "simpleCUFFT.simpleCUFFTKernel.ptx";

            CudaContext ctx = new CudaContext(0);
            CudaKernel  ComplexPointwiseMulAndScale;

            string[] liste = assembly.GetManifestResourceNames();
            using (Stream stream = assembly.GetManifestResourceStream(resourceName))
            {
                ComplexPointwiseMulAndScale = ctx.LoadKernelPTX(stream, "ComplexPointwiseMulAndScale");
            }

            // Allocate host memory for the signal
            cuFloatComplex[] h_signal = new cuFloatComplex[SIGNAL_SIZE]; //we use cuFloatComplex for complex multiplaction in reference host code...

            Random rand = new Random(0);

            // Initialize the memory for the signal
            for (int i = 0; i < SIGNAL_SIZE; ++i)
            {
                h_signal[i].real = (float)rand.NextDouble();
                h_signal[i].imag = 0;
            }

            // Allocate host memory for the filter
            cuFloatComplex[] h_filter_kernel = new cuFloatComplex[FILTER_KERNEL_SIZE];

            // Initialize the memory for the filter
            for (int i = 0; i < FILTER_KERNEL_SIZE; ++i)
            {
                h_filter_kernel[i].real = (float)rand.NextDouble();
                h_filter_kernel[i].imag = 0;
            }

            // Pad signal and filter kernel
            cuFloatComplex[] h_padded_signal        = null;
            cuFloatComplex[] h_padded_filter_kernel = null;
            int new_size = PadData(h_signal, ref h_padded_signal, SIGNAL_SIZE,
                                   h_filter_kernel, ref h_padded_filter_kernel, FILTER_KERNEL_SIZE);
            int mem_size = (int)cuFloatComplex.SizeOf * new_size;


            // Allocate device memory for signal
            CudaDeviceVariable <cuFloatComplex> d_signal = new CudaDeviceVariable <cuFloatComplex>(new_size);

            // Copy host memory to device
            d_signal.CopyToDevice(h_padded_signal);

            // Allocate device memory for filter kernel
            CudaDeviceVariable <cuFloatComplex> d_filter_kernel = new CudaDeviceVariable <cuFloatComplex>(new_size);

            // Copy host memory to device
            d_filter_kernel.CopyToDevice(h_padded_filter_kernel);

            // CUFFT plan simple API
            CudaFFTPlan1D plan = new CudaFFTPlan1D(new_size, cufftType.C2C, 1);

            // Transform signal and kernel
            Console.WriteLine("Transforming signal cufftExecC2C");
            plan.Exec(d_signal.DevicePointer, TransformDirection.Forward);
            plan.Exec(d_filter_kernel.DevicePointer, TransformDirection.Forward);

            // Multiply the coefficients together and normalize the result
            Console.WriteLine("Launching ComplexPointwiseMulAndScale<<< >>>");
            ComplexPointwiseMulAndScale.BlockDimensions = 256;
            ComplexPointwiseMulAndScale.GridDimensions  = 32;
            ComplexPointwiseMulAndScale.Run(d_signal.DevicePointer, d_filter_kernel.DevicePointer, new_size, 1.0f / new_size);

            // Transform signal back
            Console.WriteLine("Transforming signal back cufftExecC2C");
            plan.Exec(d_signal.DevicePointer, TransformDirection.Inverse);

            // Copy device memory to host
            cuFloatComplex[] h_convolved_signal = d_signal;

            // Allocate host memory for the convolution result
            cuFloatComplex[] h_convolved_signal_ref = new cuFloatComplex[SIGNAL_SIZE];

            // Convolve on the host
            Convolve(h_signal, SIGNAL_SIZE,
                     h_filter_kernel, FILTER_KERNEL_SIZE,
                     h_convolved_signal_ref);

            // check result
            bool bTestResult = sdkCompareL2fe(h_convolved_signal_ref, h_convolved_signal, 1e-5f);

            //Destroy CUFFT context
            plan.Dispose();

            // cleanup memory
            d_filter_kernel.Dispose();
            d_signal.Dispose();
            ctx.Dispose();

            if (bTestResult)
            {
                Console.WriteLine("Test Passed");
            }
            else
            {
                Console.WriteLine("Test Failed");
            }
        }
        public List <float> hypotesis_long_save(List <double> xx, List <double> h, int N)
        {
            int n = (int)Math.Ceiling((double)(xx.Count() + 0.000000000001) / N);

            double[] temp_data = new double[n * (N + h.Count - 1) - (n - 1) * (h.Count - 1)];
            xx.CopyTo(temp_data, h.Count - 1);
            List <double> x = temp_data.ToList();
            //int N = 2000000;

            string      path   = Path.GetDirectoryName(mv.plugins[0].filename);
            CudaContext ctx    = new CudaContext();
            CudaKernel  kernel = ctx.LoadKernel(path + "\\kernel.ptx", "ComplexMultCUDA");

            kernel.GridDimensions  = (int)Math.Ceiling((double)(N + h.Count - 1) / 1024);
            kernel.BlockDimensions = 1024;

            int blocks = (int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1));

            double[][] temp_y = new double[n][];
            double[]   temp_h = new double[N + h.Count - 1];
            double[]   temp_x = new double[N + h.Count - 1];


            h.ToArray().CopyTo(temp_h, 0);


            CudaDeviceVariable <double> d_x = null;



            CudaDeviceVariable <double>  d_h = new CudaDeviceVariable <double>(N + h.Count - 1);
            CudaDeviceVariable <double2> d_H = new CudaDeviceVariable <double2>(N + h.Count - 1);

            //CudaDeviceVariable<double> d_y = new CudaDeviceVariable<double>(N + h.Count - 1);


            CudaFFTPlan1D planForward = new CudaFFTPlan1D(N + h.Count - 1, cufftType.D2Z, 1);
            CudaFFTPlan1D planInverse = new CudaFFTPlan1D(N + h.Count - 1, cufftType.Z2D, 1);

            try
            {
                d_h = temp_h;
                planForward.Exec(d_h.DevicePointer, d_H.DevicePointer, TransformDirection.Forward);
            }
            catch (Exception exp)
            {
                mainView.log(exp, "CUDA error: Impulse response FFT", this);
                return(null);
            }


            for (int g = 0; g < n; g++)
            {
                CudaDeviceVariable <double2> d_X = new CudaDeviceVariable <double2>(N + h.Count - 1);
                int P = N + h.Count - 1;
                //if (x.Count - P * g < P) P = x.Count - P * g;
                int L = h.Count - 1;
                if (g == 0)
                {
                    L = 0;
                }

                x.CopyTo(P * g - L * g, temp_x, 0, P);

                try
                {
                    d_x = temp_x;
                    planForward.Exec(d_x.DevicePointer, d_X.DevicePointer);
                    kernel.Run(d_H.DevicePointer, d_X.DevicePointer, N + h.Count - 1);
                    planInverse.Exec(d_X.DevicePointer, d_x.DevicePointer);
                }
                catch (Exception exp)
                {
                    mainView.log(exp, "Cuda error: kernel run cuda error", this);
                }

                temp_y[g] = d_x;
                d_x.Dispose();
                d_X.Dispose();
            }
            planForward.Dispose();
            planInverse.Dispose();
            d_x.Dispose();

            d_h.Dispose();
            d_H.Dispose();
            ctx.Dispose();

            return(OverlapSave(temp_y, h.Count, N + h.Count - 1).GetRange(h.Count / 2, xx.Count));
        }
        public List <float> hypotesis_long(List <double> x, List <double> h, int N)
        {
            //int N = 2000000;

            string      path   = Path.GetDirectoryName(mv.plugins[0].filename);
            CudaContext ctx    = new CudaContext();
            CudaKernel  kernel = ctx.LoadKernel(path + "\\kernel.ptx", "ComplexMultCUDA");

            kernel.GridDimensions  = (int)Math.Ceiling((double)(N + h.Count - 1) / 1024);
            kernel.BlockDimensions = 1024;

            int blocks = (int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1));

            double[][] temp_y = new double[blocks][];
            double[]   temp_h = new double[N + h.Count - 1];
            double[]   temp_x = new double[N + h.Count - 1];


            h.ToArray().CopyTo(temp_h, 0);


            CudaDeviceVariable <double>  d_x = null;
            CudaDeviceVariable <double2> d_X = new CudaDeviceVariable <double2>(N + h.Count - 1);

            CudaDeviceVariable <double>  d_h = new CudaDeviceVariable <double>(N + h.Count - 1);
            CudaDeviceVariable <double2> d_H = new CudaDeviceVariable <double2>(N + h.Count - 1);

            //CudaDeviceVariable<double> d_y = new CudaDeviceVariable<double>(N + h.Count - 1);


            CudaFFTPlan1D planForward = new CudaFFTPlan1D(N + h.Count - 1, cufftType.D2Z, 1);
            CudaFFTPlan1D planInverse = new CudaFFTPlan1D(N + h.Count - 1, cufftType.Z2D, 1);

            try
            {
                d_h = temp_h;
                planForward.Exec(d_h.DevicePointer, d_H.DevicePointer, TransformDirection.Forward);
            }
            catch (Exception exp)
            {
                mainView.log(exp, "CUDA error: Impulse response FFT", this);
                return(null);
            }

            for (int g = 0; g < blocks; g++)
            {
                int P = N;
                if (x.Count - N * g < N)
                {
                    P = x.Count - N * g;
                }

                x.GetRange(N * g, P).ToArray().CopyTo(temp_x, 0);

                try
                {
                    d_x = temp_x;
                    planForward.Exec(d_x.DevicePointer, d_X.DevicePointer);
                    kernel.Run(d_H.DevicePointer, d_X.DevicePointer, N + h.Count - 1);
                    planInverse.Exec(d_X.DevicePointer, d_x.DevicePointer);
                }
                catch (Exception exp)
                {
                    mainView.log(exp, "Cuda error: kernel run cuda error", this);
                }

                temp_y[g] = d_x;
            }

            return(OverlapAdd(temp_y, h.Count).GetRange(h.Count / 2, x.Count));
        }
        public List <float> CUDA_FIR_long(List <float> x, List <double> h)
        {
            CudaContext ctx  = new CudaContext();
            string      path = Path.GetDirectoryName(mv.plugins[0].filename);


            int N = 2000000;

            //alloc data to cuda format
            double2[][] temp_x = new double2[(int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1))][];
            double2[]   temp_h = new double2[N + h.Count - 1];
            double2[][] temp_y = new double2[(int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1))][];


            //data copy

            System.Threading.Tasks.Parallel.For(0, (int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1)), j => {
                temp_x[j] = new double2[N + h.Count - 1];
                temp_y[j] = new double2[N + h.Count - 1];
                for (int i = 0; (j * N + i) < x.Count && i < N; i++)
                {
                    temp_x[j][i].x = x[j * N + i];
                }
            });

            for (int i = 0; i < h.Count; i++)
            {
                temp_h[i].x = h[i];
            }



            CudaDeviceVariable <double2> d_x = null;
            CudaDeviceVariable <double2> d_h = null;


            CudaFFTPlan1D plan1D = new CudaFFTPlan1D(N + h.Count - 1, cufftType.Z2Z, 1);

            CudaKernel kernel = ctx.LoadKernel(path + "\\kernel.ptx", "ComplexMultCUDA");

            kernel.GridDimensions  = (int)Math.Ceiling((double)(N + h.Count - 1) / 1024);
            kernel.BlockDimensions = 1024;

            try
            {
                d_h = temp_h;
            }
            catch (Exception e)
            {
                //("{0} Exception caught.", e);
                return(null);
            }
            plan1D.Exec(d_h.DevicePointer, TransformDirection.Forward);

            for (int g = 0; g < (int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1)); g++)
            {
                try
                {
                    d_x = temp_x[g];
                }
                catch (Exception e)
                {
                    mainView.log(e, "cuda alloc data error", this);
                    return(null);
                }

                try
                {
                    plan1D.Exec(d_x.DevicePointer, TransformDirection.Forward);
                    kernel.Run(d_h.DevicePointer, d_x.DevicePointer, N + h.Count - 1);
                    plan1D.Exec(d_x.DevicePointer, TransformDirection.Inverse);
                }
                catch (Exception exp)
                {
                    mainView.log(exp, "kernel run cuda error", this);
                }
                temp_y[g] = d_x;

                //this.Invoke((MethodInvoker)delegate
                //{
                //    progressBar1.Value = (int)(50/ (int)Math.Ceiling((double)(x.Count + h.Count - 1) / (N + h.Count - 1)))*g;

                //});
                d_x.Dispose();
            }
            d_h.Dispose();

            plan1D.Dispose();

            return(OverlapAdd(temp_y, h.Count).GetRange(h.Count / 2, x.Count));
        }