コード例 #1
0
        public MyFourierBinder(MyWorkingNode owner, int inputSize, MyMemoryBlock<float> tempBlock)
            : base(owner, inputSize, tempBlock)
        {
            m_stream = new CudaStream();

            m_fft = new CudaFFTPlan1D(inputSize, cufftType.R2C, 1);
            m_fft.SetStream(m_stream.Stream);
            m_ifft = new CudaFFTPlan1D(inputSize, cufftType.C2R, 1);
            m_ifft.SetStream(m_stream.Stream);

            m_mulkernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Common\CombineVectorsKernel", "MulComplexElementWise");
            m_mulkernel.SetupExecution(inputSize + 1);

            m_involutionKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Common\CombineVectorsKernel", "InvolveVector");
            m_involutionKernel.SetupExecution(inputSize - 1);

            m_inversionKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Transforms\InvertValuesKernel", "InvertLengthComplexKernel");
            m_inversionKernel.SetupExecution(inputSize);

            m_dotKernel = MyKernelFactory.Instance.KernelProduct<float>(owner, owner.GPU, ProductMode.f_DotProduct_f);

            m_normalKernel = MyKernelFactory.Instance.Kernel(owner.GPU, @"Transforms\TransformKernels", "PolynomialFunctionKernel");
            m_normalKernel.SetupExecution(inputSize);

            m_firstFFTOffset = 0;
            m_secondFFTOffset = (inputSize + 1) * 2;
            m_tempOffset = (inputSize + 1) * 4;

            Denominator = inputSize;
        }
コード例 #2
0
ファイル: Program.cs プロジェクト: kunzmi/managedCuda
        static void Main(string[] args)
        {
            int SIGNAL_SIZE = 50;
            int FILTER_KERNEL_SIZE = 11;

            Console.WriteLine("[simpleCUFFT] is starting...");

            var assembly = Assembly.GetExecutingAssembly();
            var resourceName = "simpleCUFFT.simpleCUFFTKernel.ptx";

            CudaContext ctx = new CudaContext(0);
            CudaKernel ComplexPointwiseMulAndScale;
            string[] liste = assembly.GetManifestResourceNames();
            using (Stream stream = assembly.GetManifestResourceStream(resourceName))
            {
                ComplexPointwiseMulAndScale = ctx.LoadKernelPTX(stream, "ComplexPointwiseMulAndScale");
            }

            // Allocate host memory for the signal
            cuFloatComplex[] h_signal = new cuFloatComplex[SIGNAL_SIZE]; //we use cuFloatComplex for complex multiplaction in reference host code...

            Random rand = new Random(0);
            // Initialize the memory for the signal
            for (int i = 0; i < SIGNAL_SIZE; ++i)
            {
                h_signal[i].real = (float)rand.NextDouble();
                h_signal[i].imag = 0;
            }

            // Allocate host memory for the filter
            cuFloatComplex[] h_filter_kernel = new cuFloatComplex[FILTER_KERNEL_SIZE];

            // Initialize the memory for the filter
            for (int i = 0; i < FILTER_KERNEL_SIZE; ++i)
            {
                h_filter_kernel[i].real = (float)rand.NextDouble();
                h_filter_kernel[i].imag = 0;
            }

            // Pad signal and filter kernel
            cuFloatComplex[] h_padded_signal = null;
            cuFloatComplex[] h_padded_filter_kernel = null;
            int new_size = PadData(h_signal, ref h_padded_signal, SIGNAL_SIZE,
                                   h_filter_kernel, ref h_padded_filter_kernel, FILTER_KERNEL_SIZE);
            int mem_size = (int)cuFloatComplex.SizeOf * new_size;

            // Allocate device memory for signal
            CudaDeviceVariable<cuFloatComplex> d_signal = new CudaDeviceVariable<cuFloatComplex>(new_size);
            // Copy host memory to device
            d_signal.CopyToDevice(h_padded_signal);

            // Allocate device memory for filter kernel
            CudaDeviceVariable<cuFloatComplex> d_filter_kernel = new CudaDeviceVariable<cuFloatComplex>(new_size);

            // Copy host memory to device
            d_filter_kernel.CopyToDevice(h_padded_filter_kernel);

            // CUFFT plan simple API
            CudaFFTPlan1D plan = new CudaFFTPlan1D(new_size, cufftType.C2C, 1);

            // Transform signal and kernel
            Console.WriteLine("Transforming signal cufftExecC2C");
            plan.Exec(d_signal.DevicePointer, TransformDirection.Forward);
            plan.Exec(d_filter_kernel.DevicePointer, TransformDirection.Forward);

            // Multiply the coefficients together and normalize the result
            Console.WriteLine("Launching ComplexPointwiseMulAndScale<<< >>>");
            ComplexPointwiseMulAndScale.BlockDimensions = 256;
            ComplexPointwiseMulAndScale.GridDimensions = 32;
            ComplexPointwiseMulAndScale.Run(d_signal.DevicePointer, d_filter_kernel.DevicePointer, new_size, 1.0f / new_size);

            // Transform signal back
            Console.WriteLine("Transforming signal back cufftExecC2C");
            plan.Exec(d_signal.DevicePointer, TransformDirection.Inverse);

            // Copy device memory to host
            cuFloatComplex[] h_convolved_signal = d_signal;

            // Allocate host memory for the convolution result
            cuFloatComplex[] h_convolved_signal_ref = new cuFloatComplex[SIGNAL_SIZE];

            // Convolve on the host
            Convolve(h_signal, SIGNAL_SIZE,
                     h_filter_kernel, FILTER_KERNEL_SIZE,
                     h_convolved_signal_ref);

            // check result
            bool bTestResult = sdkCompareL2fe(h_convolved_signal_ref, h_convolved_signal, 1e-5f);

            //Destroy CUFFT context
            plan.Dispose();

            // cleanup memory
            d_filter_kernel.Dispose();
            d_signal.Dispose();
            ctx.Dispose();

            if (bTestResult)
            {
                Console.WriteLine("Test Passed");
            }
            else
            {
                Console.WriteLine("Test Failed");
            }
        }