Exemplo n.º 1
0
        private static bool cutCompareL2fe(Float2[] reference, Float2[] data,
                int len, float epsilon)
        {
            float error = 0;
            float reff = 0;

            for (int i = 0; i < len / 2; ++i)
            {
                float diff = reference[i].x - data[i].x;
                error += diff * diff;
                reff += reference[i].x * reference[i].x;

                diff = reference[i].y - data[i].y;
                error += diff * diff;
                reff += reference[i].y * reference[i].y;
            }

            float normRef = (float)Math.Sqrt(reff);
            if (Math.Abs(reff) < 1e-7)
            {
                Console.WriteLine("ERROR, reference l2-norm is 0");

                return false;
            }
            float normError = (float)Math.Sqrt(error);
            error = normError / normRef;
            bool result = error < epsilon;
            if (!result)
            {
                Console.WriteLine("ERROR, l2-norm error {0} is greater than epsilon {1}", error, epsilon);
            }

            return result ? true : false;
        }
Exemplo n.º 2
0
        static void Main(string[] args)
        {
            // Init and select 1st device.
            CUDA cuda = new CUDA(0, true);

            // load module
            //cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, "simpleCUFFT.ptx"));
            CUfunction func = new CUfunction();// cuda.GetModuleFunction("ComplexPointwiseMulAndScale");

            // The filter size is assumed to be a number smaller than the signal size
            const int SIGNAL_SIZE = 50;
            const int FILTER_KERNEL_SIZE = 11;

            // Allocate host memory for the signal
            Float2[] h_signal = new Float2[SIGNAL_SIZE];
            // Initalize the memory for the signal
            Random r = new Random();
            for (int i = 0; i < SIGNAL_SIZE; ++i)
            {
                h_signal[i].x = r.Next() / (float)int.MaxValue;
                h_signal[i].y = 0;
            }

            // Allocate host memory for the filter
            Float2[] h_filter_kernel = new Float2[FILTER_KERNEL_SIZE];
            // Initalize the memory for the filter
            for (int i = 0; i < FILTER_KERNEL_SIZE; ++i)
            {
                h_filter_kernel[i].x = r.Next() / (float)int.MaxValue;
                h_filter_kernel[i].y = 0;
            }

            // Pad signal and filter kernel
            Float2[] h_padded_signal;
            Float2[] h_padded_filter_kernel;
            int new_size = PadData(h_signal, out h_padded_signal, SIGNAL_SIZE,
                                   h_filter_kernel, out h_padded_filter_kernel, FILTER_KERNEL_SIZE);

            // Allocate device memory for signal
            // Copy host memory to device
            CUdeviceptr d_signal = cuda.CopyHostToDevice<Float2>(h_padded_signal);

            // Allocate device memory for filter kernel
            // Copy host memory to device
            CUdeviceptr d_filter_kernel = cuda.CopyHostToDevice<Float2>(h_padded_filter_kernel);

            // CUFFT plan
            CUFFT fft = new CUFFT(cuda);
            cufftHandle handle = new cufftHandle();
            CUFFTResult fftres = CUFFTDriver.cufftPlan1d(ref handle, new_size, CUFFTType.C2C, 1);
            //fft.Plan1D(new_size, CUFFTType.C2C, 1);


            return;

            // Transform signal and kernel
            fft.ExecuteComplexToComplex(d_signal, d_signal, CUFFTDirection.Forward);
            fft.ExecuteComplexToComplex(d_filter_kernel, d_filter_kernel, CUFFTDirection.Forward);

            // Multiply the coefficients together and normalize the result
            // ComplexPointwiseMulAndScale<<<32, 256>>>(d_signal, d_filter_kernel, new_size, 1.0f / new_size);
            cuda.SetFunctionBlockShape(func, 256, 1, 1);
            cuda.SetParameter(func, 0, (uint)d_signal.Pointer);
            cuda.SetParameter(func, IntPtr.Size, (uint)d_filter_kernel.Pointer);
            cuda.SetParameter(func, IntPtr.Size * 2, (uint)new_size);
            cuda.SetParameter(func, IntPtr.Size * 2 + 4, 1.0f / new_size);
            cuda.SetParameterSize(func, (uint)(IntPtr.Size * 2 + 8));
            cuda.Launch(func, 32, 1);

            // Transform signal back
            fft.ExecuteComplexToComplex(d_signal, d_signal, CUFFTDirection.Inverse);

            // Copy device memory to host
            Float2[] h_convolved_signal = h_padded_signal;
            cuda.CopyDeviceToHost<Float2>(d_signal, h_convolved_signal);

            // Allocate host memory for the convolution result
            Float2[] h_convolved_signal_ref = new Float2[SIGNAL_SIZE];

            // Convolve on the host
            Convolve(h_signal, SIGNAL_SIZE,
                     h_filter_kernel, FILTER_KERNEL_SIZE,
                     h_convolved_signal_ref);

            // check result
            bool res = cutCompareL2fe(h_convolved_signal_ref, h_convolved_signal, 2 * SIGNAL_SIZE, 1e-5f);
            Console.WriteLine("Test {0}", (true == res) ? "PASSED" : "FAILED");

            //Destroy CUFFT context
            fft.Destroy();

            // cleanup memory
            cuda.Free(d_signal);
            cuda.Free(d_filter_kernel);
        }
Exemplo n.º 3
0
        private static Float2 ComplexMul(Float2 a, Float2 b)
        {
            Float2 res = new Float2();
            res.x = a.x * b.x - a.y * b.y;
            res.y = a.x * b.y + a.y * b.x;

            return res;
        }
Exemplo n.º 4
0
        private static Float2 ComplexAdd(Float2 n1, Float2 n2)
        {
            Float2 res = new Float2();
            res.x = n1.x + n2.x;
            res.y = n1.y + n2.y;

            return res;
        }
Exemplo n.º 5
0
        ////////////////////////////////////////////////////////////////////////////////
        // Filtering operations
        ////////////////////////////////////////////////////////////////////////////////

        // Computes convolution on the host
        private static void Convolve(Float2[] signal, int signal_size,
                      Float2[] filter_kernel, int filter_kernel_size,
                      Float2[] filtered_signal)
        {
            int minRadius = filter_kernel_size / 2;
            int maxRadius = filter_kernel_size - minRadius;
            // Loop over output element indices
            for (int i = 0; i < signal_size; ++i)
            {
                filtered_signal[i].x = filtered_signal[i].y = 0;
                // Loop over convolution indices
                for (int j = -maxRadius + 1; j <= minRadius; ++j)
                {
                    int k = i + j;
                    if (k >= 0 && k < signal_size)
                        filtered_signal[i] = ComplexAdd(filtered_signal[i], ComplexMul(signal[k], filter_kernel[minRadius - j]));
                }
            }
        }
Exemplo n.º 6
0
        // Pad data
        private static int PadData(Float2[] signal, out Float2[] padded_signal, int signal_size,
                    Float2[] filter_kernel, out Float2[] padded_filter_kernel, int filter_kernel_size)
        {
            int minRadius = filter_kernel_size / 2;
            int maxRadius = filter_kernel_size - minRadius;
            int new_size = signal_size + maxRadius;

            // Pad signal
            Float2[] new_data = new Float2[new_size];
            Array.Copy(signal, new_data, signal_size);
            for (int i = 0; i < (new_size - signal_size)/2; i++)
            {
                new_data[signal_size + i] = new Float2();
            }
            padded_signal = new_data;

            // Pad filter
            new_data = new Float2[new_size];
            Array.Copy(filter_kernel, minRadius, new_data, 0, maxRadius);
            for (int i = 0; i < (new_size - filter_kernel_size)/2; i++)
            {
                new_data[maxRadius/2 + i] = new Float2();
            }
            Array.Copy(filter_kernel, (new_size - minRadius) / Marshal.SizeOf(typeof(Float2)), filter_kernel, 0, minRadius);
            padded_filter_kernel = new_data;

            return new_size;
        }
Exemplo n.º 7
0
 public static extern CUResult cuParamSetv(CUfunction hfunc, int offset, ref Float2 ptr, uint numbytes);