コード例 #1
0
ファイル: Bandwidth.cs プロジェクト: uzbekdev1/NOpenCL
        private unsafe double TestDeviceToHostTransferPaged(Context context, CommandQueue commandQueue, int memSize, AccessMode accessMode)
        {
            // standard host allocation
            byte[] data = new byte[memSize];
            for (int i = 0; i < data.Length; i++)
            {
                data[i] = (byte)i;

                fixed(byte *pdata = data)
                {
                    // allocate device memory
                    using (Buffer deviceData = context.CreateBuffer(MemoryFlags.ReadWrite, memSize))
                    {
                        // initialize device memory
                        commandQueue.EnqueueWriteBuffer(deviceData, false, 0, memSize, (IntPtr)pdata);

                        // sync queue to host
                        commandQueue.Finish();
                        var timer = Stopwatch.StartNew();
                        if (accessMode == AccessMode.Direct)
                        {
                            // DIRECT: API access to device buffer
                            for (int i = 0; i < MemoryCopyIterations; i++)
                            {
                                commandQueue.EnqueueReadBuffer(deviceData, false, 0, memSize, (IntPtr)pdata);
                            }

                            commandQueue.Finish();
                        }
                        else
                        {
                            // MAPPED: mapped pointers to device buffer for conventional pointer access
                            IntPtr dm_idata;
                            commandQueue.EnqueueMapBuffer(deviceData, true, MapFlags.Read, 0, memSize, out dm_idata);
                            for (int i = 0; i < MemoryCopyIterations; i++)
                            {
                                CopyMemory((IntPtr)pdata, dm_idata, (UIntPtr)memSize);
                            }

                            commandQueue.EnqueueUnmapMemObject(deviceData, dm_idata);
                        }

                        // get the elapsed time in seconds
                        double elapsedTimeInSeconds = timer.Elapsed.TotalSeconds;

                        // Calculate bandwidth in MB/s
                        //      This is for kernels that read and write GMEM simultaneously
                        //      Obtained Throughput for unidirectional block copies will be 1/2 of this #
                        double bandwidthInMBs = 2.0 * ((double)memSize * (double)MemoryCopyIterations) / (elapsedTimeInSeconds * (double)(1 << 20));

                        return(bandwidthInMBs);
                    }
                }
        }
コード例 #2
0
        protected override void Dispose(bool disposing)
        {
            if (Buffer != null)
            {
                if (HasOwnership)
                {
                    Synchronize();
                }

                Buffer.Dispose();
                Buffer = null;
                Ptr    = IntPtr.Zero;
            }
        }
コード例 #3
0
        public void TestBufferDestroyedEvent()
        {
            bool destroyed = false;

            Platform platform = Platform.GetPlatforms()[0];

            using (Context context = Context.Create(platform.GetDevices()))
            {
                using (Buffer buffer = context.CreateBuffer(MemoryFlags.AllocateHostPointer, 1024))
                {
                    buffer.Destroyed += (sender, e) => destroyed = true;
                    Assert.IsFalse(destroyed);
                }
            }

            Assert.IsTrue(destroyed);
        }
コード例 #4
0
        private unsafe void ExecuteKernel(
            Context context,
            Device device,
            CommandQueue commandQueue,
            Kernel kernel,
            float[] input,
            float[] output,
            int globalWorkSize,
            int localWorkSize,
            bool warming,
            bool useHostPointer,
            bool autoGroupSize,
            bool enableProfiling,
            out TimeSpan stopwatchTime,
            out TimeSpan profiledTime,
            out TimeSpan readTime)
        {
            MemoryFlags inFlags  = (useHostPointer ? MemoryFlags.UseHostPointer : MemoryFlags.CopyHostPointer) | MemoryFlags.ReadOnly;
            MemoryFlags outFlags = (useHostPointer ? MemoryFlags.UseHostPointer : MemoryFlags.CopyHostPointer) | MemoryFlags.ReadWrite;

            int taskSize = input.Length;

            // allocate buffers
            fixed(float *pinput = input, poutput = output)
            {
                using (Buffer inputBuffer = context.CreateBuffer(inFlags, sizeof(float) * taskSize, (IntPtr)pinput),
                       outputBuffer = context.CreateBuffer(outFlags, sizeof(float) * taskSize, (IntPtr)poutput))
                {
                    kernel.Arguments[0].SetValue(inputBuffer);
                    kernel.Arguments[1].SetValue(outputBuffer);

                    Console.WriteLine("Original global work size {0}", globalWorkSize);
                    Console.WriteLine("Original local work size {0}", localWorkSize);
                    if (autoGroupSize)
                    {
                        Console.WriteLine("Run-time determines optimal workgroup size");
                    }

                    IntPtr workGroupSizeMaximum = kernel.GetWorkGroupSize(device);
                    Console.WriteLine("Maximum workgroup size for this kernel  {0}", workGroupSizeMaximum.ToInt64());

                    if (warming)
                    {
                        Console.Write("Warming up OpenCL execution...");
                        using (commandQueue.EnqueueNDRangeKernel(kernel, new[] { (IntPtr)globalWorkSize }, autoGroupSize ? null : new[] { (IntPtr)localWorkSize }))
                        {
                        }

                        commandQueue.Finish();
                        Console.WriteLine("Done");
                    }

                    Console.Write("Executing OpenCL kernel...");
                    Stopwatch timer = Stopwatch.StartNew();

                    // execute kernel, pls notice autoGroupSize
                    using (Event perfEvent = commandQueue.EnqueueNDRangeKernel(kernel, new[] { (IntPtr)globalWorkSize }, autoGroupSize ? null : new[] { (IntPtr)localWorkSize }))
                    {
                        Event.WaitAll(perfEvent);
                        stopwatchTime = timer.Elapsed;

                        Console.WriteLine("Done");

                        if (enableProfiling)
                        {
                            ulong start = perfEvent.CommandStartTime;
                            ulong end   = perfEvent.CommandEndTime;

                            // a tick is 100ns
                            profiledTime = TimeSpan.FromTicks((long)(end - start) / 100);
                        }
                        else
                        {
                            profiledTime = TimeSpan.Zero;
                        }
                    }

                    timer.Restart();
                    if (useHostPointer)
                    {
                        IntPtr tmpPtr;
                        using (commandQueue.EnqueueMapBuffer(outputBuffer, true, MapFlags.Read, 0, sizeof(float) * taskSize, out tmpPtr))
                        {
                        }

                        Assert.AreEqual((IntPtr)poutput, tmpPtr, "EnqueueMapBuffer failed to return original pointer");
                        using (commandQueue.EnqueueUnmapMemObject(outputBuffer, tmpPtr))
                        {
                        }
                    }
                    else
                    {
                        using (commandQueue.EnqueueReadBuffer(outputBuffer, true, 0, sizeof(float) * taskSize, (IntPtr)poutput))
                        {
                        }
                    }

                    commandQueue.Finish();
                    readTime = timer.Elapsed;
                }
            }
        }
コード例 #5
0
ファイル: Bandwidth.cs プロジェクト: uzbekdev1/NOpenCL
        private double TestHostToDeviceTransferPinned(Context context, CommandQueue commandQueue, int memSize, AccessMode accessMode)
        {
            // Create a host buffer
            using (Buffer pinnedData = context.CreateBuffer(MemoryFlags.ReadWrite | MemoryFlags.AllocateHostPointer, memSize))
            {
                // get a mapped pointer
                IntPtr h_data;
                commandQueue.EnqueueMapBuffer(pinnedData, true, MapFlags.Write, 0, memSize, out h_data);

                // initialize
                for (int i = 0; i < memSize; i++)
                {
                    Marshal.WriteByte(h_data, i, (byte)i);
                }

                // unmap and make data in the host buffer valid
                commandQueue.EnqueueUnmapMemObject(pinnedData, h_data);

                // allocate device memory
                using (Buffer deviceData = context.CreateBuffer(MemoryFlags.ReadWrite, memSize))
                {
                    // sync queue to host
                    commandQueue.Finish();
                    var timer = Stopwatch.StartNew();
                    if (accessMode == AccessMode.Direct)
                    {
                        commandQueue.EnqueueMapBuffer(pinnedData, true, MapFlags.Read, 0, memSize, out h_data);

                        // DIRECT: API access to device buffer
                        for (int i = 0; i < MemoryCopyIterations; i++)
                        {
                            commandQueue.EnqueueWriteBuffer(deviceData, false, 0, memSize, h_data);
                        }

                        commandQueue.Finish();
                    }
                    else
                    {
                        // MAPPED: mapped pointers to device buffer for conventional pointer access
                        IntPtr dm_idata;
                        commandQueue.EnqueueMapBuffer(deviceData, true, MapFlags.Write, 0, memSize, out dm_idata);
                        commandQueue.EnqueueMapBuffer(pinnedData, true, MapFlags.Read, 0, memSize, out h_data);

                        for (int i = 0; i < MemoryCopyIterations; i++)
                        {
                            CopyMemory(dm_idata, h_data, (UIntPtr)memSize);
                        }

                        commandQueue.EnqueueUnmapMemObject(deviceData, dm_idata);
                    }

                    // get the elapsed time in seconds
                    double elapsedTimeInSeconds = timer.Elapsed.TotalSeconds;

                    // Calculate bandwidth in MB/s
                    //      This is for kernels that read and write GMEM simultaneously
                    //      Obtained Throughput for unidirectional block copies will be 1/2 of this #
                    double bandwidthInMBs = 2.0 * ((double)memSize * (double)MemoryCopyIterations) / (elapsedTimeInSeconds * (double)(1 << 20));

                    return(bandwidthInMBs);
                }
            }
        }
コード例 #6
0
ファイル: VectorAdd.cs プロジェクト: uzbekdev1/NOpenCL
        public unsafe void TestVectorAdd()
        {
            Console.WriteLine("TestVectorAdd Starting...");
            Console.WriteLine();
            Console.WriteLine("# of float elements per array \t= {0}", NumElements);

            // set global and local work size dimensions
            int localWorkSize  = 256;
            int globalWorkSize = RoundUp(localWorkSize, NumElements);

            Console.WriteLine("Global work size \t\t= {0}", globalWorkSize);
            Console.WriteLine("Local work size \t\t= {0}", localWorkSize);
            Console.WriteLine("Number of work groups \t\t= {0}", (globalWorkSize % localWorkSize) + (globalWorkSize / localWorkSize));
            Console.WriteLine();

            // allocate and initialize host arrays
            Console.WriteLine("Allocate and initialize host memory...");
            float[] srcA   = new float[globalWorkSize];
            float[] srcB   = new float[globalWorkSize];
            float[] dst    = new float[globalWorkSize];
            float[] golden = new float[NumElements];
            FillArray(srcA, NumElements);
            FillArray(srcB, NumElements);

            // get an OpenCL platform
            Console.WriteLine("Get platform...");
            Platform platform = OclUtils.GetPlatform();

            // get the devices
            Console.WriteLine("Get GPU devices...");
            Device[] devices = platform.GetDevices(DeviceType.Gpu);
            if (devices.Length == 0)
            {
                Console.WriteLine("No GPU devices found. Falling back to CPU for test...");
                devices = platform.GetDevices(DeviceType.Cpu);
                Assert.AreNotEqual(0, devices.Length, "There are no devices supporting OpenCL");
            }

            // create the context
            Console.WriteLine("Get context...");
            using (var context = Context.Create(devices))
            {
                // create a command queue
                Console.WriteLine("Get command queue...");
                using (CommandQueue commandQueue = context.CreateCommandQueue(devices[0], CommandQueueProperties.None))
                {
                    Console.WriteLine("Create buffers...");
                    using (Buffer deviceSrcA = context.CreateBuffer(MemoryFlags.ReadOnly, globalWorkSize * sizeof(float)),
                           deviceSrcB = context.CreateBuffer(MemoryFlags.ReadOnly, globalWorkSize * sizeof(float)),
                           deviceDst = context.CreateBuffer(MemoryFlags.WriteOnly, globalWorkSize * sizeof(float)))
                    {
                        string source =
                            @"// OpenCL Kernel Function for element by element vector addition
__kernel void VectorAdd(__global const float* a, __global const float* b, __global float* c, int iNumElements)
{
    // get index into global data array
    int iGID = get_global_id(0);

    // bound check (equivalent to the limit on a 'for' loop for standard/serial C code
    if (iGID >= iNumElements)
    {   
        return; 
    }

    // add the vector elements
    c[iGID] = a[iGID] + b[iGID];
}
";

                        // create the program
                        Console.WriteLine("Create program with source...");
                        using (Program program = context.CreateProgramWithSource(source))
                        {
                            // build the program
                            string options;
#if false
                            options = "-cl-fast-relaxed-math -DMAC";
#else
                            options = "-cl-fast-relaxed-math";
#endif

                            program.Build(options);

                            // create the kernel
                            using (Kernel kernel = program.CreateKernel("VectorAdd"))
                            {
                                kernel.Arguments[0].SetValue(deviceSrcA);
                                kernel.Arguments[1].SetValue(deviceSrcB);
                                kernel.Arguments[2].SetValue(deviceDst);
                                kernel.Arguments[3].SetValue(NumElements);

                                // Start core sequence... copy input data to GPU, compute, copy results back
                                fixed(float *psrcA = srcA, psrcB = srcB, pdst = dst)
                                {
                                    // asynchronous write of data to GPU device
                                    using (commandQueue.EnqueueWriteBuffer(deviceSrcA, false, 0, sizeof(float) * globalWorkSize, (IntPtr)psrcA))
                                        using (commandQueue.EnqueueWriteBuffer(deviceSrcB, false, 0, sizeof(float) * globalWorkSize, (IntPtr)psrcB))
                                        {
                                        }

                                    // launch kernel
                                    using (commandQueue.EnqueueNDRangeKernel(kernel, (IntPtr)globalWorkSize, (IntPtr)localWorkSize))
                                    {
                                    }

                                    // synchronous/blocking read of results, and check accumulated errors
                                    using (commandQueue.EnqueueReadBuffer(deviceDst, true, 0, sizeof(float) * globalWorkSize, (IntPtr)pdst))
                                    {
                                    }
                                }
                            }
                        }
                    }
                }
            }

            // compute and compare results for golden-host and report errors and pass/fail
            Console.WriteLine("Comparing against host computation...");
            Console.WriteLine();
            VectorAddHost(srcA, srcB, golden, NumElements);
            bool match = Comparefet(golden, dst, NumElements, 0.0f, 0);
            Assert.IsTrue(match);
        }