示例#1
0
        public void TestBandwidth()
        {
            int        start      = DefaultSize;
            int        end        = DefaultSize;
            int        increment  = DefaultIncrement;
            PrintMode  printMode  = PrintMode.UserReadable;
            MemoryMode memoryMode = MemoryMode.Pageable;
            AccessMode accessMode = AccessMode.Direct;

            // Get OpenCL platform ID for NVIDIA if available, otherwise default
            Platform platform = OclUtils.GetPlatform();

            // Find out how many devices there are
            Device[] devices = platform.GetDevices(DeviceType.Gpu);
            if (devices.Length == 0)
            {
                Console.WriteLine("No GPU devices found. Falling back to CPU for test...");
                devices = platform.GetDevices(DeviceType.Cpu);
                Assert.AreNotEqual(0, devices.Length, "There are no devices supporting OpenCL");
            }

            int startDevice = 0;
            int endDevice   = 0;

            // Get and log the device info
            Console.WriteLine("Running on...");
            Console.WriteLine();
            for (int i = startDevice; i <= endDevice; i++)
            {
                Console.WriteLine(devices[i].Name);
            }

            Console.WriteLine();

            // Mode
            Console.WriteLine("Quick Mode");
            Console.WriteLine();
            TestMode testMode = TestMode.Quick;

            bool hostToDevice   = true;
            bool deviceToHost   = true;
            bool deviceToDevice = true;

            if (testMode == TestMode.Range)
            {
                throw new NotImplementedException();
            }

            using (var context = Context.Create(devices))
            {
                if (hostToDevice)
                {
                    TestBandwidth(context, devices, start, end, increment, testMode, MemoryCopyKind.HostToDevice, printMode, accessMode, memoryMode, startDevice, endDevice);
                }

                if (deviceToHost)
                {
                    TestBandwidth(context, devices, start, end, increment, testMode, MemoryCopyKind.DeviceToHost, printMode, accessMode, memoryMode, startDevice, endDevice);
                }

                if (deviceToDevice)
                {
                    TestBandwidth(context, devices, start, end, increment, testMode, MemoryCopyKind.DeviceToDevice, printMode, accessMode, memoryMode, startDevice, endDevice);
                }
            }
        }
示例#2
0
        public unsafe void TestVectorAdd()
        {
            Console.WriteLine("TestVectorAdd Starting...");
            Console.WriteLine();
            Console.WriteLine("# of float elements per array \t= {0}", NumElements);

            // set global and local work size dimensions
            int localWorkSize  = 256;
            int globalWorkSize = RoundUp(localWorkSize, NumElements);

            Console.WriteLine("Global work size \t\t= {0}", globalWorkSize);
            Console.WriteLine("Local work size \t\t= {0}", localWorkSize);
            Console.WriteLine("Number of work groups \t\t= {0}", globalWorkSize % localWorkSize + globalWorkSize / localWorkSize);
            Console.WriteLine();

            // allocate and initialize host arrays
            Console.WriteLine("Allocate and initialize host memory...");
            float[] srcA   = new float[globalWorkSize];
            float[] srcB   = new float[globalWorkSize];
            float[] dst    = new float[globalWorkSize];
            float[] golden = new float[NumElements];
            FillArray(srcA, NumElements);
            FillArray(srcB, NumElements);

            // get an OpenCL platform
            Console.WriteLine("Get platform...");
            Platform platform = OclUtils.GetPlatform();

            // get the devices
            Console.WriteLine("Get GPU devices...");
            Device[] devices = platform.GetDevices(DeviceType.Gpu);

            // create the context
            Console.WriteLine("Get context...");
            using (Context context = Context.Create(devices))
            {
                // create a command queue
                Console.WriteLine("Get command queue...");
                using (CommandQueue commandQueue = context.CreateCommandQueue(devices[0], CommandQueueProperties.None))
                {
                    Console.WriteLine("Create buffers...");
                    using (Mem deviceSrcA = context.CreateBuffer(MemoryFlags.ReadOnly, globalWorkSize * sizeof(float)),
                           deviceSrcB = context.CreateBuffer(MemoryFlags.ReadOnly, globalWorkSize * sizeof(float)),
                           deviceDst = context.CreateBuffer(MemoryFlags.WriteOnly, globalWorkSize * sizeof(float)))
                    {
                        string source =
                            @"// OpenCL Kernel Function for element by element vector addition
__kernel void VectorAdd(__global const float* a, __global const float* b, __global float* c, int iNumElements)
{
    // get index into global data array
    int iGID = get_global_id(0);

    // bound check (equivalent to the limit on a 'for' loop for standard/serial C code
    if (iGID >= iNumElements)
    {   
        return; 
    }

    // add the vector elements
    c[iGID] = a[iGID] + b[iGID];
}
";

                        // create the program
                        Console.WriteLine("Create program with source...");
                        using (Program program = context.CreateProgramWithSource(source))
                        {
                            // build the program
                            string options = "-cl-fast-relaxed-math";

                            program.Build(options);

                            // create the kernel
                            using (Kernel kernel = program.CreateKernel("VectorAdd"))
                            {
                                kernel.Arguments[0].SetValue(deviceSrcA);
                                kernel.Arguments[1].SetValue(deviceSrcB);
                                kernel.Arguments[2].SetValue(deviceDst);
                                kernel.Arguments[3].SetValue(NumElements);

                                // Start core sequence... copy input data to GPU, compute, copy results back
                                fixed(float *psrcA = srcA, psrcB = srcB, pdst = dst)
                                {
                                    // asynchronous write of data to GPU device
                                    commandQueue.EnqueueWriteBuffer(deviceSrcA, false, 0, sizeof(float) * globalWorkSize, (IntPtr)psrcA);
                                    commandQueue.EnqueueWriteBuffer(deviceSrcB, false, 0, sizeof(float) * globalWorkSize, (IntPtr)psrcB);

                                    // launch kernel
                                    commandQueue.EnqueueNDRangeKernel(kernel, (IntPtr)globalWorkSize, (IntPtr)localWorkSize);

                                    // synchronous/blocking read of results, and check accumulated errors
                                    commandQueue.EnqueueReadBufferAndWait(deviceDst, (IntPtr)pdst, sizeof(float) * globalWorkSize);
                                }
                            }
                        }
                    }
                }
            }

            // compute and compare results for golden-host and report errors and pass/fail
            Console.WriteLine("Comparing against host computation...");
            Console.WriteLine();
            VectorAddHost(srcA, srcB, golden, NumElements);
            bool match = Comparefet(golden, dst, NumElements, 0.0f, 0);

            Assert.IsTrue(match);
        }