Пример #1
0
        public void ExternalLoopBody(Cl.Program program)
        {
            // create kernel
            Cl.Kernel kernel = Cl.CreateKernel(program, "ExternalLoopBody", out error);
            clSafeCall(error);

            // create command queue
            Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error);
            clSafeCall(error);

            // allocate host vectors
            int[] hres = { 0, 1, 2, 3, 4, 5 };

            // allocate device vectors
            Cl.Mem dres = Cl.CreateBuffer(context, Cl.MemFlags.ReadWrite | Cl.MemFlags.CopyHostPtr,
                                          (IntPtr)(sizeof(int) * hres.Length), hres, out error);
            clSafeCall(error);

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 0, dres));
            clSafeCall(Cl.SetKernelArg(kernel, 1, hres.Length));

            // execute kernel
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent));

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dres, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(int) * hres.Length), hres, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            Assert.AreEqual(new[] { 1, 4, 3, 6, 5, 8 }, hres);
        }
Пример #2
0
        public void SetUp()
        {
            device = (from platformid in Cl.GetPlatformIDs(out error)
                      from deviceid in Cl.GetDeviceIDs(platformid, Cl.DeviceType.Gpu, out error)
                      select deviceid).First();

            context = Cl.CreateContext(null, 1, new[] { device }, null, IntPtr.Zero, out error);

            dummy = Cl.CreateBuffer(context, Cl.MemFlags.ReadOnly, IntPtr.Zero, IntPtr.Zero, out error);
        }
Пример #3
0
        public void ArrayCompare(Cl.Program program)
        {
            // create kernel
            Cl.Kernel kernel = Cl.CreateKernel(program, "ArrayCompare", out error);
            clSafeCall(error);

            // create command queue
            Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error);
            clSafeCall(error);

            // allocate host vectors
            bool[] res = { true, false, true, false };

            // allocate device vectors
            Cl.Mem dp1 = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(int)), IntPtr.Zero, out error);
            clSafeCall(error);
            Cl.Mem dp2 = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(int)), IntPtr.Zero, out error);
            clSafeCall(error);
            Cl.Mem dp3 = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(bool) * res.Length), IntPtr.Zero, out error);
            clSafeCall(error);

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 0, dp1));
            clSafeCall(Cl.SetKernelArg(kernel, 1, dp2));
            clSafeCall(Cl.SetKernelArg(kernel, 2, dp3));

            // execute kernel
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent));

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp3, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(bool) * res.Length), res, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            Assert.AreEqual(new[] { false, true, false, true }, res);

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 0, dummy));
            clSafeCall(Cl.SetKernelArg(kernel, 1, dummy));

            // execute kernel
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent));

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp3, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(bool) * res.Length), res, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            Assert.AreEqual(new[] { true, false, true, false }, res);
        }
Пример #4
0
        public void ArrayRefOut(Cl.Program program)
        {
            // create kernel
            Cl.Kernel kernel = Cl.CreateKernel(program, "ArrayRefOut", out error);
            clSafeCall(error);

            // create command queue
            Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error);
            clSafeCall(error);

            // allocate host vectors
            int[] hp1 = { 1 };
            int[] hp2 = { 2 };

            // allocate device vectors
            Cl.Mem dp1 = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite,
                                         (IntPtr)(sizeof(int) * hp1.Length), hp1, out error);
            clSafeCall(error);
            Cl.Mem dp2 = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite,
                                         (IntPtr)(sizeof(int) * hp2.Length), hp2, out error);
            clSafeCall(error);

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 0, dp1));
            clSafeCall(Cl.SetKernelArg(kernel, 1, dp2));
            clSafeCall(Cl.SetKernelArg(kernel, 2, dummy));

            // execute kernel
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent));

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp1, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(int) * hp1.Length), hp1, 0, null, out clevent));
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp2, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(int) * hp1.Length), hp2, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            Assert.AreEqual(5, hp1[0]);
            Assert.AreEqual(4, hp2[0]);
        }
Пример #5
0
        public void PoissonJacobi()
        {
            if (!prepared)
            {
                Prepare(this.BuildIR().InlineIR());
                prepared = true;
            }

            // create kernel
            Cl.Kernel kernel = Cl.CreateKernel(program, "PoissonJacobi", out error);
            clSafeCall(error);

            // create command queue
            Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error);
            clSafeCall(error);

            // initialize host memory

            uint dimX = 162;
            uint dimY = 122;
            uint N    = 15000;

            float x0 = (float)(-0.25 * Math.PI);
            float y0 = (float)(-0.25 * Math.PI);

            float hx = 2.0f * Math.Abs(x0) / dimX;
            float hy = 2.0f * Math.Abs(y0) / dimY;

            float[] hData = new float[dimX * dimY];

            uint stride = dimX;

            //boundary values

            for (uint i = 1; i < dimY - 1; i++)
            {
                uint  y_idx = i * stride;
                float y_val = y0 + i * hy;
                hData[y_idx]            = u(x0, y_val);
                hData[y_idx + dimX - 1] = u(x0 + (dimX - 1) * hx, y_val);
            }

            for (uint j = 1; j < dimX - 1; j++)
            {
                float x_val = x0 + j * hx;
                hData[j] = u(x_val, y0);
                hData[j + (dimY - 1) * stride] = u(x_val, y0 + (dimY - 1) * hy);
            }

            // allocate device vectors
            Cl.Mem input = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite,
                                           (IntPtr)(sizeof(float) * hData.Length), hData, out error);
            clSafeCall(error);
            Cl.Mem output = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite,
                                            (IntPtr)(sizeof(float) * hData.Length), hData, out error);
            clSafeCall(error);

            float a1 = 2 * hy / hx;
            float a2 = 2 * hx / hy;
            float a3 = a1;
            float a4 = a2;
            float a  = a1 + a2 + a3 + a4;

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 2, (AREA_SIZE_Y + 2) * (AREA_SIZE_X + 2) * sizeof(float), null));
            clSafeCall(Cl.SetKernelArg(kernel, 3, dimX));
            clSafeCall(Cl.SetKernelArg(kernel, 4, dimY));
            clSafeCall(Cl.SetKernelArg(kernel, 5, stride));
            clSafeCall(Cl.SetKernelArg(kernel, 6, a1));
            clSafeCall(Cl.SetKernelArg(kernel, 7, a2));
            clSafeCall(Cl.SetKernelArg(kernel, 8, a3));
            clSafeCall(Cl.SetKernelArg(kernel, 9, a4));
            clSafeCall(Cl.SetKernelArg(kernel, 10, a));
            clSafeCall(Cl.SetKernelArg(kernel, 11, hx));
            clSafeCall(Cl.SetKernelArg(kernel, 12, hy));
            clSafeCall(Cl.SetKernelArg(kernel, 13, x0));
            clSafeCall(Cl.SetKernelArg(kernel, 14, y0));

            IntPtr[] lo = { (IntPtr)16, (IntPtr)16 };
            IntPtr[] gl = { (IntPtr)((dimX - 2 + AREA_SIZE_X - 1) / AREA_SIZE_X * 16),
                            (IntPtr)((dimY - 2 + AREA_SIZE_Y - 1) / AREA_SIZE_Y * 16) };

            Cl.Mem curIn  = input;
            Cl.Mem curOut = output;

            // execute kernel (and perform data transfering silently)
            clSafeCall(Cl.SetKernelArg(kernel, 0, curIn));
            clSafeCall(Cl.SetKernelArg(kernel, 1, curOut));
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();

            for (uint idx = 1; idx < N; idx++)
            {
                // swap buffers
                Cl.Mem temp = curIn;
                curIn  = curOut;
                curOut = temp;

                // execute kernel
                clSafeCall(Cl.SetKernelArg(kernel, 0, curIn));
                clSafeCall(Cl.SetKernelArg(kernel, 1, curOut));
                clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent));
            }

            clSafeCall(Cl.Finish(cmdQueue));

            stopwatch.Stop();

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, curOut, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(float) * hData.Length), hData, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            float avgerr = 0, maxerr = 0;

            for (uint i = 1; i < dimY - 1; i++)
            {
                for (uint j = 1; j < dimX - 1; j++)
                {
                    float theory = u(x0 + j * hx, y0 + i * hy);
                    float err    = Math.Abs(theory - hData[j + i * stride]) / Math.Abs(theory);
                    avgerr += err;
                    maxerr  = Math.Max(maxerr, err);
                }
            }
            avgerr /= dimX * dimY;

            long   elapsedTime          = stopwatch.ElapsedMilliseconds;
            double dataSizePerIteration = dimX * dimY * 2 * sizeof(float);
            double dataSizeTotal        = dataSizePerIteration * N;
            double elapsedSeconds       = elapsedTime * 0.001;
            double gigabyteFactor       = 1 << 30;
            double bandwidth            = dataSizeTotal / (gigabyteFactor * elapsedSeconds);

            Console.WriteLine("avgerr = {0} maxerr = {1} elapsedTime = {2} ms bandwidth = {3} GB/s",
                              avgerr, maxerr, elapsedTime, bandwidth);

            Assert.That(maxerr, Is.LessThanOrEqualTo(5E-2F));
            Assert.That(avgerr, Is.LessThanOrEqualTo(1E-2F));
        }
Пример #6
0
        public void MatMul()
        {
            if (!prepared)
            {
                Prepare(this.BuildIR().InlineIR());
                prepared = true;
            }

            // create kernel
            Cl.Kernel kernel = Cl.CreateKernel(program, "MatMul", out error);
            clSafeCall(error);

            // create command queue
            Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error);
            clSafeCall(error);

            // allocate host matrices
            float[] A = new float[WA * HA];
            float[] B = new float[WB * HB];
            float[] C = new float[WC * HC];

            // initialize host memory
            Random rand = new Random();

            for (int i = 0; i < A.Length; i++)
            {
                A[i] = (float)rand.Next() / short.MaxValue;
            }
            for (int i = 0; i < B.Length; i++)
            {
                B[i] = (float)rand.Next() / short.MaxValue;
            }

            // allocate device vectors
            Cl.Mem hDeviceMemA = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly,
                                                 (IntPtr)(sizeof(float) * A.Length), A, out error);
            clSafeCall(error);
            Cl.Mem hDeviceMemB = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly,
                                                 (IntPtr)(sizeof(float) * B.Length), B, out error);
            clSafeCall(error);
            Cl.Mem hDeviceMemC = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly,
                                                 (IntPtr)(sizeof(float) * C.Length), IntPtr.Zero, out error);
            clSafeCall(error);

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 0, hDeviceMemA));
            clSafeCall(Cl.SetKernelArg(kernel, 1, hDeviceMemB));
            clSafeCall(Cl.SetKernelArg(kernel, 2, hDeviceMemC));
            clSafeCall(Cl.SetKernelArg(kernel, 3, BLOCK_SIZE * BLOCK_SIZE * sizeof(float), null));
            clSafeCall(Cl.SetKernelArg(kernel, 4, BLOCK_SIZE * BLOCK_SIZE * sizeof(float), null));
            clSafeCall(Cl.SetKernelArg(kernel, 5, WA));
            clSafeCall(Cl.SetKernelArg(kernel, 6, WB));

            // execute kernel
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, new[] { (IntPtr)WC, (IntPtr)HC },
                                               new[] { (IntPtr)BLOCK_SIZE, (IntPtr)BLOCK_SIZE }, 0, null, out clevent));

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemC, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(float) * C.Length), C, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            for (int i = 0; i < HA; ++i)
            {
                for (int j = 0; j < WB; ++j)
                {
                    float sum = 0;
                    for (int k = 0; k < WA; ++k)
                    {
                        sum += A[i * WA + k] * B[k * WB + j];
                    }
                    float err = Math.Abs((sum - C[i * WB + j]) / sum);
                    Assert.That(err, Is.LessThanOrEqualTo(1E-3F));
                }
            }
        }
Пример #7
0
        public void VecAdd()
        {
            if (!prepared)
            {
                Prepare(this.BuildIR().InlineIR());
                prepared = true;
            }

            // create kernel
            Cl.Kernel kernel = Cl.CreateKernel(program, "VecAdd", out error);
            clSafeCall(error);

            // create command queue
            Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error);
            clSafeCall(error);

            int length = 1 << 10;

            // allocate host vectors
            float[] A = new float[length];
            float[] B = new float[length];
            float[] C = new float[length];

            // initialize host memory
            Random rand = new Random();

            for (int i = 0; i < length; i++)
            {
                A[i] = (float)rand.Next() / short.MaxValue;
                B[i] = (float)rand.Next() / short.MaxValue;
            }

            // allocate device vectors
            Cl.Mem hDeviceMemA = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly,
                                                 (IntPtr)(sizeof(float) * length), A, out error);
            clSafeCall(error);
            Cl.Mem hDeviceMemB = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly,
                                                 (IntPtr)(sizeof(float) * length), B, out error);
            clSafeCall(error);
            Cl.Mem hDeviceMemC = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly,
                                                 (IntPtr)(sizeof(float) * length), IntPtr.Zero, out error);
            clSafeCall(error);

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 0, hDeviceMemA));
            clSafeCall(Cl.SetKernelArg(kernel, 1, hDeviceMemB));
            clSafeCall(Cl.SetKernelArg(kernel, 2, hDeviceMemC));
            clSafeCall(Cl.SetKernelArg(kernel, 3, length));

            // execute kernel
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)length }, new[] { (IntPtr)256 },
                                               0, null, out clevent));

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemC, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(float) * length), C, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            for (int i = 0; i < length; i++)
            {
                float sum = A[i] + B[i];
                float err = Math.Abs((sum - C[i]) / sum);
                Assert.That(err, Is.LessThanOrEqualTo(1E-3F));
            }
        }
Пример #8
0
        public void BlockMutation()
        {
            AssemblyName    assemblyName    = new AssemblyName("UniGPUTestFixture");
            AssemblyBuilder assemblyBuilder = AppDomain.CurrentDomain.DefineDynamicAssembly(assemblyName, AssemblyBuilderAccess.RunAndSave);
            ModuleBuilder   moduleBuilder   = assemblyBuilder.DefineDynamicModule(assemblyName.Name, assemblyName.Name + ".dll");
            TypeBuilder     typeBuilder     = moduleBuilder.DefineType("CILBBTypeMutation", TypeAttributes.Public);
            MethodBuilder   methodBuilder   = typeBuilder.DefineMethod("TestCase", MethodAttributes.Public | MethodAttributes.Static,
                                                                       typeof(void), new Type[] { typeof(int), typeof(int[]) });

            methodBuilder.DefineParameter(1, ParameterAttributes.None, "arg");
            methodBuilder.DefineParameter(2, ParameterAttributes.None, "addr");

            ILGenerator il = methodBuilder.GetILGenerator();

            LocalBuilder lb = il.DeclareLocal(typeof(float));

            Label ZERO             = il.DefineLabel();
            Label LOOP             = il.DefineLabel();
            Label LOOP_FLT_MUTATOR = il.DefineLabel();
            Label LOOP_INT_MUTATOR = il.DefineLabel();

            il.Emit(OpCodes.Ldarg_1);
            il.Emit(OpCodes.Ldc_I4_0);

            il.Emit(OpCodes.Ldarg_0);
            il.Emit(OpCodes.Ldarg_0);
            il.Emit(OpCodes.Brfalse, ZERO);

            il.MarkLabel(LOOP);
            il.Emit(OpCodes.Conv_I2);
            il.Emit(OpCodes.Starg, 0);
            il.Emit(OpCodes.Ldarga, 0);
            il.Emit(OpCodes.Dup);
            il.Emit(OpCodes.Ldind_I4);
            il.Emit(OpCodes.Dup);
            il.Emit(OpCodes.Ldc_I4_2);
            il.Emit(OpCodes.Rem);
            il.Emit(OpCodes.Not);
            il.Emit(OpCodes.Ldc_I4_1);
            il.Emit(OpCodes.And);
            il.Emit(OpCodes.Brtrue, LOOP_FLT_MUTATOR);

            il.MarkLabel(LOOP_INT_MUTATOR);
            il.Emit(OpCodes.Conv_I4);
            il.Emit(OpCodes.Starg, 0);
            il.Emit(OpCodes.Ldind_I4);
            il.Emit(OpCodes.Ldarg_0);
            il.Emit(OpCodes.Add);
            il.Emit(OpCodes.Ldc_I4_2);
            il.Emit(OpCodes.Div);
            il.Emit(OpCodes.Ldc_I4_M1);
            il.Emit(OpCodes.Neg);
            il.Emit(OpCodes.Sub);
            il.Emit(OpCodes.Dup);
            il.Emit(OpCodes.Ldc_I4_1);
            il.Emit(OpCodes.Bge, LOOP);

            il.Emit(OpCodes.Br, ZERO);

            il.MarkLabel(LOOP_FLT_MUTATOR);
            il.Emit(OpCodes.Conv_R4);
            il.Emit(OpCodes.Stloc_0);
            il.Emit(OpCodes.Pop);
            il.Emit(OpCodes.Ldloc_0);
            il.Emit(OpCodes.Ldc_R4, 1.0f);
            il.Emit(OpCodes.Sub);
            il.Emit(OpCodes.Dup);
            il.Emit(OpCodes.Ldc_R4, 1.0f);
            il.Emit(OpCodes.Bge, LOOP);

            il.Emit(OpCodes.Conv_I4);

            il.MarkLabel(ZERO);
            il.Emit(OpCodes.Ldc_I4_1);
            il.Emit(OpCodes.Add);
            il.Emit(OpCodes.Stelem_I4);
            il.Emit(OpCodes.Ret);

            MethodInfo method = typeBuilder.CreateType().GetMethod(methodBuilder.Name);

            int[] res = { 0 };

            method.Invoke(null, new object[] { 8, res });

            //Assert.AreEqual(1, res[0]);

            Cl.Program program = method.BuildIR().ToGPUClProgram(device, context);
            clSafeCall(Cl.BuildProgram(program, 1, new[] { device }, string.Empty, null, IntPtr.Zero));
            Assert.AreEqual(Cl.BuildStatus.Success, Cl.GetProgramBuildInfo(program, device, Cl.ProgramBuildInfo.Status, out error).
                            CastTo <Cl.BuildStatus>());

            Cl.Kernel kernel = Cl.CreateKernel(program, "TestCase", out error);
            clSafeCall(error);

            Cl.Mem cl_res = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)sizeof(int), IntPtr.Zero, out error);
            clSafeCall(error);

            Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, (Cl.CommandQueueProperties) 0, out error);
            clSafeCall(error);

            clSafeCall(Cl.SetKernelArg(kernel, 0, 8));
            clSafeCall(Cl.SetKernelArg(kernel, 1, cl_res));

            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent));

            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, cl_res, Cl.Bool.True, IntPtr.Zero, (IntPtr)sizeof(int), res, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            clSafeCall(Cl.ReleaseMemObject(cl_res));

            program.Dispose();

            Assert.AreEqual(1, res[0]);
        }
Пример #9
0
        public void SmallTypes(Cl.Program program)
        {
            // create kernel
            Cl.Kernel kernel = Cl.CreateKernel(program, "SmallTypes", out error);
            clSafeCall(error);

            // create command queue
            Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error);
            clSafeCall(error);

            // allocate host vectors
            short[] hres1 = { 0 };
            short[] hres2 = { 0 };

            // allocate device vectors
            Cl.Mem dres1 = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly,
                                           (IntPtr)(sizeof(short) * hres1.Length), IntPtr.Zero, out error);
            clSafeCall(error);
            Cl.Mem dres2 = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly,
                                           (IntPtr)(sizeof(short) * hres2.Length), IntPtr.Zero, out error);
            clSafeCall(error);

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 0, dres1));
            clSafeCall(Cl.SetKernelArg(kernel, 1, dres2));
            clSafeCall(Cl.SetKernelArg(kernel, 2, (byte)1));
            clSafeCall(Cl.SetKernelArg(kernel, 3, (sbyte)-20));
            clSafeCall(Cl.SetKernelArg(kernel, 4, (ushort)30));
            clSafeCall(Cl.SetKernelArg(kernel, 5, (short)-4));
            clSafeCall(Cl.SetKernelArg(kernel, 6, true));

            // execute kernel
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent));

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dres1, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(short) * hres1.Length), hres1, 0, null, out clevent));
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dres2, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(short) * hres1.Length), hres2, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            Assert.AreEqual(7, hres1[0]);
            Assert.AreEqual(-7, hres2[0]);

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 6, false));

            // execute kernel
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent));

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dres1, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(short) * hres1.Length), hres1, 0, null, out clevent));
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dres2, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(short) * hres1.Length), hres2, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            Assert.AreEqual(-7, hres1[0]);
            Assert.AreEqual(7, hres2[0]);
        }
Пример #10
0
        private static long PoissonRBSOR(Cl.Device device, Cl.Context context, Cl.Program program, bool lmem,
                                         float x0, float y0, float x1, float y1, int dimX, int dimY, int N, float omega,
                                         string fileName = null, string options = "")
        {
            Cl.ErrorCode error;
            Cl.Event     clevent;

            // build program
            clSafeCall(Cl.BuildProgram(program, 1, new[] { device }, options, null, IntPtr.Zero));
            Cl.BuildStatus status = Cl.GetProgramBuildInfo(program, device, Cl.ProgramBuildInfo.Status, out error).CastTo <Cl.BuildStatus>();
            if (status != Cl.BuildStatus.Success)
            {
                throw new Exception(status.ToString());
            }

            // save binary
            if (fileName != null)
            {
                Cl.InfoBuffer binarySizes = Cl.GetProgramInfo(program, Cl.ProgramInfo.BinarySizes, out error);
                clSafeCall(error);
                Cl.InfoBufferArray binaries = new Cl.InfoBufferArray(
                    binarySizes.CastToEnumerable <IntPtr>(Enumerable.Range(0, 1)).Select(sz => new Cl.InfoBuffer(sz)).ToArray());
                IntPtr szRet;
                clSafeCall(Cl.GetProgramInfo(program, Cl.ProgramInfo.Binaries, binaries.Size, binaries, out szRet));
                byte[] binary = binaries[0].CastToArray <byte>(binarySizes.CastTo <IntPtr>(0).ToInt32());
                File.WriteAllBytes(fileName, binary);
            }

            // create kernel
            Cl.Kernel kernel = Cl.CreateKernel(program, "PoissonRBSOR" + (lmem ? "_LMem" : ""), out error);
            clSafeCall(error);

            // create command queue
            Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error);
            clSafeCall(error);

            float hx = (x1 - x0) / dimX;
            float hy = (y1 - y0) / dimY;

            // boundary values

            float[] hgrid = new float[dimX * dimY];

            int gstride = dimX;

            for (int i = 1; i < dimY - 1; i++)
            {
                int   y_idx = i * gstride;
                float y_val = y0 + i * hy;
                hgrid[y_idx]            = u(x0, y_val);
                hgrid[y_idx + dimX - 1] = u(x0 + (dimX - 1) * hx, y_val);
            }

            for (int j = 1; j < dimX - 1; j++)
            {
                float x_val = x0 + j * hx;
                hgrid[j] = u(x_val, y0);
                hgrid[j + (dimY - 1) * gstride] = u(x_val, y0 + (dimY - 1) * hy);
            }

            // laplacian values

            float[] hlaplacian = new float[(dimX - 2) * (dimY - 2)];

            int lstride = dimX - 2;

            for (int i = 1; i < dimY - 1; i++)
            {
                for (int j = 1; j < dimX - 1; j++)
                {
                    hlaplacian[j - 1 + (i - 1) * lstride] = J(x0 + j * hx, y0 + i * hy);
                }
            }

            // allocate device vectors
            Cl.Mem dgrid = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite,
                                           (IntPtr)(sizeof(float) * hgrid.Length), hgrid, out error);
            clSafeCall(error);
            Cl.Mem dlaplacian = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly,
                                                (IntPtr)(sizeof(float) * hlaplacian.Length), hlaplacian, out error);
            clSafeCall(error);

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 0, dgrid));
            clSafeCall(Cl.SetKernelArg(kernel, 1, dlaplacian));
            clSafeCall(Cl.SetKernelArg(kernel, 2, dimX));
            clSafeCall(Cl.SetKernelArg(kernel, 3, dimY));
            clSafeCall(Cl.SetKernelArg(kernel, 4, gstride));
            clSafeCall(Cl.SetKernelArg(kernel, 5, lstride));
            clSafeCall(Cl.SetKernelArg(kernel, 6, hx));
            clSafeCall(Cl.SetKernelArg(kernel, 7, hy));
            clSafeCall(Cl.SetKernelArg(kernel, 8, omega));
            if (lmem)
            {
                clSafeCall(Cl.SetKernelArg(kernel, 10, (AREA_SIZE_Y + 2) * (AREA_SIZE_X + 2) * sizeof(float), null));
            }

            IntPtr[] lo = { (IntPtr)TILE_SIZE_X, (IntPtr)TILE_SIZE_Y };
            IntPtr[] gl =
            {
                (IntPtr)((dimX - 2 + (lmem ? AREA_SIZE_X : TILE_SIZE_X) - 1) /
                         (lmem ? AREA_SIZE_X : TILE_SIZE_X) * TILE_SIZE_X),
                (IntPtr)((dimY - 2 + (lmem ? AREA_SIZE_Y : TILE_SIZE_Y) - 1) /
                         (lmem ? AREA_SIZE_Y : TILE_SIZE_Y) * TILE_SIZE_Y)
            };

            // execute RED kernel
            clSafeCall(Cl.SetKernelArg(kernel, 9, 1));
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent));

            // execute BLACK kernel
            clSafeCall(Cl.SetKernelArg(kernel, 9, 0));
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();

            for (int idx = 1; idx < N; idx++)
            {
                // execute RED kernel
                clSafeCall(Cl.SetKernelArg(kernel, 9, 1));
                clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent));

                // execute BLACK kernel
                clSafeCall(Cl.SetKernelArg(kernel, 9, 0));
                clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent));
            }

            clSafeCall(Cl.Finish(cmdQueue));

            stopwatch.Stop();

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dgrid, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(float) * hgrid.Length), hgrid, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            cmdQueue.Dispose();
            kernel.Dispose();
            dgrid.Dispose();

            float avgerr = 0, maxerr = 0;

            for (int i = 1; i < dimY - 1; i++)
            {
                for (int j = 1; j < dimX - 1; j++)
                {
                    float theory = u(x0 + j * hx, y0 + i * hy);
                    float err    = Math.Abs(theory - hgrid[j + i * gstride]) / Math.Abs(theory);
                    avgerr += err;
                    maxerr  = Math.Max(maxerr, err);
                }
            }
            avgerr /= dimX * dimY;

            long elapsedTime = stopwatch.ElapsedMilliseconds;

            Console.WriteLine("average error = {0}%\nmaximal error = {1}%\nelapsed time: {2}ms\niterations per second: {3}",
                              avgerr * 100, maxerr * 100, elapsedTime, (double)N / (double)elapsedTime * 1000.0d);

            return(elapsedTime);
        }