Example #1
0
        public void SetUp()
        {
            device = (from platformid in Cl.GetPlatformIDs(out error)
                      from deviceid in Cl.GetDeviceIDs(platformid, Cl.DeviceType.Gpu, out error)
                      select deviceid).First();

            context = Cl.CreateContext(null, 1, new[] { device }, null, IntPtr.Zero, out error);
        }
Example #2
0
        public static void Main(string[] args)
        {
            int  dimX = GetParameter("--dimX", 322, args);
            int  dimY = GetParameter("--dimY", 242, args);
            int  N    = GetParameter("--N", 97760, args);
            bool lmem = GetFlag("--lmem", args);

            Console.WriteLine("Poisson equation solver: dimX = {0} dimY = {1} N = {2}, LMem: {3}", dimX, dimY, N, lmem);
            string options = string.Join(" ", args.Where(arg => arg.IndexOf("--") == -1));

            Console.WriteLine("OpenCL program build options: " + options);

            Cl.ErrorCode error;

            Cl.Device device = (from platformid in Cl.GetPlatformIDs(out error)
                                from deviceid in Cl.GetDeviceIDs(platformid, Cl.DeviceType.Gpu, out error)
                                select deviceid).First();
            clSafeCall(error);

            Cl.Context context = Cl.CreateContext(null, 1, new[] { device }, null, IntPtr.Zero, out error);
            clSafeCall(error);

            // create program from C# kernel
            IRBuildOptions.AutoInline     = GetFlag("--inline", args);
            IRBuildOptions.WasteRegisters = GetFlag("--rwaste", args);
            Console.WriteLine("IR code build options: AutoInline: {0}; WasteRegisters: {1}",
                              IRBuildOptions.AutoInline, IRBuildOptions.WasteRegisters);
            Cl.Program pcsharp = typeof(MainClass).BuildIR().ToGPUClProgram(device, context);

            // create program from OpenCL kernel
            Cl.Program popencl = Cl.CreateProgramWithSource(context, 1, new[] { PoissonRBSORCl }, null, out error);
            clSafeCall(error);

            // perform bandwidth comparison

            float x0    = (float)(-0.5 * Math.PI);
            float y0    = (float)(-0.5 * Math.PI);
            float x1    = -x0;
            float y1    = -y0;
            float omega = 0.8f;

            Console.WriteLine("C# benchmark:");
            long tcsharp = PoissonRBSOR(device, context, pcsharp, lmem,
                                        x0, y0, x1, y1, dimX, dimY, N, omega, "unigpu.bin", options);

            pcsharp.Dispose();

            Console.WriteLine("OpenCL benchmark:");
            long topencl = PoissonRBSOR(device, context, popencl, lmem,
                                        x0, y0, x1, y1, dimX, dimY, N, omega, "opencl.bin", options);

            popencl.Dispose();

            Console.WriteLine("OpenCL advantage: {0}", (double)tcsharp / (double)topencl);

            context.Dispose();
        }
Example #3
0
        private static byte[] ToNVIDIAClBinary(this Program irprog, Cl.Device device)
        {
            Cl.ErrorCode error;

            int cchi = Cl.GetDeviceInfo(device, Cl.DeviceInfo.ComputeCapabilityMajorNV, out error).CastTo <int>();

            clSafeCall(error);
            int cclo = Cl.GetDeviceInfo(device, Cl.DeviceInfo.ComputeCapabilityMinorNV, out error).CastTo <int>();

            clSafeCall(error);

            return(Encoding.Convert(
                       Encoding.Unicode, Encoding.ASCII,
                       Encoding.Unicode.GetBytes(irprog.ToPTX(string.Format("sm_{0}{1}", cchi, cclo)))));
        }
Example #4
0
        public static byte[] ToGPUClBinary(this Program irprog, Cl.Device device)
        {
            Cl.ErrorCode error;
            Cl.Platform  platform = Cl.GetDeviceInfo(device, Cl.DeviceInfo.Platform, out error).CastTo <Cl.Platform>();
            clSafeCall(error);
            string platformName = Cl.GetPlatformInfo(platform, Cl.PlatformInfo.Name, out error).ToString();

            clSafeCall(error);

            switch (platformName)
            {
            case "NVIDIA CUDA":
                return(irprog.ToNVIDIAClBinary(device));

            case "AMD Accelerated Parallel Processing":
                return(irprog.ToAMDClBinary(device));

            default:
                throw new NotSupportedException(platformName);
            }
        }
Example #5
0
        public static Cl.Program ToGPUClProgram(this Program irprog, Cl.Device device, Cl.Context context)
        {
            byte[] code = irprog.ToGPUClBinary(device);

            Cl.ErrorCode error;

            Cl.ErrorCode[] binariesStatus = { Cl.ErrorCode.InvalidBinary };

            Cl.Program program = Cl.CreateProgramWithBinary(
                context,
                1,
                new[] { device },
                new IntPtr[] { (IntPtr)code.Length },
                new Cl.InfoBufferArray(new Cl.InfoBuffer(code)),
                binariesStatus,
                out error
                );
            clSafeCall(error);
            clSafeCall(binariesStatus[0]);

            return(program);
        }
Example #6
0
        public void Setup()
        {
            Cl.ErrorCode     error;
            Cl.Platform[]    platforms   = Cl.GetPlatformIDs(out error);
            List <Cl.Device> devicesList = new List <Cl.Device>();

            CheckErr(error, "Cl.GetPlatformIDs");

            foreach (Cl.Platform platform in platforms)
            {
                string platformName = Cl.GetPlatformInfo(platform, Cl.PlatformInfo.Name, out error).ToString();
                Console.WriteLine("Platform: " + platformName);
                CheckErr(error, "Cl.GetPlatformInfo");
                //We will be looking only for GPU devices
                foreach (Cl.Device device in Cl.GetDeviceIDs(platform, Cl.DeviceType.Gpu, out error))
                {
                    CheckErr(error, "Cl.GetDeviceIDs");
                    Console.WriteLine("Device: " + device.ToString());
                    devicesList.Add(device);
                }
            }

            if (devicesList.Count <= 0)
            {
                Console.WriteLine("No devices found.");
                return;
            }

            _device = devicesList[0];

            if (Cl.GetDeviceInfo(_device, Cl.DeviceInfo.ImageSupport, out error).CastTo <Cl.Bool>() == Cl.Bool.False)
            {
                Console.WriteLine("No image support.");
                return;
            }
            _context = Cl.CreateContext(null, 1, new[] { _device }, ContextNotify, IntPtr.Zero, out error);    //Second parameter is amount of devices
            CheckErr(error, "Cl.CreateContext");
        }
Example #7
0
        public void DeviceQueries()
        {
            uint platformCount;
            Cl.ErrorCode result = Cl.GetPlatformIDs(0, null, out platformCount);
            Assert.AreEqual(result, Cl.ErrorCode.Success, "Could not get platform count");
            Console.WriteLine("{0} platforms found", platformCount);

            var platformIds = new Cl.Platform[platformCount];
            result = Cl.GetPlatformIDs(platformCount, platformIds, out platformCount);
            Assert.AreEqual(result, Cl.ErrorCode.Success, "Could not get platform ids");

            foreach (Cl.Platform platformId in platformIds)
            {
                IntPtr paramSize;
                result = Cl.GetPlatformInfo(platformId, Cl.PlatformInfo.Name, IntPtr.Zero, Cl.InfoBuffer.Empty, out paramSize);
                Assert.AreEqual(result, Cl.ErrorCode.Success, "Could not get platform name size");

                using (var buffer = new Cl.InfoBuffer(paramSize))
                {
                    result = Cl.GetPlatformInfo(platformIds[0], Cl.PlatformInfo.Name, paramSize, buffer, out paramSize);
                    Assert.AreEqual(result, Cl.ErrorCode.Success, "Could not get platform name string");
                }

                uint deviceCount;
                result = Cl.GetDeviceIDs(platformIds[0], Cl.DeviceType.All, 0, null, out deviceCount);
                Assert.AreEqual(result, Cl.ErrorCode.Success, "Could not get device count");

                var deviceIds = new Cl.Device[deviceCount];
                result = Cl.GetDeviceIDs(platformIds[0], Cl.DeviceType.All, deviceCount, deviceIds, out deviceCount);
                Assert.AreEqual(result, Cl.ErrorCode.Success, "Could not get device ids");

                result = Cl.GetDeviceInfo(deviceIds[0], Cl.DeviceInfo.Vendor, IntPtr.Zero, Cl.InfoBuffer.Empty, out paramSize);
                Assert.AreEqual(result, Cl.ErrorCode.Success, "Could not get device vendor name size");
                using (var buf = new Cl.InfoBuffer(paramSize))
                {
                    result = Cl.GetDeviceInfo(deviceIds[0], Cl.DeviceInfo.Vendor, paramSize, buf, out paramSize);
                    Assert.AreEqual(result, Cl.ErrorCode.Success, "Could not get device vendor name string");
                    var deviceVendor = buf.ToString();
                }
            }
        }
Example #8
0
        public void Setup()
        {
            Cl.ErrorCode error;

            _device = (from device in
                           Cl.GetDeviceIDs(
                               (from platform in Cl.GetPlatformIDs(out error)
                                where Cl.GetPlatformInfo(platform, Cl.PlatformInfo.Name, out error).ToString() == "NVIDIA CUDA"
                                select platform).First(), Cl.DeviceType.Gpu, out error)
                       select device).First();

            _context = Cl.CreateContext(null, 1, new[] { _device }, null, IntPtr.Zero, out error);
        }
Example #9
0
        private static long PoissonRBSOR(Cl.Device device, Cl.Context context, Cl.Program program, bool lmem,
                                         float x0, float y0, float x1, float y1, int dimX, int dimY, int N, float omega,
                                         string fileName = null, string options = "")
        {
            Cl.ErrorCode error;
            Cl.Event     clevent;

            // build program
            clSafeCall(Cl.BuildProgram(program, 1, new[] { device }, options, null, IntPtr.Zero));
            Cl.BuildStatus status = Cl.GetProgramBuildInfo(program, device, Cl.ProgramBuildInfo.Status, out error).CastTo <Cl.BuildStatus>();
            if (status != Cl.BuildStatus.Success)
            {
                throw new Exception(status.ToString());
            }

            // save binary
            if (fileName != null)
            {
                Cl.InfoBuffer binarySizes = Cl.GetProgramInfo(program, Cl.ProgramInfo.BinarySizes, out error);
                clSafeCall(error);
                Cl.InfoBufferArray binaries = new Cl.InfoBufferArray(
                    binarySizes.CastToEnumerable <IntPtr>(Enumerable.Range(0, 1)).Select(sz => new Cl.InfoBuffer(sz)).ToArray());
                IntPtr szRet;
                clSafeCall(Cl.GetProgramInfo(program, Cl.ProgramInfo.Binaries, binaries.Size, binaries, out szRet));
                byte[] binary = binaries[0].CastToArray <byte>(binarySizes.CastTo <IntPtr>(0).ToInt32());
                File.WriteAllBytes(fileName, binary);
            }

            // create kernel
            Cl.Kernel kernel = Cl.CreateKernel(program, "PoissonRBSOR" + (lmem ? "_LMem" : ""), out error);
            clSafeCall(error);

            // create command queue
            Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error);
            clSafeCall(error);

            float hx = (x1 - x0) / dimX;
            float hy = (y1 - y0) / dimY;

            // boundary values

            float[] hgrid = new float[dimX * dimY];

            int gstride = dimX;

            for (int i = 1; i < dimY - 1; i++)
            {
                int   y_idx = i * gstride;
                float y_val = y0 + i * hy;
                hgrid[y_idx]            = u(x0, y_val);
                hgrid[y_idx + dimX - 1] = u(x0 + (dimX - 1) * hx, y_val);
            }

            for (int j = 1; j < dimX - 1; j++)
            {
                float x_val = x0 + j * hx;
                hgrid[j] = u(x_val, y0);
                hgrid[j + (dimY - 1) * gstride] = u(x_val, y0 + (dimY - 1) * hy);
            }

            // laplacian values

            float[] hlaplacian = new float[(dimX - 2) * (dimY - 2)];

            int lstride = dimX - 2;

            for (int i = 1; i < dimY - 1; i++)
            {
                for (int j = 1; j < dimX - 1; j++)
                {
                    hlaplacian[j - 1 + (i - 1) * lstride] = J(x0 + j * hx, y0 + i * hy);
                }
            }

            // allocate device vectors
            Cl.Mem dgrid = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite,
                                           (IntPtr)(sizeof(float) * hgrid.Length), hgrid, out error);
            clSafeCall(error);
            Cl.Mem dlaplacian = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly,
                                                (IntPtr)(sizeof(float) * hlaplacian.Length), hlaplacian, out error);
            clSafeCall(error);

            // setup kernel arguments
            clSafeCall(Cl.SetKernelArg(kernel, 0, dgrid));
            clSafeCall(Cl.SetKernelArg(kernel, 1, dlaplacian));
            clSafeCall(Cl.SetKernelArg(kernel, 2, dimX));
            clSafeCall(Cl.SetKernelArg(kernel, 3, dimY));
            clSafeCall(Cl.SetKernelArg(kernel, 4, gstride));
            clSafeCall(Cl.SetKernelArg(kernel, 5, lstride));
            clSafeCall(Cl.SetKernelArg(kernel, 6, hx));
            clSafeCall(Cl.SetKernelArg(kernel, 7, hy));
            clSafeCall(Cl.SetKernelArg(kernel, 8, omega));
            if (lmem)
            {
                clSafeCall(Cl.SetKernelArg(kernel, 10, (AREA_SIZE_Y + 2) * (AREA_SIZE_X + 2) * sizeof(float), null));
            }

            IntPtr[] lo = { (IntPtr)TILE_SIZE_X, (IntPtr)TILE_SIZE_Y };
            IntPtr[] gl =
            {
                (IntPtr)((dimX - 2 + (lmem ? AREA_SIZE_X : TILE_SIZE_X) - 1) /
                         (lmem ? AREA_SIZE_X : TILE_SIZE_X) * TILE_SIZE_X),
                (IntPtr)((dimY - 2 + (lmem ? AREA_SIZE_Y : TILE_SIZE_Y) - 1) /
                         (lmem ? AREA_SIZE_Y : TILE_SIZE_Y) * TILE_SIZE_Y)
            };

            // execute RED kernel
            clSafeCall(Cl.SetKernelArg(kernel, 9, 1));
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent));

            // execute BLACK kernel
            clSafeCall(Cl.SetKernelArg(kernel, 9, 0));
            clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            Stopwatch stopwatch = new Stopwatch();

            stopwatch.Start();

            for (int idx = 1; idx < N; idx++)
            {
                // execute RED kernel
                clSafeCall(Cl.SetKernelArg(kernel, 9, 1));
                clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent));

                // execute BLACK kernel
                clSafeCall(Cl.SetKernelArg(kernel, 9, 0));
                clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, gl, lo, 0, null, out clevent));
            }

            clSafeCall(Cl.Finish(cmdQueue));

            stopwatch.Stop();

            // copy results from device back to host
            clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dgrid, Cl.Bool.True, IntPtr.Zero,
                                            (IntPtr)(sizeof(float) * hgrid.Length), hgrid, 0, null, out clevent));

            clSafeCall(Cl.Finish(cmdQueue));

            cmdQueue.Dispose();
            kernel.Dispose();
            dgrid.Dispose();

            float avgerr = 0, maxerr = 0;

            for (int i = 1; i < dimY - 1; i++)
            {
                for (int j = 1; j < dimX - 1; j++)
                {
                    float theory = u(x0 + j * hx, y0 + i * hy);
                    float err    = Math.Abs(theory - hgrid[j + i * gstride]) / Math.Abs(theory);
                    avgerr += err;
                    maxerr  = Math.Max(maxerr, err);
                }
            }
            avgerr /= dimX * dimY;

            long elapsedTime = stopwatch.ElapsedMilliseconds;

            Console.WriteLine("average error = {0}%\nmaximal error = {1}%\nelapsed time: {2}ms\niterations per second: {3}",
                              avgerr * 100, maxerr * 100, elapsedTime, (double)N / (double)elapsedTime * 1000.0d);

            return(elapsedTime);
        }
Example #10
0
        private static byte[] ToAMDClBinary(this Program irprog, Cl.Device device)
        {
            // create OpenCL stub before registers naming

            string clstub = string.Join("\n", irprog.Kernels.Select(kernel => string.Format(
                                                                        "__kernel void {0}({1}) {{ {2} }}",
                                                                        kernel.Name,
                                                                        string.Join(", ", kernel.FormalParameters.Select(
                                                                                        (fp, idx) => {
                switch (fp.StateSpace)
                {
                case StateSpaces.GLOBAL:
                    return(string.Format("__global {0}* p{1}", fp.UnderlyingType.FormatC(), idx));

                case StateSpaces.SHARED:
                    return(string.Format("__local {0}* p{1}", fp.UnderlyingType.FormatC(), idx));

                case StateSpaces.CONSTANT:
                    return(string.Format("__constant {0}* p{1}", fp.UnderlyingType.FormatC(), idx));

                case StateSpaces.REG:
                default:
                    return(string.Format("{0} p{1}", fp.UnderlyingType.FormatC(), idx));
                }
            })),
                                                                        string.Join(" ", kernel.FormalParameters.Where(
                                                                                        fp => fp.StateSpace != StateSpaces.REG && fp.StateSpace != StateSpaces.CONSTANT).Select(
                                                                                        fp => string.Format("*p{0} = 0;", kernel.FormalParameters.IndexOf(fp)))))));

            // create template binary from OpenCL stub

            Cl.ErrorCode error;

            Cl.Context context = Cl.CreateContext(null, 1, new[] { device }, null, IntPtr.Zero, out error);
            clSafeCall(error);

            Cl.Program program = Cl.CreateProgramWithSource(context, 1, new[] { clstub }, null, out error);
            clSafeCall(error);

            clSafeCall(Cl.BuildProgram(program, 1, new[] { device },
                                       "-fno-bin-source -fno-bin-llvmir -fno-bin-exe -fbin-amdil", null, IntPtr.Zero));
            Cl.BuildStatus status = Cl.GetProgramBuildInfo(program, device, Cl.ProgramBuildInfo.Status, out error).CastTo <Cl.BuildStatus>();
            if (status != Cl.BuildStatus.Success)
            {
                throw new Exception(status.ToString());
            }

            Cl.InfoBuffer binarySizes = Cl.GetProgramInfo(program, Cl.ProgramInfo.BinarySizes, out error);
            clSafeCall(error);
            Cl.InfoBufferArray binaries = new Cl.InfoBufferArray(
                binarySizes.CastToEnumerable <IntPtr>(Enumerable.Range(0, 1)).Select(sz => new Cl.InfoBuffer(sz)).ToArray());
            IntPtr szRet;

            clSafeCall(Cl.GetProgramInfo(program, Cl.ProgramInfo.Binaries, binaries.Size, binaries, out szRet));

            program.Dispose();
            context.Dispose();

            // inject generated code into the elf binary

            LinkingView   elf    = new LinkingView(binaries[0].CastToArray <byte>(binarySizes.CastTo <IntPtr>(0).ToInt32()));
            SymTabSection symtab = (SymTabSection)elf[".symtab"];
            Section       amdil  = elf[".amdil"];
            Section       rodata = elf[".rodata"];

            MemoryStream amdilcode = new MemoryStream();

            foreach (Kernel kernel in irprog.Kernels)
            {
                SymbolWrapper _metadata = symtab["__OpenCL_" + kernel.Name + "_metadata"];

                string[] str_metadata = Marshal.PtrToStringAnsi(Marshal.UnsafeAddrOfPinnedArrayElement(
                                                                    rodata.Data, (int)_metadata.st_value), (int)_metadata.st_size).Split('\n');

                int setup_id = (from line in str_metadata let prms = line.Split(':')
                                                                     where prms[0] == ";uniqueid" select int.Parse(prms[1])).Single();

                int raw_uav_id = (from line in str_metadata let prms = line.Split(':')
                                                                       where prms[0] == ";uavid" select int.Parse(prms[1])).Single();

                SymbolWrapper _fmetadata = symtab["__OpenCL_" + kernel.Name + "_fmetadata"];

                string[] str_fmetadata = Marshal.PtrToStringAnsi(Marshal.UnsafeAddrOfPinnedArrayElement(
                                                                     rodata.Data, (int)_fmetadata.st_value), (int)_fmetadata.st_size).Split('\n');

                int func_id = (from line in str_fmetadata let prms = line.Split(':')
                                                                     where prms[0] == ";uniqueid" select int.Parse(prms[1])).Single();

                // ugly, i know!!!
                raw_uav_id = Math.Max(raw_uav_id, 11);
                int arena_uav_id = raw_uav_id;

                byte[] code = Encoding.Convert(Encoding.Unicode, Encoding.ASCII, Encoding.Unicode.GetBytes(
                                                   irprog.ToAMDIL(kernel.Name, setup_id, func_id, raw_uav_id, arena_uav_id)));

                SymbolWrapper _amdil = symtab["__OpenCL_" + kernel.Name + "_amdil"];

                _amdil.st_value = (uint)amdilcode.Position;
                _amdil.st_size  = (uint)code.Length;

                foreach (byte b in code)
                {
                    amdilcode.WriteByte(b);
                }
            }

            amdil.Data = amdilcode.ToArray();

            return(elf.BuildBinary());
        }