public void RunSmoothNoise(GameTexture tex, int octave) { CLFilter filter = _kernels["smooth"]; GL.GetTexLevelParameter(TextureTarget.Texture2D, 0, GetTextureParameter.TextureWidth, out float w); GL.GetTexLevelParameter(TextureTarget.Texture2D, 0, GetTextureParameter.TextureHeight, out float h); int width = (int)w; int height = (int)h; byte[] buffer = new byte[width * height * 4]; GL.GetTexImage(TextureTarget.Texture2D, 0, PixelFormat.Bgra, PixelType.UnsignedByte, buffer); MemoryBuffer mb = c.CreateBuffer(MemoryFlag.ReadWrite | MemoryFlag.CopyHostPointer, buffer); filter.SetArg(0, mb); filter.SetArg(1, new int3(width, height, 1)); filter.SetArg(2, 4); filter.SetArg(3, enablechannels); int samplePeriod = 1 << octave; filter.SetArg(4, samplePeriod); filter.SetArg(5, 1f / samplePeriod); CommandQueue cq = CommandQueue.CreateCommandQueue(c, d); cq.EnqueueNDRangeKernel(filter.kernel, 1, buffer.Length); buffer = cq.EnqueueReadBuffer <byte>(mb, buffer.Length); GL.BindTexture(TextureTarget.Texture2D, tex.textureID); GL.TexSubImage2D(TextureTarget.Texture2D, 0, 0, 0, (int)width, (int)height, PixelFormat.Bgra, PixelType.UnsignedByte, buffer); }
public GameTexture GetWorley(List <float> positions, int width, int height) { CLFilter filter = _kernels["worley"]; byte[] buffer = new byte[width * height * 4]; MemoryBuffer mb = c.CreateBuffer(MemoryFlag.ReadWrite | MemoryFlag.CopyHostPointer, buffer); MemoryBuffer points = c.CreateBuffer(MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, positions.ToArray()); //Maybe wrong struct filter.SetArg(0, mb); int3 dims = new int3(width, height, 1); filter.SetArg(1, dims); filter.SetArg(2, 4); filter.SetArg(3, enablechannels); filter.SetArg(4, points); filter.SetArg(5, positions.Count / 3); filter.SetArg(6, 0.3f); CommandQueue cq = CommandQueue.CreateCommandQueue(c, d); cq.EnqueueNDRangeKernel(filter.kernel, 1, buffer.Length); buffer = cq.EnqueueReadBuffer <byte>(mb, buffer.Length); return(GameTexture.Load(buffer, width, height)); }
public void TestIfCl() { var a = new int[] { 7, 14, 6, 10 }; var b = new int[] { 5, 10, 6, 14 }; var r = new int[4]; // compile Cl kernel var source = ClCompiler.EmitKernel("opencl-tests", "OpenCl.Tests.TestIf", "test_if"); // test Cl kernel var platform = Platform.GetPlatformIDs().First(); var device = Device.GetDeviceIDs(platform, DeviceType.All).First(); using (var context = Context.CreateContext(platform, device, null, null)) using (var queue = CommandQueue.CreateCommandQueue(context, device)) using (var program = Program.CreateProgramWithSource(context, device, source)) using (var kernel = Kernel.CreateKernel(program, "test_if")) using (var ma = Mem <int> .CreateBuffer(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, a)) using (var mb = Mem <int> .CreateBuffer(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, b)) using (var mr = Mem <int> .CreateBuffer(context, MemFlags.ReadWrite, 4 * Marshal.SizeOf <int>())) { kernel.SetKernelArg(0, (HandleObject)ma); kernel.SetKernelArg(1, (HandleObject)mb); kernel.SetKernelArg(2, (HandleObject)mr); queue.EnqueueNDRangeKernel(kernel, null, new int[] { 4 }, null, null); queue.Finish(); queue.EnqueueReadBuffer(mr, true, r); } Assert.AreEqual(2, r[0]); Assert.AreEqual(4, r[1]); Assert.AreEqual(0, r[2]); Assert.AreEqual(4, r[3]); }
public override uint ReadData(uint address) { if (address == PresentPin) { return(1); } if (address == GetPlatformCountPin) { return(Platform.GetPlatformCount()); } if (address == GetDeviceCountPin) { return((m_Handles[( int )m_DeviceCountPlatformIdxSelector] as Platform).GetDeviceCount( DeviceType.All )); } if (address == GetDevicePin) { Device p = (m_Handles[( int )m_DevicePlatformIdxSelector] as Platform).GetDevices(DeviceType.All). Skip(( int )m_PlatformIdxSelector). First(); ; m_Handles.Add(p); return(( uint )m_Handles.Count - 1); } if (address == GetPlatformPin) { Platform p = Platform.GetPlatforms().Skip(( int )m_PlatformIdxSelector).First(); m_Handles.Add(p); return(( uint )m_Handles.Count - 1); } if (address == CreateContextPin) { Context p = Context.CreateContext(m_Handles[( int )m_CreateContextDeviceIdxSelector] as Device); m_Handles.Add(p); return(( uint )m_Handles.Count - 1); } if (address == CreateCommandQueuePin) { m_CreateCommandQueueContextStep = 0; Device d = m_Handles[( int )m_CreateCommandQueueDeviceIdxSelector] as Device; Context c = m_Handles[( int )m_CreateCommandQueueContextIdxSelector] as Context; CommandQueue q = CommandQueue.CreateCommandQueue(c, d); m_Handles.Add(q); return(( uint )m_Handles.Count - 1); } return(0); }
private static void RunNative(byte[] module) { Device device = Device.GetDeviceIDs(null, DeviceType.All).First(); using (var context = Context.CreateContext(null, device, null, null)) using (var queue = CommandQueue.CreateCommandQueue(context, device)) using (var program = Program.CreateProgramWithIL(context, module.ToArray())) { program.BuildProgram(device); using (var kernel = Kernel.CreateKernel(program, "conv")) using (var ms = Mem <uint> .CreateBuffer(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, inputSignal)) using (var mm = Mem <uint> .CreateBuffer(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, mask)) using (var mr = Mem <uint> .CreateBuffer(context, MemFlags.WriteOnly, (int)outputSignalWidth * (int)outputSignalHeight * Marshal.SizeOf <int>())) { kernel.SetKernelArg(0, (HandleObject)ms); kernel.SetKernelArg(1, (HandleObject)mm); kernel.SetKernelArg(2, (HandleObject)mr); kernel.SetKernelArg(3, (int)inputSignalWidth); kernel.SetKernelArg(4, (int)maskWidth); queue.EnqueueNDRangeKernel(kernel, null, new uint[] { outputSignalWidth, outputSignalHeight }, new uint[] { 1, 1 }, null); queue.Finish(); queue.EnqueueReadBuffer(mr, true, outputSignal); } } PrintArray(outputSignalWidth, outputSignal); }
private static void RunNative(string source, bool ascending) { Platform platform = Platform.GetPlatformIDs()[0]; Device[] devices = Device.GetDeviceIDs(platform, DeviceType.Cpu); Context context = Context.CreateContext(platform, devices, null, null); var program = Program.CreateProgramWithSource(context, new String[] { source }); try { program.BuildProgram(devices, null, null, null); } catch (OpenClException ex) { Console.WriteLine("*** Error building kernel 'bitonic_sort'"); if (ex.ErrorCode == ErrorCode.BuildProgramFailure) { Console.WriteLine("*** Build log: {0}", program.BuildInfo.GetLog(devices[0])); Console.WriteLine("*** Source code:"); Console.WriteLine(source); } return; } var kernel = Kernel.CreateKernel(program, "bitonic_sort"); var dataBuffer = Mem <int> .CreateBuffer(context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, data); if (dataBuffer.Size < Marshal.SizeOf <int>() * data.Length) { Console.WriteLine("*** Invalid MemObject size: expected >= {0}, found {1}.", Marshal.SizeOf <int>() * data.Length, dataBuffer.Size); return; } var queue = CommandQueue.CreateCommandQueue(context, devices[0]); var numStages = 0; for (var i = data.Length; i > 2; i >>= 1) { numStages++; } kernel.SetKernelArg(0, (HandleObject)dataBuffer); kernel.SetKernelArg(3, ascending ? 1 : 0); for (var stage = 0; stage < numStages; stage++) { kernel.SetKernelArg(1, stage); for (var pass = stage; pass >= 0; pass--) { kernel.SetKernelArg(2, pass); // set work-item dimensions var gsz = data.Length / (2 * 4); var global_work_size = new int[] { pass > 0 ? gsz : gsz << 1 }; // number of quad items in input array // execute kernel queue.EnqueueNDRangeKernel(kernel, null, global_work_size, null, null); } } var result = new int[data.Length]; queue.EnqueueReadBuffer(dataBuffer, true, result); PrintArray(result); }
public void ReadBuffer() { CommandQueue queue = CommandQueue.CreateCommandQueue(context, device); MemoryBuffer buff = context.CreateBuffer <byte>( MemoryFlag.AllocateHostPointer | MemoryFlag.ReadWrite, 255, "TestBufferA" ); byte[] bufIn = Enumerable.Range(0, 255).Select(x => (byte)x).ToArray(); queue.EnqueueWriteBuffer(buff, bufIn); byte[] bufOut = queue.EnqueueReadBuffer <byte>(buff, (int)buff.Size); for (int i = 0; i < bufIn.Length; i++) { if (bufIn[i] != bufOut[i]) { buff.Dispose(); queue.Dispose(); Assert.Fail("Buffer read back different data than was written into it."); return; } } buff.Dispose(); queue.Dispose(); }
public static void WriteRandomByte(MemoryBuffer buf, byte[] channelEnableState) { CommandQueue cq = CommandQueue.CreateCommandQueue(GetInstance().c, GetInstance().d); byte[] rnd = cq.EnqueueReadBuffer <byte>(buf, (int)buf.Size); rnd = CreateRandom(rnd, channelEnableState); cq.EnqueueWriteBuffer(buf, rnd); }
private static void RunNative(byte[] module) { var platform = Platform.GetPlatformIDs().First(); var device = Device.GetDeviceIDs(platform, DeviceType.All).First(); var context = Context.CreateContext(platform, device, null, null); var program = Program.CreateProgramWithIL(context, module); try { program.BuildProgram(device); } catch (OpenClException) { Console.WriteLine("*** Error creating kernel 'simple_kernel'"); return; } var kernel = Kernel.CreateKernel(program, "simple_kernel"); var abuf = Mem <int> .CreateBuffer(context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, a); if (abuf.Size < Marshal.SizeOf <int>() * a.Length) { Console.WriteLine("*** Invalid 'abuf' MemObject size: expected >= {0}, found {1}.", Marshal.SizeOf <int>() * a.Length, abuf.Size); return; } var bbuf = Mem <int> .CreateBuffer(context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, b); if (bbuf.Size < Marshal.SizeOf <int>() * b.Length) { Console.WriteLine("*** Invalid 'bbuf' MemObject size: expected >= {0}, found {1}.", Marshal.SizeOf <int>() * b.Length, bbuf.Size); return; } var rbuf = Mem <int> .CreateBuffer(context, MemFlags.ReadWrite, Marshal.SizeOf <int>() *((len + 3) & ~3)); if (rbuf.Size < Marshal.SizeOf <int>() * len) { Console.WriteLine("*** Invalid 'rbuf' MemObject size: expected >= {0}, found {1}.", Marshal.SizeOf <int>() * len, rbuf.Size); return; } var queue = CommandQueue.CreateCommandQueue(context, device); kernel.SetKernelArg(0, (HandleObject)abuf); kernel.SetKernelArg(1, (HandleObject)bbuf); kernel.SetKernelArg(2, (HandleObject)rbuf); // set work-item dimensions var global_work_size = new int[] { (len + 3) / 4 }; // number of quad items in input arrays // execute kernel queue.EnqueueNDRangeKernel(kernel, null, global_work_size, null, null); var result = new int[len]; queue.EnqueueReadBuffer(rbuf, true, result); PrintArray(result); }
/// <summary> /// Initializes the OpenCL API /// </summary> private void InitializeOpenCl() { IEnumerable <Platform> platforms = Platform.GetPlatforms(); List <Device> devs = new List <Device>(); for (int i = 0; i < platforms.Count(); i++) { IEnumerable <Device> ds = platforms.ElementAt(i).GetDevices(DeviceType.Default); for (int j = 0; j < ds.Count(); j++) { Logger.Log(LogType.Log, "Adding Device: " + ds.ElementAt(j).Name + "@" + ds.ElementAt(j).Vendor, 1); devs.Add(ds.ElementAt(j)); } } Device chosenDevice = null; bool found = false; for (int i = 0; i < devs.Count; i++) { bool available = devs[i].IsAvailable; if (available && !found) { Logger.Log(LogType.Log, "Choosing Device: " + devs[i].Name + "@" + devs[i].Vendor, 1); chosenDevice = devs[i]; found = true; } } if (chosenDevice == null) { throw new OpenClException("Could not Get Device. Total Devices: " + devs.Count); } try { context = Context.CreateContext(chosenDevice); commandQueue = CommandQueue.CreateCommandQueue(context, chosenDevice); } catch (Exception e) { Logger.Log(LogType.Error, e.ToString(), 1); throw new OpenClException( "Could not initialize OpenCL with Device: " + chosenDevice.Name + "@" + chosenDevice.Vendor + "\n\t" + e.Message, e ); } }
/// <summary> /// Initializes the OpenCL API /// </summary> private void InitializeOpenCL() { #if NO_CL Logger.Log("Starting in NO_CL Mode", DebugChannel.Warning); #else IEnumerable <Platform> platforms = Platform.GetPlatforms(); Device chosenDevice = platforms.FirstOrDefault()?.GetDevices(DeviceType.All).FirstOrDefault(); _context = Context.CreateContext(chosenDevice); Device CLDevice = chosenDevice; _commandQueue = CommandQueue.CreateCommandQueue(_context, CLDevice); #endif }
private static void RunConvolution(string source) { Platform platform = Platform.GetPlatformIDs()[0]; Device[] devices = Device.GetDeviceIDs(platform, DeviceType.Cpu); Context context = Context.CreateContext(platform, devices, null, null); var program = Program.CreateProgramWithSource(context, new String[] { source }); try { program.BuildProgram(devices, null, null, null); } catch (OpenClException ex) { if (ex.ErrorCode == ErrorCode.BuildProgramFailure) { Console.WriteLine("*** Error building kernel 'conv'."); Console.WriteLine("*** Build log: {0}", program.BuildInfo.GetLog(devices[0])); Console.WriteLine("*** Source code:"); Console.WriteLine(source); } throw ex; } var kernel = Kernel.CreateKernel(program, "conv"); var inputSignalBuffer = Mem <uint> .CreateBuffer(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, inputSignal); if (inputSignalBuffer.Size < (uint)(Marshal.SizeOf <uint>() * inputSignal.Length)) { throw new ApplicationException(String.Format("Invalid MemObject size: expected >= {0}, found {1}.", Marshal.SizeOf <uint>() * inputSignal.Length, inputSignalBuffer.Size)); } var maskBuffer = Mem <uint> .CreateBuffer(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, mask); var outputSignalBuffer = Mem <uint> .CreateBuffer(context, MemFlags.WriteOnly, sizeof(uint) *outputSignalHeight *outputSignalWidth); var queue = CommandQueue.CreateCommandQueue(context, devices[0]); kernel.SetKernelArg(0, (HandleObject)inputSignalBuffer); kernel.SetKernelArg(1, (HandleObject)maskBuffer); kernel.SetKernelArg(2, (HandleObject)outputSignalBuffer); kernel.SetKernelArg(3, inputSignalWidth); kernel.SetKernelArg(4, maskWidth); var globalWorkSize = new uint[] { outputSignalWidth, outputSignalHeight }; var localWorkSize = new uint[] { 1, 1 }; queue.EnqueueNDRangeKernel(kernel, null, globalWorkSize, localWorkSize, null); queue.EnqueueReadBuffer(outputSignalBuffer, true, outputSignal); PrintOutputSignal(); }
public void WriteBuffer() { CommandQueue queue = CommandQueue.CreateCommandQueue(context, device); MemoryBuffer buff = context.CreateBuffer <byte>( MemoryFlag.AllocateHostPointer | MemoryFlag.ReadWrite, 255, "TestBufferA" ); byte[] buf = new byte[255]; queue.EnqueueWriteBuffer(buff, buf); buff.Dispose(); queue.Dispose(); }
public Bitmap RunImageKernel(Bitmap tex, string kernelKey) { Kernel k = _kernels[kernelKey]; int width = tex.Width; int height = tex.Height; byte[] buffer = new byte[(width * height * 4)]; BitmapData data = tex.LockBits(new Rectangle(0, 0, width, height), ImageLockMode.ReadWrite, PixelFormat.Format32bppArgb); Marshal.Copy(data.Scan0, buffer, 0, buffer.Length); MemoryBuffer mb = c.CreateBuffer(MemoryFlag.ReadWrite | MemoryFlag.CopyHostPointer, buffer); k.SetKernelArgument(0, mb); CommandQueue cq = CommandQueue.CreateCommandQueue(c, d); cq.EnqueueNDRangeKernel(k, 1, buffer.Length); buffer = cq.EnqueueReadBuffer <byte>(mb, buffer.Length); Marshal.Copy(buffer, 0, data.Scan0, buffer.Length); tex.UnlockBits(data); return(tex); //System.Drawing.Bitmap bmp = new System.Drawing.Bitmap((int)width, (int)height); //BitmapData data = bmp.LockBits(new System.Drawing.Rectangle(0, 0, (int)width, (int)height), // System.Drawing.Imaging.ImageLockMode.WriteOnly, System.Drawing.Imaging.PixelFormat.Format24bppRgb); //Marshal.Copy(buffer, 0, data.Scan0, buffer.Length); //bmp.UnlockBits(data); //bmp.Save("testimage.png"); //GL.DeleteTexture(tex.textureID); //GL.TexSubImage2D(TextureTarget.Texture2D, 0, 0,0, (int)width, (int)height, PixelFormat.Rgba, PixelType.UnsignedByte, buffer); }
public void TestIfSpirV() { var a = new int[] { 7, 14, 6, 10 }; var b = new int[] { 5, 10, 6, 14 }; var r = new int[4]; // compile SPIR-V kernel var module = new MemoryStream(); SpirCompiler.EmitKernel("opencl-tests", "OpenCl.Tests.TestIf", "test_if", module); // ***DEBUG*** using (var stream = new FileStream("test_if.spv", FileMode.Create)) { var buf = module.ToArray(); stream.Write(buf, 0, buf.Length); } // ***ENDEBUG*** // test SPIR-V kernel var platform = Platform.GetPlatformIDs().First(); var device = Device.GetDeviceIDs(platform, DeviceType.All).First(); using (var context = Context.CreateContext(null, device, null, null)) using (var queue = CommandQueue.CreateCommandQueue(context, device)) using (var program = Program.CreateProgramWithIL(context, device, module.ToArray())) using (var kernel = Kernel.CreateKernel(program, "test_if")) using (var ma = Mem <int> .CreateBuffer(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, a)) using (var mb = Mem <int> .CreateBuffer(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, b)) using (var mr = Mem <int> .CreateBuffer(context, MemFlags.ReadWrite, 4 * Marshal.SizeOf <int>())) { kernel.SetKernelArg(0, (HandleObject)ma); kernel.SetKernelArg(1, (HandleObject)mb); kernel.SetKernelArg(2, (HandleObject)mr); queue.EnqueueNDRangeKernel(kernel, null, new int[] { 4 }, null, null); queue.Finish(); queue.EnqueueReadBuffer(mr, true, r); } Assert.AreEqual(2, r[0]); Assert.AreEqual(4, r[1]); Assert.AreEqual(0, r[2]); Assert.AreEqual(4, r[3]); }
public void RunOverlayKernel(GameTexture tex, GameTexture overlay, float weight) { GL.BindTexture(TextureTarget.Texture2D, tex.textureID); GL.GetTexLevelParameter(TextureTarget.Texture2D, 0, GetTextureParameter.TextureWidth, out float width); GL.GetTexLevelParameter(TextureTarget.Texture2D, 0, GetTextureParameter.TextureHeight, out float height); byte[] buffer = new byte[(int)(width * height * 4)]; GL.GetTexImage(TextureTarget.Texture2D, 0, PixelFormat.Bgra, PixelType.UnsignedByte, buffer); MemoryBuffer mb = c.CreateBuffer(MemoryFlag.CopyHostPointer, buffer); //Custom CLFilter filter = _kernels["overlay"]; byte[] bufferOverlay = new byte[(int)(width * height * 4)]; GL.BindTexture(TextureTarget.Texture2D, overlay.textureID); GL.GetTexImage(TextureTarget.Texture2D, 0, PixelFormat.Bgra, PixelType.UnsignedByte, bufferOverlay); MemoryBuffer mbOverlay = c.CreateBuffer(MemoryFlag.CopyHostPointer, bufferOverlay); filter.SetArg(0, mb); filter.SetArg(1, new int3((int)width, (int)height, 1)); filter.SetArg(2, 4); filter.SetArg(3, enablechannels); //Custom filter.SetArg(5, mbOverlay); filter.SetArg(6, weight); CommandQueue cq = CommandQueue.CreateCommandQueue(c, d); cq.EnqueueNDRangeKernel(filter.kernel, 1, buffer.Length); buffer = cq.EnqueueReadBuffer <byte>(mb, buffer.Length); GL.BindTexture(TextureTarget.Texture2D, tex.textureID); GL.TexSubImage2D(TextureTarget.Texture2D, 0, 0, 0, (int)width, (int)height, PixelFormat.Bgra, PixelType.UnsignedByte, buffer); }
public void RunCheckerBoardKernel(GameTexture tex, float length) { //Custom CLFilter filter = _kernels["checkerboard"]; //Get Width & Height GL.BindTexture(TextureTarget.Texture2D, tex.textureID); GL.GetTexLevelParameter(TextureTarget.Texture2D, 0, GetTextureParameter.TextureWidth, out float width); GL.GetTexLevelParameter(TextureTarget.Texture2D, 0, GetTextureParameter.TextureHeight, out float height); //Get Image source from opengl //Custom byte[] buffer = new byte[(int)(width * height * 4)]; GL.GetTexImage(TextureTarget.Texture2D, 0, PixelFormat.Bgra, PixelType.UnsignedByte, buffer); //Create buffer with image content MemoryBuffer mb = c.CreateBuffer(MemoryFlag.ReadWrite | MemoryFlag.CopyHostPointer, buffer); filter.SetArg(0, mb); filter.SetArg(1, new int3((int)width, (int)height, 1)); filter.SetArg(2, 4); filter.SetArg(3, enablechannels); //Custom filter.SetArg(3, length); filter.SetArg(4, (int)width); CommandQueue cq = CommandQueue.CreateCommandQueue(c, d); cq.EnqueueNDRangeKernel(filter.kernel, 1, buffer.Length); buffer = cq.EnqueueReadBuffer <byte>(mb, buffer.Length); GL.BindTexture(TextureTarget.Texture2D, tex.textureID); GL.TexSubImage2D(TextureTarget.Texture2D, 0, 0, 0, (int)width, (int)height, PixelFormat.Bgra, PixelType.UnsignedByte, buffer); }
private static void RunNative(byte[] module) { var platform = Platform.GetPlatformIDs().First(); var device = Device.GetDeviceIDs(platform, DeviceType.All).First(); var context = Context.CreateContext(platform, device, null, null); var program = Program.CreateProgramWithIL(context, module); try { program.BuildProgram(device); } catch (OpenClException) { Console.WriteLine("*** Error creating kernel 'test_kernel'"); return; } var kernel = Kernel.CreateKernel(program, "test_kernel"); var abuf = Mem <short4> .CreateBuffer(context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, a); var bbuf = Mem <short4> .CreateBuffer(context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, b); var rbuf = Mem <short4> .CreateBuffer(context, MemFlags.ReadWrite, Marshal.SizeOf <short4>() *2); var queue = CommandQueue.CreateCommandQueue(context, device); kernel.SetKernelArg(0, (HandleObject)abuf); kernel.SetKernelArg(1, (HandleObject)bbuf); kernel.SetKernelArg(2, (HandleObject)rbuf); // set work-item dimensions var global_work_size = new int[] { 2 }; // execute kernel queue.EnqueueNDRangeKernel(kernel, null, global_work_size, null, null); var result = new short4[2]; queue.EnqueueReadBuffer(rbuf, true, result); PrintArray(result); }
public void RunKernel() { CommandQueue queue = CommandQueue.CreateCommandQueue(context, device); Program program = context.CreateAndBuildProgramFromString(TEST_KERNEL); Kernel kernel = program.CreateKernel("set_value"); MemoryBuffer buffer = context.CreateBuffer( MemoryFlag.CopyHostPointer | MemoryFlag.ReadWrite, new byte[255], "TestBufferA" ); kernel.SetKernelArgument(0, buffer); kernel.SetKernelArgumentGen(1, (byte)128); queue.EnqueueNDRangeKernel(kernel, 1, 255); byte[] result = queue.EnqueueReadBuffer <byte>(buffer, 255); foreach (byte b in result) { if (b != 128) { Assert.Fail("Kernel did not execute Correctly."); } } buffer.Dispose(); kernel.Dispose(); program.Dispose(); queue.Dispose(); }
public void RunLightKernel(GameTexture tex) { //MemoryBuffer buf = c.CreateFromGLTexture2D(MemoryFlag.ReadWrite | MemoryFlag.UseHostPointer, (uint)TextureTarget.Texture2D, 0, (uint)tex.textureID); //string test = d.GetDeviceInformation<string>(DeviceInformation.Extensions); //DevicesNativeApi.GetDeviceInformation(d, DeviceInformation.Extensions, UIntPtr.Zero, ) GL.BindTexture(TextureTarget.Texture2D, tex.textureID); //Custom CLFilter filter = _kernels["light"]; GL.GetTexLevelParameter(TextureTarget.Texture2D, 0, GetTextureParameter.TextureWidth, out float width); GL.GetTexLevelParameter(TextureTarget.Texture2D, 0, GetTextureParameter.TextureHeight, out float height); byte[] buffer = new byte[(int)(width * height * 4)]; GL.GetTexImage(TextureTarget.Texture2D, 0, PixelFormat.Bgra, PixelType.UnsignedByte, buffer); MemoryBuffer mb = c.CreateBuffer(MemoryFlag.ReadWrite | MemoryFlag.CopyHostPointer, buffer); filter.SetArg(0, mb); filter.SetArg(1, new int3((int)width, (int)height, 1)); filter.SetArg(2, 4); filter.SetArg(3, enablechannels); CommandQueue cq = CommandQueue.CreateCommandQueue(c, d); cq.EnqueueNDRangeKernel(filter.kernel, 1, buffer.Length); buffer = cq.EnqueueReadBuffer <byte>(mb, buffer.Length); GL.BindTexture(TextureTarget.Texture2D, tex.textureID); GL.TexSubImage2D(TextureTarget.Texture2D, 0, 0, 0, (int)width, (int)height, PixelFormat.Bgra, PixelType.UnsignedByte, buffer); }
public GameTexture GetPerlin(int width, int height, int octaves, float persistence) { CLFilter filter = _kernels["perlin"]; byte[] buffer = new byte[width * height * 4]; Random rnd = new Random(); for (int i = 0; i < buffer.Length; i++) { if (i % 4 == 0) { byte val = (byte)rnd.Next(0, 255); buffer[i] = buffer[i + 1] = buffer[i + 2] = val; buffer[i + 3] = 255; // Alpha } } MemoryBuffer mb = c.CreateBuffer(MemoryFlag.ReadWrite | MemoryFlag.CopyHostPointer, buffer); filter.SetArg(0, mb); int3 dims = new int3(width, height, 1); filter.SetArg(1, dims); filter.SetArg(2, 4); filter.SetArg(3, enablechannels); filter.SetArg(4, persistence); filter.SetArg(5, octaves); CommandQueue cq = CommandQueue.CreateCommandQueue(c, d); cq.EnqueueNDRangeKernel(filter.kernel, 1, buffer.Length); buffer = cq.EnqueueReadBuffer <byte>(mb, buffer.Length); return(GameTexture.Load(buffer, width, height)); }
static void Main(string[] args) { try { if (args.Length > 0) { deviceID = int.Parse(args[0]); } if (args.Length > 2) { platformID = int.Parse(args[2]); } } catch (Exception ex) { Logger.Log(LogLevel.Error, "Device ID parse error", ex); } try { if (args.Length > 1) { port = int.Parse(args[1]); Comms.ConnectToMaster(port); } else { TEST = true; CGraph.ShowCycles = true; Logger.CopyToConsole = true; } } catch (Exception ex) { Logger.Log(LogLevel.Error, "Master connection error"); } // Gets all available platforms and their corresponding devices, and prints them out in a table List <Platform> platforms = null; try { platforms = Platform.GetPlatforms().ToList(); } catch (Exception ex) { Logger.Log(LogLevel.Error, "Failed to get OpenCL platform list"); return; } if (TEST) { currentJob = nextJob = new Job() { jobID = 0, k0 = 0xf4956dc403730b01L, k1 = 0xe6d45de39c2a5a3eL, k2 = 0xcbf626a8afee35f6L, k3 = 0x4307b94b1a0c9980L, //k0 = 0x10ef16eadd6aa061L, //k1 = 0x563f07e7a3c788b3L, //k2 = 0xe8d7c8db1518f29aL, //k3 = 0xc0ab7d1b4ca1adffL, pre_pow = TestPrePow, timestamp = DateTime.Now }; } else { currentJob = nextJob = new Job() { jobID = 0, k0 = 0xf4956dc403730b01L, k1 = 0xe6d45de39c2a5a3eL, k2 = 0xcbf626a8afee35f6L, k3 = 0x4307b94b1a0c9980L, pre_pow = TestPrePow, timestamp = DateTime.Now }; if (!Comms.IsConnected()) { Console.WriteLine("Master connection failed, aborting"); Logger.Log(LogLevel.Error, "No master connection, exitting!"); Task.Delay(500).Wait(); return; } if (deviceID < 0) { try { //Environment.SetEnvironmentVariable("GPU_FORCE_64BIT_PTR", "1", EnvironmentVariableTarget.Machine); Environment.SetEnvironmentVariable("GPU_MAX_HEAP_SIZE", "100", EnvironmentVariableTarget.User); Environment.SetEnvironmentVariable("GPU_USE_SYNC_OBJECTS", "1", EnvironmentVariableTarget.User); Environment.SetEnvironmentVariable("GPU_MAX_ALLOC_PERCENT", "100", EnvironmentVariableTarget.User); Environment.SetEnvironmentVariable("GPU_SINGLE_ALLOC_PERCENT", "100", EnvironmentVariableTarget.User); Environment.SetEnvironmentVariable("GPU_64BIT_ATOMICS", "1", EnvironmentVariableTarget.User); Environment.SetEnvironmentVariable("GPU_MAX_WORKGROUP_SIZE", "1024", EnvironmentVariableTarget.User); //Environment.SetEnvironmentVariable("AMD_OCL_BUILD_OPTIONS_APPEND", "-cl-std=CL2.0", EnvironmentVariableTarget.Machine); GpuDevicesMessage gpum = new GpuDevicesMessage() { devices = new List <GpuDevice>() }; //foreach (Platform platform in platforms) for (int p = 0; p < platforms.Count(); p++) { Platform platform = platforms[p]; var devices = platform.GetDevices(DeviceType.Gpu).ToList(); //foreach (Device device in platform.GetDevices(DeviceType.All)) for (int d = 0; d < devices.Count(); d++) { Device device = devices[d]; string name = device.Name; string pName = platform.Name; //Console.WriteLine(device.Name + " " + platform.Version.VersionString); gpum.devices.Add(new GpuDevice() { deviceID = d, platformID = p, platformName = pName, name = name, memory = device.GlobalMemorySize }); } } Comms.gpuMsg = gpum; Comms.SetEvent(); Task.Delay(1000).Wait(); Comms.Close(); return; } catch (Exception ex) { Logger.Log(LogLevel.Error, "Unable to enumerate OpenCL devices"); Task.Delay(500).Wait(); Comms.Close(); return; } } } try { Device chosenDevice = null; try { chosenDevice = platforms[platformID].GetDevices(DeviceType.Gpu).ToList()[deviceID]; Console.WriteLine($"Using OpenCL device: {chosenDevice.Name} ({chosenDevice.Vendor})"); Console.WriteLine(); } catch (Exception ex) { Logger.Log(LogLevel.Error, $"Unable to select OpenCL device {deviceID} on platform {platformID} "); Task.Delay(500).Wait(); Comms.Close(); return; } var assembly = Assembly.GetEntryAssembly(); var resourceStream = assembly.GetManifestResourceStream("OclSolver.kernel.cl"); using (StreamReader reader = new StreamReader(resourceStream)) { using (Context context = Context.CreateContext(chosenDevice)) { /* * Once the program has been created you can use clGetProgramInfo with CL_PROGRAM_BINARY_SIZES and then CL_PROGRAM_BINARIES, storing the resulting binary programs (one for each device of the context) into a buffer you supply. You can then save this binary data to disk for use in later runs. * Not all devices might support binaries, so you will need to check the CL_PROGRAM_BINARY_SIZES result (it returns a zero size for that device if binaries are not supported). */ using (OpenCl.DotNetCore.Programs.Program program = context.CreateAndBuildProgramFromString(reader.ReadToEnd())) { using (CommandQueue commandQueue = CommandQueue.CreateCommandQueue(context, chosenDevice)) { IntPtr clearPattern = IntPtr.Zero; uint[] edgesCount; int[] edgesLeft; int trims = 0; try { clearPattern = Marshal.AllocHGlobal(4); Marshal.Copy(new byte[4] { 0, 0, 0, 0 }, 0, clearPattern, 4); try { bufferA1 = context.CreateBuffer <uint>(MemoryFlag.ReadWrite, BUFFER_SIZE_A1); bufferA2 = context.CreateBuffer <uint>(MemoryFlag.ReadWrite, BUFFER_SIZE_A2); bufferB = context.CreateBuffer <uint>(MemoryFlag.ReadWrite, BUFFER_SIZE_B); bufferI1 = context.CreateBuffer <uint>(MemoryFlag.ReadWrite, INDEX_SIZE); bufferI2 = context.CreateBuffer <uint>(MemoryFlag.ReadWrite, INDEX_SIZE); bufferR = context.CreateBuffer <uint>(MemoryFlag.ReadOnly, 42 * 2); } catch (Exception ex) { Task.Delay(500).Wait(); Logger.Log(LogLevel.Error, "Unable to allocate buffers, out of memory?"); Task.Delay(500).Wait(); Comms.Close(); return; } using (Kernel kernelSeedA = program.CreateKernel("FluffySeed2A")) using (Kernel kernelSeedB1 = program.CreateKernel("FluffySeed2B")) using (Kernel kernelSeedB2 = program.CreateKernel("FluffySeed2B")) using (Kernel kernelRound1 = program.CreateKernel("FluffyRound1")) using (Kernel kernelRoundO = program.CreateKernel("FluffyRoundNO1")) using (Kernel kernelRoundNA = program.CreateKernel("FluffyRoundNON")) using (Kernel kernelRoundNB = program.CreateKernel("FluffyRoundNON")) using (Kernel kernelTail = program.CreateKernel("FluffyTailO")) using (Kernel kernelRecovery = program.CreateKernel("FluffyRecovery")) { Stopwatch sw = new Stopwatch(); kernelSeedA.SetKernelArgumentGeneric(0, currentJob.k0); kernelSeedA.SetKernelArgumentGeneric(1, currentJob.k1); kernelSeedA.SetKernelArgumentGeneric(2, currentJob.k2); kernelSeedA.SetKernelArgumentGeneric(3, currentJob.k3); kernelSeedA.SetKernelArgument(4, bufferB); kernelSeedA.SetKernelArgument(5, bufferA1); kernelSeedA.SetKernelArgument(6, bufferI1); kernelSeedB1.SetKernelArgument(0, bufferA1); kernelSeedB1.SetKernelArgument(1, bufferA1); kernelSeedB1.SetKernelArgument(2, bufferA2); kernelSeedB1.SetKernelArgument(3, bufferI1); kernelSeedB1.SetKernelArgument(4, bufferI2); kernelSeedB1.SetKernelArgumentGeneric(5, (uint)32); kernelSeedB2.SetKernelArgument(0, bufferB); kernelSeedB2.SetKernelArgument(1, bufferA1); kernelSeedB2.SetKernelArgument(2, bufferA2); kernelSeedB2.SetKernelArgument(3, bufferI1); kernelSeedB2.SetKernelArgument(4, bufferI2); kernelSeedB2.SetKernelArgumentGeneric(5, (uint)0); kernelRound1.SetKernelArgument(0, bufferA1); kernelRound1.SetKernelArgument(1, bufferA2); kernelRound1.SetKernelArgument(2, bufferB); kernelRound1.SetKernelArgument(3, bufferI2); kernelRound1.SetKernelArgument(4, bufferI1); kernelRound1.SetKernelArgumentGeneric(5, (uint)DUCK_SIZE_A * 1024); kernelRound1.SetKernelArgumentGeneric(6, (uint)DUCK_SIZE_B * 1024); kernelRoundO.SetKernelArgument(0, bufferB); kernelRoundO.SetKernelArgument(1, bufferA1); kernelRoundO.SetKernelArgument(2, bufferI1); kernelRoundO.SetKernelArgument(3, bufferI2); kernelRoundNA.SetKernelArgument(0, bufferB); kernelRoundNA.SetKernelArgument(1, bufferA1); kernelRoundNA.SetKernelArgument(2, bufferI1); kernelRoundNA.SetKernelArgument(3, bufferI2); kernelRoundNB.SetKernelArgument(0, bufferA1); kernelRoundNB.SetKernelArgument(1, bufferB); kernelRoundNB.SetKernelArgument(2, bufferI2); kernelRoundNB.SetKernelArgument(3, bufferI1); kernelTail.SetKernelArgument(0, bufferB); kernelTail.SetKernelArgument(1, bufferA1); kernelTail.SetKernelArgument(2, bufferI1); kernelTail.SetKernelArgument(3, bufferI2); kernelRecovery.SetKernelArgumentGeneric(0, currentJob.k0); kernelRecovery.SetKernelArgumentGeneric(1, currentJob.k1); kernelRecovery.SetKernelArgumentGeneric(2, currentJob.k2); kernelRecovery.SetKernelArgumentGeneric(3, currentJob.k3); kernelRecovery.SetKernelArgument(4, bufferR); kernelRecovery.SetKernelArgument(5, bufferI2); int loopCnt = 0; //for (int i = 0; i < runs; i++) while (!Comms.IsTerminated) { try { if (!TEST && (Comms.nextJob.pre_pow == null || Comms.nextJob.pre_pow == "" || Comms.nextJob.pre_pow == TestPrePow)) { Logger.Log(LogLevel.Info, string.Format("Waiting for job....")); Task.Delay(1000).Wait(); continue; } if (!TEST && ((currentJob.pre_pow != Comms.nextJob.pre_pow) || (currentJob.origin != Comms.nextJob.origin))) { currentJob = Comms.nextJob; currentJob.timestamp = DateTime.Now; } if (!TEST && (currentJob.timestamp.AddMinutes(30) < DateTime.Now) && Comms.lastIncoming.AddMinutes(30) < DateTime.Now) { Logger.Log(LogLevel.Info, string.Format("Job too old...")); Task.Delay(1000).Wait(); continue; } // test runs only once if (TEST && loopCnt++ > 100000) { Comms.IsTerminated = true; } Logger.Log(LogLevel.Debug, string.Format("GPU AMD{4}:Trimming #{4}: {0} {1} {2} {3}", currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, currentJob.jobID, deviceID)); //Stopwatch srw = new Stopwatch(); //srw.Start(); Solution s; while (graphSolutions.TryDequeue(out s)) { kernelRecovery.SetKernelArgumentGeneric(0, s.job.k0); kernelRecovery.SetKernelArgumentGeneric(1, s.job.k1); kernelRecovery.SetKernelArgumentGeneric(2, s.job.k2); kernelRecovery.SetKernelArgumentGeneric(3, s.job.k3); commandQueue.EnqueueWriteBufferEdges(bufferR, s.GetLongEdges()); commandQueue.EnqueueClearBuffer(bufferI2, 64 * 64 * 4, clearPattern); commandQueue.EnqueueNDRangeKernel(kernelRecovery, 1, 2048 * 256, 256, 0); s.nonces = commandQueue.EnqueueReadBuffer <uint>(bufferI2, 42); OpenCl.DotNetCore.Interop.CommandQueues.CommandQueuesNativeApi.Finish(commandQueue.Handle); s.nonces = s.nonces.OrderBy(n => n).ToArray(); Comms.graphSolutionsOut.Enqueue(s); Comms.SetEvent(); } //srw.Stop(); //Console.WriteLine("RECOVERY " + srw.ElapsedMilliseconds); currentJob = currentJob.Next(); kernelSeedA.SetKernelArgumentGeneric(0, currentJob.k0); kernelSeedA.SetKernelArgumentGeneric(1, currentJob.k1); kernelSeedA.SetKernelArgumentGeneric(2, currentJob.k2); kernelSeedA.SetKernelArgumentGeneric(3, currentJob.k3); sw.Restart(); commandQueue.EnqueueClearBuffer(bufferI2, 64 * 64 * 4, clearPattern); commandQueue.EnqueueClearBuffer(bufferI1, 64 * 64 * 4, clearPattern); commandQueue.EnqueueNDRangeKernel(kernelSeedA, 1, 2048 * 128, 128, 0); commandQueue.EnqueueNDRangeKernel(kernelSeedB1, 1, 1024 * 128, 128, 0); commandQueue.EnqueueNDRangeKernel(kernelSeedB2, 1, 1024 * 128, 128, 0); commandQueue.EnqueueClearBuffer(bufferI1, 64 * 64 * 4, clearPattern); commandQueue.EnqueueNDRangeKernel(kernelRound1, 1, 4096 * 1024, 1024, 0); commandQueue.EnqueueClearBuffer(bufferI2, 64 * 64 * 4, clearPattern); commandQueue.EnqueueNDRangeKernel(kernelRoundO, 1, 4096 * 1024, 1024, 0); commandQueue.EnqueueClearBuffer(bufferI1, 64 * 64 * 4, clearPattern); commandQueue.EnqueueNDRangeKernel(kernelRoundNB, 1, 4096 * 1024, 1024, 0); for (int r = 0; r < trimRounds; r++) { commandQueue.EnqueueClearBuffer(bufferI2, 64 * 64 * 4, clearPattern); commandQueue.EnqueueNDRangeKernel(kernelRoundNA, 1, 4096 * 1024, 1024, 0); commandQueue.EnqueueClearBuffer(bufferI1, 64 * 64 * 4, clearPattern); commandQueue.EnqueueNDRangeKernel(kernelRoundNB, 1, 4096 * 1024, 1024, 0); } commandQueue.EnqueueClearBuffer(bufferI2, 64 * 64 * 4, clearPattern); commandQueue.EnqueueNDRangeKernel(kernelTail, 1, 4096 * 1024, 1024, 0); edgesCount = commandQueue.EnqueueReadBuffer <uint>(bufferI2, 1); edgesCount[0] = edgesCount[0] > 1000000 ? 1000000 : edgesCount[0]; edgesLeft = commandQueue.EnqueueReadBuffer(bufferA1, (int)edgesCount[0] * 2); OpenCl.DotNetCore.Interop.CommandQueues.CommandQueuesNativeApi.Flush(commandQueue.Handle); OpenCl.DotNetCore.Interop.CommandQueues.CommandQueuesNativeApi.Finish(commandQueue.Handle); sw.Stop(); currentJob.trimTime = sw.ElapsedMilliseconds; currentJob.solvedAt = DateTime.Now; Logger.Log(LogLevel.Info, string.Format("GPU AMD{2}: Trimmed in {0}ms to {1} edges", sw.ElapsedMilliseconds, edgesCount[0], deviceID)); CGraph cg = new CGraph(); cg.SetEdges(edgesLeft, (int)edgesCount[0]); cg.SetHeader(currentJob); Task.Factory.StartNew(() => { if (edgesCount[0] < 200000) { try { if (findersInFlight++ < 3) { Stopwatch cycleTime = new Stopwatch(); cycleTime.Start(); cg.FindSolutions(graphSolutions); cycleTime.Stop(); AdjustTrims(cycleTime.ElapsedMilliseconds); if (TEST) { Logger.Log(LogLevel.Info, string.Format("Finder completed in {0}ms on {1} edges with {2} solution(s) and {3} dupes", sw.ElapsedMilliseconds, edgesCount[0], graphSolutions.Count, cg.dupes)); if (++trims % 50 == 0) { Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine("SOLS: {0}/{1} - RATE: {2:F1}", solutions, trims, (float)trims / solutions); Console.ResetColor(); } } if (graphSolutions.Count > 0) { solutions++; } } else { Logger.Log(LogLevel.Warning, "CPU overloaded!"); } } catch (Exception ex) { Logger.Log(LogLevel.Error, "Cycle finder crashed " + ex.Message); } finally { findersInFlight--; } } }); } catch (Exception ex) { Logger.Log(LogLevel.Error, "Critical error in main ocl loop " + ex.Message); Task.Delay(5000).Wait(); } } //uint[] resultArray = commandQueue.EnqueueReadBuffer<uint>(bufferI1, 64 * 64); //uint[] resultArray2 = commandQueue.EnqueueReadBuffer<uint>(bufferI2, 64 * 64); //Console.WriteLine("SeedA: " + resultArray.Sum(e => e) + " in " + sw.ElapsedMilliseconds / runs); //Console.WriteLine("SeedB: " + resultArray2.Sum(e => e) + " in " + sw.ElapsedMilliseconds / runs); //Task.Delay(1000).Wait(); //Console.WriteLine(""); } } finally { // clear pattern if (clearPattern != IntPtr.Zero) { Marshal.FreeHGlobal(clearPattern); } } } } } } } catch (Exception ex) { Logger.Log(LogLevel.Error, "Critical error in OCL Init " + ex.Message); Console.ForegroundColor = ConsoleColor.Yellow; Console.WriteLine(ex.Message); Console.ResetColor(); Task.Delay(500).Wait(); } finally { Task.Delay(500).Wait(); try { Comms.Close(); bufferA1.Dispose(); bufferA2.Dispose(); bufferB.Dispose(); bufferI1.Dispose(); bufferI2.Dispose(); bufferR.Dispose(); if (OpenCl.DotNetCore.CommandQueues.CommandQueue.resultValuePointer != IntPtr.Zero) { Marshal.FreeHGlobal(OpenCl.DotNetCore.CommandQueues.CommandQueue.resultValuePointer); } } catch { } } //Console.ReadKey(); }
private void InitializeDevice(Device.AMD device) { if (!device.Allow) { return; } Log(Level.Debug, $"Initializing device ID [{device.DeviceID}]"); try { device.Context = Context.CreateContext(device.Info); device.Program = device.Context.CreateAndBuildProgramFromString(sKernel); device.CommandQueue = CommandQueue.CreateCommandQueue(device.Context, device.Info); } catch (Exception ex) { Log(ex); Log(Level.Error, $"Failed loading kernel to device ID [{device.DeviceID}]"); return; } try { device.KernelSeedA = device.Program.CreateKernel("FluffySeed2A"); device.KernelSeedB1 = device.Program.CreateKernel("FluffySeed2B"); device.KernelSeedB2 = device.Program.CreateKernel("FluffySeed2B"); device.KernelRound1 = device.Program.CreateKernel("FluffyRound1"); device.KernelRoundO = device.Program.CreateKernel("FluffyRoundNO1"); device.KernelRoundNA = device.Program.CreateKernel("FluffyRoundNON"); device.KernelRoundNB = device.Program.CreateKernel("FluffyRoundNON"); device.KernelTail = device.Program.CreateKernel("FluffyTailO"); device.KernelRecovery = device.Program.CreateKernel("FluffyRecovery"); } catch (Exception ex) { Log(ex); Log(Level.Error, $"Failed loading kernel to device ID [{device.DeviceID}]"); return; } var availableMemory = Math.Round(device.AvailableMemory / Math.Pow(2, 30), 1); try { Log(Level.Debug, $"Allocating video memory on device ID [{device.DeviceID}]"); Log(Level.Info, $"Available video memory on device ID [{device.DeviceID}]: {availableMemory}GB"); device.BufferA1 = device.Context.CreateBuffer <uint>(MemoryFlag.ReadWrite, Device.AMD.BUFFER_SIZE_A1); device.BufferA2 = device.Context.CreateBuffer <uint>(MemoryFlag.ReadWrite, Device.AMD.BUFFER_SIZE_A2); device.BufferB = device.Context.CreateBuffer <uint>(MemoryFlag.ReadWrite, Device.AMD.BUFFER_SIZE_B); device.BufferI1 = device.Context.CreateBuffer <uint>(MemoryFlag.ReadWrite, Device.AMD.INDEX_SIZE); device.BufferI2 = device.Context.CreateBuffer <uint>(MemoryFlag.ReadWrite, Device.AMD.INDEX_SIZE); device.BufferNonce = device.Context.CreateBuffer <byte>(MemoryFlag.ReadOnly, 32); device.BufferR = device.Context.CreateBuffer <uint>(MemoryFlag.ReadOnly, CycleFinder.CUCKOO_42 * 2); } catch (Exception ex) { Log(ex); Log(Level.Error, $"Out of video memory at device ID [{device.DeviceID}], required >{Device.AMD.RequiredGPUMemoryGB()}GB"); return; } device.IsInitialized = true; }
private static void RunNative(string source) { var A = CreateRandomMatrix(M, K); var B = CreateRandomMatrix(K, N); var C = CreateRandomMatrix(M, N); var Z = CreateZeroMatrix(M, N); Array.Copy(C, Z, M * N); var error = ErrorCode.Success; var platform = Platform.GetPlatformIDs()[0]; var devices = Device.GetDeviceIDs(platform, DeviceType.Cpu); using (var context = Context.CreateContext(platform, devices, null, null)) using (var queue = CommandQueue.CreateCommandQueue(context, devices[0])) { var program = null as Program; var kernel = null as Kernel; var Abuf = null as Mem <double>; var Bbuf = null as Mem <double>; var Cbuf = null as Mem <double>; try { program = Program.CreateProgramWithSource(context, new String[] { source }); program.BuildProgram(devices, null, null, null); kernel = Kernel.CreateKernel(program, "gemm_nn_f64"); Abuf = Mem <double> .CreateBuffer(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, A); Bbuf = Mem <double> .CreateBuffer(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, B); Cbuf = Mem <double> .CreateBuffer(context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, C); kernel.SetKernelArg(0, (HandleObject)Abuf); kernel.SetKernelArg(1, K); kernel.SetKernelArg(2, (HandleObject)Bbuf); kernel.SetKernelArg(3, N); kernel.SetKernelArg(4, (HandleObject)Cbuf); kernel.SetKernelArg(5, N); kernel.SetKernelArg(6, K); kernel.SetKernelArg(7, 0.5); kernel.SetKernelArg(8, 0.5); // Console.WriteLine("*** Enqueueing kernel."); queue.EnqueueNDRangeKernel(kernel, null, new int[] { M / TM, N / TN }, new int[] { GM, GN }, null); // Console.WriteLine("*** Enqueueing read buffer."); queue.EnqueueReadBuffer(Cbuf, true, C); // Console.WriteLine("*** Waiting for queue to finish."); queue.Finish(); } catch (OpenClException ex) { error = ex.ErrorCode; switch (error) { case ErrorCode.BuildProgramFailure: Console.WriteLine("*** Error building kernel 'gemm_nn_f64'"); Console.WriteLine("*** Build log: {0}", program.BuildInfo.GetLog(devices[0])); Console.WriteLine("*** Source code:"); Console.WriteLine(source); break; case ErrorCode.InvalidWorkGroupSize: Console.WriteLine("*** Invalid workgroup size (max. workgroup size: {0}; max. work item sizes: {1},{2},{3}).", devices[0].MaxWorkGroupSize, devices[0].MaxWorkItemSizes[0], devices[0].MaxWorkItemSizes[1], devices[0].MaxWorkItemSizes[2]); break; default: Console.WriteLine("*** OpenCL error {0}: {1}", (int)error, error); break; } Console.WriteLine("*** "); } finally { if (Cbuf != null) { Cbuf.Dispose(); } if (Bbuf != null) { Bbuf.Dispose(); } if (Abuf != null) { Abuf.Dispose(); } if (kernel != null) { kernel.Dispose(); } if (program != null) { program.Dispose(); } } if (error != ErrorCode.Success) { return; } Gemm('N', 'N', M, N, K, 0.5, A, K, B, N, 0.5, Z, N); for (var i = 0; i < M; i++) { for (var j = 0; j < N; j++) { if (Math.Abs(C[i * N + j] - Z[i * N + j]) > 1e-10) { Console.WriteLine("*** Error in elemen ({0},{1}): expected {2}, found {3}.", i, j, Z[i * N + j], C[i * N + j]); } } } } }
/// <summary> /// This is the asynchronous entrypoint to the application. /// </summary> /// <param name="args">The command line arguments that have been passed to the program.</param> private static async Task MainAsync(string[] args) { // Gets all available platforms and their corresponding devices, and prints them out in a table IEnumerable <Platform> platforms = Platform.GetPlatforms(); ConsoleTable consoleTable = new ConsoleTable("Platform", "OpenCL Version", "Vendor", "Device", "Driver Version", "Bits", "Memory", "Clock Speed", "Available"); foreach (Platform platform in platforms) { foreach (Device device in platform.GetDevices(DeviceType.All)) { consoleTable.AddRow( platform.Name, $"{platform.Version.MajorVersion}.{platform.Version.MinorVersion}", platform.Vendor, device.Name, device.DriverVersion, $"{device.AddressBits} Bit", $"{Math.Round(device.GlobalMemorySize / 1024.0f / 1024.0f / 1024.0f, 2)} GiB", $"{device.MaximumClockFrequency} MHz", device.IsAvailable ? "✔" : "✖"); } } Console.WriteLine("Supported Platforms & Devices:"); consoleTable.Write(Format.Alternative); // Gets the first available platform and selects the first device offered by the platform and prints out the chosen device Device chosenDevice = platforms.FirstOrDefault(p => p.Name.ToLower().Contains("nvidia") /* && p.Version.VersionString.Contains("2.1")*/).GetDevices(DeviceType.Gpu).FirstOrDefault(); Console.WriteLine($"Using: {chosenDevice.Name} ({chosenDevice.Vendor})"); Console.WriteLine(); // Creats a new context for the selected device using (Context context = Context.CreateContext(chosenDevice)) { // Creates the kernel code, which multiplies a matrix with a vector string code = @" __kernel void matvec_mult(__global float4* matrix, __global float4* vector, __global float* result) { int i = get_global_id(0); result[i] = dot(matrix[i], vector[0]); }"; // Creates a program and then the kernel from it using (Program program = await context.CreateAndBuildProgramFromStringAsync(code)) { using (Kernel kernel = program.CreateKernel("matvec_mult")) { // Creates the memory objects for the input arguments of the kernel MemoryBuffer matrixBuffer = context.CreateBuffer(MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, new float[] { 0f, 2f, 4f, 6f, 8f, 10f, 12f, 14f, 16f, 18f, 20f, 22f, 24f, 26f, 28f, 30f }); MemoryBuffer vectorBuffer = context.CreateBuffer(MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, new float[] { 0f, 3f, 6f, 9f }); MemoryBuffer resultBuffer = context.CreateBuffer <float>(MemoryFlag.WriteOnly, 4); // Tries to execute the kernel try { // Sets the arguments of the kernel kernel.SetKernelArgument(0, matrixBuffer); kernel.SetKernelArgument(1, vectorBuffer); kernel.SetKernelArgument(2, resultBuffer); // Creates a command queue, executes the kernel, and retrieves the result using (CommandQueue commandQueue = CommandQueue.CreateCommandQueue(context, chosenDevice)) { //await commandQueue.EnqueueNDRangeKernelAsync(kernel, 1, 4); //commandQueue.EnqueueNDRangeKernel(kernel, 1, 4); float[] resultArray = commandQueue.EnqueueReadBuffer <float>(resultBuffer, 4); //float[] resultArray = await commandQueue.EnqueueReadBufferAsync<float>(resultBuffer, 4); Console.WriteLine($"Result: ({string.Join(", ", resultArray)})"); } } catch (OpenClException exception) { Console.WriteLine(exception.Message); } // Disposes of the memory objects matrixBuffer.Dispose(); vectorBuffer.Dispose(); resultBuffer.Dispose(); } } } }
public void TestComponentAccessors1() { int nr = 2; int nw = 2; float[] r = new float[nr]; float2[] w = new float2[nw]; // test managed Array.Clear(r, 0, nr); Array.Clear(w, 0, nw); Cl.RunKernel( new int[] { 1 }, new int[] { 1 }, (Action <float[], float2[]>)test_components1, r, w ); Assert.AreEqual((float)1, r[0]); Assert.AreEqual((float)1, w[0].s0); Assert.AreEqual((float)0, w[0].s1); Assert.AreEqual((float)2, r[1]); Assert.AreEqual((float)1, w[1].s1); Assert.AreEqual((float)0, w[1].s0); // compile kernel var source = ClCompiler.EmitKernel("opencl-tests.dll", "OpenCl.Tests.TestFloat2", "test_components1"); // test native Platform platform = Platform.GetPlatformIDs()[0]; Device[] devices = Device.GetDeviceIDs(platform, DeviceType.Cpu); using (var context = Context.CreateContext(platform, devices, null, null)) using (var queue = CommandQueue.CreateCommandQueue(context, devices[0])) { var program = null as Program; var kernel = null as Kernel; var mr = null as Mem <float>; var mw = null as Mem <float2>; try { program = Program.CreateProgramWithSource(context, new String[] { source }); try { program.BuildProgram(devices, null, null, null); } catch (OpenClException ex) { Console.WriteLine(source); throw ex; } kernel = Kernel.CreateKernel(program, "test_components1"); mr = Mem <float> .CreateBuffer(context, MemFlags.WriteOnly, nr *Marshal.SizeOf <float>()); mw = Mem <float2> .CreateBuffer(context, MemFlags.WriteOnly, nw *Marshal.SizeOf <float2>()); kernel.SetKernelArg(0, (HandleObject)mr); kernel.SetKernelArg(1, (HandleObject)mw); queue.EnqueueNDRangeKernel(kernel, null, new int[] { 1 }, null, null); queue.Finish(); Array.Clear(r, 0, nr); queue.EnqueueReadBuffer(mr, false, r); Array.Clear(w, 0, nw); queue.EnqueueReadBuffer(mw, false, w); queue.Finish(); } finally { if (mr != null) { mr.Dispose(); } if (mw != null) { mw.Dispose(); } if (kernel != null) { kernel.Dispose(); } if (program != null) { program.Dispose(); } } } Assert.AreEqual((float)1, r[0]); Assert.AreEqual((float)1, w[0].s0); Assert.AreEqual((float)0, w[0].s1); Assert.AreEqual((float)2, r[1]); Assert.AreEqual((float)1, w[1].s1); Assert.AreEqual((float)0, w[1].s0); }
public void TestGe() { float2[] a = new float2[] { new float2((float)2, (float)1), new float2((float)0, (float)1) }; float2[] b = new float2[] { new float2((float)0, (float)1), new float2((float)2, (float)1) }; int2[] r = new int2[2]; // test managed Array.Clear(r, 0, 2); Cl.RunKernel( new int[] { 2 }, new int[] { 1 }, (Action <float2[], float2[], int2[]>)test_float2_ge, a, b, r ); Assert.AreEqual(-1, r[0].s0); Assert.AreEqual(-1, r[0].s1); Assert.AreEqual(0, r[1].s0); Assert.AreEqual(-1, r[1].s1); // compile kernel var source = ClCompiler.EmitKernel("opencl-tests.dll", "OpenCl.Tests.TestFloat2", "test_float2_ge"); // test native Platform platform = Platform.GetPlatformIDs()[0]; Device[] devices = Device.GetDeviceIDs(platform, DeviceType.Cpu); using (var context = Context.CreateContext(platform, devices, null, null)) using (var queue = CommandQueue.CreateCommandQueue(context, devices[0])) { var program = null as Program; var kernel = null as Kernel; var ma = null as Mem <float2>; var mb = null as Mem <float2>; var mr = null as Mem <int2>; try { program = Program.CreateProgramWithSource(context, new String[] { source }); try { program.BuildProgram(devices, null, null, null); } catch (OpenClException ex) { Console.WriteLine(source); throw ex; } kernel = Kernel.CreateKernel(program, "test_float2_ge"); ma = Mem <float2> .CreateBuffer(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, a); mb = Mem <float2> .CreateBuffer(context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, b); mr = Mem <int2> .CreateBuffer(context, MemFlags.WriteOnly, 2 *Marshal.SizeOf <int2>()); kernel.SetKernelArg(0, (HandleObject)ma); kernel.SetKernelArg(1, (HandleObject)mb); kernel.SetKernelArg(2, (HandleObject)mr); queue.EnqueueNDRangeKernel(kernel, null, new int[] { 2 }, null, null); queue.Finish(); Array.Clear(r, 0, 2); queue.EnqueueReadBuffer(mr, true, r); } finally { if (mr != null) { mr.Dispose(); } if (mb != null) { mb.Dispose(); } if (ma != null) { ma.Dispose(); } if (kernel != null) { kernel.Dispose(); } if (program != null) { program.Dispose(); } } } Assert.AreEqual(-1, r[0].s0); Assert.AreEqual(-1, r[0].s1); Assert.AreEqual(0, r[1].s0); Assert.AreEqual(-1, r[1].s1); }
public static T[] ReadBuffer <T>(MemoryBuffer buf) where T : struct { CommandQueue cq = CommandQueue.CreateCommandQueue(GetInstance().c, GetInstance().d); return(cq.EnqueueReadBuffer <T>(buf, (int)buf.Size)); }
public static CommandQueue CreateCommandQueue() { return(CommandQueue.CreateCommandQueue(GetInstance().c, GetInstance().d)); }
public static void WriteToBuffer <T>(MemoryBuffer buf, T[] values) where T : struct { CommandQueue cq = CommandQueue.CreateCommandQueue(GetInstance().c, GetInstance().d); cq.EnqueueWriteBuffer <T>(buf, values); }