public void MemBufferTests() { const int bufferSize = 100; ErrorCode error; Random random = new Random(); float[] values = (from value in Enumerable.Range(0, bufferSize) select(float) random.NextDouble()).ToArray(); IMem buffer = Cl.CreateBuffer(_context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, (IntPtr)(sizeof(float) * bufferSize), values, out error); Assert.AreEqual(error, ErrorCode.Success); Assert.AreEqual(Cl.GetMemObjectInfo(buffer, MemInfo.Type, out error).CastTo <MemObjectType>(), MemObjectType.Buffer); Assert.AreEqual(Cl.GetMemObjectInfo(buffer, MemInfo.Size, out error).CastTo <uint>(), values.Length * sizeof(float)); // TODO: Verify values //int index = 0; //foreach (float value in Cl.GetMemObjectInfo(buffer, Cl.MemInfo.HostPtr, out error).CastToEnumerable<float>(Enumerable.Range(0, 100))) //{ // Assert.AreEqual(values[index], value); // index++; //} buffer.Dispose(); }
public void ExternalLoopBody(Cl.Program program) { // create kernel Cl.Kernel kernel = Cl.CreateKernel(program, "ExternalLoopBody", out error); clSafeCall(error); // create command queue Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error); clSafeCall(error); // allocate host vectors int[] hres = { 0, 1, 2, 3, 4, 5 }; // allocate device vectors Cl.Mem dres = Cl.CreateBuffer(context, Cl.MemFlags.ReadWrite | Cl.MemFlags.CopyHostPtr, (IntPtr)(sizeof(int) * hres.Length), hres, out error); clSafeCall(error); // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 0, dres)); clSafeCall(Cl.SetKernelArg(kernel, 1, hres.Length)); // execute kernel clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent)); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dres, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(int) * hres.Length), hres, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); Assert.AreEqual(new[] { 1, 4, 3, 6, 5, 8 }, hres); }
public void SetArgumentValue(int argumentIndex, void *item, MemFlags flags, long byteCount) { fixed(GPUBuffer *gpuBufferPtr = _gpuBuffers) { ErrorCode err; GPUBuffer buffer = *(gpuBufferPtr + argumentIndex); if (buffer.MemoryObject == IntPtr.Zero) { buffer.MemoryObject.Dispose(); } if (buffer.Length != byteCount) { buffer.Length = byteCount; } buffer.MemoryObject = Cl.CreateBuffer(Context, flags, byteCount, (IntPtr)item, out err); err = Cl.SetKernelArg(Kernel, (uint)argumentIndex, IntPtr.Size, buffer.MemoryObject); if ((IntPtr)item != IntPtr.Zero) { Cl.EnqueueWriteBuffer(CommandQueue, buffer.MemoryObject, 1, IntPtr.Zero, byteCount, (IntPtr)item, 0, null, out Event @event); @event.WaitForComplete(); } } }
public void SetupBuffers(int MiniBatchSize) { this.miniBatchSize = MiniBatchSize; #if OPENCL_ENABLED this.activationsGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.AllocHostPtr, (IntPtr)(sizeof(float) * NumberOfUnits * MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer Neurons.activationsGPU"); OpenCLSpace.WipeBuffer(activationsGPU, NumberOfUnits * MiniBatchSize, typeof(float)); this.deltaGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.AllocHostPtr, (IntPtr)(sizeof(float) * NumberOfUnits * MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.WipeBuffer(activationsGPU, NumberOfUnits * MiniBatchSize, typeof(float)); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer Neurons.deltaGPU"); #else for (int m = 0; m < MiniBatchSize; m++) { this.activations.Add(new double[nUnits]); this.delta.Add(new double[nUnits]); } #endif }
private void WriteBuffer <T>(int argIndex, T[] mem, MemFlags flags) where T : unmanaged { int size = sizeof(T) * mem.Length; fixed(IntPtr *memObjPtr = _memObj) fixed(T * itemsPtr = mem) { ErrorCode err; if (*(memObjPtr + argIndex) == IntPtr.Zero) { *(_memObjSize + argIndex) = size; *(memObjPtr + argIndex) = Cl.CreateBuffer(Context, flags, size, (IntPtr)itemsPtr, out err); err = Cl.SetKernelArg(Kernel, (uint)argIndex, IntPtr.Size, (IntPtr)(memObjPtr + argIndex)); } else if (*(_memObjSize + argIndex) < size) { (*(Mem *)(memObjPtr + argIndex)).Dispose(); *(_memObjSize + argIndex) = size; *(memObjPtr + argIndex) = Cl.CreateBuffer(Context, flags, size, (IntPtr)itemsPtr, out err); err = Cl.SetKernelArg(Kernel, (uint)argIndex, IntPtr.Size, (IntPtr)(memObjPtr + argIndex)); } err = Cl.EnqueueWriteBuffer(_commandQueue, *(memObjPtr + argIndex), true.ToInt(), IntPtr.Zero, size, (IntPtr)itemsPtr, 0, null, out _); } }
// images cannot be read_write... so let's continue using plain buffers // should implement this in a way that allows imgfAccu to be loaded only once // should test for image size consistency public void Accumulate(FloatMap imgfAccu, FloatMap imgfSrc, float k) { var kernel = _kernels["accumulate"]; // Creation of on-device memory objects IMem <float> accuMapBuffer = Cl.CreateBuffer <float>(_context, MemFlags.ReadWrite, imgfAccu.Size, out err); // why MemFlags.CopyHostPtr doesn't work here (and forces me to manual copy) ??? assert(err, "accu buf creation"); IMem <float> srcMapBuffer = Cl.CreateBuffer <float>(_context, MemFlags.WriteOnly, imgfSrc.Size, out err); assert(err, "src buf creation"); // Set memory objects as parameters to kernel err = Cl.SetKernelArg(kernel, 0, intPtrSize, accuMapBuffer); assert(err, "accu map setKernelArg"); err = Cl.SetKernelArg(kernel, 1, intPtrSize, srcMapBuffer); assert(err, "src map setKernelArg"); err = Cl.SetKernelArg(kernel, 2, intSize, imgfAccu.Stride); assert(err, "in stride setKernelArg"); err = Cl.SetKernelArg(kernel, 3, intSize, imgfSrc.Stride); assert(err, "out stride setKernelArg"); err = Cl.SetKernelArg(kernel, 4, floatSize, k); assert(err, "out stride setKernelArg"); // write actual data into memory object Event clevent; err = Cl.EnqueueWriteBuffer <float>(_commandsQueue, accuMapBuffer, Bool.True, 0, imgfAccu.Size, imgfAccu._buf, 0, null, out clevent); clevent.Dispose(); assert(err, "write accu buffer"); err = Cl.EnqueueWriteBuffer <float>(_commandsQueue, srcMapBuffer, Bool.True, 0, imgfSrc.Size, imgfSrc._buf, 0, null, out clevent); clevent.Dispose(); assert(err, "write src buffer"); // execute err = Cl.EnqueueNDRangeKernel(_commandsQueue, kernel, 2, new[] { (IntPtr)0, (IntPtr)0, (IntPtr)0 }, // offset new[] { new IntPtr(imgfAccu.W), new IntPtr(imgfAccu.H), (IntPtr)1 }, // range null, 0, null, out clevent); clevent.Dispose(); assert(err, "Cl.EnqueueNDRangeKernel"); // sync Cl.Finish(_commandsQueue); // read from output memory object into actual buffer err = Cl.EnqueueReadBuffer <float>(_commandsQueue, accuMapBuffer, Bool.True, imgfAccu._buf, 0, null, out clevent); clevent.Dispose(); assert(err, "read output buffer"); Cl.ReleaseMemObject(srcMapBuffer); Cl.ReleaseMemObject(accuMapBuffer); // maybe i could return this without disposing; would affect non-OpenCl implementation }
public override void SetupOutput() { this.outputWidth = inputWidth; this.outputHeight = inputHeight; this.outputDepth = inputDepth; this.inputArea = inputHeight * inputWidth; this.nOutputUnits = nInputUnits; this.outputNeurons = new Neurons(nOutputUnits); // Initialize OpenCL buffers // 1. mean, variance and their cumulative averages this.meanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * inputDepth), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(meanGPU, inputDepth, typeof(float)); this.varianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * inputDepth), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(varianceGPU, inputDepth, typeof(float)); // (Initialize cumulative means to zero...) this.cumulativeMeanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * inputDepth), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(cumulativeMeanGPU, inputDepth, typeof(float)); // (...and variances to one.) float[] ones = new float[inputDepth]; for (int i = 0; i < inputDepth; ++i) { ones[i] = 1.0f; } this.cumulativeVarianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, (IntPtr)(sizeof(float) * inputDepth), ones, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); // OpenCL buffer for normalized input values (needed for backprop) this.normalizedInputGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits * inputNeurons.MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(normalizedInputGPU, nInputUnits * inputNeurons.MiniBatchSize, typeof(float)); }
public static long CrackHigh(int[] sequence, long low) { if (!crackHighProgram.HasValue || !crackHighKernel.HasValue || !initialized || sequence.Length != 16) { return(-1); } ErrorCode error; IMem <int> sequence_dev = Cl.CreateBuffer <int>(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, sequence, out error); ErrorCheck(error, "CrackHigh(): Cl.CreateBuffer"); long[] seeds = new long[1]; IMem <long> seed_dev = Cl.CreateBuffer <long>(context, MemFlags.CopyHostPtr | MemFlags.WriteOnly, seeds, out error); ErrorCheck(error, "InitializeParameters(): Cl.CreateBuffer"); error = Cl.SetKernelArg(crackHighKernel.Value, 0, sequence_dev); ErrorCheck(error, "Cl.SetKernelArg"); error = Cl.SetKernelArg(crackHighKernel.Value, 1, seed_dev); ErrorCheck(error, "Cl.SetKernelArg"); error = Cl.SetKernelArg(crackHighKernel.Value, 2, (IntPtr)sizeof(long), low); ErrorCheck(error, "Cl.SetKernelArg"); CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, 0, out error); ErrorCheck(error, "Cl.CreateCommandQueue"); int maxGroupWorkSize = Cl.GetKernelWorkGroupInfo(crackHighKernel.Value, device, KernelWorkGroupInfo.WorkGroupSize, out error).CastTo <int>(); ErrorCheck(error, "Cl.GetKernelWorkGroupInfo"); Event e; int threads = maxGroupWorkSize * 12; IntPtr[] workSize = new IntPtr[] { (IntPtr)threads }; error = Cl.EnqueueNDRangeKernel(cmdQueue, crackHighKernel.Value, 1, null, workSize, null, 0, null, out e); ErrorCheck(error, "Cl.EnqueueNDRangeKernel"); error = Cl.Finish(cmdQueue); ErrorCheck(error, "Cl.Finish"); long[] seed_host = new long[1]; error = Cl.EnqueueReadBuffer(cmdQueue, seed_dev, Bool.True, (IntPtr)0, (IntPtr)(sizeof(long) * 1), seed_host, 0, null, out e); ErrorCheck(error, "CL.EnqueueReadBuffer"); //Dispose your shit error = Cl.ReleaseCommandQueue(cmdQueue); ErrorCheck(error, "CL.ReleaseCommandQueue"); error = Cl.ReleaseMemObject(sequence_dev); ErrorCheck(error, "CL.ReleaseMemObject"); error = Cl.ReleaseMemObject(seed_dev); ErrorCheck(error, "CL.ReleaseMemObject"); return(seed_host[0]); }
public static IMem <T> CreateBuffer <T>(this Context context, T[] data, MemFlags flags = MemFlags.None) where T : struct { ErrorCode err; var result = Cl.CreateBuffer <T>(context, flags | MemFlags.CopyHostPtr, data, out err); err.Check(); return(result); }
public void SetUp() { device = (from platformid in Cl.GetPlatformIDs(out error) from deviceid in Cl.GetDeviceIDs(platformid, Cl.DeviceType.Gpu, out error) select deviceid).First(); context = Cl.CreateContext(null, 1, new[] { device }, null, IntPtr.Zero, out error); dummy = Cl.CreateBuffer(context, Cl.MemFlags.ReadOnly, IntPtr.Zero, IntPtr.Zero, out error); }
public static MemoryBuffer CreateMemory(int size, bool write, bool read) { var r = Cl.CreateBuffer(_context, (write && read ? MemFlags.ReadWrite : 0) | (write && !read ? MemFlags.WriteOnly : 0) | (!write && read ? MemFlags.ReadOnly : 0), size, out ErrorCode code); MemoryBuffer buf = new MemoryBuffer() { mem = r }; return(buf); }
public float [] MathFunctionsSingleTest(int[] input) { if (input.Length == 0) { return(new float[0]); } var source = @"#pragma OPENCL EXTENSION cl_khr_fp64 : enable __kernel void kernelCode(__global int* ___input___, __global float* ___result___) { int n0; float ___final___10; int ___flag___11; int ___id___ = get_global_id(0); n0 = ___input___[___id___]; float pi = 3.14159274f; float c = cos(((float) n0)); float s = sin(((float) n0)); float f = floor(pi); float sq = sqrt(((float) (n0 * n0))); float ex = exp(pi); float p = powr(pi, 2.0f); float a = fabs(c); float l = log(((float) n0)); ___final___10 = ((((((((f * pi) * c) * s) * sq) * ex) * p) * a) * l); ___result___[___id___] = ___final___10; } "; var output = new float[input.Length]; ErrorCode error; var a = Cl.CreateBuffer(env.Context, MemFlags.ReadOnly | MemFlags.None | MemFlags.UseHostPtr, (IntPtr)(input.Length * sizeof(int)), input, out error); var b = Cl.CreateBuffer(env.Context, MemFlags.WriteOnly | MemFlags.None | MemFlags.UseHostPtr, (IntPtr)(input.Length * sizeof(float)), output, out error); var max = Cl.GetDeviceInfo(env.Devices[0], DeviceInfo.MaxWorkGroupSize, out error).CastTo <uint>(); OpenCL.Net.Program program = Cl.CreateProgramWithSource(env.Context, 1u, new string[] { source }, null, out error); error = Cl.BuildProgram(program, (uint)env.Devices.Length, env.Devices, " -cl-fast-relaxed-math -cl-mad-enable ", null, IntPtr.Zero); OpenCL.Net.Kernel kernel = Cl.CreateKernel(program, "kernelCode", out error); error = Cl.SetKernelArg(kernel, 0, a); error = Cl.SetKernelArg(kernel, 1, b); Event eventID; error = Cl.EnqueueNDRangeKernel(env.CommandQueues[0], kernel, (uint)1, null, new IntPtr[] { (IntPtr)input.Length }, new IntPtr[] { (IntPtr)1 }, (uint)0, null, out eventID); env.CommandQueues[0].ReadFromBuffer(b, output); a.Dispose(); b.Dispose(); //env.Dispose(); return(output); }
public void ArrayCompare(Cl.Program program) { // create kernel Cl.Kernel kernel = Cl.CreateKernel(program, "ArrayCompare", out error); clSafeCall(error); // create command queue Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error); clSafeCall(error); // allocate host vectors bool[] res = { true, false, true, false }; // allocate device vectors Cl.Mem dp1 = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(int)), IntPtr.Zero, out error); clSafeCall(error); Cl.Mem dp2 = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(int)), IntPtr.Zero, out error); clSafeCall(error); Cl.Mem dp3 = Cl.CreateBuffer(context, Cl.MemFlags.WriteOnly, (IntPtr)(sizeof(bool) * res.Length), IntPtr.Zero, out error); clSafeCall(error); // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 0, dp1)); clSafeCall(Cl.SetKernelArg(kernel, 1, dp2)); clSafeCall(Cl.SetKernelArg(kernel, 2, dp3)); // execute kernel clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent)); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp3, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(bool) * res.Length), res, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); Assert.AreEqual(new[] { false, true, false, true }, res); // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 0, dummy)); clSafeCall(Cl.SetKernelArg(kernel, 1, dummy)); // execute kernel clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent)); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp3, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(bool) * res.Length), res, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); Assert.AreEqual(new[] { true, false, true, false }, res); }
public void ReadImage(Image input, int label) { unsafe { using (Bitmap bmp = new Bitmap(input)) { int offSet = bmp.Width * bmp.Height; DataDimension = offSet * 3; #if OPENCL_ENABLED float[] dataPoint = new float[DataDimension]; #else double[] dataPoint = new double[DataDimension]; #endif #region Copy RGB values directly from memory to the array BitmapData bitmapData = bmp.LockBits(new Rectangle(0, 0, bmp.Width, bmp.Height), ImageLockMode.ReadOnly, bmp.PixelFormat); int bytesPerPixel = Image.GetPixelFormatSize(bmp.PixelFormat) / 8; int heightInPixels = bitmapData.Height; int widthInBytes = bitmapData.Width * bytesPerPixel; byte * ptrFirstPixel = (byte *)bitmapData.Scan0; int index = 0; for (int y = 0; y < heightInPixels; y++) { byte *currentLine = ptrFirstPixel + (y * bitmapData.Stride); for (int x = 0; x < widthInBytes; x = x + bytesPerPixel) { dataPoint[index] = currentLine[x + 2]; // Red dataPoint[index + offSet] = currentLine[x + 1]; // Green dataPoint[index + offSet + offSet] = currentLine[x]; // Blue index++; } } bmp.UnlockBits(bitmapData); #endregion #if OPENCL_ENABLED int datumBytesSize = sizeof(float) * dataPoint.Length; Mem tmpBuffer = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadOnly | MemFlags.CopyHostPtr | MemFlags.AllocHostPtr, (IntPtr)datumBytesSize, dataPoint, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "DataSet(): Cl.CreateBuffer tmpBuffer"); DataContainer.Add(new DataItem(tmpBuffer, label)); #else DataContainer.Add(new DataItem(dataPoint, label)); #endif } } }
public ClBuffer(Context context, int size, out ErrorCode error) { ready = false; this.Size = size; buffer = new T[size]; clMem = Cl.CreateBuffer <T>(context, MemFlags.UseHostPtr, buffer, out error); if (error == ErrorCode.Success) { ready = true; } }
public GpuSimiulation(IStockData stockData, int holding = 10, float ann = 5.0990f) { _stockData = stockData; _holding = holding; _ann = ann; _programSource = File.ReadAllText("Kernels.cl"); _env = "*AMD*".CreateCLEnvironment(DeviceType.Gpu, CommandQueueProperties.ProfilingEnable); _h_output = new float[(stockData.QuotesPerStock - holding)]; _d_stocksAndPrices = Cl.CreateBuffer <float>(_env.Context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, _stockData.RawData, out ErrorCode err); _d_portfolioStockMv = Cl.CreateBuffer <float>(_env.Context, MemFlags.ReadOnly | MemFlags.CopyHostPtr, _stockData.MarketValues, out err); _d_output = Cl.CreateBuffer <float>(_env.Context, MemFlags.ReadWrite, stockData.StocksCount * (stockData.QuotesPerStock - holding), out err); }
public override void SetupOutput() { this.outputWidth = inputWidth; this.outputHeight = inputHeight; this.outputDepth = inputDepth; this.nOutputUnits = nInputUnits; this.outputNeurons = new Neurons(nOutputUnits); // Also initialize OpenCL buffers for mean, variance, their cumulative averages, and normalized input activations this.meanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(meanGPU, nInputUnits, typeof(float)); this.varianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(varianceGPU, nInputUnits, typeof(float)); this.cumulativeMeanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(cumulativeMeanGPU, nInputUnits, typeof(float)); this.cumulativeVarianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(cumulativeVarianceGPU, nInputUnits, typeof(float)); this.normalizedInputGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits * inputNeurons.MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(normalizedInputGPU, nInputUnits * inputNeurons.MiniBatchSize, typeof(float)); }
public Buffer(Context context, MemoryFlags flags, ulong size) : this() { if (context == Context.Null) { throw new ArgumentNullException("context"); } if (flags.HasFlag(MemoryFlags.WriteOnly) & flags.HasFlag(MemoryFlags.ReadOnly)) { throw new ArgumentException("MemoryFlags.WriteOnly and MemoryFlags.ReadOnly are mutually exclusive."); } if (flags.HasFlag(MemoryFlags.HostWriteOnly) & flags.HasFlag(MemoryFlags.HostReadOnly)) { throw new ArgumentException("MemoryFlags.HostWriteOnly and MemoryFlags.HostReadOnly are mutually exclusive."); } if (flags.HasFlag(MemoryFlags.HostWriteOnly) & flags.HasFlag(MemoryFlags.HostNoAccess)) { throw new ArgumentException("MemoryFlags.HostWriteOnly and MemoryFlags.HostNoAccess are mutually exclusive."); } if (flags.HasFlag(MemoryFlags.HostReadOnly) & flags.HasFlag(MemoryFlags.HostNoAccess)) { throw new ArgumentException("MemoryFlags.HostReadOnly and MemoryFlags.HostNoAccess are mutually exclusive."); } if (flags.HasFlag(MemoryFlags.UseHostPtr)) { throw new ArgumentException("MemoryFlags.UseHostPtr is not valid."); } if (flags.HasFlag(MemoryFlags.CopyHostPtr)) { throw new ArgumentException("MemoryFlags.CopyHostPtr is not valid."); } if (size == 0) { throw new ArgumentOutOfRangeException("size", size, "size is 0."); } unsafe { int error; Handle = Cl.CreateBuffer(context.Handle, (ulong)flags, new UIntPtr(size), null, &error); ClHelper.GetError(error); } }
public override void SetupOutput() { this.outputDepth = nOutputUnits; this.outputHeight = 1; this.outputWidth = 1; this.outputNeurons = new Neurons(this.nOutputUnits); #if OPENCL_ENABLED this.dropoutMaskGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(bool) * nOutputUnits * inputNeurons.MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(dropoutMaskGPU, nOutputUnits * inputNeurons.MiniBatchSize, typeof(bool)); #endif }
public void ReadData(string dataPath, string labelsPath) { string[] dataArray = File.ReadAllLines(dataPath); string[] labelsArray = File.ReadAllLines(labelsPath); if (dataArray.Length != labelsArray.Length) { throw new Exception("The amount of data does not match the amount of labels"); } // Read images and their labels for (int index = 0; index < dataArray.Length; index++) { string[] columns = dataArray[index].Split('\t'); DataDimension = columns.Length; #if OPENCL_ENABLED float[] dataPoint = new float[columns.Length]; for (int i = 0; i < columns.Length; i++) { dataPoint[i] = float.Parse(columns[i], CultureInfo.InvariantCulture.NumberFormat); } int datumBytesSize = sizeof(float) * dataPoint.Length; Mem tmpBuffer = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadOnly | MemFlags.CopyHostPtr | MemFlags.AllocHostPtr, (IntPtr)datumBytesSize, dataPoint, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "DataSet(): Cl.CreateBuffer tmpBuffer"); #else double[] tmpBuffer = new double[columns.Length]; for (int i = 0; i < columns.Length; i++) { tmpBuffer[i] = double.Parse(columns[i], CultureInfo.InvariantCulture.NumberFormat); } #endif DataContainer.Add(new DataItem(tmpBuffer, Convert.ToInt32(labelsArray[index]))); } }
public void ArrayRefOut(Cl.Program program) { // create kernel Cl.Kernel kernel = Cl.CreateKernel(program, "ArrayRefOut", out error); clSafeCall(error); // create command queue Cl.CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, Cl.CommandQueueProperties.None, out error); clSafeCall(error); // allocate host vectors int[] hp1 = { 1 }; int[] hp2 = { 2 }; // allocate device vectors Cl.Mem dp1 = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite, (IntPtr)(sizeof(int) * hp1.Length), hp1, out error); clSafeCall(error); Cl.Mem dp2 = Cl.CreateBuffer(context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite, (IntPtr)(sizeof(int) * hp2.Length), hp2, out error); clSafeCall(error); // setup kernel arguments clSafeCall(Cl.SetKernelArg(kernel, 0, dp1)); clSafeCall(Cl.SetKernelArg(kernel, 1, dp2)); clSafeCall(Cl.SetKernelArg(kernel, 2, dummy)); // execute kernel clSafeCall(Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new[] { (IntPtr)1 }, null, 0, null, out clevent)); // copy results from device back to host clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp1, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(int) * hp1.Length), hp1, 0, null, out clevent)); clSafeCall(Cl.EnqueueReadBuffer(cmdQueue, dp2, Cl.Bool.True, IntPtr.Zero, (IntPtr)(sizeof(int) * hp1.Length), hp2, 0, null, out clevent)); clSafeCall(Cl.Finish(cmdQueue)); Assert.AreEqual(5, hp1[0]); Assert.AreEqual(4, hp2[0]); }
public void WriteOutBuffer(int argIndex, int length) { fixed(IntPtr *memObjPtr = _memObj) { ErrorCode err; if (*(memObjPtr + argIndex) == IntPtr.Zero) { *(_memObjSize + argIndex) = length; IntPtr lmem = (*(memObjPtr + argIndex) = Cl.CreateBuffer(Context, MemFlags.WriteOnly, length, IntPtr.Zero, out err)); err = Cl.SetKernelArg(Kernel, (uint)argIndex, IntPtr.Size, lmem); } else if (*(_memObjSize + argIndex) < length) { (*(Mem *)(memObjPtr + argIndex)).Dispose(); *(_memObjSize + argIndex) = length; IntPtr lmem = (*(memObjPtr + argIndex) = Cl.CreateBuffer(Context, MemFlags.WriteOnly, length, IntPtr.Zero, out err)); err = Cl.SetKernelArg(Kernel, (uint)argIndex, IntPtr.Size, lmem); } } }
public FloatMap GaussianBlur(FloatMap inMap, float sigma) { var k = _kernels["convolveImg"]; FloatMap outMap = new FloatMap(inMap.W, inMap.H); FloatMap mask = createBlurMask(sigma); IMem <float> maskBuf = Cl.CreateBuffer <float>(_context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, mask._buf, out err); assert(err, "capholes mask, mem object creation"); err = Cl.SetKernelArg(k, 2, intPtrSize, maskBuf); assert(err, "capholes mask, setKernelArg"); err = Cl.SetKernelArg(k, 3, intSize, (mask.W - 1) / 2); assert(err, "capholes mask, setKernelArg"); singlePass(k, inMap, outMap); return(outMap); }
private FloatMap capHoles(FloatMap inMap, int filterHalfSize) { var k = _kernels["capHolesImg"]; FloatMap outMap = new FloatMap(inMap.W, inMap.H); FloatMap mask = getDistanceWeightMap(filterHalfSize); IMem <float> maskBuf = Cl.CreateBuffer <float>(_context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, mask._buf, out err); assert(err, "capholes mask, mem object creation"); err = Cl.SetKernelArg(k, 2, intPtrSize, maskBuf); assert(err, "capholes mask, setKernelArg"); err = Cl.SetKernelArg(k, 3, intSize, filterHalfSize); assert(err, "capholes mask, setKernelArg"); singlePass(k, inMap, outMap); Cl.ReleaseMemObject(maskBuf); return(outMap); }
public void WriteOutBuffer <T>(int argIndex, T[] localBuffer) where T : unmanaged { int size = sizeof(T) * localBuffer.Length; fixed(IntPtr *memObjPtr = _memObj) { ErrorCode err; if (*(memObjPtr + argIndex) == IntPtr.Zero) { *(_memObjSize + argIndex) = size; *(memObjPtr + argIndex) = Cl.CreateBuffer(Context, MemFlags.WriteOnly, size, IntPtr.Zero, out err); err = Cl.SetKernelArg(Kernel, (uint)argIndex, IntPtr.Size, (IntPtr)(memObjPtr + argIndex)); } else if (*(_memObjSize + argIndex) < size) { (*(Mem *)(memObjPtr + argIndex)).Dispose(); *(_memObjSize + argIndex) = size; *(memObjPtr + argIndex) = Cl.CreateBuffer(Context, MemFlags.WriteOnly, size, IntPtr.Zero, out err); err = Cl.SetKernelArg(Kernel, (uint)argIndex, IntPtr.Size, (IntPtr)(memObjPtr + argIndex)); } } }
public Memory AllocateMemory(int len, MemoryFlags flags, bool zero) { while (len % 4 != 0) { len++; } Memory m = new Memory { buf = Cl.CreateBuffer(env.Context, (MemFlags)(int)flags, (IntPtr)(len * sizeof(float)), out var errCode) }; if (zero) { Fill(m, 0, len * sizeof(float), 0); } if (errCode != ErrorCode.Success) { throw new Exception(errCode.ToString()); } return(m); }
public static IMem <T> CreateBuffer <T>(this Context context, int length, MemFlags flags = MemFlags.None, bool zero = false) where T : struct { ErrorCode err; if (zero) { var hostData = new T[length]; var result = Cl.CreateBuffer <T>(context, flags | MemFlags.CopyHostPtr, hostData, out err); err.Check(); hostData = null; return(result); } else { var result = Cl.CreateBuffer <T>(context, flags, length, out err); err.Check(); return(result); } }
private void ArrayToMem <T>(T[] data) { curArgSize = _intPtrSize; if (kernel.Provider.AutoconfiguredBuffers.ContainsKey(data)) { curArgVal = kernel.Provider.AutoconfiguredBuffers[data]; } else { ErrorCode error; var operations = Operations.ReadWrite; var memory = Memory.Device; var _elementSize = Marshal.SizeOf(typeof(T)); var mem = Cl.CreateBuffer(kernel.Provider.Context, (MemFlags)operations | (memory == Memory.Host ? MemFlags.UseHostPtr : (MemFlags)memory | MemFlags.CopyHostPtr), (IntPtr)(_elementSize * data.Length), data, out error); curArgVal = mem; mem.Pin(); kernel.Provider.AutoconfiguredBuffers.Add(data, (Mem)mem); if (error != ErrorCode.Success) { throw new CLException(error); } } }
private void ready() { ErrorCode error; context = Cl.CreateContext(null, 1, new[] { device }, null, IntPtr.Zero, out error); string source = System.IO.File.ReadAllText("kernels.cl"); program = Cl.CreateProgramWithSource(context, 1, new[] { source }, null, out error); error = Cl.BuildProgram(program, 1, new[] { device }, string.Empty, null, IntPtr.Zero); InfoBuffer buildStatus = Cl.GetProgramBuildInfo(program, device, ProgramBuildInfo.Status, out error); if (buildStatus.CastTo <BuildStatus>() != BuildStatus.Success) { throw new Exception($"OpenCL could not build the kernel successfully: {buildStatus.CastTo<BuildStatus>()}"); } allGood(error); Kernel[] kernels = Cl.CreateKernelsInProgram(program, out error); kernel = kernels[0]; allGood(error); queue = Cl.CreateCommandQueue(context, device, CommandQueueProperties.None, out error); allGood(error); dataOut = Cl.CreateBuffer(context, MemFlags.WriteOnly, (IntPtr)(globalSize * sizeof(int)), out error); allGood(error); var intSizePtr = new IntPtr(Marshal.SizeOf(typeof(int))); error |= Cl.SetKernelArg(kernel, 2, new IntPtr(Marshal.SizeOf(typeof(IntPtr))), dataOut); error |= Cl.SetKernelArg(kernel, 3, intSizePtr, new IntPtr(worldSeed)); error |= Cl.SetKernelArg(kernel, 4, intSizePtr, new IntPtr(globalSize)); allGood(error); }
static void Main(string[] args) { Console.WriteLine("Hello World!"); uint platformCount; ErrorCode result = Cl.GetPlatformIDs(0, null, out platformCount); Console.WriteLine("{0} platforms found", platformCount); var platformIds = new Platform[platformCount]; result = Cl.GetPlatformIDs(platformCount, platformIds, out platformCount); var platformCounter = 0; foreach (var platformId in platformIds) { IntPtr paramSize; result = Cl.GetPlatformInfo(platformId, PlatformInfo.Name, IntPtr.Zero, InfoBuffer.Empty, out paramSize); using (var buffer = new InfoBuffer(paramSize)) { result = Cl.GetPlatformInfo(platformIds[0], PlatformInfo.Name, paramSize, buffer, out paramSize); Console.WriteLine($"Platform {platformCounter}: {buffer}"); } platformCounter++; } Console.WriteLine($"Using first platform..."); uint deviceCount; result = Cl.GetDeviceIDs(platformIds[0], DeviceType.All, 0, null, out deviceCount); Console.WriteLine("{0} devices found", deviceCount); var deviceIds = new Device[deviceCount]; result = Cl.GetDeviceIDs(platformIds[0], DeviceType.All, deviceCount, deviceIds, out var numberDevices); var selectedDevice = deviceIds[0]; var context = Cl.CreateContext(null, 1, new[] { selectedDevice }, null, IntPtr.Zero, out var error); const string kernelSrc = @" // Simple test; c[i] = a[i] + b[i] __kernel void add_array(__global float *a, __global float *b, __global float *c) { int xid = get_global_id(0); c[xid] = a[xid] + b[xid] - 1500; } __kernel void sub_array(__global float *a, __global float *b, __global float *c) { int xid = get_global_id(0); c[xid] = a[xid] - b[xid] - 2000; } __kernel void double_everything(__global float *a) { int xid = get_global_id(0); a[xid] = a[xid] * 2; } "; var src = kernelSrc; Console.WriteLine("=== src ==="); Console.WriteLine(src); Console.WriteLine("============"); var program = Cl.CreateProgramWithSource(context, 1, new[] { src }, null, out var error2); error2 = Cl.BuildProgram(program, 1, new[] { selectedDevice }, string.Empty, null, IntPtr.Zero); if (error2 == ErrorCode.BuildProgramFailure) { Console.Error.WriteLine(Cl.GetProgramBuildInfo(program, selectedDevice, ProgramBuildInfo.Log, out error)); } Console.WriteLine(error2); // Get the kernels. var kernels = Cl.CreateKernelsInProgram(program, out error); Console.WriteLine($"Program contains {kernels.Length} kernels."); var kernelAdd = kernels[0]; var kernelDouble = kernels[2]; // float[] A = new float[1000]; float[] B = new float[1000]; float[] C = new float[1000]; for (var i = 0; i < 1000; i++) { A[i] = i; B[i] = i; } IMem <float> hDeviceMemA = Cl.CreateBuffer(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, A, out error); IMem <float> hDeviceMemB = Cl.CreateBuffer(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, B, out error); IMem <float> hDeviceMemC = Cl.CreateBuffer(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, C, out error); // Create a command queue. var cmdQueue = Cl.CreateCommandQueue(context, selectedDevice, CommandQueueProperties.None, out error); int intPtrSize = 0; intPtrSize = Marshal.SizeOf(typeof(IntPtr)); error = Cl.SetKernelArg(kernelDouble, 0, new IntPtr(intPtrSize), hDeviceMemA); error = Cl.SetKernelArg(kernelAdd, 0, new IntPtr(intPtrSize), hDeviceMemA); error = Cl.SetKernelArg(kernelAdd, 1, new IntPtr(intPtrSize), hDeviceMemB); error = Cl.SetKernelArg(kernelAdd, 2, new IntPtr(intPtrSize), hDeviceMemC); // write data from host to device Event clevent; error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemA, Bool.True, IntPtr.Zero, new IntPtr(1000 * sizeof(float)), A, 0, null, out clevent); error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemB, Bool.True, IntPtr.Zero, new IntPtr(1000 * sizeof(float)), B, 1, new [] { clevent }, out clevent); // execute kernel error = Cl.EnqueueNDRangeKernel(cmdQueue, kernelDouble, 1, null, new IntPtr[] { new IntPtr(1000) }, null, 1, new [] { clevent }, out clevent); var infoBuffer = Cl.GetEventInfo(clevent, EventInfo.CommandExecutionStatus, out var e2); error = Cl.EnqueueNDRangeKernel(cmdQueue, kernelAdd, 1, null, new IntPtr[] { new IntPtr(1000) }, null, 1, new [] { clevent }, out clevent); Console.WriteLine($"Run result: {error}"); error = Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemC, Bool.False, 0, C.Length, C, 1, new [] { clevent }, out clevent); Cl.WaitForEvents(1, new [] { clevent }); for (var i = 0; i < 1000; i++) { Console.WriteLine($"[{i}]: {C[i]}"); } program.Dispose(); foreach (var res in typeof(SourceLoader).Assembly.GetManifestResourceNames()) { Console.WriteLine(res); } }