private RenderKernel(ComputeContext context, ComputeKernel kernel, string[] sourcecodes, Dictionary<string, string> defines) { _context = context; _kernel = kernel; _sourcecodes = sourcecodes; _defines = defines; }
internal OpenCLKernel(OpenCLProgram program, ComputeKernel kernel) { Contract.Requires(program != null); Contract.Requires(kernel != null); Program = program; ComputeKernel = kernel; }
public override void ApplyToKernel(ComputeKernel kernel, bool useDouble, ref int startIndex) { if (useDouble) { kernel.SetValueArgument(startIndex++, new Vector4d(_position)); kernel.SetValueArgument(startIndex++, new Vector4d(_lookat)); kernel.SetValueArgument(startIndex++, new Vector4d(_up)); } else { kernel.SetValueArgument(startIndex++, new Vector4((Vector3)_position)); kernel.SetValueArgument(startIndex++, new Vector4((Vector3)_lookat)); kernel.SetValueArgument(startIndex++, new Vector4((Vector3)_up)); } }
public override void ApplyToKernel(ComputeKernel kernel, bool isDouble, ref int startIndex) { if (isDouble) { kernel.SetValueArgument(startIndex++, _x); kernel.SetValueArgument(startIndex++, _y); kernel.SetValueArgument(startIndex++, _zoom); } else { kernel.SetValueArgument(startIndex++, (float)_x); kernel.SetValueArgument(startIndex++, (float)_y); kernel.SetValueArgument(startIndex++, (float)_zoom); } }
public void SetupDevice(params string[] kernelNames) { try { this.program.Build(new[] { device }, string.Empty, null, IntPtr.Zero); } catch (Exception) { Tracer.TraceLine(this.program.GetBuildLog(ComputePlatform.Platforms[0].Devices[0])); throw; } if (kernelNames.Length > 1) { kernels = program.CreateAllKernels().ToDictionary(item => item.FunctionName); } else { kernel = program.CreateKernel(kernelNames[0]); } commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); }
private static void ConductSearch(ComputeContext context, ComputeKernel kernel) { var todos = GetQueenTaskPartition(NumQueens, 4); var done = new List<QueenTask>(); ComputeEventList eventList = new ComputeEventList(); var commands = new ComputeCommandQueue(context, context.Devices[1], ComputeCommandQueueFlags.None); Console.WriteLine("Starting {0} tasks, and working {1} at a time.", todos.Count, Spread); QueenTask[] inProgress = GetNextAssignment(new QueenTask[] {}, todos, done); var sw = new Stopwatch(); sw.Start(); while (inProgress.Any()) { var taskBuffer = new ComputeBuffer<QueenTask>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, inProgress); kernel.SetMemoryArgument(0, taskBuffer); commands.WriteToBuffer(inProgress, taskBuffer, false, null); for (int i = 0; i < 12; i++) commands.Execute(kernel, null, new long[] { inProgress.Length }, null, eventList); commands.ReadFromBuffer(taskBuffer, ref inProgress, false, eventList); commands.Finish(); inProgress = GetNextAssignment(inProgress, todos, done); } sw.Stop(); Console.WriteLine(sw.ElapsedMilliseconds / 1000.0); ulong sum = done.Select(state => state.solutions) .Aggregate((total, next) => total + next); Console.WriteLine("Q({0})={1}", NumQueens, sum); }
public OpenCLPasswordMatcher () { if (ComputePlatform.Platforms.Count == 0) { Console.WriteLine ("Cound not find any OpenCL platforms"); Environment.Exit (1); } var platform = ComputePlatform.Platforms [0]; logger.Info ("Found {0} computing devices:", platform.Devices.Count); foreach (var d in platform.Devices) { logger.Info ("* {0}", d.Name); } Context = new ComputeContext (ComputeDeviceTypes.All, new ComputeContextPropertyList (platform), null, IntPtr.Zero); Device = Context.Devices [0]; logger.Info ("Using first device."); // load opencl source StreamReader streamReader = new StreamReader (MD5_OPENCL_FILE); string clSource = streamReader.ReadToEnd (); streamReader.Close (); // create program with opencl source ComputeProgram program = new ComputeProgram (Context, clSource); // compile opencl source try { program.Build (null, null, null, IntPtr.Zero); } catch (Exception e) { logger.Error ("Build log: " + program.GetBuildLog(Device)); throw e; } // load chosen kernel from program Kernel = program.CreateKernel ("crackMD5"); }
public static void UpdateArguments(Mat frame, ComputeContext ctx, ComputeKernel k, int WindowValue) { if (frame.Width * frame.Height != Length) { // alokuj pamiec. maxBuffer = new int[frame.Cols]; minBuffer = new int[frame.Cols]; minBufferCB = new ComputeBuffer <int>(ctx, ComputeMemoryFlags.WriteOnly, minBuffer.Length); maxBufferCB = new ComputeBuffer <int>(ctx, ComputeMemoryFlags.WriteOnly, maxBuffer.Length); Length = frame.Width * frame.Height; } if (frameDataHandler.IsAllocated) { frameDataHandler.Free(); } if (frameBuffer != null) { frameBuffer.Dispose(); } frameData = new byte[frame.Width * frame.Height]; frameDataHandler = GCHandle.Alloc(frameData, GCHandleType.Pinned); // ustaw parametry alokacji pamieci. frameBuffer = new ComputeBuffer <byte>(ctx, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, frame.Width * frame.Height, frameDataHandler.AddrOfPinnedObject()); frame.CopyTo(frameData); k.SetMemoryArgument(0, frameBuffer); k.SetValueArgument <int>(1, frame.Rows); k.SetValueArgument <int>(2, frame.Cols); k.SetMemoryArgument(3, maxBufferCB); k.SetMemoryArgument(4, minBufferCB); k.SetValueArgument <int>(5, WindowValue); }
/// <summary> /// Compile the kernel with a map of preprocessor defines, a collection of /// name-value pairs. /// </summary> /// /// <param name="options">A map of preprocessor defines.</param> public void Compile(IDictionary <String, String> options) { // clear out any old program if (this.program != null) { this.program.Dispose(); this.kernel.Dispose(); } // Create the program from the source code this.program = new ComputeProgram(this.context, this.cl); if (options.Count > 0) { StringBuilder builder = new StringBuilder(); /* foreach */ foreach (KeyValuePair <String, String> obj in options) { if (builder.Length > 0) { builder.Append(" "); } builder.Append("-D "); builder.Append(obj.Key); builder.Append("="); builder.Append(obj.Value); } program.Build(null, builder.ToString(), null, IntPtr.Zero); } else { program.Build(null, null, null, IntPtr.Zero); } // Create the kernel this.kernel = Program.CreateKernel(this.kernelName); }
private void BuildEthashProgram() { ComputeDevice computeDevice = OpenCLDevice.GetComputeDevice(); try { mProgramArrayMutex.WaitOne(5000); } catch (Exception) { } if (mEthashProgramArray.ContainsKey(new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] })) { mEthashProgram = mEthashProgramArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }]; mEthashDAGKernel = mEthashDAGKernelArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }]; mEthashSearchKernel = mEthashSearchKernelArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }]; } else { mEthashProgram = BuildProgram("ethash_lbry", mEthashLocalWorkSizeArray[0], "-O1", "", ""); mEthashProgramArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }] = mEthashProgram; mEthashDAGKernelArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }] = mEthashDAGKernel = mEthashProgram.CreateKernel("GenerateDAG"); mEthashSearchKernelArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }] = mEthashSearchKernel = mEthashProgram.CreateKernel("search"); } try { mProgramArrayMutex.ReleaseMutex(); } catch (Exception) { } }
private void BuildEthashProgram() { ComputeDevice computeDevice = OpenCLDevice.GetComputeDevice(); try { mProgramArrayMutex.WaitOne(5000); } catch (Exception) { } if (mEthashProgramArray.ContainsKey(new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] })) { mEthashProgram = mEthashProgramArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }]; mEthashDAGKernel = mEthashDAGKernelArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }]; mEthashSearchKernel = mEthashSearchKernelArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }]; } else { String source = System.IO.File.ReadAllText(@"Kernels\ethash_lbry.cl"); mEthashProgram = new ComputeProgram(Context, source); MainForm.Logger(@"Loaded Kernels\ethash_lbry.cl for Device #" + DeviceIndex + "."); String buildOptions = (OpenCLDevice.GetVendor() == "AMD" ? "-O1 " : OpenCLDevice.GetVendor() == "NVIDIA" ? "" : // "-cl-nv-opt-level=1 -cl-nv-maxrregcount=256 " : "") + " -IKernels -DWORKSIZE=" + mEthashLocalWorkSizeArray[0]; try { mEthashProgram.Build(OpenCLDevice.DeviceList, buildOptions, null, IntPtr.Zero); } catch (Exception) { MainForm.Logger(mEthashProgram.GetBuildLog(computeDevice)); throw; } MainForm.Logger("Built Ethash program for Device #" + DeviceIndex + "."); MainForm.Logger("Build options: " + buildOptions); mEthashProgramArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }] = mEthashProgram; mEthashDAGKernelArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }] = mEthashDAGKernel = mEthashProgram.CreateKernel("GenerateDAG"); mEthashSearchKernelArray[new long[] { DeviceIndex, mEthashLocalWorkSizeArray[0] }] = mEthashSearchKernel = mEthashProgram.CreateKernel("search"); } try { mProgramArrayMutex.ReleaseMutex(); } catch (Exception) { } }
// --- GrassComponent --- private void InitializeMaterial() { string _QuadKernelName = "FindQuad"; _treeMaterial = new Material(TreeShader); // ********************** Grass material ********************** _treeMaterial.SetBuffer(ShaderID.treeBuffer, _megaBuffer); TestMat.SetBuffer("_Buffer", _closeMegaBuffer); _treeMaterial.SetTexture(ShaderID.treeTexture, _treeTextures); _treeMaterial.SetTexture(ShaderID.perlinNoise, PerlinNoise); _treeMaterial.SetTexture(ShaderID.colorVariance, PerlinNoise); _treeMaterial.SetVectorArray(ShaderID.minMaxWidthHeight, _minMaxWidthHeight); List <Vector4> quads = new List <Vector4>(); ComputeBuffer smallestQuad = new ComputeBuffer(1, sizeof(float) * 4, ComputeBufferType.Append); ComputeKernel findSmallestQuad = new ComputeKernel(_QuadKernelName, ComputeShader); foreach (TerrainTextures terrain in TreeTextures) { var front = GetBillboardTexture(Sides.Front, terrain.FeatureTexture); var side = GetBillboardTexture(Sides.Side, terrain.FeatureTexture); var top = GetBillboardTexture(Sides.Top, terrain.FeatureTexture); var frontXY = CalcSide(front, findSmallestQuad, smallestQuad); var sideXY = CalcSide(side, findSmallestQuad, smallestQuad); var topXY = CalcSide(top, findSmallestQuad, smallestQuad); quads.Add(frontXY / front.width); quads.Add(sideXY / side.width); quads.Add(topXY / top.width); } smallestQuad.SafeRelease(); _treeMaterial.SetVectorArray(ShaderID.quads, quads.ToArray()); //Debug.LogFormat("Generated Mesh"); }
static void Main(string[] args) { int[] r1 = new int[] {1, 2, 3, 4}; int[] r2 = new int[] {4, 3, 2, 1}; int rowSize = r1.Length; // pick first platform ComputePlatform platform = ComputePlatform.Platforms[0]; // create context with all gpu devices ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); // create a command queue with first gpu found ComputeCommandQueue queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); // load opencl source and // create program with opencl source ComputeProgram program = new ComputeProgram(context, CalculateKernel); // compile opencl source program.Build(null, null, null, IntPtr.Zero); // load chosen kernel from program ComputeKernel kernel = program.CreateKernel("Calc"); // allocate a memory buffer with the message (the int array) ComputeBuffer<int> row1Buffer = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r1); // allocate a memory buffer with the message (the int array) ComputeBuffer<int> row2Buffer = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r2); kernel.SetMemoryArgument(0, row1Buffer); // set the integer array kernel.SetMemoryArgument(1, row2Buffer); // set the integer array kernel.SetValueArgument(2, rowSize); // set the array size // execute kernel queue.ExecuteTask(kernel, null); // wait for completion queue.Finish(); Console.WriteLine("Finished"); Console.ReadKey(); }
public void InitializeComponents() { //ceb = new ComputeEventList(); time3 = new Stopwatch(); time3.Start(); p_dataInitialization = new ComputeProgram(context, s_dataInitialization); p_dataInitialization.Build(devices, "-g", null, IntPtr.Zero); p_reorder = new ComputeProgram(context, s_reorder); p_reorder.Build(devices, "-g", null, IntPtr.Zero); p_elementCount = new ComputeProgram(context, s_elementCount); p_elementCount.Build(devices, "-g", null, IntPtr.Zero); p_prefixSum = new ComputeProgram(context, s_prefixSum); p_prefixSum.Build(devices, "-g", null, IntPtr.Zero); p_ccArrayCreation = new ComputeProgram(context, s_ccArrayCreation); p_ccArrayCreation.Build(devices, "-g", null, IntPtr.Zero); p_radixSort = new ComputeProgram(context, s_radixSort); p_radixSort.Build(devices, "-g", null, IntPtr.Zero); p_count = time3.ElapsedMilliseconds; k_start = time3.ElapsedMilliseconds; k_dataInitialization = p_dataInitialization.CreateKernel("dataInitialization"); k_reorder = p_reorder.CreateKernel("reorder"); k_elementCount = p_elementCount.CreateKernel("elementCount"); k_prefixSum = p_prefixSum.CreateKernel("prefixSum"); k_ccArrayCreation = p_ccArrayCreation.CreateKernel("ccArrayCreation"); kernel_block_sort = p_radixSort.CreateKernel("clBlockSort"); kernel_block_scan = p_radixSort.CreateKernel("clBlockScan"); kernel_block_prefix = p_radixSort.CreateKernel("clBlockPrefix"); kernel_reorder = p_radixSort.CreateKernel("clReorder"); k_count = time3.ElapsedMilliseconds - k_start; Console.WriteLine("TIME SPENT INITIALIZING AND BUILDING PROGRAMS: " + p_count + "ms"); Console.WriteLine("TIME SPENT CREATING KERNELS: " + k_count + "ms"); }
/// <summary> /// Executes the specified kernel function name. /// </summary> /// <typeparam name="TSource">The type of the source.</typeparam> /// <param name="functionName">Name of the function.</param> /// <param name="inputs">The inputs.</param> /// <param name="returnInputVariable">The return result.</param> /// <returns></returns> /// <exception cref="ExecutionException"> /// </exception> public override void Execute <TSource>(string functionName, params object[] args) { ComputeKernel kernel = _compiledKernels.FirstOrDefault(x => (x.FunctionName == functionName)); ComputeCommandQueue commands = new ComputeCommandQueue(_context, _defaultDevice, ComputeCommandQueueFlags.None); if (kernel == null) { throw new ExecutionException(string.Format("Kernal function {0} not found", functionName)); } try { var ndobject = (TSource[])args.FirstOrDefault(x => (x.GetType() == typeof(TSource[]))); long length = ndobject != null ? ndobject.Length : 1; var buffers = BuildKernelArguments <TSource>(args, kernel, length); commands.Execute(kernel, null, new long[] { length }, null, null); foreach (var item in buffers) { TSource[] r = (TSource[])args[item.Key]; commands.ReadFromBuffer(item.Value, ref r, true, null); //args[item.Key] = r; item.Value.Dispose(); } commands.Finish(); } catch (Exception ex) { throw new ExecutionException(ex.Message); } finally { commands.Dispose(); } }
public OpenCLEthashMiner(ComputeDevice aDevice, int aDeviceIndex, EthashStratum aStratum) : base(aDevice, aDeviceIndex, "Ethash") { mStratum = aStratum; mGlobalWorkSize = 4096 * mLocalWorkSize * Device.MaxComputeUnits; mProgram = new ComputeProgram(this.Context, System.IO.File.ReadAllText(@"Kernels\ethash.cl")); //mProgram = new ComputeProgram(this.Context, new List<byte[]> { System.IO.File.ReadAllBytes(@"BinaryKernels\ethash-newEllesmeregw192l8.bin") }, new List<ComputeDevice> { Device }); MainForm.Logger("Loaded ethash program for Device #" + aDeviceIndex + "."); List <ComputeDevice> deviceList = new List <ComputeDevice>(); deviceList.Add(Device); mProgram.Build(deviceList, "-DWORKSIZE=" + mLocalWorkSize, null, IntPtr.Zero); MainForm.Logger("Built ethash program for Device #" + aDeviceIndex + "."); mDAGKernel = mProgram.CreateKernel("GenerateDAG"); MainForm.Logger("Created DAG kernel for Device #" + aDeviceIndex + "."); mSearchKernel = mProgram.CreateKernel("search"); MainForm.Logger("Created search kernel for Device #" + aDeviceIndex + "."); mMinerThread = new Thread(new ThreadStart(MinerThread)); mMinerThread.IsBackground = true; mMinerThread.Start(); }
void Setargument(ComputeKernel kernel, int index, object arg) { if (arg == null) { throw new ArgumentException("Argument " + index + " is null"); } Type argtype = arg.GetType(); if (argtype.IsArray) { ComputeMemory messageBuffer = (ComputeMemory)Activator.CreateInstance(typeof(ComputeBuffer <>).MakeGenericType(argtype.GetElementType()), new object[] { context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, arg }); kernel.SetMemoryArgument(index, messageBuffer); // set the array } else { typeof(ComputeKernel).GetMethod("SetValueArgument").MakeGenericMethod(argtype).Invoke(kernel, new object[] { index, arg }); } }
// 26 ms 4096x4096@512 iter with 1024 cores static void SetupCUDA(string sourceFile) { //var watch = System.Diagnostics.Stopwatch.StartNew(); // pick first platform platform = ComputePlatform.Platforms[0]; // create context with all gpu devices context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); // LEAK // create a command queue with first gpu found queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); // load opencl source using (var streamReader = new StreamReader(sourceFile)) { string clSource = streamReader.ReadToEnd(); // create program with opencl source program = new ComputeProgram(context, clSource); // compile opencl source program.Build(null, null, null, IntPtr.Zero); // load chosen kernel from program kernel = program.CreateKernel("mandel"); // allocate a memory buffer with the message messageBuffer = new ComputeBuffer <int>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, message); gradientBuffer = new ComputeBuffer <int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, gradient); streamReader.Close(); } }
void DestroyClBuffers() { if (clImage != null) { clImage.Dispose(); clImage = null; } if (result != null) { result.Dispose(); result = null; } if (cmap != null) { cmap.Dispose(); cmap = null; } if (clCommands != null) { clCommands.Dispose(); clCommands = null; } if (clKernel != null) { clKernel.Dispose(); clKernel = null; } if (clProgram != null) { clProgram.Dispose(); clProgram = null; } clDirty = true; }
public ComputeKernel CreateKernel(object kernelInstance) { string kernelName = kernelInstance.GetType().Name; if (HardwareAccelerationEnabled) { IKernel program = KernelManager.LoadKernel(kernelName); // Create and build the opencl program. var computeProgram = new ComputeProgram(_context, program.Code); computeProgram.Build(null, null, null, IntPtr.Zero); // Create the kernel function and set its arguments. ComputeKernel kernel = computeProgram.CreateKernel("Run"); int index = 0; foreach (string key in _intComputeBuffers.Keys) { kernel.SetMemoryArgument(index, _intComputeBuffers[key]); index++; } foreach (string key in _floatComputeBuffers.Keys) { kernel.SetMemoryArgument(index, _floatComputeBuffers[key]); index++; } return(kernel); } return(null); }
public void Recompile() { lock (_kernelLock) { var newKernel = Compile(_context, _sourcecodes, _defines); Dispose(); int useDoubleDefine; _useDouble = _defines.ContainsKey("UseDouble") && int.TryParse(_defines["UseDouble"], out useDoubleDefine) && useDoubleDefine != 0; _kernel = newKernel; } }
/// <summary> /// Release this kernel. /// </summary> /// public virtual void Release() { if (this.program != null) { this.program.Dispose(); this.kernel.Dispose(); this.program = null; this.kernel = null; } }
public override void ApplyToKernel(ComputeKernel kernel, bool isDouble, ref int param) { base.ApplyToKernel(kernel, isDouble, ref param); if (isDouble) { kernel.SetValueArgument(param++, Fov); kernel.SetValueArgument(param++, MoveSpeed * 3); kernel.SetValueArgument(param++, Frame); } else { kernel.SetValueArgument(param++, (float)Fov); kernel.SetValueArgument(param++, (float)MoveSpeed * 3); kernel.SetValueArgument(param++, Frame); } }
///// <summary>Number of arguments</summary> //private int nArgs = 0; ///// <summary>Gets how many arguments this kernel has</summary> //public int NumberOfArguments { get { return nArgs; } } /// <summary>Creates a new Kernel</summary> /// <param name="KernelName"></param> public Kernel(string KernelName) { kernel = Prog.CreateKernel(KernelName); }
public void Exec2D(ComputeKernel kernel, long global1, long global2, long local1, long local2, ICollection <ComputeEventBase> events) { Queue.Execute(kernel, null, new[] { global1, global2 }, new[] { local1, local2 }, events); }
public void Exec1D(ComputeKernel kernel, long global, long local) { Queue.Execute(kernel, null, new[] { global }, new[] { local }, null); }
/// <summary> /// OpenCL関係の準備をする /// </summary> static void InitializeOpenCL(Real[] result, Real[] left, Real[] right) { // プラットフォームを取得 var platform = ComputePlatform.Platforms[0]; Console.WriteLine("プラットフォーム:{0} ({1})", platform.Name, platform.Version); // コンテキストを作成 var context = new ComputeContext(Cloo.ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); // 利用可能なデバイス群を取得 var devices = context.Devices; Console.WriteLine("デバイス数:{0}", devices.Count); // キューを作成 queue = new ComputeCommandQueue(context, devices[0], ComputeCommandQueueFlags.None); // 利用可能なデバイスすべてに対して for(int i = 0; i < devices.Count; i++) { var device = devices[i]; // デバイス情報を表示 Console.WriteLine("* {0} ({1})", device.Name, device.Vendor); } // プログラムを作成 var program = new ComputeProgram(context, Properties.Resources.HeavyWorkItem); // ビルドしてみて try { string realString = ((typeof(Real) == typeof(Double)) ? "double" : "float"); program.Build(devices, string.Format(" -D REAL={0} -D REALV={0}{1} -D VLOADN=vload{1} -D VSTOREN=vstore{1} -D COUNT_PER_WORKITEM={2} -Werror", realString, VECTOR_COUNT, COUNT_PER_WORKITEM), null, IntPtr.Zero); } // 失敗したら catch(BuildProgramFailureComputeException ex) { // ログを表示して例外を投げる throw new ApplicationException(string.Format("{0}\n{1}", ex.Message, program.GetBuildLog(devices[0])), ex); } // カーネルを作成 addOneElement = program.CreateKernel("AddOneElement"); addOneVector = program.CreateKernel("AddOneVector"); addMoreElement = program.CreateKernel("AddMoreElement"); addMoreVector = program.CreateKernel("AddMoreVector"); // バッファーを作成 bufferLeft = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, left); bufferRight = new ComputeBuffer<Real>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, right); bufferResult = new ComputeBuffer<Real>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, result); }
/// <summary> /// Attempts to initialize OpenCL for the selected GPU. /// </summary> private void InitializeOpenCL() { // only initialize once if (clKernel != null) return; // select the device we've been instructed to use clDevice = ComputePlatform.Platforms .SelectMany(i => i.Devices) .SingleOrDefault(i => i.Handle.Value == Gpu.CLDeviceHandle.Value); // context we'll be working underneath clContext = new ComputeContext(new ComputeDevice[] { clDevice }, new ComputeContextPropertyList(clDevice.Platform), null, IntPtr.Zero); // queue to control device clQueue = new ComputeCommandQueue(clContext, clDevice, ComputeCommandQueueFlags.None); // buffers to store kernel output clBuffer0 = new ComputeBuffer<uint>(clContext, ComputeMemoryFlags.ReadOnly, 16); clBuffer1 = new ComputeBuffer<uint>(clContext, ComputeMemoryFlags.ReadOnly, 16); // kernel code string kernelCode; using (var rdr = new StreamReader(GetType().Assembly.GetManifestResourceStream("BitMaker.Miner.Gpu.DiabloMiner.cl"))) kernelCode = rdr.ReadToEnd(); clProgram = new ComputeProgram(clContext, kernelCode); try { // build kernel for device clProgram.Build(new ComputeDevice[] { clDevice }, "-D WORKSIZE=" + clDevice.MaxWorkGroupSize, null, IntPtr.Zero); } catch (ComputeException) { throw new Exception(clProgram.GetBuildLog(clDevice)); } clKernel = clProgram.CreateKernel("search"); }
/// <summary> /// Entry point for a standard work thread. /// </summary> private void WorkThread() { InitializeOpenCL(); try { // continue working until canceled while (!cts.IsCancellationRequested) Work(Context.GetWork(this, GetType().Name)); } catch (OperationCanceledException) { // ignore } clQueue.Finish(); clKernel.Dispose(); clKernel = null; clBuffer0.Dispose(); clBuffer0 = null; clBuffer1.Dispose(); clBuffer1 = null; clQueue.Dispose(); clQueue = null; clDevice = null; clProgram.Dispose(); clProgram = null; clContext.Dispose(); clContext = null; }
public abstract void ApplyToKernel(ComputeKernel kernel, bool useDouble, ref int startIndex);
/// <summary> /// Attempts to initialize OpenCL for the selected GPU. /// </summary> internal void InitializeOpenCL() { // only initialize once if (clKernel != null) return; // unused memory so Cloo doesn't break with a null ptr var userDataPtr = Marshal.AllocCoTaskMem(512); try { clDevice = Gpu.CLDevice; // context we'll be working underneath clContext = new ComputeContext( new[] { clDevice }, new ComputeContextPropertyList(clDevice.Platform), (p1, p2, p3, p4) => { }, userDataPtr); // queue to control device clQueue = new ComputeCommandQueue(clContext, clDevice, ComputeCommandQueueFlags.None); // buffers to store kernel output clBuffer0 = new ComputeBuffer<uint>(clContext, ComputeMemoryFlags.ReadOnly, 16); clBuffer1 = new ComputeBuffer<uint>(clContext, ComputeMemoryFlags.ReadOnly, 16); // obtain the program clProgram = new ComputeProgram(clContext, Gpu.GetSource()); var b = new StringBuilder(); if (Gpu.WorkSize > 0) b.Append(" -D WORKSIZE=").Append(Gpu.WorkSize); if (Gpu.HasBitAlign) b.Append(" -D BITALIGN"); if (Gpu.HasBfiInt) b.Append(" -D BFIINT"); try { // build kernel for device clProgram.Build(new[] { clDevice }, b.ToString(), (p1, p2) => { }, userDataPtr); } catch (ComputeException) { throw new Exception(clProgram.GetBuildLog(clDevice)); } clKernel = clProgram.CreateKernel("search"); } finally { Marshal.FreeCoTaskMem(userDataPtr); } }
/// <summary>Sets this variable as an argument for a kernel</summary> /// <param name="ArgIndex">Index of kernel argument</param> /// <param name="Kernel">Kernel to receive argument</param> public void SetAsArgument(int ArgIndex, ComputeKernel Kernel) { //Is this a buffer object? if (this is Variable) { Variable v = (Variable)this; if (v.CreatedFromGLBuffer && (!v.AcquiredInOpenCL)) { throw new Exception("Attempting to use a variable created from OpenGL buffer without acquiring. Should use CLGLInteropFunctions to properly acquire and release these variables"); } } Kernel.SetMemoryArgument(ArgIndex, VarPointer); }
bool useGPU = true; // GPU code enabled (from commandline) #endregion Fields #region Methods // initialize renderer: takes in command line parameters passed by template code public void Init( int rt, bool gpu, int platformIdx ) { // pass command line parameters runningTime = rt; useGPU = gpu; gpuPlatform = platformIdx; // initialize accumulator accumulator = new Vector3[screen.width * screen.height]; ClearAccumulator(); // setup scene scene = new Scene(); // setup camera camera = new Camera( screen.width, screen.height ); // Generate randoms Console.Write("Generating randoms....\t"); randoms = new float[1000]; Random r = RTTools.GetRNG(); for (int i = 0; i < 1000; i++) randoms[i] = (float)r.NextDouble(); int variable = r.Next(); Console.WriteLine("Done!"); // initialize required opencl things if gpu is used if (useGPU) { StreamReader streamReader = new StreamReader("../../kernel.cl"); string clSource = streamReader.ReadToEnd(); streamReader.Close(); platform = ComputePlatform.Platforms[0]; context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); program = new ComputeProgram(context, clSource); try { program.Build(null, null, null, IntPtr.Zero); kernel = program.CreateKernel("Main"); sceneBuffer = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, scene.toCL()); rndBuffer = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, randoms); cameraBuffer = new ComputeBuffer<Vector3>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, camera.toCL()); outputBuffer = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, screen.pixels); skydome = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, scene.Skydome); kernel.SetMemoryArgument(0, outputBuffer); kernel.SetValueArgument(1, screen.width); kernel.SetValueArgument(2, screen.height); kernel.SetMemoryArgument(3, sceneBuffer); kernel.SetValueArgument(4, scene.toCL().Length); kernel.SetMemoryArgument(5, skydome); kernel.SetMemoryArgument(6, cameraBuffer); kernel.SetMemoryArgument(7, rndBuffer); } catch (ComputeException e) { Console.WriteLine("Error in kernel code: {0}", program.GetBuildLog(context.Devices[0])); Console.ReadLine(); useGPU = false; } } else { return; } }
public void RunKernel(ComputeKernel kernel, int count) { int argOffset = _intComputeBuffers.Count + _floatComputeBuffers.Count; foreach (string key in _intArguments.Keys) { kernel.SetValueArgument(argOffset, _intArguments[key]); argOffset++; } foreach (string key in _floatArguments.Keys) { kernel.SetValueArgument(argOffset, _floatArguments[key]); argOffset++; } foreach (string key in _doubleArguments.Keys) { kernel.SetValueArgument(argOffset, _doubleArguments[key]); argOffset++; } _commands.Execute(kernel, count); _commands.Finish(); }
public void Dispose() { lock (_kernelLock) { if (_kernel != null) { _kernel.Program.Dispose(); _kernel.Dispose(); _kernel = null; } } }
// 26 ms 4096x4096@512 iter with 1024 cores static long Method05(float ymin, float xmin, float width, int[] message) { // pick first platform ComputePlatform platform = ComputePlatform.Platforms[0]; // create context with all gpu devices ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); // create a command queue with first gpu found ComputeCommandQueue queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); // load opencl source StreamReader streamReader = new StreamReader("Mandel3.cl"); string clSource = streamReader.ReadToEnd(); streamReader.Close(); // create program with opencl source ComputeProgram program = new ComputeProgram(context, clSource); // compile opencl source program.Build(null, null, null, IntPtr.Zero); // load chosen kernel from program ComputeKernel kernel = program.CreateKernel("mandel"); int messageSize = message.Length; // allocate a memory buffer with the message ComputeBuffer <int> messageBuffer = new ComputeBuffer <int>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, message); kernel.SetMemoryArgument(0, messageBuffer); kernel.SetValueArgument(1, N); kernel.SetValueArgument(2, ymin); kernel.SetValueArgument(3, xmin); kernel.SetValueArgument(4, width); kernel.SetValueArgument(5, MaxIter); var watch = System.Diagnostics.Stopwatch.StartNew(); // Execute kernel //queue.ExecuteTask(kernel, null); //queue.Execute(kernel, new long[] { 0, 0, 0, 0 }, new long[] { 8, 8 }, new long[] { 8, 8 }, null); for (var i = 0; i < N / 32; ++i) { for (var j = 0; j < N / 32; ++j) { queue.Execute(kernel, new long[] { i *32, j *32 }, new long[] { 32, 32 }, null, null); } } // Read data back unsafe { fixed(int *retPtr = message) { queue.Read(messageBuffer, false, 0, messageSize, new IntPtr(retPtr), null); queue.Finish(); } } watch.Stop(); return(watch.ElapsedMilliseconds); }
public void Exec2D(ComputeKernel kernel, long global1, long global2, long local1, long local2) { Queue.Execute(kernel, null, new[] { global1, global2 }, new[] { local1, local2 }, null); }
static float ComputeAverageGPUTime( ushort[] depthPixels, int width, float inverseRotatedFx, float rotatedCx, float inverseRotatedFy, float rotatedCy, Matrix bedTransformationM, Matrix bedTransformationb, Matrix floorTransformationM, Matrix floorTransformationb, int numberOfIterations) { // pick the device platform ComputePlatform intelGPU = ComputePlatform.Platforms.Where(n => n.Name.Contains("Intel")).First(); ComputeContext context = new ComputeContext( ComputeDeviceTypes.Gpu, // use the gpu new ComputeContextPropertyList(intelGPU), // use the intel openCL platform null, IntPtr.Zero); // the command queue is the, well, queue of commands sent to the "device" (GPU) ComputeCommandQueue commandQueue = new ComputeCommandQueue( context, // the compute context context.Devices[0], // first device matching the context specifications ComputeCommandQueueFlags.None); // no special flags string kernelSource = null; using (StreamReader sr = new StreamReader("kernel.cl")) { kernelSource = sr.ReadToEnd(); } // create the "program" ComputeProgram program = new ComputeProgram(context, new string[] { kernelSource }); // compile. program.Build(null, null, null, IntPtr.Zero); ComputeKernel kernel = program.CreateKernel("ComputePoints"); Point3D[] outProjectivePoints = new Point3D[depthPixels.Length]; Point3D[] outRealPoints = new Point3D[depthPixels.Length]; Point3D[] outBedPoints = new Point3D[depthPixels.Length]; Point3D[] outFloorPoints = new Point3D[depthPixels.Length]; float[] affines = new float[24]; // do bed affines first because that's what assembly code expects int z = 0; for (int b = 0; b < 3; b++) { for (int c = 0; c < 3; c++) { affines[z++] = bedTransformationM[b, c]; } affines[z++] = bedTransformationb[b, 0]; } // do floor affines next because that's what assembly code expects for (int b = 0; b < 3; b++) { for (int c = 0; c < 3; c++) { affines[z++] = floorTransformationM[b, c]; } affines[z++] = floorTransformationb[b, 0]; } ComputeBuffer <float> affinesBuffer = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, affines); kernel.SetMemoryArgument(1, affinesBuffer); ComputeBuffer <Point3D> projectivePointsBuffer = new ComputeBuffer <Point3D>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, outProjectivePoints); kernel.SetMemoryArgument(2, projectivePointsBuffer); ComputeBuffer <Point3D> realPointsBuffer = new ComputeBuffer <Point3D>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, outBedPoints); kernel.SetMemoryArgument(3, realPointsBuffer); ComputeBuffer <Point3D> bedPointsBuffer = new ComputeBuffer <Point3D>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, outFloorPoints); kernel.SetMemoryArgument(4, projectivePointsBuffer); ComputeBuffer <Point3D> floorPointsBuffer = new ComputeBuffer <Point3D>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, outRealPoints); kernel.SetMemoryArgument(5, realPointsBuffer); kernel.SetValueArgument <int>(6, width); kernel.SetValueArgument <float>(7, inverseRotatedFx); kernel.SetValueArgument <float>(8, rotatedCx); kernel.SetValueArgument <float>(9, inverseRotatedFy); kernel.SetValueArgument <float>(10, rotatedCy); Stopwatch sw = new Stopwatch(); sw.Start(); for (int c = 0; c < numberOfIterations; c++) { ComputeBuffer <ushort> depthPointsBuffer = new ComputeBuffer <ushort>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, depthPixels); kernel.SetMemoryArgument(0, depthPointsBuffer); commandQueue.Execute(kernel, new long[] { 0 }, new long[] { depthPixels.Length }, null, null); unsafe { fixed(Point3D *projectivePointsPtr = outProjectivePoints) { fixed(Point3D *realPointsPtr = outRealPoints) { fixed(Point3D *bedPointsPtr = outBedPoints) { fixed(Point3D *floorPointsPtr = outFloorPoints) { commandQueue.Read(projectivePointsBuffer, false, 0, outProjectivePoints.Length, new IntPtr(projectivePointsPtr), null); commandQueue.Read(realPointsBuffer, false, 0, outProjectivePoints.Length, new IntPtr(realPointsPtr), null); commandQueue.Read(bedPointsBuffer, false, 0, outProjectivePoints.Length, new IntPtr(bedPointsPtr), null); commandQueue.Read(floorPointsBuffer, false, 0, outProjectivePoints.Length, new IntPtr(floorPointsPtr), null); commandQueue.Finish(); } } } } } } sw.Stop(); return(sw.ElapsedMilliseconds / (numberOfIterations * 1.0f)); }
/// <summary> /// Prepare OpenCL program, data buffers, etc. /// </summary> public void PrepareClBuffers(bool dirty = true) { clDirty = clDirty || dirty; if (texName == 0 || !checkOpenCL.Checked) { DestroyClBuffers(); return; } if (!clDirty) { return; } DestroyClBuffers(); if (clContext == null) { SetupClContext(); } if (clContext == null) // to be sure { Util.Log("OpenCL error"); clImage = null; clDirty = true; return; } GL.BindTexture(TextureTarget.Texture2D, 0); GL.Finish(); try { // OpenCL C source: string src = ClInfo.ReadSourceFile(CanUseDouble ? "mandel.cl" : "mandelSingle.cl", "090opencl"); if (string.IsNullOrEmpty(src)) { return; } // program & kernel: clProgram = new ComputeProgram(clContext, src); clProgram.Build(clContext.Devices, null, null, IntPtr.Zero); clKernel = clProgram.CreateKernel((checkDouble.Checked && CanUseDouble) ? "mandelDouble" : "mandelSingle"); clCommands = new ComputeCommandQueue(clContext, clContext.Devices[0], ComputeCommandQueueFlags.None); globalWidth = (texWidth + groupSize - 1) & -groupSize; globalHeight = (texHeight + groupSize - 1) & -groupSize; // buffers: // 1. colormap array cmap = new ComputeBuffer <byte>(clContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, colormap); bool interopOk = checkInterop.Checked; if (interopOk) { // 2. CL image for OpenGL interop clImage = ComputeImage2D.CreateFromGLTexture2D(clContext, ComputeMemoryFlags.ReadWrite, (int)TextureTarget.Texture2D, 0, texName); if (clImage == null) { Util.Log("OpenCL cannot reference OpenGL texture!"); interopOk = false; } } // 3. CL output array result = new ComputeBuffer <byte>(clContext, ComputeMemoryFlags.ReadWrite, texWidth * texHeight * 4); // synced.. clDirty = false; } catch (Exception exc) { Util.LogFormat("OpenCL build error: {0}", exc.Message); clImage = null; clDirty = true; } }
/// <summary> /// Builds the kernel arguments. /// </summary> /// <typeparam name="TSource">The type of the source.</typeparam> /// <param name="inputs">The inputs.</param> /// <param name="kernel">The kernel.</param> /// <param name="length">The length.</param> /// <param name="returnInputVariable">The return result.</param> /// <returns></returns> private Dictionary <int, GenericArrayMemory> BuildKernelArguments(object[] inputs, ComputeKernel kernel, long length, int?returnInputVariable = null) { int i = 0; Dictionary <int, GenericArrayMemory> result = new Dictionary <int, GenericArrayMemory>(); foreach (var item in inputs) { int size = 0; if (item.GetType().IsArray) { var datagch = GCHandle.Alloc(item, GCHandleType.Pinned); GenericArrayMemory mem = new GenericArrayMemory(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, item); kernel.SetMemoryArgument(i, mem); result.Add(i, mem); } else { size = Marshal.SizeOf(item); var datagch = GCHandle.Alloc(item, GCHandleType.Pinned); kernel.SetArgument(i, new IntPtr(size), datagch.AddrOfPinnedObject()); } i++; } return(result); }
/// <summary> /// Initializes local fields and the underlying compute context. /// </summary> public void Initialize() { if (this.context == null) { var devices = ComputePlatform.Platforms.SelectMany(a => a.Devices).Where(a => a.Extensions.Contains("cl_khr_fp64")).Take(1).ToArray(); ComputeContextPropertyList list = new ComputeContextPropertyList(devices[0].Platform); this.context = new ComputeContext(devices, list, null, IntPtr.Zero); } this.program = new ComputeProgram(this.context, File.ReadAllText("Mandelbrot.cl")); this.program.Build(null, null, null, IntPtr.Zero); this.mandelbrot = this.program.CreateKernel("Mandelbrot"); this.toBitmap = this.program.CreateKernel("ToBitmap"); this.resultBuffer = new ComputeBuffer<int>(this.context, ComputeMemoryFlags.ReadWrite, this.ImageWidth * this.ImageHeight); this.bitmapBuffer = new ComputeBuffer<byte>(this.context, ComputeMemoryFlags.ReadWrite, this.ImageWidth * this.ImageHeight * 4); this.mandelbrot.SetMemoryArgument(7, this.resultBuffer); this.toBitmap.SetMemoryArgument(1, this.resultBuffer); this.toBitmap.SetMemoryArgument(2, this.bitmapBuffer); this.commandQueue = new ComputeCommandQueue(this.context, this.context.Devices.OrderBy(a => a.Type).Where(a => a.Extensions.Contains("cl_khr_fp64")).First(), ComputeCommandQueueFlags.None); }
// Use this for initialization void Awake() { var platform = ComputePlatform.Platforms[0]; _context = new ComputeContext(ComputeDeviceTypes.Cpu, new ComputeContextPropertyList(platform), null, System.IntPtr.Zero); _queue = new ComputeCommandQueue(_context, _context.Devices[0], ComputeCommandQueueFlags.None); string clSource = System.IO.File.ReadAllText(clProgramPath); _program = new ComputeProgram(_context, clSource); try { _program.Build(null, null, null, System.IntPtr.Zero); } catch(BuildProgramFailureComputeException) { Debug.Log(_program.GetBuildLog(_context.Devices[0])); throw; } _events = new ComputeEventList(); _updateGridKernel = _program.CreateKernel(clUpdateGridKernelName); _updateBoidsKernel = _program.CreateKernel(clUpdateBoidsKernelName); _boundaryKernel = _program.CreateKernel(clBoundaryKernelName); _pointCounters = new int[nGridPartitions * nGridPartitions * nGridPartitions]; _pointIndices = new int[_pointCounters.Length * maxIndices]; _pointCountersBuffer = new Cloo.ComputeBuffer<int>( _context, ComputeMemoryFlags.WriteOnly, _pointCounters.Length); _pointIndicesBuffer = new Cloo.ComputeBuffer<int>( _context, ComputeMemoryFlags.WriteOnly, _pointIndices.Length); _gridInfo = new GridInfo() { worldOrigin = gridbounds.min, worldSize = gridbounds.size, cellSize = gridbounds.size * (1f / nGridPartitions), nGridPartitions = nGridPartitions, maxIndices = maxIndices }; _boundaryKernel.SetValueArgument(1, _gridInfo); _updateGridKernel.SetMemoryArgument(1, _pointCountersBuffer); _updateGridKernel.SetMemoryArgument(2, _pointIndicesBuffer); _updateGridKernel.SetValueArgument(3, _gridInfo); _updateBoidsKernel.SetMemoryArgument(2, _pointCountersBuffer); _updateBoidsKernel.SetMemoryArgument(3, _pointIndicesBuffer); _updateBoidsKernel.SetValueArgument(4, _gridInfo); }
// initialize renderer: takes in command line parameters passed by template code public void Init(int rt, bool gpu, int platformIdx) { // pass command line parameters runningTime = rt; useGPU = gpu; gpuPlatform = platformIdx; //Determine tile width and height tileCount = GreatestDiv(screen.width, screen.height); tileWidth = screen.width/tileCount; tileHeight = screen.height/tileCount; // initialize accumulator accumulator = new Vector3[screen.width * screen.height]; ClearAccumulator(); // setup scene scene = new Scene(); // setup camera camera = new Camera(screen.width, screen.height); //Init OpenCL ComputePlatform platform = ComputePlatform.Platforms[gpuPlatform]; context = new ComputeContext( ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero ); var streamReader = new StreamReader("../../program.cl"); string clSource = streamReader.ReadToEnd(); streamReader.Close(); ComputeProgram program = new ComputeProgram(context, clSource); //try to compile try { program.Build(null, null, null, IntPtr.Zero); } catch { Console.Write("error in kernel code:\n"); Console.Write(program.GetBuildLog(context.Devices[0]) + "\n"); } kernel = program.CreateKernel("device_function"); //setup RNG rngSeed = new int[screen.width * screen.height]; Random r = RTTools.GetRNG(); for (int i = 0; i < rngSeed.Length; i++) rngSeed[i] = r.Next(); //import buffers etc to GPU Vector3[] data = new Vector3[screen.width * screen.height]; Vector3[] sphereOrigins = Scene.GetOrigins; float[] sphereRadii = Scene.GetRadii; var FlagRW = ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer; var FlagR = ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer; rngBuffer = new ComputeBuffer<int>(context, FlagRW, rngSeed); screenPixels = new ComputeBuffer<int>(context, FlagRW, screen.pixels); skyBox = new ComputeBuffer<float>(context, FlagR, scene.skybox); originBuffer = new ComputeBuffer<Vector3>(context, FlagR, sphereOrigins); radiusBuffer = new ComputeBuffer<float>(context, FlagR, sphereRadii); accBuffer = new ComputeBuffer<Vector3>(context, FlagRW, accumulator); kernel.SetValueArgument(0, camera.p1); kernel.SetValueArgument(1, camera.p2); kernel.SetValueArgument(2, camera.p3); kernel.SetValueArgument(3, camera.up); kernel.SetValueArgument(4, camera.right); kernel.SetValueArgument(5, camera.pos); kernel.SetValueArgument(6, camera.lensSize); kernel.SetValueArgument(7, (float)screen.width); kernel.SetValueArgument(8, (float)screen.height); kernel.SetMemoryArgument(9, rngBuffer); kernel.SetMemoryArgument(10, screenPixels); kernel.SetMemoryArgument(11, skyBox); kernel.SetMemoryArgument(12, originBuffer); kernel.SetMemoryArgument(13, radiusBuffer); kernel.SetMemoryArgument(14, accBuffer); queue = new ComputeCommandQueue(context, context.Devices[0], 0); long[] tempWorkSize = { screen.width * screen.height }; //For some reason, doing this directly produces a build error. workSize = tempWorkSize; //Luckily, this works. }
public override void ApplyToKernel(ComputeKernel kernel, bool isDouble, ref int startIndex) { base.ApplyToKernel(kernel, isDouble, ref startIndex); kernel.SetValueArgument(startIndex++, Frame); }
static void Main(string[] args) { //Test2(); Test1(); ComputePlatform plat = ComputePlatform.Platforms[0]; Console.WriteLine("Plat:" + plat.Name); ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(plat), null, IntPtr.Zero); ComputeCommandQueue queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); StreamReader rs = new StreamReader("Foom/CL/testProg.txt"); string clSrc = rs.ReadToEnd(); rs.Close(); ComputeProgram prog = new ComputeProgram(context, clSrc); prog.Build(null, null, null, IntPtr.Zero); Console.WriteLine("BS:" + prog.GetBuildStatus(context.Devices[0]).ToString()); Console.WriteLine("Info:" + prog.GetBuildLog(context.Devices[0])); ComputeKernel kern = prog.CreateKernel("vector_add"); int[] data = new int[1024]; for (int i = 0; i < 1024; i++) { data[i] = 100; } ComputeBuffer <int> b1 = new ComputeBuffer <int>(context, ComputeMemoryFlags.CopyHostPointer, data); ComputeBuffer <int> b2 = new ComputeBuffer <int>(context, ComputeMemoryFlags.WriteOnly, 1024); // queue.WriteToBuffer<int>(data, b1, true, null); kern.SetMemoryArgument(0, b1); kern.SetMemoryArgument(1, b2); long[] wo = new long[1]; wo[0] = 0; long[] ws = new long[1]; ws[0] = 1024; long[] tc = new long[1]; tc[0] = 16; queue.Execute(kern, wo, ws, tc, null); int c = Environment.TickCount; queue.Finish(); c = Environment.TickCount - c; queue.ReadFromBuffer <int>(b2, ref data, true, null); for (int i = 0; i < 10; i++) { Console.WriteLine("C:" + (int)data[i]); } Console.WriteLine("Done:" + c); while (true) { } }
/// <summary> /// Executes the specified kernel function name. /// </summary> /// <typeparam name="TSource">The type of the source.</typeparam> /// <param name="functionName">Name of the function.</param> /// <param name="args"></param> /// <exception cref="ExecutionException"> /// </exception> public override void Execute(string functionName, params object[] args) { ValidateArgs(functionName, args); ComputeKernel kernel = _compiledKernels.FirstOrDefault(x => (x.FunctionName == functionName)); ComputeCommandQueue commands = new ComputeCommandQueue(_context, _defaultDevice, ComputeCommandQueueFlags.None); if (kernel == null) { throw new ExecutionException(string.Format("Kernal function {0} not found", functionName)); } try { Array ndobject = (Array)args.FirstOrDefault(x => (x.GetType().IsArray)); List <long> length = new List <long>(); long totalLength = 0; if (ndobject == null) { var xarrayList = args.Where(x => (x.GetType().Name == "XArray" || x.GetType().BaseType.Name == "XArray")).ToList(); foreach (var item in xarrayList) { var xarrayobj = (XArray)item; if (xarrayobj.Direction == Direction.Output) { totalLength = xarrayobj.Count; if (!xarrayobj.IsElementWise) { length = xarrayobj.Sizes.ToList(); } else { length.Add(totalLength); } } } if (totalLength == 0) { var xarrayobj = (XArray)xarrayList[0]; totalLength = xarrayobj.Count; if (!xarrayobj.IsElementWise) { length = xarrayobj.Sizes.ToList(); } else { length.Add(totalLength); } } } else { totalLength = ndobject.Length; for (int i = 0; i < ndobject.Rank; i++) { length.Add(ndobject.GetLength(i)); } } var method = KernelFunctions.FirstOrDefault(x => (x.Name == functionName)); var buffers = BuildKernelArguments(method, args, kernel, totalLength); commands.Execute(kernel, null, length.ToArray(), null, null); for (int i = 0; i < args.Length; i++) { if (args[i].GetType().IsArray) { var ioMode = method.Parameters.ElementAt(i).Value.IOMode; if (ioMode == IOMode.InOut || ioMode == IOMode.Out) { Array r = (Array)args[i]; commands.ReadFromMemory(buffers[i], ref r, true, 0, null); } buffers[i].Dispose(); } else if (args[i].GetType().Name == "XArray" || args[i].GetType().BaseType.Name == "XArray") { var ioMode = method.Parameters.ElementAt(i).Value.IOMode; if (ioMode == IOMode.InOut || ioMode == IOMode.Out) { XArray r = (XArray)args[i]; commands.ReadFromMemory(buffers[i], ref r, true, 0, null); } buffers[i].Dispose(); } } } catch (Exception ex) { throw new ExecutionException(ex.Message); } finally { commands.Finish(); commands.Dispose(); } }
public void Load(OpenCLProxy clProxy) { _kernel = new ClearKernel(); _computeKernel = clProxy.CreateKernel(_kernel); }
private void PrepareResources() { Output.UploadToComputingDevice(); _kernel = OpenClApplication.Program.CreateKernel(KernelName); }
// initialize renderer: takes in command line parameters passed by template code public void Init(int rt, bool gpu, int platformIdx) { // pass command line parameters runningTime = rt; useGPU = gpu; gpuPlatform = platformIdx; // initialize accumulator accumulator = new Vector3[screen.width * screen.height]; ClearAccumulator(); // setup scene scene = new Scene(); // setup camera camera = new Camera(screen.width, screen.height); rngQueue = new ConcurrentQueue<Random>(); xtiles = (int)Math.Ceiling((float)screen.width / TILESIZE); ytiles = (int)Math.Ceiling((float)screen.height / TILESIZE); #if DEBUG RTTools.factorials[0] = Vector<float>.One; for (int i = 1; i < RTTools.TERMS * 2; i++) RTTools.factorials[i] = RTTools.factorials[i - 1] * i; //for (int i = 0; i < RTTools.TERMS; i++) // RTTools.atanStuff[i] = (new Vector<float>((float)Math.Pow(2, 2 * i)) * (RTTools.factorials[i] * RTTools.factorials[i])) / RTTools.factorials[2 * i + 1]; #endif #region OpenCL related things randNums = new float[screen.width * screen.height + 25]; var streamReader = new StreamReader("../../assets/GPUCode.cl"); string clSource = streamReader.ReadToEnd(); streamReader.Close(); platform = ComputePlatform.Platforms[gpuPlatform]; context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); program = new ComputeProgram(context, clSource); try { program.Build(null, null, null, IntPtr.Zero); kernel = program.CreateKernel("Test"); } catch { Console.Write("error in kernel code:\n"); Console.Write(program.GetBuildLog(context.Devices[0]) + "\n"); Debugger.Break(); } eventList = new ComputeEventList(); commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); #endregion }
public void CreateKernel() { ForwardKernel = Weaver.CreateProgram(Weaver.GetKernelSource(FUNCTION_NAME)).CreateKernel("MaxPoolingForward"); }
/// <summary> /// Compile the kernel with a map of preprocessor defines, a collection of /// name-value pairs. /// </summary> /// /// <param name="options">A map of preprocessor defines.</param> public void Compile(IDictionary<String, String> options) { // clear out any old program if (this.program != null) { this.program.Dispose(); this.kernel.Dispose(); } // Create the program from the source code this.program = new ComputeProgram(this.context, this.cl); if (options.Count > 0) { StringBuilder builder = new StringBuilder(); /* foreach */ foreach (KeyValuePair<String, String> obj in options) { if (builder.Length > 0) { builder.Append(" "); } builder.Append("-D "); builder.Append(obj.Key); builder.Append("="); builder.Append(obj.Value); } program.Build(null, builder.ToString(), null, IntPtr.Zero); } else { program.Build(null, null, null, IntPtr.Zero); } // Create the kernel this.kernel = Program.CreateKernel(this.kernelName); }
// initialize renderer: takes in command line parameters passed by template code public void Init(int rt, bool gpu, int platformIdx) { // pass command line parameters runningTime = rt; useGPU = gpu; gpuPlatform = platformIdx; // initialize accumulator accumulator = new Vector3[screen.width * screen.height]; ClearAccumulator(); // setup scene scene = new Scene(); // setup camera camera = new Camera(screen.width, screen.height); // Generate randoms Console.Write("Generating randoms....\t"); randoms = new float[1000]; Random r = RTTools.GetRNG(); for (int i = 0; i < 1000; i++) { randoms[i] = (float)r.NextDouble(); } int variable = r.Next(); Console.WriteLine("Done!"); // initialize required opencl things if gpu is used if (useGPU) { StreamReader streamReader = new StreamReader("../../kernel.cl"); string clSource = streamReader.ReadToEnd(); streamReader.Close(); platform = ComputePlatform.Platforms[0]; context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); program = new ComputeProgram(context, clSource); try { program.Build(null, null, null, IntPtr.Zero); kernel = program.CreateKernel("Main"); sceneBuffer = new ComputeBuffer <Vector4>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, scene.toCL()); rndBuffer = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, randoms); cameraBuffer = new ComputeBuffer <Vector3>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, camera.toCL()); outputBuffer = new ComputeBuffer <int>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.UseHostPointer, screen.pixels); skydome = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, scene.Skydome); kernel.SetMemoryArgument(0, outputBuffer); kernel.SetValueArgument(1, screen.width); kernel.SetValueArgument(2, screen.height); kernel.SetMemoryArgument(3, sceneBuffer); kernel.SetValueArgument(4, scene.toCL().Length); kernel.SetMemoryArgument(5, skydome); kernel.SetMemoryArgument(6, cameraBuffer); kernel.SetMemoryArgument(7, rndBuffer); } catch (ComputeException e) { Console.WriteLine("Error in kernel code: {0}", program.GetBuildLog(context.Devices[0])); Console.ReadLine(); useGPU = false; } } else { return; } }
public virtual void PrepareModel(Model model, IDictionary <string, TensorShape> inputShapes) { var modelHash = CalcModelWithInputsHashCode(model, inputShapes); if (modelHash == m_CachedModelHash) { return; } m_CachedModelHash = modelHash; foreach (var l in m_CompiledLayers) { foreach (var i in l.Value.instructions) { if (i.tensors.Length == 0) { continue; } foreach (var t in i.tensors) { t.Dispose(); } } } m_CompiledLayers.Clear(); IDictionary <string, TensorShape?> shapesByName; ModelAnalyzer.ListTemporaryTensorShapes(model, inputShapes, out shapesByName); foreach (var l in model.layers) { if (m_CompiledLayers.ContainsKey(l)) { continue; // already compiled } if (l.inputs.Length == 0) { continue; // don't need to compile layers without inputs, so far all of them are CPU only } if (shapesByName[l.inputs[0]] == null || shapesByName[l.name] == null) { continue; } var X = shapesByName[l.inputs[0]].Value; var O = shapesByName[l.name].Value; ComputeKernel kernel = new ComputeKernel(); if (l.type == Layer.Type.Dense) { var instructions = new List <CompiledInstruction>(); var itemSize = 4; // @TODO: itemSizeInBytes == 2 | float16 kernel = BestKernel(ComputeKernelLibrary.Dense(X, l.datasets[0].shape, O, itemSize >> 2)); instructions.Add(new CompiledInstruction { kernel = kernel, shape = O }); if (ShouldFlattenInputForDenseLayer(X)) { var flattenedShape = X.Flatten(); var flattenKernel = BestKernel(ComputeKernelLibrary.ReshapeFromNHWCModel(flattenedShape)); instructions.Add(new CompiledInstruction { kernel = flattenKernel, shape = flattenedShape }); } // FusedActivation var fusedActivation = (Layer.FusedActivation)l.activation; if (!IsFusedActivationSupported(fusedActivation)) { var activationKernel = BestKernel(ComputeKernelLibrary.Activation(X, O, fusedActivation.ToString())); instructions.Add(new CompiledInstruction { kernel = activationKernel, shape = O }); } m_CompiledLayers.Add(l, new CompiledLayer { instructions = instructions.ToArray(), shape = O }); continue; } else if ( l.type == Layer.Type.Conv2D) { Assert.IsNotNull(l.stride); Assert.IsNotNull(l.pad); var instructions = new List <CompiledInstruction>(); // Conv2D kernel = BestKernel(ComputeKernelLibrary.Conv2D(X, l.datasets[0].shape, O, l.stride, l.pad)); if (kernel.func.kernelName.StartsWith("Conv2DWinograd_2x2_3x3")) { instructions.Add(new CompiledInstruction { kernel = kernel, shape = O, tensors = PrepareConv2dWinograd(model, l) }); } else { instructions.Add(new CompiledInstruction { kernel = kernel, shape = O }); } // FusedActivation var fusedActivation = (Layer.FusedActivation)l.activation; if (!IsFusedActivationSupported(fusedActivation)) { var activationKernel = BestKernel(ComputeKernelLibrary.Activation(X, O, fusedActivation.ToString())); instructions.Add(new CompiledInstruction { kernel = activationKernel, shape = O }); } m_CompiledLayers.Add(l, new CompiledLayer { instructions = instructions.ToArray(), shape = O }); continue; } else if ( l.type == Layer.Type.DepthwiseConv2D) { kernel = BestKernel( ComputeKernelLibrary.DepthwiseConv2D(X, l.datasets[0].shape, O)); } else if ( l.type == Layer.Type.Conv2DTrans) { var outputAdjustment = l.pool; var stride = l.stride; var K = l.datasets[0].shape; var B = l.datasets[1].shape; var pad = new int[] { K.kernelWidth - l.pad[0] - 1, K.kernelHeight - l.pad[1] - 1, K.kernelWidth - l.pad[2] - 1, K.kernelHeight - l.pad[3] - 1 }; var XpaddedShape = new TensorShape(X.batch, stride[0] * (X.height - 1) + 1 + outputAdjustment[0], stride[0] * (X.width - 1) + 1 + outputAdjustment[1], X.channels); var kernelFill = CompileKernel(new ComputeKernelLibrary.Entry("Conv2DTransPadFill", (X.channels, X.width, X.height), 1.0f, 0)); var kernelConv = BestKernel( ComputeKernelLibrary.Conv2D(XpaddedShape, K, O, new int[] { 1, 1 }, pad)); bool isConvWinograd = (kernelConv.func.kernelName.StartsWith("Conv2DWinograd_2x2_3x3")); m_CompiledLayers.Add(l, new CompiledLayer { instructions = new CompiledInstruction[] { new CompiledInstruction { kernel = kernelFill, shape = XpaddedShape }, new CompiledInstruction { shape = K, tensors = PrepareConv2DTrans(model, l) }, new CompiledInstruction { kernel = kernelConv, shape = O, tensors = isConvWinograd ? PrepareConv2dWinograd(model, l) : null } }, shape = O }); continue; } else if ( l.type == Layer.Type.Upsample2D) { // axis is treated as upsample point/bilinear flag var bilinear = l.axis > 0; kernel = BestKernel( ComputeKernelLibrary.Upsample2D(X, O, l.pool, bilinear)); } else if ( l.type == Layer.Type.MaxPool2D || l.type == Layer.Type.AvgPool2D) { var kernelName = l.type.ToString(); Assert.IsNotNull(l.pool); Assert.IsNotNull(l.stride); Assert.IsNotNull(l.pad); kernel = BestKernel( ComputeKernelLibrary.Pool2D(X, O, kernelName)); } else if ( l.type == Layer.Type.GlobalMaxPool2D || l.type == Layer.Type.GlobalAvgPool2D) { var poolKernelName = l.type.ToString().Substring(6) + "Reduce"; var globalKernelName = l.type.ToString(); var instructions = new List <CompiledInstruction>(); var Xr = X; while (Xr.height * Xr.width >= 64) { var lastLength = Xr.length; var pool = new[] { 8, 8 }; var stride = pool; var pad = new[] { 0, 0, 0, 0 }; var Oshape = Xr.ApplyPool(pool, stride, pad, ceilMode: true); var Or = new TensorShape(Oshape.batch, IDivC(Oshape.height, 2), IDivC(Oshape.width, 2), Oshape.channels); var poolKernel = BestKernel( ComputeKernelLibrary.Pool2DReduce(Xr, Or, poolKernelName)); instructions.Add(new CompiledInstruction { kernel = poolKernel, shape = Or }); Xr = Or; Assert.IsTrue(Xr.length < lastLength); } var globalKernel = BestKernel( ComputeKernelLibrary.GlobalPool2D(Xr, O, globalKernelName)); instructions.Add(new CompiledInstruction { kernel = globalKernel, shape = O }); m_CompiledLayers.Add(l, new CompiledLayer { instructions = instructions.ToArray(), shape = O }); continue; } else if ( l.type == Layer.Type.ScaleBias) { kernel = BestKernel( ComputeKernelLibrary.ScaleBias(X, O)); } else if ( l.type == Layer.Type.Normalization) { // GlobalAvgVariancePool2D var poolKernelName = "AvgVariancePool2DReduce"; var globalKernelName = "GlobalAvgVariancePool2D"; var instructions = new List <CompiledInstruction>(); var Xr = X; while (Xr.height * Xr.width >= 64) { var lastLength = Xr.length; var pool = new[] { 8, 8 }; var stride = pool; var pad = new[] { 0, 0, 0, 0 }; var Oshape = Xr.ApplyPool(pool, stride, pad, ceilMode: true); var Or = new TensorShape(Oshape.batch, IDivC(Oshape.height, 2), IDivC(Oshape.width, 2), Oshape.channels); var poolKernel = BestKernel( ComputeKernelLibrary.PoolAvgVar2D(Xr, Or, poolKernelName)); instructions.Add(new CompiledInstruction { kernel = poolKernel, shape = Or }); Xr = Or; Assert.IsTrue(Xr.length < lastLength); } var meanVariance = new TensorShape(Xr.batch, 2, 1, Xr.channels); var globalKernel = BestKernel( ComputeKernelLibrary.GlobalPool2D(Xr, meanVariance, globalKernelName)); instructions.Add(new CompiledInstruction { kernel = globalKernel, shape = meanVariance }); // ScaleBias var S = l.datasets[0].shape; var B = l.datasets[1].shape; Assert.AreEqual(X.channels, B.channels); Assert.AreEqual(X.channels, S.channels); Assert.AreEqual(B.length, B.channels); Assert.AreEqual(S.length, S.channels); var normlizationKernel = BestKernel(ComputeKernelLibrary.NormalizationTail(X, O)); instructions.Add(new CompiledInstruction { kernel = normlizationKernel, shape = O }); // FusedActivation var fusedActivation = (Layer.FusedActivation)l.activation; if (!IsFusedActivationSupported(fusedActivation)) { var activationKernel = BestKernel(ComputeKernelLibrary.Activation(X, O, fusedActivation.ToString())); instructions.Add(new CompiledInstruction { kernel = activationKernel, shape = O }); } else { instructions.Add(new CompiledInstruction { shape = O }); } m_CompiledLayers.Add(l, new CompiledLayer { instructions = instructions.ToArray(), shape = O }); continue; } else if ( l.type == Layer.Type.Add || l.type == Layer.Type.Sub || l.type == Layer.Type.Mul || l.type == Layer.Type.Div || l.type == Layer.Type.Pow || l.type == Layer.Type.Min || l.type == Layer.Type.Max || l.type == Layer.Type.Mean ) { var kernelName = "Broadcast" + l.type; kernel = BestKernel( ComputeKernelLibrary.Broadcast(X, O, kernelName)); } else if ( l.type == Layer.Type.Concat) { var instructions = new List <CompiledInstruction>(); foreach (var input in l.inputs) { var I = shapesByName[input]; if (I == null) { instructions.Add(new CompiledInstruction { }); continue; } var kernelI = BestKernel(ComputeKernelLibrary.Copy(I.Value, O)); instructions.Add(new CompiledInstruction { kernel = kernelI, shape = I.Value }); } m_CompiledLayers.Add(l, new CompiledLayer { instructions = instructions.ToArray(), shape = O }); continue; } // Activations else if (l.type == Layer.Type.Activation) { if (l.activation == Layer.Activation.Softmax) { kernel = BestKernel( ComputeKernelLibrary.Softmax(X, O)); } else if (l.activation == Layer.Activation.LogSoftmax) { kernel = BestKernel( ComputeKernelLibrary.LogSoftmax(X, O)); } else if (l.activation == Layer.Activation.PRelu) { kernel = BestKernel( ComputeKernelLibrary.PRelu(X, O)); } else if (l.activation != Layer.Activation.None) { var kernelName = l.activation.ToString(); kernel = BestKernel( ComputeKernelLibrary.Activation(X, O, kernelName)); } } m_CompiledLayers.Add(l, new CompiledLayer { instructions = new CompiledInstruction[] { new CompiledInstruction { kernel = kernel, shape = O } }, shape = O }); } }
override unsafe protected void MinerThread() { Random r = new Random(); UInt32[] output = new UInt32[256]; ComputeDevice computeDevice = Device.GetComputeDevice(); MarkAsAlive(); MainForm.Logger("Miner thread for Device #" + DeviceIndex + " started."); ComputeProgram program; try { mProgramArrayMutex.WaitOne(); } catch (Exception) { } if (mProgramArray.ContainsKey(new long[] { DeviceIndex, mLocalWorkSize })) { program = mProgramArray[new long[] { DeviceIndex, mLocalWorkSize }]; } else { String source = System.IO.File.ReadAllText(@"Kernels\ethash.cl"); program = new ComputeProgram(Context, source); MainForm.Logger("Loaded ethash program for Device #" + DeviceIndex + "."); String buildOptions = (Device.Vendor == "AMD" ? "-O1 " : Device.Vendor == "NVIDIA" ? "" : // "-cl-nv-opt-level=1 -cl-nv-maxrregcount=256 " : "") + " -IKernels -DWORKSIZE=" + mLocalWorkSize; try { program.Build(Device.DeviceList, buildOptions, null, IntPtr.Zero); } catch (Exception) { MainForm.Logger(program.GetBuildLog(computeDevice)); throw; } MainForm.Logger("Built cryptonight program for Device #" + DeviceIndex + "."); MainForm.Logger("Built options: " + buildOptions); mProgramArray[new long[] { DeviceIndex, mLocalWorkSize }] = program; } try { mProgramArrayMutex.ReleaseMutex(); } catch (Exception) { } while (!Stopped) { MarkAsAlive(); try { // Wait for the first job to arrive. int elapsedTime = 0; while ((mStratum == null || mStratum.GetJob() == null) && elapsedTime < 5000) { Thread.Sleep(10); elapsedTime += 10; } if (mStratum == null || mStratum.GetJob() == null) { MainForm.Logger("Stratum server failed to send a new job."); //throw new TimeoutException("Stratum server failed to send a new job."); return; } int epoch = -1; long DAGSize = 0; ComputeBuffer <byte> DAGBuffer = null; using (ComputeKernel DAGKernel = program.CreateKernel("GenerateDAG")) using (ComputeKernel searchKernel = program.CreateKernel("search")) using (ComputeBuffer <UInt32> outputBuffer = new ComputeBuffer <UInt32>(Context, ComputeMemoryFlags.ReadWrite, 256)) using (ComputeBuffer <byte> headerBuffer = new ComputeBuffer <byte>(Context, ComputeMemoryFlags.ReadOnly, 32)) { MarkAsAlive(); System.Diagnostics.Stopwatch consoleUpdateStopwatch = new System.Diagnostics.Stopwatch(); EthashStratum.Work work; while (!Stopped && (work = mStratum.GetWork()) != null) { String poolExtranonce = mStratum.PoolExtranonce; byte[] extranonceByteArray = Utilities.StringToByteArray(poolExtranonce); byte localExtranonce = work.LocalExtranonce; UInt64 startNonce = (UInt64)localExtranonce << (8 * (7 - extranonceByteArray.Length)); for (int i = 0; i < extranonceByteArray.Length; ++i) { startNonce |= (UInt64)extranonceByteArray[i] << (8 * (7 - i)); } startNonce += (ulong)r.Next(0, int.MaxValue) & (0xfffffffffffffffful >> (extranonceByteArray.Length * 8 + 8)); String jobID = work.GetJob().ID; String headerhash = work.GetJob().Headerhash; String seedhash = work.GetJob().Seedhash; double difficulty = mStratum.Difficulty; fixed(byte *p = Utilities.StringToByteArray(headerhash)) Queue.Write <byte>(headerBuffer, true, 0, 32, (IntPtr)p, null); if (epoch != work.GetJob().Epoch) { if (DAGBuffer != null) { DAGBuffer.Dispose(); DAGBuffer = null; } epoch = work.GetJob().Epoch; DAGCache cache = new DAGCache(epoch, work.GetJob().Seedhash); DAGSize = Utilities.GetDAGSize(epoch); System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); sw.Start(); fixed(byte *p = cache.GetData()) { long globalWorkSize = DAGSize / 64; globalWorkSize /= 8; if (globalWorkSize % mLocalWorkSize > 0) { globalWorkSize += mLocalWorkSize - globalWorkSize % mLocalWorkSize; } ComputeBuffer <byte> DAGCacheBuffer = new ComputeBuffer <byte>(Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, cache.GetData().Length, (IntPtr)p); DAGBuffer = new ComputeBuffer <byte>(Context, ComputeMemoryFlags.ReadWrite, globalWorkSize * 8 * 64 /* DAGSize */); // With this, we can remove a conditional statement in the DAG kernel. DAGKernel.SetValueArgument <UInt32>(0, 0); DAGKernel.SetMemoryArgument(1, DAGCacheBuffer); DAGKernel.SetMemoryArgument(2, DAGBuffer); DAGKernel.SetValueArgument <UInt32>(3, (UInt32)cache.GetData().Length / 64); DAGKernel.SetValueArgument <UInt32>(4, 0xffffffffu); for (long start = 0; start < DAGSize / 64; start += globalWorkSize) { Queue.Execute(DAGKernel, new long[] { start }, new long[] { globalWorkSize }, new long[] { mLocalWorkSize }, null); Queue.Finish(); if (Stopped || !mStratum.GetJob().ID.Equals(jobID)) { break; } } DAGCacheBuffer.Dispose(); if (Stopped || !mStratum.GetJob().ID.Equals(jobID)) { break; } } sw.Stop(); MainForm.Logger("Generated DAG for Epoch #" + epoch + " (" + (long)sw.Elapsed.TotalMilliseconds + "ms)."); } consoleUpdateStopwatch.Start(); while (!Stopped && mStratum.GetJob().ID.Equals(jobID) && mStratum.PoolExtranonce.Equals(poolExtranonce)) { MarkAsAlive(); // Get a new local extranonce if necessary. if ((startNonce & (0xfffffffffffffffful >> (extranonceByteArray.Length * 8 + 8)) + (ulong)mGlobalWorkSize) >= ((ulong)0x1 << (64 - (extranonceByteArray.Length * 8 + 8)))) { break; } UInt64 target = (UInt64)((double)0xffff0000U / difficulty); searchKernel.SetMemoryArgument(0, outputBuffer); // g_output searchKernel.SetMemoryArgument(1, headerBuffer); // g_header searchKernel.SetMemoryArgument(2, DAGBuffer); // _g_dag searchKernel.SetValueArgument <UInt32>(3, (UInt32)(DAGSize / 128)); // DAG_SIZE searchKernel.SetValueArgument <UInt64>(4, startNonce); // start_nonce searchKernel.SetValueArgument <UInt64>(5, target); // target searchKernel.SetValueArgument <UInt32>(6, 0xffffffffu); // isolate System.Diagnostics.Stopwatch sw = new System.Diagnostics.Stopwatch(); sw.Start(); fixed(UInt32 *p = output) { output[255] = 0; // output[255] is used as an atomic counter. Queue.Write <UInt32>(outputBuffer, true, 0, 256, (IntPtr)p, null); Queue.Execute(searchKernel, new long[] { 0 }, new long[] { mGlobalWorkSize }, new long[] { mLocalWorkSize }, null); Queue.Read <UInt32>(outputBuffer, true, 0, 256, (IntPtr)p, null); } sw.Stop(); mSpeed = ((double)mGlobalWorkSize) / sw.Elapsed.TotalSeconds; if (consoleUpdateStopwatch.ElapsedMilliseconds >= 10 * 1000) { MainForm.Logger("Device #" + DeviceIndex + ": " + String.Format("{0:N2} Mh/s", mSpeed / (1000000))); consoleUpdateStopwatch.Restart(); } if (mStratum.GetJob().ID.Equals(jobID)) { for (int i = 0; i < output[255]; ++i) { mStratum.Submit(GatelessGateDevice, work.GetJob(), startNonce + (UInt64)output[i]); } } startNonce += (UInt64)mGlobalWorkSize; } } } if (DAGBuffer != null) { DAGBuffer.Dispose(); DAGBuffer = null; } } catch (Exception ex) { MainForm.Logger("Exception in miner thread: " + ex.Message + ex.StackTrace); MainForm.Logger("Restarting miner thread..."); } } MarkAsDone(); }
/// <summary> /// Builds the kernel arguments. /// </summary> /// <typeparam name="TSource">The type of the source.</typeparam> /// <param name="inputs">The inputs.</param> /// <param name="kernel">The kernel.</param> /// <param name="length">The length.</param> /// <param name="returnInputVariable">The return result.</param> /// <returns></returns> private static Dictionary <int, ComputeBuffer <TSource> > BuildKernelArguments <TSource>(object[] inputs, ComputeKernel kernel, long length, int?returnInputVariable = null) where TSource : struct { int i = 0; Dictionary <int, ComputeBuffer <TSource> > result = new Dictionary <int, ComputeBuffer <TSource> >(); foreach (var item in inputs) { if (item.GetType() == typeof(TSource[])) { var buffer = new ComputeBuffer <TSource>(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, ((TSource[])item)); kernel.SetMemoryArgument(i, buffer); result.Add(i, buffer); } else if (item.GetType().IsPrimitive) { kernel.SetValueArgument(i, (TSource)item); } i++; } return(result); }
public static NdArray <Real> SingleInputForward(NdArray <Real> input, int kernelWidth, int kernelHeight, int strideX, int strideY, int padX, int padY, bool coverAll, List <int[]> outputIndicesList, Func <NdArray <Real>, int[], int, int, List <int[]>, IFunction <Real>, NdArray <Real> > getForwardResult, ComputeKernel forwardKernel, IFunction <Real> maxPooling2d) { int outputHeight = coverAll ? (int)Math.Floor((input.Shape[1] - kernelHeight + padY * 2.0f + strideY - 1.0f) / strideY) + 1 : (int)Math.Floor((input.Shape[1] - kernelHeight + padY * 2.0f) / strideY) + 1; int outputWidth = coverAll ? (int)Math.Floor((input.Shape[2] - kernelWidth + padX * 2.0f + strideX - 1.0f) / strideX) + 1 : (int)Math.Floor((input.Shape[2] - kernelWidth + padX * 2.0f) / strideX) + 1; int[] outputIndices = new int[input.Shape[0] * outputHeight * outputWidth * input.BatchCount]; using (ComputeBuffer <Real> gpuX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, input.Data)) using (ComputeBuffer <int> gpuYIndex = new ComputeBuffer <int>(OpenCL.Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, outputIndices.Length)) { forwardKernel.SetMemoryArgument(0, gpuX); forwardKernel.SetMemoryArgument(1, gpuYIndex); forwardKernel.SetValueArgument(2, outputHeight); forwardKernel.SetValueArgument(3, outputWidth); forwardKernel.SetValueArgument(4, input.Shape[0]); forwardKernel.SetValueArgument(5, input.Shape[1]); forwardKernel.SetValueArgument(6, input.Shape[2]); forwardKernel.SetValueArgument(7, kernelHeight); forwardKernel.SetValueArgument(8, kernelWidth); forwardKernel.SetValueArgument(9, strideX); forwardKernel.SetValueArgument(10, strideY); forwardKernel.SetValueArgument(11, padY); forwardKernel.SetValueArgument(12, padX); OpenCL.CommandQueue.Execute ( forwardKernel, null, new long[] { input.BatchCount *input.Shape[0], outputHeight, outputWidth }, null, null ); OpenCL.CommandQueue.Finish(); OpenCL.CommandQueue.ReadFromBuffer(gpuYIndex, ref outputIndices, true, null); } return(getForwardResult(input, outputIndices, outputWidth, outputHeight, outputIndicesList, maxPooling2d)); }
public TerrainGen() { #if CPU_DEBUG var platform = ComputePlatform.Platforms[1]; #else var platform = ComputePlatform.Platforms[0]; #endif _devices = new List<ComputeDevice>(); _devices.Add(platform.Devices[0]); _properties = new ComputeContextPropertyList(platform); _context = new ComputeContext(_devices, _properties, null, IntPtr.Zero); _cmdQueue = new ComputeCommandQueue(_context, _devices[0], ComputeCommandQueueFlags.None); #region setup generator kernel bool loadFromSource = Gbl.HasRawHashChanged[Gbl.RawDir.Scripts]; loadFromSource = true; _chunkWidthInBlocks = Gbl.LoadContent<int>("TGen_ChunkWidthInBlocks"); _chunkWidthInVerts = _chunkWidthInBlocks + 1; _blockWidth = Gbl.LoadContent<int>("TGen_BlockWidthInMeters"); float lacunarity = Gbl.LoadContent<float>("TGen_Lacunarity"); float gain = Gbl.LoadContent<float>("TGen_Gain"); int octaves = Gbl.LoadContent<int>("TGen_Octaves"); float offset = Gbl.LoadContent<float>("TGen_Offset"); float hScale = Gbl.LoadContent<float>("TGen_HScale"); float vScale = Gbl.LoadContent<float>("TGen_VScale"); _genConstants = new ComputeBuffer<float>(_context, ComputeMemoryFlags.ReadOnly, 8); var genArr = new[]{ lacunarity, gain, offset, octaves, hScale, vScale, _blockWidth, _chunkWidthInBlocks }; _cmdQueue.WriteToBuffer(genArr, _genConstants, false, null); if (loadFromSource){ _generationPrgm = new ComputeProgram(_context, Gbl.LoadScript("TGen_Generator")); #if CPU_DEBUG _generationPrgm.Build(null, @"-g -s D:\Projects\Gondola\Scripts\GenTerrain.cl", null, IntPtr.Zero); //use option -I + scriptDir for header search #else _generationPrgm.Build(null, "", null, IntPtr.Zero);//use option -I + scriptDir for header search #endif Gbl.SaveBinary(_generationPrgm.Binaries, "TGen_Generator"); } else{ var binary = Gbl.LoadBinary("TGen_Generator"); _generationPrgm = new ComputeProgram(_context, binary, _devices); _generationPrgm.Build(null, "", null, IntPtr.Zero); } //loadFromSource = false; _terrainGenKernel = _generationPrgm.CreateKernel("GenTerrain"); _normalGenKernel = _generationPrgm.CreateKernel("GenNormals"); //despite the script using float3 for these fields, we need to consider it to be float4 because the //implementation is basically a float4 wrapper that uses zero for the last variable _geometry = new ComputeBuffer<float>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts*4); _normals = new ComputeBuffer<ushort>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts * _chunkWidthInVerts * 4); _binormals = new ComputeBuffer<byte>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts*4); _tangents = new ComputeBuffer<byte>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts*4); _uvCoords = new ComputeBuffer<float>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts*2); _terrainGenKernel.SetMemoryArgument(0, _genConstants); _terrainGenKernel.SetMemoryArgument(3, _geometry); _terrainGenKernel.SetMemoryArgument(4, _uvCoords); _normalGenKernel.SetMemoryArgument(0, _genConstants); _normalGenKernel.SetMemoryArgument(3, _geometry); _normalGenKernel.SetMemoryArgument(4, _normals); _normalGenKernel.SetMemoryArgument(5, _binormals); _normalGenKernel.SetMemoryArgument(6, _tangents); #endregion #region setup quadtree kernel if (loadFromSource){ _qTreePrgm = new ComputeProgram(_context, Gbl.LoadScript("TGen_QTree")); #if CPU_DEBUG _qTreePrgm.Build(null, @"-g -s D:\Projects\Gondola\Scripts\Quadtree.cl", null, IntPtr.Zero); #else _qTreePrgm.Build(null, "", null, IntPtr.Zero); #endif Gbl.SaveBinary(_qTreePrgm.Binaries, "TGen_QTree"); } else{ var binary = Gbl.LoadBinary("TGen_QTree"); _qTreePrgm = new ComputeProgram(_context, binary, _devices); _qTreePrgm.Build(null, "", null, IntPtr.Zero); } _qTreeKernel = _qTreePrgm.CreateKernel("QuadTree"); _crossCullKernel = _qTreePrgm.CreateKernel("CrossCull"); _activeVerts = new ComputeBuffer<byte>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts); _dummy = new ComputeBuffer<int>(_context, ComputeMemoryFlags.None, 50); var rawNormals = new ushort[_chunkWidthInVerts * _chunkWidthInVerts * 4]; _emptyVerts = new byte[_chunkWidthInVerts*_chunkWidthInVerts]; for (int i = 0; i < _emptyVerts.Length; i++){ _emptyVerts[i] = 1; } _cmdQueue.WriteToBuffer(rawNormals, _normals, true, null); _cmdQueue.WriteToBuffer(_emptyVerts, _activeVerts, true, null); _qTreeKernel.SetValueArgument(1, _chunkWidthInBlocks); _qTreeKernel.SetMemoryArgument(2, _normals); _qTreeKernel.SetMemoryArgument(3, _activeVerts); _qTreeKernel.SetMemoryArgument(4, _dummy); _crossCullKernel.SetValueArgument(1, _chunkWidthInBlocks); _crossCullKernel.SetMemoryArgument(2, _normals); _crossCullKernel.SetMemoryArgument(3, _activeVerts); _crossCullKernel.SetMemoryArgument(4, _dummy); #endregion #region setup winding kernel if (loadFromSource){ _winderPrgm = new ComputeProgram(_context, Gbl.LoadScript("TGen_VertexWinder")); #if CPU_DEBUG _winderPrgm.Build(null, @"-g -s D:\Projects\Gondola\Scripts\VertexWinder.cl", null, IntPtr.Zero); #else _winderPrgm.Build(null, "", null, IntPtr.Zero); #endif Gbl.SaveBinary(_winderPrgm.Binaries, "TGen_VertexWinder"); } else{ var binary = Gbl.LoadBinary("TGen_VertexWinder"); _winderPrgm = new ComputeProgram(_context, binary, _devices); _winderPrgm.Build(null, "", null, IntPtr.Zero); } _winderKernel = _winderPrgm.CreateKernel("VertexWinder"); _indicies = new ComputeBuffer<int>(_context, ComputeMemoryFlags.None, (_chunkWidthInBlocks)*(_chunkWidthInBlocks)*8); _winderKernel.SetMemoryArgument(0, _activeVerts); _winderKernel.SetMemoryArgument(1, _indicies); _emptyIndices = new int[(_chunkWidthInBlocks)*(_chunkWidthInBlocks)*8]; for (int i = 0; i < (_chunkWidthInBlocks)*(_chunkWidthInBlocks)*8; i++){ _emptyIndices[i] = 0; } _cmdQueue.WriteToBuffer(_emptyIndices, _indicies, true, null); #endregion if (loadFromSource){ Gbl.AllowMD5Refresh[Gbl.RawDir.Scripts] = true; } _cmdQueue.Finish(); }
protected virtual void buildOpenCLProgram() { if (CLSourcePaths == null) { System.Diagnostics.Trace.Write("No CL source defined.\n"); return; } String[] sourceArray = new String[CLSourcePaths.Length]; try { for (int i = 0; i < CLSourcePaths.Length; i++) { StreamReader sourceReader = new StreamReader(CLSourcePaths[i]); sourceArray[i] = sourceReader.ReadToEnd(); } } catch (FileNotFoundException e) { System.Diagnostics.Trace.Write("Can't find: " + e.FileName + "\n"); Environment.Exit(-1); } // Build and compile the OpenCL program _renderKernel = null; _renderProgram = new ComputeProgram(_commandQueue.Context, sourceArray); try { // build the program _renderProgram.Build(null, "-cl-nv-verbose", null, IntPtr.Zero); // create a reference a kernel function _renderKernel = _renderProgram.CreateKernel("render"); } catch (BuildProgramFailureComputeException) { printBuildLog(); Environment.Exit(-1); } catch (InvalidBuildOptionsComputeException) { printBuildLog(); Environment.Exit(-1); } catch (InvalidBinaryComputeException) { printBuildLog(); Environment.Exit(-1); } }
/// <summary> /// Builds the kernel arguments. /// </summary> /// <typeparam name="TSource">The type of the source.</typeparam> /// <param name="method">The method.</param> /// <param name="inputs">The inputs.</param> /// <param name="kernel">The kernel.</param> /// <param name="length">The length.</param> /// <param name="returnInputVariable">The return result.</param> /// <returns></returns> private Dictionary <int, GenericArrayMemory> BuildKernelArguments(KernelFunction method, object[] inputs, ComputeKernel kernel, long length, int?returnInputVariable = null) { int i = 0; Dictionary <int, GenericArrayMemory> result = new Dictionary <int, GenericArrayMemory>(); foreach (var item in inputs) { int size = 0; if (item.GetType().IsArray) { var mode = method.Parameters.ElementAt(i).Value.IOMode; var flag = ComputeMemoryFlags.ReadWrite; if (mode == IOMode.Out) { flag |= ComputeMemoryFlags.AllocateHostPointer; } else { flag |= ComputeMemoryFlags.CopyHostPointer; } GenericArrayMemory mem = new GenericArrayMemory(_context, flag, (Array)item); kernel.SetMemoryArgument(i, mem); result.Add(i, mem); } else if (item.GetType().Name == "XArray" || item.GetType().BaseType.Name == "XArray") { var mode = method.Parameters.ElementAt(i).Value.IOMode; var flag = ComputeMemoryFlags.ReadWrite; if (mode == IOMode.Out) { flag |= ComputeMemoryFlags.AllocateHostPointer; } else { flag |= ComputeMemoryFlags.CopyHostPointer; } GenericArrayMemory mem = new GenericArrayMemory(_context, flag, (XArray)item); kernel.SetMemoryArgument(i, mem); result.Add(i, mem); } else { size = Marshal.SizeOf(item); var datagch = GCHandle.Alloc(item, GCHandleType.Pinned); kernel.SetArgument(i, new IntPtr(size), datagch.AddrOfPinnedObject()); } i++; } return(result); }
public OpenCLCalculator(ComputeContext context, ComputeProgram prg, ComputeKernel krnl) { _context = context; _prg = prg; _krnl = krnl; }