public async Task InvokeAsync(string Method, long worksize, params object[] Args) { ComputeKernel kernel = CreateKernel(Method, Args); ComputeEventList eventList = new ComputeEventList(); InvokeStarted?.Invoke(this, EventArgs.Empty); string jid = Guid.NewGuid().ToString(); AsyncManualResetEvent evt = new AsyncManualResetEvent(false); lock (CompletionLocks) { CompletionLocks.Add(jid, evt); } queue.Execute(kernel, null, new long[] { worksize }, null, eventList); eventList[0].Completed += (sender, e) => EasyCL_Completed(sender, jid); eventList[0].Aborted += (sender, e) => EasyCL_Aborted(sender, Method); await evt.WaitAsync(); lock (CompletionLocks) { CompletionLocks.Remove(jid); } }
public unsafe static void MatrixMulti_OpenCL(double[,] result, double[,] a, double[,] b) { InitCloo(); var ncols = result.GetUpperBound(0) + 1; var nrows = result.GetUpperBound(1) + 1; fixed(double *rp = result, ap = a, bp = b) { ComputeBuffer <double> aBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, a.Length, (IntPtr)ap); ComputeBuffer <double> bBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, b.Length, (IntPtr)bp); ComputeBuffer <double> rBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.WriteOnly, result.Length); kernel.SetMemoryArgument(0, aBuffer); kernel.SetMemoryArgument(1, bBuffer); kernel.SetValueArgument(2, ncols); kernel.SetMemoryArgument(3, rBuffer); ComputeEventList events = new ComputeEventList(); ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); commands.Execute(kernel, null, new long[] { result.Length }, null, events); commands.ReadFromBuffer(rBuffer, ref result, false, new SysIntX2(), new SysIntX2(), new SysIntX2(ncols, nrows), events); commands.Finish(); } }
/// <summary> /// Initializes a new instance of the <see cref="Julia"/> class. /// </summary> public Julia() : base() { this.Mode = ConcurrencyMode.SequentialCPU; this.options = new ParallelOptions(); this.options.MaxDegreeOfParallelism = Environment.ProcessorCount; // Initialize OpenCL. platform = ComputePlatform.Platforms[0]; properties = new ComputeContextPropertyList(platform); context = new ComputeContext(platform.Devices, properties, null, IntPtr.Zero); // Create the OpenCL kernel. program = new ComputeProgram(context, new string[] { kernelSource }); program.Build(null, "-cl-mad-enable", null, IntPtr.Zero); kernel = program.CreateKernel("julia"); // Create objects needed for kernel launch/execution. commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); events = new ComputeEventList(); }
public Kernel(string name, string compile_options, string [] kernel_names) { _path = _paths + name + _exts; StreamReader streamReader = new StreamReader(_path); string source = streamReader.ReadToEnd(); streamReader.Close(); // Create and build the opencl program. _program = new ComputeProgram(Example.context, source); _program.Build(Example.context.Devices, compile_options, null, IntPtr.Zero); // Create the kernel function and set its arguments. ComputeKernel kernel; for (int i = 0; i < kernel_names.Length; i++) { kernel = _program.CreateKernel(kernel_names[i]); _kernels.Add(kernel); } _eventList = new ComputeEventList(); // Create the command queue. This is used to control kernel execution and manage read/write/copy operations. _commands = new ComputeCommandQueue(Example.context, Example.context.Devices[0], ComputeCommandQueueFlags.None); Console.WriteLine(_path); }
public void Invoke(string Method, long Offset, long Worksize, params object[] Args) { ComputeKernel kernel = CreateKernel(Method, Args); ComputeEventList eventList = new ComputeEventList(); InvokeStarted?.Invoke(this, EventArgs.Empty); queue.Execute(kernel, new long[] { Offset }, new long[] { Worksize }, null, eventList); eventList[0].Completed += (sender, e) => EasyCL_Completed(sender, null); eventList[0].Aborted += (sender, e) => EasyCL_Aborted(sender, Method); queue.Finish(); }
private static void ConductSearch(ComputeContext context, ComputeKernel kernel) { var todos = GetQueenTaskPartition(NumQueens, 4); var done = new List <QueenTask>(); ComputeEventList eventList = new ComputeEventList(); var commands = new ComputeCommandQueue(context, context.Devices[1], ComputeCommandQueueFlags.None); Console.WriteLine("Starting {0} tasks, and working {1} at a time.", todos.Count, Spread); QueenTask[] inProgress = GetNextAssignment(new QueenTask[] {}, todos, done); var sw = new Stopwatch(); sw.Start(); while (inProgress.Any()) { var taskBuffer = new ComputeBuffer <QueenTask>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, inProgress); kernel.SetMemoryArgument(0, taskBuffer); commands.WriteToBuffer(inProgress, taskBuffer, false, null); for (int i = 0; i < 12; i++) { commands.Execute(kernel, null, new long[] { inProgress.Length }, null, eventList); } commands.ReadFromBuffer(taskBuffer, ref inProgress, false, eventList); commands.Finish(); inProgress = GetNextAssignment(inProgress, todos, done); } sw.Stop(); Console.WriteLine(sw.ElapsedMilliseconds / 1000.0); ulong sum = done.Select(state => state.solutions) .Aggregate((total, next) => total + next); Console.WriteLine("Q({0})={1}", NumQueens, sum); }
/// <summary> /// Subsequent calls to Invoke work faster without arguments /// </summary> public void Invoke(string Method, long Offset, long Worksize) { if (LastKernel == null) { throw new InvalidOperationException("You need to call Invoke with arguments before. All Arguments are saved"); } ComputeEventList eventList = new ComputeEventList(); InvokeStarted?.Invoke(this, EventArgs.Empty); queue.Execute(LastKernel, new long[] { Offset }, new long[] { Worksize }, null, eventList); eventList[0].Completed += (sender, e) => EasyCL_Completed(sender, null); eventList[0].Aborted += (sender, e) => EasyCL_Aborted(sender, Method); queue.Finish(); }
private static void ConductSearch(ComputeContext context, ComputeKernel kernel) { var todos = GetQueenTaskPartition(NumQueens, 4); var done = new List<QueenTask>(); ComputeEventList eventList = new ComputeEventList(); var commands = new ComputeCommandQueue(context, context.Devices[1], ComputeCommandQueueFlags.None); Console.WriteLine("Starting {0} tasks, and working {1} at a time.", todos.Count, Spread); QueenTask[] inProgress = GetNextAssignment(new QueenTask[] {}, todos, done); var sw = new Stopwatch(); sw.Start(); while (inProgress.Any()) { var taskBuffer = new ComputeBuffer<QueenTask>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, inProgress); kernel.SetMemoryArgument(0, taskBuffer); commands.WriteToBuffer(inProgress, taskBuffer, false, null); for (int i = 0; i < 12; i++) commands.Execute(kernel, null, new long[] { inProgress.Length }, null, eventList); commands.ReadFromBuffer(taskBuffer, ref inProgress, false, eventList); commands.Finish(); inProgress = GetNextAssignment(inProgress, todos, done); } sw.Stop(); Console.WriteLine(sw.ElapsedMilliseconds / 1000.0); ulong sum = done.Select(state => state.solutions) .Aggregate((total, next) => total + next); Console.WriteLine("Q({0})={1}", NumQueens, sum); }
private void InitClooApi() { try { CvInvoke.UseOpenCL = true; // pick first platform ComputePlatform platform = ComputePlatform.Platforms[1]; // create context with all gpu devices clooCtx = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); // load opencl source StreamReader streamReader = new StreamReader(LASER_CL_PATH); string clSource = streamReader.ReadToEnd(); streamReader.Close(); // build program. ctxLaserCL = new ComputeProgram(clooCtx, clSource); // compile opencl source ctxLaserCL.Build(null, null, null, IntPtr.Zero); // load chosen kernel from program ctxMinMaxKernel = ctxLaserCL.CreateKernel("minMaxValues"); ctxMaskImageKernel = ctxLaserCL.CreateKernel("maskImage"); ctxCenterMassKernel = ctxLaserCL.CreateKernel("centerMass"); ctxTransform3DKernel = ctxLaserCL.CreateKernel("transformPixelsTo3D"); // create a command queue with first gpu found queue = new ComputeCommandQueue(clooCtx, clooCtx.Devices[0], ComputeCommandQueueFlags.None); // execute kernel events = new ComputeEventList(); } catch (Exception ex) { MessageBox.Show(ex.Message); MessageBox.Show(ctxLaserCL.GetBuildLog(clooCtx.Devices[0])); } }
public override void RunKernel(ComputeContext context, ComputeKernel kernel, ComputeCommandQueue commands, long[] dimensions) { var tradeprofitsBuffer = new ComputeBuffer <short>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, TradeProfits); var tradearbitrageBuffer = new ComputeBuffer <byte>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, TradeArbitrage); var fit_functionBuffer = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, FitFunction); kernel.SetMemoryArgument(0, allvarsBuffer); kernel.SetMemoryArgument(1, loboundsBuffer); kernel.SetMemoryArgument(2, upboundsBuffer); kernel.SetMemoryArgument(3, tradeprofitsBuffer); kernel.SetMemoryArgument(4, tradearbitrageBuffer); kernel.SetMemoryArgument(5, fit_functionBuffer); kernel.SetValueArgument <int>(6, VariablesCount); var eventList = new ComputeEventList(); commands.Execute(kernel, null, dimensions, null, eventList); commands.ReadFromBuffer(fit_functionBuffer, ref FitFunction, true, null); commands.Finish(); }
public static void RunKernels(int nofKernels) { try { // Create the event wait list. An event list is not really needed for this example but it is important to see how it works. // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution. // For this reason their use should be avoided if possible. ComputeEventList eventList = new ComputeEventList(); commandQueue1.WriteToBuffer(input, CB_input, false, eventList); commandQueue1.WriteToBuffer(weightIDs, CB_networkIndex, false, eventList); // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command. // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created. commandQueue1.Execute(kernel, null, new long[] { nofKernels }, null, eventList); // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host. // eventList will contain two events after this method returns. commandQueue1.ReadFromBuffer(CB_output, ref output, false, eventList); // , eventList // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands // to finish has to be issued before "arrC" can be used. // This explicit synchronization can be achieved in two ways: // 1) Wait for the events in the list to finish, eventList.Wait(); // 2) Or simply use commandQueue1.Finish(); } catch (Exception e) { Console.WriteLine(e.ToString()); } }
public BuddhaCloo() { clPlatform = ComputePlatform.Platforms[0]; clProperties = new ComputeContextPropertyList(clPlatform); clContext = new ComputeContext(clPlatform.Devices, clProperties, null, IntPtr.Zero); clCommands = new ComputeCommandQueue(clContext, clContext.Devices[0], ComputeCommandQueueFlags.None); clEvents = new ComputeEventList(); clProgram = new ComputeProgram(clContext, new string[] { kernelSource }); R = new Random(); seed1 = (uint)R.Next(); seed2 = (uint)R.Next(); seed3 = (uint)R.Next(); seed4 = (uint)R.Next(); /* //Default buddhabrot parameters * realMin = -1.5f; * realMax = 0.75f; * imaginaryMin = -1.5f; * imaginaryMax = 1.5f; */ /* realMin = -1.05f; realMax = -0.9f; imaginaryMin = -0.3f; imaginaryMax = -0.225f; minIter = 20000; maxIter = 200000; escapeOrbit = 4.0f; minColor.R = 20000; maxColor.R = 60000; minColor.G = 60000; maxColor.G = 100000; minColor.B = 100000; maxColor.B = 200000; */ realMin = -1.22f; realMax = -1.0f; imaginaryMin = 0.16f; imaginaryMax = 0.32f; //realMin = -1.5f; //realMax = 0.75f; //imaginaryMin = -1.5f; //imaginaryMax = 1.5f; minIter =20; maxIter = 1600; escapeOrbit = 4.0f; minColor.R = 20; maxColor.R = 400; minColor.G = 400; maxColor.G = 800; minColor.B = 800; maxColor.B = 1600; width = 1000; height = 700; h_outputBuffer = new ColorVectorRGBA[width * height]; gc_outputBuffer = GCHandle.Alloc(h_outputBuffer, GCHandleType.Pinned); }
// Use this for initialization void Awake() { var platform = ComputePlatform.Platforms[0]; _context = new ComputeContext(ComputeDeviceTypes.Cpu, new ComputeContextPropertyList(platform), null, System.IntPtr.Zero); _queue = new ComputeCommandQueue(_context, _context.Devices[0], ComputeCommandQueueFlags.None); string clSource = System.IO.File.ReadAllText(clProgramPath); _program = new ComputeProgram(_context, clSource); try { _program.Build(null, null, null, System.IntPtr.Zero); } catch(BuildProgramFailureComputeException) { Debug.Log(_program.GetBuildLog(_context.Devices[0])); throw; } _events = new ComputeEventList(); _updateGridKernel = _program.CreateKernel(clUpdateGridKernelName); _updateBoidsKernel = _program.CreateKernel(clUpdateBoidsKernelName); _boundaryKernel = _program.CreateKernel(clBoundaryKernelName); _pointCounters = new int[nGridPartitions * nGridPartitions * nGridPartitions]; _pointIndices = new int[_pointCounters.Length * maxIndices]; _pointCountersBuffer = new Cloo.ComputeBuffer<int>( _context, ComputeMemoryFlags.WriteOnly, _pointCounters.Length); _pointIndicesBuffer = new Cloo.ComputeBuffer<int>( _context, ComputeMemoryFlags.WriteOnly, _pointIndices.Length); _gridInfo = new GridInfo() { worldOrigin = gridbounds.min, worldSize = gridbounds.size, cellSize = gridbounds.size * (1f / nGridPartitions), nGridPartitions = nGridPartitions, maxIndices = maxIndices }; _boundaryKernel.SetValueArgument(1, _gridInfo); _updateGridKernel.SetMemoryArgument(1, _pointCountersBuffer); _updateGridKernel.SetMemoryArgument(2, _pointIndicesBuffer); _updateGridKernel.SetValueArgument(3, _gridInfo); _updateBoidsKernel.SetMemoryArgument(2, _pointCountersBuffer); _updateBoidsKernel.SetMemoryArgument(3, _pointIndicesBuffer); _updateBoidsKernel.SetValueArgument(4, _gridInfo); }
private unsafe void notify(CLProgramHandle programHandle, IntPtr userDataPtr) { uint[] dst = new uint[16]; fixed (uint* dstPtr = dst) { using (var queue = new ComputeCommandQueue(ccontext, device, ComputeCommandQueueFlags.None)) { var buf = new ComputeBuffer<uint>(ccontext, ComputeMemoryFlags.WriteOnly, 16); var kernel = program.CreateKernel("test"); kernel.SetValueArgument(0, 1443351125U); kernel.SetMemoryArgument(1, buf); var eventList = new ComputeEventList(); queue.Execute(kernel, null, new long[] { 16L, 256L, 1048576L }, null, null); queue.Finish(); queue.Read<uint>(buf, true, 0, 16, (IntPtr)dstPtr, null); queue.Finish(); queue.Finish(); } } }
public void Run(ComputeContext context, TextWriter log) { try { // Create the arrays and fill them with random data. int count = 10; float[] arrA = new float[count]; float[] arrB = new float[count]; float[] arrC = new float[count]; Random rand = new Random(); for (int i = 0; i < count; i++) { arrA[i] = (float)(rand.NextDouble() * 100); arrB[i] = (float)(rand.NextDouble() * 100); } // Create the input buffers and fill them with data from the arrays. // Access modifiers should match those in a kernel. // CopyHostPointer means the buffer should be filled with the data provided in the last argument. ComputeBuffer <float> a = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA); ComputeBuffer <float> b = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB); // The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length). ComputeBuffer <float> c = new ComputeBuffer <float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length); // Create and build the opencl program. program = new ComputeProgram(context, clProgramSource); program.Build(null, null, null, IntPtr.Zero); // Create the kernel function and set its arguments. ComputeKernel kernel = program.CreateKernel("VectorAdd"); kernel.SetMemoryArgument(0, a); kernel.SetMemoryArgument(1, b); kernel.SetMemoryArgument(2, c); // Create the event wait list. An event list is not really needed for this example but it is important to see how it works. // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution. // For this reason their use should be avoided if possible. ComputeEventList eventList = new ComputeEventList(); // Create the command queue. This is used to control kernel execution and manage read/write/copy operations. ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command. // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created. commands.Execute(kernel, null, new long[] { count }, null, eventList); // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host. // eventList will contain two events after this method returns. commands.ReadFromBuffer(c, ref arrC, false, eventList); // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands // to finish has to be issued before "arrC" can be used. // This explicit synchronization can be achieved in two ways: // 1) Wait for the events in the list to finish, //eventList.Wait(); // 2) Or simply use commands.Finish(); // Print the results to a log/console. for (int i = 0; i < count; i++) { log.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]); } // cleanup commands commands.Dispose(); // cleanup events foreach (ComputeEventBase eventBase in eventList) { eventBase.Dispose(); } eventList.Clear(); // cleanup kernel kernel.Dispose(); // cleanup program program.Dispose(); // cleanup buffers a.Dispose(); b.Dispose(); c.Dispose(); } catch (Exception e) { log.WriteLine(e.ToString()); } }
public void Run(ComputeContext context, TextWriter log) { // Create host part data int count = 1024; float[] h_A = new float[count]; float[] h_B = new float[count]; float[] h_C = new float[count]; // Init data System.Random rand = new System.Random(1); for (int i = 0; i < count; ++i) { h_A[i] = (float)(rand.NextDouble() * 100); h_B[i] = (float)(rand.NextDouble() * 100); } path = Environment.CurrentDirectory + "/Assets/Scripts/CLVecAdd/res"; // Write To File StreamWriter swBefore = new StreamWriter(path + "/exBefore.txt"); for (int i = 0; i < count; ++i) { swBefore.WriteLine("{0} - {1} - {2}", h_A[i], h_B[i], h_C[i]); } // Create Input Buffer ComputeBuffer <float> d_A = new ComputeBuffer <float> (context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, h_A); ComputeBuffer <float> d_B = new ComputeBuffer <float> (context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, h_B); // Create Output BUffer ComputeBuffer <float> d_C = new ComputeBuffer <float> (context, ComputeMemoryFlags.WriteOnly, h_C.Length); // Load Program Source StreamReader srProgram = new StreamReader(Environment.CurrentDirectory + "/Assets/Scripts/CLVecAdd/kernel.cl"); clProgramSource = srProgram.ReadToEnd(); // Create & Build the opencl program program = new ComputeProgram(context, clProgramSource); program.Build(null, null, null, IntPtr.Zero); // Create the kernel ComputeKernel kernel = program.CreateKernel("VectorAdd"); kernel.SetMemoryArgument(0, d_A); kernel.SetMemoryArgument(1, d_B); kernel.SetMemoryArgument(2, d_C); // Create the event wait list ComputeEventList eventList = new ComputeEventList(); // Create the command queue ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); // Execute the kernel commands.Execute(kernel, null, new long[] { count }, null, eventList); // Read back the result commands.ReadFromBuffer(d_C, ref h_C, false, eventList); // Wait until finish commands.Finish(); // Write results to file StreamWriter swAfter = new StreamWriter(path + "/exAfter.txt"); for (int i = 0; i < count; ++i) { swAfter.WriteLine("{0} + {1} -> {2}", h_A[i], h_B[i], h_C[i]); } swAfter.Flush(); swBefore.Flush(); }
protected override void RunInternal() { int count = 10; float[] arrA = new float[count]; float[] arrB = new float[count]; float[] arrC = new float[count]; Random rand = new Random(); for (int i = 0; i < count; i++) { arrA[i] = (float)(rand.NextDouble() * 100); arrB[i] = (float)(rand.NextDouble() * 100); } ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA); ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB); ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length); ComputeProgram program = new ComputeProgram(context, new string[] { kernelSource }); program.Build(null, null, null, IntPtr.Zero); ComputeKernel kernel = program.CreateKernel("VectorAdd"); kernel.SetMemoryArgument(0, a); kernel.SetMemoryArgument(1, b); kernel.SetMemoryArgument(2, c); ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); ComputeEventList events = new ComputeEventList(); commands.Execute(kernel, null, new long[] { count }, null, events); arrC = new float[count]; GCHandle arrCHandle = GCHandle.Alloc(arrC, GCHandleType.Pinned); commands.Read(c, false, 0, count, arrCHandle.AddrOfPinnedObject(), events); commands.Finish(); arrCHandle.Free(); for (int i = 0; i < count; i++) Console.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]); }
public Float4[] CreateMesh(AproximationFunction function, PlotInterval interval) { var basis = BasisMatrix(function, interval); var u0 = interval.X0; var u1 = interval.X1; var v0 = interval.Y0; var v1 = interval.Y1; var uKnotsDistance = Math.Abs(u1 - u0); var xCount = Math.Ceiling(uKnotsDistance / Density); var yKnotDistance = Math.Abs(v1 - v0); var yCount = Math.Ceiling(yKnotDistance / Density); var verticesCount = (int)((++xCount) * (++yCount)); var result = new Float4[verticesCount]; // Create the input buffers and fill them with data from the arrays. // Access modifiers should match those in a kernel. // CopyHostPointer means the buffer should be filled with the data provided in the last argument. var knotsBuffer = new ComputeBuffer <float>(Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, new[] { u0, v0, u1, v1 }); var densityBuffer = new ComputeBuffer <float>(Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, new[] { Density }); var basisBuffer = new ComputeBuffer <float>(Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, basis); var eventList = new ComputeEventList(); // var localBasisBuffer = new ComputeBuffer<float>(Context, ComputeMemoryFlags.ReadWrite , 16L); var resultBuffer = new ComputeBuffer <Float4>(Context, ComputeMemoryFlags.WriteOnly, verticesCount); _kernel.SetMemoryArgument(0, resultBuffer); // _kernel.SetMemoryArgument(1, u0Buffer); // _kernel.SetMemoryArgument(2, u1Buffer); // _kernel.SetMemoryArgument(3, v0Buffer); // _kernel.SetMemoryArgument(4, v1Buffer); _kernel.SetMemoryArgument(1, knotsBuffer); _kernel.SetMemoryArgument(2, densityBuffer); _kernel.SetMemoryArgument(3, basisBuffer); // _kernel.SetLocalArgument(7,16*sizeof(float)); var commands = new ComputeCommandQueue(_context, _context.Devices[0], ComputeCommandQueueFlags.None); // a.k.a. number of threads .... uCount*vCount var globalWorkSize = new[] { (long)xCount, (long)yCount }; //var globalWorkSize = new[] {(long) verticesCount}; //var localWorkSize = new long[] { 4,4}; commands.Execute(_kernel, null, globalWorkSize, null, eventList); commands.ReadFromBuffer(resultBuffer, ref result, false, eventList); //Wait for the events in the list to finish, //eventList.Wait(); //Or simply use commands.Finish(); return(result); }
/// <summary> /// Attempts to solve the given work with the specified solver. Returns <c>true</c> if a solution is found. /// <paramref name="work"/> is updated to reflect the solution. /// </summary> /// <param name="work"></param> private unsafe void Work(Work work) { // invoked periodically to report hashes and check status var check = (Func<uint, bool>)(i => { // report hashes to context Context.ReportHashes(this, i); // abort if we are working on stale work, or if instructed to return work.Pool.CurrentBlockNumber == work.BlockNumber && !CancellationToken.IsCancellationRequested; }); // allocate buffers to hold hashing work byte[] round1Blocks, round2Blocks; uint[] round1State, round1State2Pre, round2State; // allocate buffers and create partial hash PrepareWork(work, out round1Blocks, out round1State, out round2Blocks, out round2State); // static values for work uint W16, W17, W18, W19, W31, W32, PreVal4, T1, PreVal4_plus_state0, PreVal4_plus_T1; // build message schedule without nonce uint* W = stackalloc uint[64]; fixed (byte* round1BlocksPtr = round1Blocks) Sha256.Schedule(round1BlocksPtr + Sha256.SHA256_BLOCK_SIZE, W); // complete first three rounds of block 2 round1State2Pre = Sha256.AllocateStateBuffer(); Array.Copy(round1State, round1State2Pre, Sha256.SHA256_STATE_SIZE); Sha256.Round(ref round1State2Pre[0], ref round1State2Pre[1], ref round1State2Pre[2], ref round1State2Pre[3], ref round1State2Pre[4], ref round1State2Pre[5], ref round1State2Pre[6], ref round1State2Pre[7], W, 0); Sha256.Round(ref round1State2Pre[0], ref round1State2Pre[1], ref round1State2Pre[2], ref round1State2Pre[3], ref round1State2Pre[4], ref round1State2Pre[5], ref round1State2Pre[6], ref round1State2Pre[7], W, 1); Sha256.Round(ref round1State2Pre[0], ref round1State2Pre[1], ref round1State2Pre[2], ref round1State2Pre[3], ref round1State2Pre[4], ref round1State2Pre[5], ref round1State2Pre[6], ref round1State2Pre[7], W, 2); // precalculated peices that are independent of nonce W16 = W[16]; W17 = W[17]; W18 = W[18]; W19 = W[19]; W31 = W[31]; W32 = W[32]; PreVal4 = round1State[4] + Sha256.Sigma1(round1State2Pre[4]) + Sha256.Ch(round1State2Pre[4], round1State2Pre[5], round1State2Pre[6]) + Sha256.K[3]; T1 = Sha256.Sigma0(round1State2Pre[0]) + Sha256.Maj(round1State2Pre[0], round1State2Pre[1], round1State2Pre[2]); PreVal4_plus_state0 = PreVal4 + round1State[0]; PreVal4_plus_T1 = PreVal4 + T1; // clear output buffers, in case they've already been used uint[] outputZero = new uint[16]; clQueue.WriteToBuffer(outputZero, clBuffer0, true, null); clQueue.WriteToBuffer(outputZero, clBuffer0, true, null); // to hold output buffer uint[] output = new uint[16]; // swaps between true and false to allow a kernel to execute while testing output of last run bool outputAlt = true; // size of local work groups long localWorkSize = clDevice.MaxWorkGroupSize; // number of items to dispatch to GPU at a time long globalWorkSize = localWorkSize * localWorkSize * 8; // begin working at 0 uint nonce = 0; // continue dispatching work to the GPU while (true) { // list of output events var events = new ComputeEventList(); // read output into current output buffer then reset buffer clQueue.ReadFromBuffer(outputAlt ? clBuffer0 : clBuffer1, ref output, true, events); // scan output buffer for produced nonce values bool outputDirty = false; for (int j = 0; j < 16; j++) if (output[j] != 0) { outputDirty = true; // replace header data on work fixed (byte* headerPtr = work.Header) ((uint*)headerPtr)[19] = output[j]; // submit work for validation Context.SubmitWork(this, work, GetType().Name); } // clear output buffer if (outputDirty) clQueue.WriteToBuffer(outputZero, outputAlt ? clBuffer0 : clBuffer1, true, events); // execute kernel with computed values clQueue.Finish(); clKernel.SetValueArgument(0, round1State[0]); clKernel.SetValueArgument(1, round1State[1]); clKernel.SetValueArgument(2, round1State[2]); clKernel.SetValueArgument(3, round1State[3]); clKernel.SetValueArgument(4, round1State[4]); clKernel.SetValueArgument(5, round1State[5]); clKernel.SetValueArgument(6, round1State[6]); clKernel.SetValueArgument(7, round1State[7]); clKernel.SetValueArgument(8, round1State2Pre[4]); clKernel.SetValueArgument(9, round1State2Pre[5]); clKernel.SetValueArgument(10, round1State2Pre[6]); clKernel.SetValueArgument(11, round1State2Pre[0]); clKernel.SetValueArgument(12, round1State2Pre[1]); clKernel.SetValueArgument(13, round1State2Pre[2]); clKernel.SetValueArgument(14, nonce); clKernel.SetValueArgument(15, W16); clKernel.SetValueArgument(16, W17); clKernel.SetValueArgument(17, W18); clKernel.SetValueArgument(18, W19); clKernel.SetValueArgument(19, W31); clKernel.SetValueArgument(20, W32); clKernel.SetValueArgument(21, PreVal4_plus_state0); clKernel.SetValueArgument(22, PreVal4_plus_T1); clKernel.SetMemoryArgument(23, outputAlt ? clBuffer0 : clBuffer1); clQueue.Execute(clKernel, null, new long[] { globalWorkSize }, new long[] { localWorkSize }, events); // dispose of all events floating around in the list foreach (var e in events) e.Dispose(); // report that we just hashed the work size number of hashes if (!check((uint)globalWorkSize)) break; // update nonce and check whether it is now less than the work size, which indicates it overflowed if ((nonce += (uint)globalWorkSize) < (uint)globalWorkSize) break; // next loop deals with other output buffer outputAlt = !outputAlt; } }
public static void CLApply2DLUT(ComputeContext context) { ComputeImageFormat format = new ComputeImageFormat(ComputeImageChannelOrder.Bgra, ComputeImageChannelType.UnsignedInt8); var startTime = LLTools.TimestampMS(); #region Visible / Temporary Source BitmapData bitmapData = bmp.LockBits(new Rectangle(0, 0, bmp.Width, bmp.Height), ImageLockMode.ReadOnly, bmp.PixelFormat); ComputeImage2D source0 = new ComputeImage2D(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, format, bmp.Width, bmp.Height, bitmapData.Stride, bitmapData.Scan0); bmp.UnlockBits(bitmapData); #endregion #region Infrared Source bitmapData = irBmp.LockBits(new Rectangle(0, 0, irBmp.Width, irBmp.Height), ImageLockMode.ReadOnly, irBmp.PixelFormat); ComputeImage2D source1 = new ComputeImage2D(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, format, irBmp.Width, irBmp.Height, bitmapData.Stride, bitmapData.Scan0); irBmp.UnlockBits(bitmapData); #endregion #region Output ComputeImage2D output = new ComputeImage2D(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.AllocateHostPointer, format, bmp.Width, bmp.Height, 0, IntPtr.Zero); #endregion #region Variable Initialization ComputeEventList eventList = new ComputeEventList(); ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); #region Apply Curve applyCurveKernel.SetMemoryArgument(0, source0); applyCurveKernel.SetMemoryArgument(1, output); applyCurveKernel.SetMemoryArgument(2, curveLutBuffer); #endregion #region Apply LUT 2D apply2DLUTKernel.SetMemoryArgument(0, source1); apply2DLUTKernel.SetMemoryArgument(1, output); apply2DLUTKernel.SetMemoryArgument(2, source0); apply2DLUTKernel.SetMemoryArgument(3, lut2DBuffer); #endregion #region Reprojection var latRangeBuff = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, latRange); var lonRangeBuff = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, lonRange); var coverageBuff = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, coverage); var trimBuff = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, trim); var sizeBuff = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, size); reprojectKernel.SetMemoryArgument(0, source0); reprojectKernel.SetMemoryArgument(1, output); reprojectKernel.SetValueArgument(2, satelliteLongitude); reprojectKernel.SetValueArgument(3, coff); reprojectKernel.SetValueArgument(4, cfac); reprojectKernel.SetValueArgument(5, loff); reprojectKernel.SetValueArgument(6, lfac); reprojectKernel.SetValueArgument(7, (uint)(fixAspect ? 1 : 0)); reprojectKernel.SetValueArgument(8, aspectRatio); reprojectKernel.SetMemoryArgument(9, latRangeBuff); reprojectKernel.SetMemoryArgument(10, lonRangeBuff); reprojectKernel.SetMemoryArgument(11, coverageBuff); reprojectKernel.SetMemoryArgument(12, trimBuff); reprojectKernel.SetMemoryArgument(13, sizeBuff); #endregion #endregion #region Run Pipeline UIConsole.Log("Executing curve kernel"); commands.Execute(applyCurveKernel, null, new long[] { bmp.Width, bmp.Height }, null, eventList); UIConsole.Log("Executing LUT2D kernel"); commands.Execute(apply2DLUTKernel, null, new long[] { bmp.Width, bmp.Height }, null, eventList); UIConsole.Log("Executing kernel"); commands.Execute(reprojectKernel, null, new long[] { bmp.Width, bmp.Height }, null, eventList); #endregion #region Dump Bitmap UIConsole.Log("Dumping bitmap"); Bitmap obmp = new Bitmap(bmp.Width, bmp.Height, bmp.PixelFormat); BitmapData bmpData = obmp.LockBits(new Rectangle(0, 0, obmp.Width, obmp.Height), ImageLockMode.ReadWrite, obmp.PixelFormat); commands.ReadFromImage(output, bmpData.Scan0, true, null); obmp.UnlockBits(bmpData); var delta = LLTools.TimestampMS() - startTime; UIConsole.Log($"Took {delta} ms to Apply Curve -> Apply Lut2D (FalseColor) -> Reproject"); UIConsole.Log("Saving bitmap"); obmp.Save("teste.png"); UIConsole.Log("Done"); bmp.Save("original.png"); #endregion }
// initialize renderer: takes in command line parameters passed by template code public void Init(int rt, bool gpu, int platformIdx) { // pass command line parameters runningTime = rt; useGPU = gpu; gpuPlatform = platformIdx; // initialize accumulator accumulator = new Vector3[screen.width * screen.height]; ClearAccumulator(); // setup scene scene = new Scene(); // setup camera camera = new Camera(screen.width, screen.height); rngQueue = new ConcurrentQueue<Random>(); xtiles = (int)Math.Ceiling((float)screen.width / TILESIZE); ytiles = (int)Math.Ceiling((float)screen.height / TILESIZE); #if DEBUG RTTools.factorials[0] = Vector<float>.One; for (int i = 1; i < RTTools.TERMS * 2; i++) RTTools.factorials[i] = RTTools.factorials[i - 1] * i; //for (int i = 0; i < RTTools.TERMS; i++) // RTTools.atanStuff[i] = (new Vector<float>((float)Math.Pow(2, 2 * i)) * (RTTools.factorials[i] * RTTools.factorials[i])) / RTTools.factorials[2 * i + 1]; #endif #region OpenCL related things randNums = new float[screen.width * screen.height + 25]; var streamReader = new StreamReader("../../assets/GPUCode.cl"); string clSource = streamReader.ReadToEnd(); streamReader.Close(); platform = ComputePlatform.Platforms[gpuPlatform]; context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); program = new ComputeProgram(context, clSource); try { program.Build(null, null, null, IntPtr.Zero); kernel = program.CreateKernel("Test"); } catch { Console.Write("error in kernel code:\n"); Console.Write(program.GetBuildLog(context.Devices[0]) + "\n"); Debugger.Break(); } eventList = new ComputeEventList(); commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); #endregion }
// SIMULATE // Takes the pattern in array 'second', and applies the rules of Game of Life to produce the next state // in array 'pattern'. At the end, the result is copied back to 'second' for the next generation. void Simulate() { //Run on GPU or CPU if (!GPU) { // Code for CPU // clear destination pattern for (int i = 0; i < pw * ph; i++) { pattern[i] = 0; } // process all pixels, skipping one pixel boundary uint w = pw * 32, h = ph; for (uint y = 1; y < h - 1; y++) { for (uint x = 1; x < w - 1; x++) { // count active neighbors uint n = GetBit(x - 1, y - 1) + GetBit(x, y - 1) + GetBit(x + 1, y - 1) + GetBit(x - 1, y) + GetBit(x + 1, y) + GetBit(x - 1, y + 1) + GetBit(x, y + 1) + GetBit(x + 1, y + 1); if ((GetBit(x, y) == 1 && n == 2) || n == 3) { BitSet(x, y); } } } // swap buffers for (int i = 0; i < pw * ph; i++) { second[i] = pattern[i]; } } else { //Code for GPU var flags = ComputeMemoryFlags.UseHostPointer | ComputeMemoryFlags.ReadOnly; var pattern_d = new ComputeBuffer <uint>(context, flags, pattern); var second_d = new ComputeBuffer <uint>(context, flags, second); if (Wrap) { //When Wrap is on kernel = program.CreateKernel("SimulateWrap"); } else { //When Wrap is off kernel = program.CreateKernel("Simulate"); } kernel.SetMemoryArgument(0, pattern_d); kernel.SetMemoryArgument(1, second_d); kernel.SetValueArgument <uint>(2, pw); kernel.SetValueArgument <uint>(3, ph); ComputeEventList eventList = new ComputeEventList(); long[] globalWorkSize = { pw *32 *ph }; long[] localWorkSize = { 32 }; queue.Execute(kernel, null, globalWorkSize, localWorkSize, eventList); queue.ReadFromBuffer <uint>(pattern_d, ref second, false, eventList); eventList.Wait(); } }
public void Run(ComputeContext context, TextWriter log) { try { // Create the arrays and fill them with random data. int count = 10; float[] arrA = new float[count]; float[] arrB = new float[count]; float[] arrC = new float[count]; Random rand = new Random(); for (int i = 0; i < count; i++) { arrA[i] = (float)(rand.NextDouble() * 100); arrB[i] = (float)(rand.NextDouble() * 100); } // Create the input buffers and fill them with data from the arrays. // Access modifiers should match those in a kernel. // CopyHostPointer means the buffer should be filled with the data provided in the last argument. ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA); ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB); // The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length). ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length); // Create and build the opencl program. program = new ComputeProgram(context, clProgramSource); program.Build(null, null, null, IntPtr.Zero); // Create the kernel function and set its arguments. ComputeKernel kernel = program.CreateKernel("VectorAdd"); kernel.SetMemoryArgument(0, a); kernel.SetMemoryArgument(1, b); kernel.SetMemoryArgument(2, c); // Create the event wait list. An event list is not really needed for this example but it is important to see how it works. // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution. // For this reason their use should be avoided if possible. ComputeEventList eventList = new ComputeEventList(); // Create the command queue. This is used to control kernel execution and manage read/write/copy operations. ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command. // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created. commands.Execute(kernel, null, new long[] { count }, null, eventList); // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host. // eventList will contain two events after this method returns. commands.ReadFromBuffer(c, ref arrC, false, eventList); // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands // to finish has to be issued before "arrC" can be used. // This explicit synchronization can be achieved in two ways: // 1) Wait for the events in the list to finish, //eventList.Wait(); // 2) Or simply use commands.Finish(); // Print the results to a log/console. for (int i = 0; i < count; i++) log.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]); // cleanup commands commands.Dispose(); // cleanup events foreach (ComputeEventBase eventBase in eventList) { eventBase.Dispose(); } eventList.Clear(); // cleanup kernel kernel.Dispose(); // cleanup program program.Dispose(); // cleanup buffers a.Dispose(); b.Dispose(); c.Dispose(); } catch (Exception e) { log.WriteLine(e.ToString()); } }
public static void render() { if (ComputePlatform.Platforms.Count > 1) { context = new ComputeContext(ComputeDeviceTypes.All, new ComputeContextPropertyList(ComputePlatform.Platforms[1]), null, IntPtr.Zero); } else { context = new ComputeContext(ComputeDeviceTypes.All, new ComputeContextPropertyList(ComputePlatform.Platforms[0]), null, IntPtr.Zero); } var assembly = Assembly.GetExecutingAssembly(); var resourceName = "DIRT.kernel.c"; string ker = ""; using (Stream stream = assembly.GetManifestResourceStream(resourceName)) using (StreamReader reader = new StreamReader(stream)) { ker = reader.ReadToEnd(); } var program = new ComputeProgram(context, ker); program.Build(null, null, null, IntPtr.Zero); prepTrisKernel = program.CreateKernel("prepTris"); renderKernel = program.CreateKernel("ray"); eventList = new ComputeEventList(); commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); sw = new Stopwatch(); while (true) { lock (renderLock) { eye = ConsoleSettings.camera; if (Screen.frameReady) { continue; } else { frame = new float[(int)ConsoleSettings.screenWidth, (int)ConsoleSettings.screenHeight, 3]; } sw.Restart(); raycast(); sw.Stop(); if (sw.ElapsedMilliseconds > 0) { fps = (int)(1000 / sw.ElapsedMilliseconds); } lock (Screen.nextFrameLock) { Screen.nextFrame = frame; Screen.frameReady = true; } if (gTris != null) { lastTriCount = (int)gTris.Count; } } foreach (ComputeEvent e in eventList) { e.Dispose(); } eventList.Clear(); } }
static void Main(string[] args) { #region const string programName = "Prime Number"; Stopwatch stopWatch = new Stopwatch(); string clProgramSource = KernelProgram(); Console.WriteLine("Environment OS:"); Console.WriteLine("-----------------------------------------"); Console.WriteLine(Environment.OSVersion); #endregion if (ComputePlatform.Platforms.Count == 0) { Console.WriteLine("No OpenCL Platforms are availble!"); } else { #region 1 // step 1 choose the first available platform ComputePlatform platform = ComputePlatform.Platforms[0]; // output the basic info BasicInfo(platform); Console.WriteLine("Program: " + programName); Console.WriteLine("-----------------------------------------"); #endregion //Cpu 10 seconds Gpu 28 seconds int count = 64; int[] output_Z = new int[count * count * count]; int[] input_X = new int[count * count * count]; for (int x = 0; x < count * count * count; x++) { input_X[x] = x; } #region 2 // step 2 create context for that platform and all devices ComputeContextPropertyList properties = new ComputeContextPropertyList(platform); ComputeContext context = new ComputeContext(platform.Devices, properties, null, IntPtr.Zero); // step 3 create and build program ComputeProgram program = new ComputeProgram(context, clProgramSource); program.Build(platform.Devices, null, null, IntPtr.Zero); #endregion // step 4 create memory objects ComputeBuffer<int> a = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, input_X); ComputeBuffer<int> z = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly, output_Z.Length); // step 5 create kernel object with same kernel programe name VectorAdd ComputeKernel kernel = program.CreateKernel("PrimeNumber"); // step 6 set kernel arguments //kernel.SetMemoryArgument(0, a); kernel.SetMemoryArgument(0, a); kernel.SetMemoryArgument(1, z); ComputeEventList eventList = new ComputeEventList(); //for (int j = 0; j < context.Devices.Count; j++) // query available devices n,...,1,0. cpu first then gpu for (int j = context.Devices.Count-1; j > -1; j--) { #region 3 stopWatch.Start(); // step 7 create command queue on that context on that device ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[j], ComputeCommandQueueFlags.None); // step 8 run the kernel program commands.Execute(kernel, null, new long[] { count, count, count }, null, eventList); //Application.DoEvents(); #endregion // step 9 read results commands.ReadFromBuffer(z, ref output_Z, false, eventList); #region 4 commands.Finish(); string fileName = "C:\\primenumber\\PrimeNumberGPU.txt"; StreamWriter file = new StreamWriter(fileName, true); FileInfo info = new FileInfo(fileName); long fs = info.Length; // 1 MegaByte = 1.049e+6 Byte int index = 1; if (fs == 1.049e+6) { fileName = "C:\\primenumber\\PrimeNumberGPU" + index.ToString() + ".txt"; file = new System.IO.StreamWriter(fileName, true); index++; } #endregion for (uint xx = 0; xx < count * count * count; xx++) { if (output_Z[xx] != 0 && output_Z[xx] != 1) { Console.WriteLine(output_Z[xx]); file.Write(output_Z[xx]); file.Write("x"); } } #region 5 file.Close(); stopWatch.Stop(); ComputeCommandProfilingInfo start = ComputeCommandProfilingInfo.Started; ComputeCommandProfilingInfo end = ComputeCommandProfilingInfo.Ended; double time = 10e-9 * (end - start); //Console.WriteLine("Nanosecond: " + time); TimeSpan ts = stopWatch.Elapsed; string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds); Console.WriteLine(context.Devices[j].Name.Trim() + " Elapsed Time " + elapsedTime); Console.WriteLine("-----------------------------------------"); #endregion } Console.ReadLine(); } }
public static void Calculate(List<Calculation> calculations) { Stopwatch s = new Stopwatch(); s.Start(); int count = calculations.Count; IntVec2[] p_p = new IntVec2[count]; IntVec2[] p_a = new IntVec2[count]; IntVec2[] p_b = new IntVec2[count]; IntVec2[] p_c = new IntVec2[count]; FloatVec3[] c = new FloatVec3[count]; int[] c_valid = new int[count]; Parallel.For(0, count, i => { var calc = calculations[i]; p_p[i] = new IntVec2(calc.P); p_a[i] = new IntVec2(calc.A); p_b[i] = new IntVec2(calc.B); p_c[i] = new IntVec2(calc.C); }); mark(s, "memory init"); ComputeBuffer<IntVec2> _p_p = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_p); ComputeBuffer<IntVec2> _p_a = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_a); ComputeBuffer<IntVec2> _p_b = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_b); ComputeBuffer<IntVec2> _p_c = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_c); ComputeBuffer<FloatVec3> _c = new ComputeBuffer<FloatVec3>(context, ComputeMemoryFlags.WriteOnly, c.Length); ComputeBuffer<int> _c_valid = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly, c_valid.Length); mark(s, "memory buffer init"); ComputeKernel kernel = program.CreateKernel("Barycentric"); kernel.SetMemoryArgument(0, _p_p); kernel.SetMemoryArgument(1, _p_a); kernel.SetMemoryArgument(2, _p_b); kernel.SetMemoryArgument(3, _p_c); kernel.SetMemoryArgument(4, _c); kernel.SetMemoryArgument(5, _c_valid); mark(s, "memory init 2"); ComputeEventList eventList = new ComputeEventList(); ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); commands.Execute(kernel, null, new long[] { count }, null, eventList); mark(s, "execute"); commands.ReadFromBuffer(_c, ref c, false, eventList); commands.ReadFromBuffer(_c_valid, ref c_valid, false, eventList); commands.Finish(); mark(s, "read 1"); Parallel.For(0, count, i => { var calc = calculations[i]; calc.Coords = new BarycentricCoordinates(c[i].U,c[i].V,c[i].W); if (c_valid[i] == 1) { lock (calc.Tri) calc.Tri.Points.Add(new DrawPoint(calc.Coords, calc.P)); } }); mark(s, "read 2"); // cleanup commands commands.Dispose(); // cleanup events foreach (ComputeEventBase eventBase in eventList) { eventBase.Dispose(); } eventList.Clear(); // cleanup kernel kernel.Dispose(); _p_p.Dispose(); _p_a.Dispose(); _p_b.Dispose(); _p_c.Dispose(); _c.Dispose(); _c_valid.Dispose(); mark(s, "dispose"); }