public override void Proccess() { // execute kernel queue.Execute(kernel, null, new long[] { DataGenerator.InputCount }, null, null); queue.Finish(); /* * short[] results2 = new short[this.results.Length]; * GCHandle arrCHandle = GCHandle.Alloc(results2, GCHandleType.Pinned); * queue.Read(result_dev, true, 0, DataFeeder.GetInputCount(), arrCHandle.AddrOfPinnedObject(), events); */ //bool[] results2 = new bool[DataFeeder.GetInputCount()]; queue.ReadFromBuffer(result_dev, ref resultsBytes, true, null); queue.ReadFromBuffer(resultCalc_dev, ref calculatables, true, null); //queue.ReadFromBuffer() /* * bool[] arrC = new bool[5]; * GCHandle arrCHandle = GCHandle.Alloc(arrC, GCHandleType.Pinned); * queue.Read<bool>(result_dev, true, 0, 5, arrCHandle.AddrOfPinnedObject(), null); */ // wait for completion //queue.Finish(); //kernel.Dispose(); //queue.Dispose(); //context.Dispose(); }
public int[] ReadIntBuffer(string key, int length) { int[] rawBuffer = _intBuffers[key]; if (HardwareAccelerationEnabled) { _commands.ReadFromBuffer(_intComputeBuffers[key], ref rawBuffer, true, 0, 0, length, null); _commands.Finish(); } return(rawBuffer); }
public string vectorSum() { string vecSum = @" __kernel void vectorSum(__global float *v1, __global float *v2, __global float *v3) { int i = get_global_id(0); v3[i] = v1[i] + v2[i]; } "; int size = 100000; float[] v1_ = new float[size]; float[] v2_ = new float[size]; float[] v3_ = new float[size]; for (var i = 0; i < size; i++) { v1_[i] = (float)i; v2_[i] = (float).5f; } var platform_ = ComputePlatform.Platforms[0]; ComputeContextPropertyList properties = new ComputeContextPropertyList(platform_); ComputeContext ctx = new ComputeContext(ComputeDeviceTypes.Gpu, properties, null, IntPtr.Zero); ComputeCommandQueue commands = new ComputeCommandQueue(ctx, ctx.Devices[0], ComputeCommandQueueFlags.None); ComputeProgram program = new ComputeProgram(ctx, vecSum); try { program.Build(null, null, null, IntPtr.Zero); Console.WriteLine("program build completed"); } catch { string log = program.GetBuildLog(ctx.Devices[0]); } ComputeBuffer <float> v1, v2, v3; v1 = new ComputeBuffer <float>(ctx, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, v1_); v2 = new ComputeBuffer <float>(ctx, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, v2_); v3 = new ComputeBuffer <float>(ctx, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.CopyHostPointer, v3_); long[] worker = { size }; commands.WriteToBuffer(v1_, v1, false, null); commands.WriteToBuffer(v2_, v2, false, null); ComputeKernel sumKernal = program.CreateKernel("vectorSum"); Console.WriteLine("kernal created"); sumKernal.SetMemoryArgument(0, v1); sumKernal.SetMemoryArgument(1, v2); sumKernal.SetMemoryArgument(2, v3); commands.Execute(sumKernal, null, worker, null, null); Console.WriteLine("Executed"); commands.ReadFromBuffer <float>(v3, ref v3_, false, null); StringBuilder sb = new StringBuilder(); for (int i = 0; i < size; i++) { sb.AppendFormat("{0} + {1} = {2}<br>", v1_[i].ToString(), v2_[i].ToString(), v3_[i].ToString()); } var sum_expression_result = sb.ToString(); return(sum_expression_result); }
/// <summary> /// Renders the provided potential force field or gradient map. /// </summary> /// <param name="queue">The queue.</param> /// <param name="pointsToRender">The points to render.</param> /// <param name="computeBuffer">The compute buffer to use.</param> /// <returns>A bitmap representing <paramref name="pointsToRender"/></returns> private Bitmap RenderPoints(ComputeCommandQueue queue, float[] pointsToRender, ComputeBuffer <float> computeBuffer) { // Calculate the max and min values here, as it's easier than on the GPU // and there's no real time cost var max = pointsToRender.AsParallel().Max(); var min = pointsToRender.AsParallel().Min(); // Set up the colourKernel's arguments colourKernel.SetValueArgument(0, max); colourKernel.SetValueArgument(1, min); colourKernel.SetMemoryArgument(2, computeBuffer); colourKernel.SetMemoryArgument(3, outPix); // Run the colourKernl queue.Execute(colourKernel, new long[] { 0 }, new long[] { gridWidth, gridHeight }, new long[] { WorkGroupSize, WorkGroupSize }, null); // and read out the data produced queue.ReadFromBuffer(outPix, ref pixels, true, null); // Copy the data into a new Bitmap var bitmap = new Bitmap(gridWidth, gridHeight, PixelFormat.Format24bppRgb); var info = bitmap.LockBits(new Rectangle(0, 0, gridWidth, gridHeight), ImageLockMode.ReadWrite, PixelFormat.Format24bppRgb); Marshal.Copy(pixels, 0, info.Scan0, pixels.Length); bitmap.UnlockBits(info); // Flip the bitmap to make the coordinate axis match those in the simulator bitmap.RotateFlip(RotateFlipType.RotateNoneFlipY); return(bitmap); }
public T[] ExecuteReturn <T>(long Worksize, long WorkOffset, int OutSize) where T : struct { T[] Returned = new T[OutSize]; SetArgs(); if (WorkOffset != 0) { queue.Execute(kernel, new long[] { WorkOffset }, new long[] { Worksize }, null, null); } else { queue.Execute(kernel, null, new long[] { Worksize }, null, null); } for (int i = 0; i < MethodInfo.Arguments.Length; i++) { if (MethodInfo.Arguments[i].CopyBack) { queue.ReadFromBuffer((ComputeBuffer <T>)MethodInfo.Arguments[i].ComputeMemory, ref Returned, false, null); } } queue.Finish(); return(Returned); }
public uint FindProofOfWork(byte[] header, byte[] bits, uint nonceStart, uint iterations, out long elapsedMilliseconds) { this.stopwatch.Restart(); using var headerBuffer = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, header); using var bitsBuffer = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, bits); using var powBuffer = new ComputeBuffer <uint>(this.computeContext, ComputeMemoryFlags.WriteOnly, 1); this.computeKernel.SetMemoryArgument(0, headerBuffer); this.computeKernel.SetMemoryArgument(1, bitsBuffer); this.computeKernel.SetValueArgument(2, nonceStart); this.computeKernel.SetMemoryArgument(3, powBuffer); using var commands = new ComputeCommandQueue(this.computeContext, this.computeDevice, ComputeCommandQueueFlags.None); commands.Execute(this.computeKernel, null, new long[] { iterations }, null, null); var nonceOut = new uint[1]; commands.ReadFromBuffer(powBuffer, ref nonceOut, true, null); commands.Finish(); elapsedMilliseconds = this.stopwatch.ElapsedMilliseconds; return(nonceOut[0]); }
/// <summary> /// Computes the field used when the robot posesses the ball /// </summary> /// <param name="queue">The queue to execute the kernel on.</param> /// <param name="field">The calculated field points</param> private void ComputePosessionField(ComputeCommandQueue queue, float[] field) { var ball = new Vector2((float)currentEnvironment.CurrentBall.Position.X - currentEnvironment.FieldBounds.Left, (float)currentEnvironment.CurrentBall.Position.Y - currentEnvironment.FieldBounds.Bottom ); var goalTarget = new Vector2((97.3632f) - currentEnvironment.FieldBounds.Left, (33.932f + 49.6801f) / 2.0f - currentEnvironment.FieldBounds.Bottom); // Collect together all the points that will repel the robot var repulsers = currentEnvironment.Opponents.Select(o => o.Position).Concat( currentEnvironment.Home.Select(h => h.Position)).Select( p => new Vector2((float)p.X - currentEnvironment.FieldBounds.Left, (float)p.Y - currentEnvironment.FieldBounds.Bottom)).ToArray(); using (var inRepulsers = new ComputeBuffer <Vector2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, repulsers)) { possessionKernel.SetValueArgument(0, ball); possessionKernel.SetValueArgument(1, goalTarget); possessionKernel.SetValueArgument(2, GridResolution); possessionKernel.SetMemoryArgument(3, inRepulsers); possessionKernel.SetMemoryArgument(4, outCl); queue.Execute(possessionKernel, new long[] { 0 }, new long[] { gridWidth, gridHeight }, new long[] { WorkGroupSize, WorkGroupSize }, null); queue.ReadFromBuffer(outCl, ref field, true, null); } }
public unsafe static void MatrixMulti_OpenCL(double[,] result, double[,] a, double[,] b) { InitCloo(); var ncols = result.GetUpperBound(0) + 1; var nrows = result.GetUpperBound(1) + 1; fixed(double *rp = result, ap = a, bp = b) { ComputeBuffer <double> aBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, a.Length, (IntPtr)ap); ComputeBuffer <double> bBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, b.Length, (IntPtr)bp); ComputeBuffer <double> rBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.WriteOnly, result.Length); kernel.SetMemoryArgument(0, aBuffer); kernel.SetMemoryArgument(1, bBuffer); kernel.SetValueArgument(2, ncols); kernel.SetMemoryArgument(3, rBuffer); ComputeEventList events = new ComputeEventList(); ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); commands.Execute(kernel, null, new long[] { result.Length }, null, events); commands.ReadFromBuffer(rBuffer, ref result, false, new SysIntX2(), new SysIntX2(), new SysIntX2(ncols, nrows), events); commands.Finish(); } }
/// <summary> /// Finds the nonce for a block header hash that meets the given target. /// </summary> /// <param name="header">serialized block header</param> /// <param name="bits">the target</param> /// <param name="nonceStart">the first nonce value to try</param> /// <param name="iterations">the number of iterations</param> /// <returns></returns> public uint FindPow(byte[] header, byte[] bits, uint nonceStart, uint iterations) { if (this.computeDevice == null) { throw new InvalidOperationException("GPU not found"); } this.ConstructOpenCLResources(); using var headerBuffer = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, header); using var bitsBuffer = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, bits); using var powBuffer = new ComputeBuffer <uint>(this.computeContext, ComputeMemoryFlags.WriteOnly, 1); this.computeKernel.SetMemoryArgument(0, headerBuffer); this.computeKernel.SetMemoryArgument(1, bitsBuffer); this.computeKernel.SetValueArgument(2, nonceStart); this.computeKernel.SetMemoryArgument(3, powBuffer); using var commands = new ComputeCommandQueue(this.computeContext, this.computeDevice, ComputeCommandQueueFlags.None); commands.Execute(this.computeKernel, null, new long[] { iterations }, null, null); var nonceOut = new uint[1]; commands.ReadFromBuffer(powBuffer, ref nonceOut, true, null); commands.Finish(); this.DisposeOpenCLResources(); return(nonceOut[0]); }
private double[] MatrixMultiply(Matrix <double> L, Matrix <double> R) { var L_array = L.To1D(); var R_array = R.To1D(); var O_array = new double[L.M * R.N]; ComputeBuffer <double> a = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, L_array); ComputeBuffer <double> b = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, R_array); ComputeBuffer <double> c = new ComputeBuffer <double>(context, ComputeMemoryFlags.WriteOnly, O_array.Length); kernel.SetMemoryArgument(0, a); kernel.SetMemoryArgument(1, b); kernel.SetMemoryArgument(2, c); kernel.SetValueArgument(3, L.N); kernel.SetValueArgument(4, L.M); kernel.SetValueArgument(5, R.N); kernel.SetValueArgument(6, R.M); commands.Execute(kernel, null, new long[] { R.N, L.M }, null, null); commands.ReadFromBuffer(c, ref O_array, true, null); commands.Finish(); // cleanup buffers a.Dispose(); b.Dispose(); c.Dispose(); return(O_array); }
/// <summary> /// Computes the gradient of the field at all points. /// </summary> /// <param name="queue">The queue to execute the kernel on.</param> /// <param name="gradPoints">The calculated gradient points.</param> private void ComputeGradient(ComputeCommandQueue queue, float[] gradPoints) { gradientKernel.SetMemoryArgument(0, outCl); gradientKernel.SetMemoryArgument(1, outGradient); queue.Execute(gradientKernel, new long[] { 1, 1 }, new long[] { gridWidth - 2, gridHeight - 2 }, null, null); queue.ReadFromBuffer(outGradient, ref gradPoints, true, null); }
static void Main(string[] args) { int w = 11, h = 11, sx = 5, sy = 5, iters = 2; var properties = new ComputeContextPropertyList(ComputePlatform.Platforms[0]); var context = new ComputeContext(ComputeDeviceTypes.All, properties, null, IntPtr.Zero); var quene = new ComputeCommandQueue(context, ComputePlatform.Platforms[0].Devices[0], ComputeCommandQueueFlags.None); //Компиляция программы var prog = new ComputeProgram(context, Source); try { prog.Build(context.Devices, "", null, IntPtr.Zero); } catch { Console.WriteLine(prog.GetBuildLog(context.Devices[0])); } //Создание ядра var kernel = prog.CreateKernel("Test"); var mat = new float[w * h]; for (int i = 0; i < w * h; i++) { mat[i] = (i == w * sy + sx) ? 1f : 0f; } var mat1 = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, mat); var mat2 = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadWrite, w * h); kernel.SetMemoryArgument(0, mat1); kernel.SetMemoryArgument(1, mat2); kernel.SetValueArgument(2, iters); kernel.SetValueArgument(3, w); kernel.SetValueArgument(4, h); quene.Execute(kernel, null, new long[] { (long)h, (long)w }, null, null); quene.ReadFromBuffer(mat1, ref mat, true, null); for (int i = 0; i < h; i++) { for (int j = 0; j < w; j++) { Console.Write($"{mat[i*w+j]:.00} "); } Console.WriteLine(); } Console.ReadKey(); }
public List <Vector2> CalculateSquareDistance(List <Vector2> positions) { // Create data var xResultData = new float[positions.Count]; var yResultData = new float[positions.Count]; var xData = positions.Select(x => x.X).ToArray(); var yData = positions.Select(x => x.Y).ToArray(); // Put data on buffers var xResultBuffer = new ComputeBuffer <float>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.CopyHostPointer, xResultData); var yResultBuffer = new ComputeBuffer <float>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.CopyHostPointer, yResultData); var xBuffer = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, xData); var yBuffer = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, yData); // Set memory arguments to kernel kernel.SetMemoryArgument(0, xResultBuffer); kernel.SetMemoryArgument(1, yResultBuffer); kernel.SetMemoryArgument(2, xBuffer); kernel.SetMemoryArgument(3, yBuffer); // Create queue var queue = new ComputeCommandQueue(context, device, ComputeCommandQueueFlags.None); queue.Execute(kernel, null, new long[] { positions.Count }, null, null); // Read data queue.ReadFromBuffer(xResultBuffer, ref xResultData, true, null); queue.ReadFromBuffer(yResultBuffer, ref yResultData, true, null); var result = new List <Vector2>(); for (int i = 0; i < xResultData.Length; i++) { result.Add(new Vector2(xResultData[i], xResultData[i])); } return(result); }
protected T[] InternalExecuteOpencl <T>( String source, String function, int bufferSize, ParallelTaskParams loaderParams, params Object[] kernelParams) where T : struct { TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointStart); ComputeCommandQueue queue = QueueWithDevice(loaderParams.OpenCLDevice); TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointPlatformInit); String updatedSource = "#define OpenCL\r\n" + source; ComputeProgram program = new ComputeProgram(queue.Context, updatedSource); program.Build(new ComputeDevice[] { queue.Device }, null, null, IntPtr.Zero); TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointKernelBuild); T[] resultBuffer = new T[bufferSize]; ComputeBuffer <T> resultBufferVar = new ComputeBuffer <T>(queue.Context, ComputeMemoryFlags.WriteOnly, bufferSize); List <ComputeMemory> vars = new List <ComputeMemory>(); vars.Add(resultBufferVar); vars.AddRange(WrapDeviceVariables(kernelParams, queue.Context)); ComputeKernel kernel = program.CreateKernel(function); for (int i = 0; i < vars.Count; i++) { kernel.SetMemoryArgument(i, vars[i]); } TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointDeviceWrite); long[] workersGlobal = new long[2] { loaderParams.GlobalWorkers.Width, loaderParams.GlobalWorkers.Height }; queue.Execute(kernel, null, workersGlobal, null, null); TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointKernelExecute); queue.ReadFromBuffer <T>(resultBufferVar, ref resultBuffer, false, null); TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointDeviceRead); queue.Finish(); TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointPlatformDeinit); return(resultBuffer); }
public PrOpenClCalculation() { m_Platform = ComputePlatform.Platforms.First(); m_Device = m_Platform.Devices.First(); m_ComputeContext = new ComputeContext(new [] { m_Device }, new ComputeContextPropertyList(m_Platform), null, IntPtr.Zero); m_CommandQueue = new ComputeCommandQueue(m_ComputeContext, m_Device, ComputeCommandQueueFlags.None); m_Program = new ComputeProgram(m_ComputeContext, ProgramSource); m_Program.Build(new [] { m_Device }, "", null, IntPtr.Zero); m_Kernel = m_Program.CreateKernel("Test"); long count = 100; var result = new int[count]; var a = new int[count]; for (int i = 0; i < count; i++) { a[i] = i; } var resultDev = new ComputeBuffer <int>(m_ComputeContext, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, result); var aDev = new ComputeBuffer <int>(m_ComputeContext, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, a); var bDev = new ComputeBuffer <int>(m_ComputeContext, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, a); //Задаем их для нашего ядра m_Kernel.SetMemoryArgument(0, resultDev); m_Kernel.SetMemoryArgument(1, aDev); m_Kernel.SetMemoryArgument(2, bDev); //Вызываем ядро количество потоков равно count m_CommandQueue.Execute(m_Kernel, null, new[] { count }, null, null); //Читаем результат из переменной m_CommandQueue.ReadFromBuffer(resultDev, ref result, true, null); //Выводим результат foreach (var i in result) { Console.WriteLine(i); } }
public Bitmap GetScreenshot(CameraConfig camera, int screenshotHeight, int slowRender) { var screenshotWidth = (int)(screenshotHeight * ScreenshotAspectRatio); var computeBuffer = new ComputeBuffer <Vector4>(_program.Context, ComputeMemoryFlags.ReadWrite, screenshotWidth * screenshotHeight); var queue = new ComputeCommandQueue(_program.Context, _program.Context.Devices[0], ComputeCommandQueueFlags.None); var globalSize = GlobalLaunchsizeFor(screenshotWidth, screenshotHeight); for (var i = 0; i < slowRender; i++) { CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize); } for (var i = 0; i < camera.Frame * slowRender; i++) { CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize); } var pixels = new Vector4[screenshotWidth * screenshotHeight]; queue.ReadFromBuffer(computeBuffer, ref pixels, true, null); queue.Finish(); computeBuffer.Dispose(); queue.Dispose(); var bmp = new Bitmap(screenshotWidth, screenshotHeight); var destBuffer = new int[screenshotWidth * screenshotHeight]; for (var y = 0; y < screenshotHeight; y++) { for (var x = 0; x < screenshotWidth; x++) { var pixel = pixels[x + y * screenshotWidth]; if (float.IsNaN(pixel.X) || float.IsNaN(pixel.Y) || float.IsNaN(pixel.Z)) { Console.WriteLine("Warning! Caught NAN pixel while taking screenshot!"); continue; } destBuffer[y * screenshotWidth + x] = (byte)(pixel.X * 255) << 16 | (byte)(pixel.Y * 255) << 8 | (byte)(pixel.Z * 255); } } var bmpData = bmp.LockBits(new Rectangle(0, 0, screenshotWidth, screenshotHeight), ImageLockMode.ReadWrite, PixelFormat.Format32bppRgb); Marshal.Copy(destBuffer, 0, bmpData.Scan0, destBuffer.Length); bmp.UnlockBits(bmpData); return(bmp); }
private static void ConductSearch(ComputeContext context, ComputeKernel kernel) { var todos = GetQueenTaskPartition(NumQueens, 4); var done = new List <QueenTask>(); ComputeEventList eventList = new ComputeEventList(); var commands = new ComputeCommandQueue(context, context.Devices[1], ComputeCommandQueueFlags.None); Console.WriteLine("Starting {0} tasks, and working {1} at a time.", todos.Count, Spread); QueenTask[] inProgress = GetNextAssignment(new QueenTask[] {}, todos, done); var sw = new Stopwatch(); sw.Start(); while (inProgress.Any()) { var taskBuffer = new ComputeBuffer <QueenTask>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, inProgress); kernel.SetMemoryArgument(0, taskBuffer); commands.WriteToBuffer(inProgress, taskBuffer, false, null); for (int i = 0; i < 12; i++) { commands.Execute(kernel, null, new long[] { inProgress.Length }, null, eventList); } commands.ReadFromBuffer(taskBuffer, ref inProgress, false, eventList); commands.Finish(); inProgress = GetNextAssignment(inProgress, todos, done); } sw.Stop(); Console.WriteLine(sw.ElapsedMilliseconds / 1000.0); ulong sum = done.Select(state => state.solutions) .Aggregate((total, next) => total + next); Console.WriteLine("Q({0})={1}", NumQueens, sum); }
public void ExecuteReturn <T>(int Worksize, T[] Returned) where T : struct { SetArgs(); queue.Execute(kernel, null, new long[] { Worksize }, null, null); for (int i = 0; i < MethodInfo.Arguments.Length; i++) { var arg = MethodInfo.Arguments[i]; if (arg.CopyBack) { queue.ReadFromBuffer <T>((Cloo.ComputeBuffer <T>)MethodInfo.Arguments[i].ComputeMemory, ref Returned, false, null); } } queue.Finish(); }
private static void ConductSearch(ComputeContext context, ComputeKernel kernel) { var todos = GetQueenTaskPartition(NumQueens, 4); var done = new List<QueenTask>(); ComputeEventList eventList = new ComputeEventList(); var commands = new ComputeCommandQueue(context, context.Devices[1], ComputeCommandQueueFlags.None); Console.WriteLine("Starting {0} tasks, and working {1} at a time.", todos.Count, Spread); QueenTask[] inProgress = GetNextAssignment(new QueenTask[] {}, todos, done); var sw = new Stopwatch(); sw.Start(); while (inProgress.Any()) { var taskBuffer = new ComputeBuffer<QueenTask>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, inProgress); kernel.SetMemoryArgument(0, taskBuffer); commands.WriteToBuffer(inProgress, taskBuffer, false, null); for (int i = 0; i < 12; i++) commands.Execute(kernel, null, new long[] { inProgress.Length }, null, eventList); commands.ReadFromBuffer(taskBuffer, ref inProgress, false, eventList); commands.Finish(); inProgress = GetNextAssignment(inProgress, todos, done); } sw.Stop(); Console.WriteLine(sw.ElapsedMilliseconds / 1000.0); ulong sum = done.Select(state => state.solutions) .Aggregate((total, next) => total + next); Console.WriteLine("Q({0})={1}", NumQueens, sum); }
public override void RunKernel(ComputeContext context, ComputeKernel kernel, ComputeCommandQueue commands, long[] dimensions) { var tradeprofitsBuffer = new ComputeBuffer <short>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, TradeProfits); var tradearbitrageBuffer = new ComputeBuffer <byte>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, TradeArbitrage); var fit_functionBuffer = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, FitFunction); kernel.SetMemoryArgument(0, allvarsBuffer); kernel.SetMemoryArgument(1, loboundsBuffer); kernel.SetMemoryArgument(2, upboundsBuffer); kernel.SetMemoryArgument(3, tradeprofitsBuffer); kernel.SetMemoryArgument(4, tradearbitrageBuffer); kernel.SetMemoryArgument(5, fit_functionBuffer); kernel.SetValueArgument <int>(6, VariablesCount); var eventList = new ComputeEventList(); commands.Execute(kernel, null, dimensions, null, eventList); commands.ReadFromBuffer(fit_functionBuffer, ref FitFunction, true, null); commands.Finish(); }
public T[] ExecuteReturn <T>(long Worksize, long?LocalWorksize, long WorkOffset, int OutSize) where T : struct { SetArgs(); if (LocalWorksize.HasValue) { queue.Execute(kernel, new long[] { WorkOffset }, new long[] { Worksize }, new long[] { LocalWorksize.Value }, null); } else { queue.Execute(kernel, new long[] { WorkOffset }, new long[] { Worksize }, null, null); } T[] Returned = new T[OutSize]; var copyBack = MethodInfo.Arguments.First(x => x.CopyBack); queue.ReadFromBuffer((ComputeBuffer <T>)copyBack.ComputeMemory, ref Returned, true, null); queue.Finish(); return(Returned); }
public static void RunKernels(int nofKernels) { try { // Create the event wait list. An event list is not really needed for this example but it is important to see how it works. // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution. // For this reason their use should be avoided if possible. ComputeEventList eventList = new ComputeEventList(); commandQueue1.WriteToBuffer(input, CB_input, false, eventList); commandQueue1.WriteToBuffer(weightIDs, CB_networkIndex, false, eventList); // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command. // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created. commandQueue1.Execute(kernel, null, new long[] { nofKernels }, null, eventList); // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host. // eventList will contain two events after this method returns. commandQueue1.ReadFromBuffer(CB_output, ref output, false, eventList); // , eventList // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands // to finish has to be issued before "arrC" can be used. // This explicit synchronization can be achieved in two ways: // 1) Wait for the events in the list to finish, eventList.Wait(); // 2) Or simply use commandQueue1.Finish(); } catch (Exception e) { Console.WriteLine(e.ToString()); } }
private void Run(OpenCLInfo[] info, OpenCLPointInfo[] points, OpenCLNote[] envs, float[] result) { //var s = sw.ElapsedMilliseconds; ComputeBuffer <OpenCLInfo> infoBuffer = new ComputeBuffer <OpenCLInfo>(FContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, info); ComputeBuffer <OpenCLPointInfo> pointsBuffer = new ComputeBuffer <OpenCLPointInfo>(FContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, points); ComputeBuffer <OpenCLNote> envsBuffer = new ComputeBuffer <OpenCLNote>(FContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, envs); ComputeBuffer <float> resultBuffer = new ComputeBuffer <float>(FContext, ComputeMemoryFlags.WriteOnly, result.Length); kernel.SetMemoryArgument(0, infoBuffer); kernel.SetMemoryArgument(1, pointsBuffer); // 2 - FWaveformBuffer kernel.SetMemoryArgument(3, envsBuffer); kernel.SetMemoryArgument(4, resultBuffer); //var f = sw.ElapsedMilliseconds; commands.Execute(kernel, null, new long[] { result.Length / 2 }, null, null); //var e = sw.ElapsedMilliseconds; commands.ReadFromBuffer(resultBuffer, ref result, true, null); //var r = sw.ElapsedMilliseconds; commands.Finish(); infoBuffer.Dispose(); pointsBuffer.Dispose(); envsBuffer.Dispose(); resultBuffer.Dispose(); //var d = sw.ElapsedMilliseconds; //if (App.DebugMode) // Debug.WriteLine($"OpenCLWaveProvider.Run: tot: {d - s}"); }
/// <summary> /// Run kernel against all elements. /// </summary> /// <typeparam name="TSource">Struct type that corresponds to kernel function type</typeparam> /// <param name="array">Array of elements to process</param> /// <param name="kernelCode">The code of kernel function</param> /// <param name="kernelSelector">Method that selects kernel by function name; if null uses first</param> /// <param name="deviceSelector">Method that selects device by index, description, OpenCL version; if null uses first</param> public static void ClooForEach <TSource>(this TSource[] array, string kernelCode, Func <string, bool> kernelSelector = null, Func <int, string, Version, bool> deviceSelector = null) where TSource : struct { kernelSelector = kernelSelector ?? ((k) => true); deviceSelector = deviceSelector ?? ((i, d, v) => true); var device = ComputePlatform.Platforms.SelectMany(p => p.Devices).Where((d, i) => deviceSelector(i, $"{d.Name} {d.DriverVersion}", d.Version)).First(); var properties = new ComputeContextPropertyList(device.Platform); using (var context = new ComputeContext(new[] { device }, properties, null, IntPtr.Zero)) using (var program = new ComputeProgram(context, kernelCode)) { program.Build(new[] { device }, null, null, IntPtr.Zero); var kernels = program.CreateAllKernels().ToList(); try { var kernel = kernels.First((k) => kernelSelector(k.FunctionName)); using (var primesBuffer = new ComputeBuffer <TSource>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, array)) { kernel.SetMemoryArgument(0, primesBuffer); using (var queue = new ComputeCommandQueue(context, context.Devices[0], 0)) { queue.Execute(kernel, null, new long[] { primesBuffer.Count }, null, null); queue.Finish(); queue.ReadFromBuffer(primesBuffer, ref array, true, null); } } } finally { kernels.ForEach(k => k.Dispose()); } } }
/// <summary> /// Executes the specified kernel function name. /// </summary> /// <typeparam name="TSource">The type of the source.</typeparam> /// <param name="functionName">Name of the function.</param> /// <param name="inputs">The inputs.</param> /// <param name="returnInputVariable">The return result.</param> /// <returns></returns> /// <exception cref="ExecutionException"> /// </exception> public override void Execute <TSource>(string functionName, params object[] args) { ComputeKernel kernel = _compiledKernels.FirstOrDefault(x => (x.FunctionName == functionName)); ComputeCommandQueue commands = new ComputeCommandQueue(_context, _defaultDevice, ComputeCommandQueueFlags.None); if (kernel == null) { throw new ExecutionException(string.Format("Kernal function {0} not found", functionName)); } try { var ndobject = (TSource[])args.FirstOrDefault(x => (x.GetType() == typeof(TSource[]))); long length = ndobject != null ? ndobject.Length : 1; var buffers = BuildKernelArguments <TSource>(args, kernel, length); commands.Execute(kernel, null, new long[] { length }, null, null); foreach (var item in buffers) { TSource[] r = (TSource[])args[item.Key]; commands.ReadFromBuffer(item.Value, ref r, true, null); //args[item.Key] = r; item.Value.Dispose(); } commands.Finish(); } catch (Exception ex) { throw new ExecutionException(ex.Message); } finally { commands.Dispose(); } }
public void CopyFromDevice() { _queue.ReadFromBuffer(_gpubuffer, ref _cpubuffer, true, null); }
static void Main(string[] args) { #region const string programName = "Prime Number"; Stopwatch stopWatch = new Stopwatch(); string clProgramSource = KernelProgram(); Console.WriteLine("Environment OS:"); Console.WriteLine("-----------------------------------------"); Console.WriteLine(Environment.OSVersion); #endregion if (ComputePlatform.Platforms.Count == 0) { Console.WriteLine("No OpenCL Platforms are availble!"); } else { #region 1 // step 1 choose the first available platform ComputePlatform platform = ComputePlatform.Platforms[0]; // output the basic info BasicInfo(platform); Console.WriteLine("Program: " + programName); Console.WriteLine("-----------------------------------------"); #endregion //Cpu 10 seconds Gpu 28 seconds int count = 64; int[] output_Z = new int[count * count * count]; int[] input_X = new int[count * count * count]; for (int x = 0; x < count * count * count; x++) { input_X[x] = x; } #region 2 // step 2 create context for that platform and all devices ComputeContextPropertyList properties = new ComputeContextPropertyList(platform); ComputeContext context = new ComputeContext(platform.Devices, properties, null, IntPtr.Zero); // step 3 create and build program ComputeProgram program = new ComputeProgram(context, clProgramSource); program.Build(platform.Devices, null, null, IntPtr.Zero); #endregion // step 4 create memory objects ComputeBuffer<int> a = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, input_X); ComputeBuffer<int> z = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly, output_Z.Length); // step 5 create kernel object with same kernel programe name VectorAdd ComputeKernel kernel = program.CreateKernel("PrimeNumber"); // step 6 set kernel arguments //kernel.SetMemoryArgument(0, a); kernel.SetMemoryArgument(0, a); kernel.SetMemoryArgument(1, z); ComputeEventList eventList = new ComputeEventList(); //for (int j = 0; j < context.Devices.Count; j++) // query available devices n,...,1,0. cpu first then gpu for (int j = context.Devices.Count-1; j > -1; j--) { #region 3 stopWatch.Start(); // step 7 create command queue on that context on that device ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[j], ComputeCommandQueueFlags.None); // step 8 run the kernel program commands.Execute(kernel, null, new long[] { count, count, count }, null, eventList); //Application.DoEvents(); #endregion // step 9 read results commands.ReadFromBuffer(z, ref output_Z, false, eventList); #region 4 commands.Finish(); string fileName = "C:\\primenumber\\PrimeNumberGPU.txt"; StreamWriter file = new StreamWriter(fileName, true); FileInfo info = new FileInfo(fileName); long fs = info.Length; // 1 MegaByte = 1.049e+6 Byte int index = 1; if (fs == 1.049e+6) { fileName = "C:\\primenumber\\PrimeNumberGPU" + index.ToString() + ".txt"; file = new System.IO.StreamWriter(fileName, true); index++; } #endregion for (uint xx = 0; xx < count * count * count; xx++) { if (output_Z[xx] != 0 && output_Z[xx] != 1) { Console.WriteLine(output_Z[xx]); file.Write(output_Z[xx]); file.Write("x"); } } #region 5 file.Close(); stopWatch.Stop(); ComputeCommandProfilingInfo start = ComputeCommandProfilingInfo.Started; ComputeCommandProfilingInfo end = ComputeCommandProfilingInfo.Ended; double time = 10e-9 * (end - start); //Console.WriteLine("Nanosecond: " + time); TimeSpan ts = stopWatch.Elapsed; string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds); Console.WriteLine(context.Devices[j].Name.Trim() + " Elapsed Time " + elapsedTime); Console.WriteLine("-----------------------------------------"); #endregion } Console.ReadLine(); } }
public void Run(ComputeContext context, TextWriter log) { try { // Create the arrays and fill them with random data. int count = 10; float[] arrA = new float[count]; float[] arrB = new float[count]; float[] arrC = new float[count]; Random rand = new Random(); for (int i = 0; i < count; i++) { arrA[i] = (float)(rand.NextDouble() * 100); arrB[i] = (float)(rand.NextDouble() * 100); } // Create the input buffers and fill them with data from the arrays. // Access modifiers should match those in a kernel. // CopyHostPointer means the buffer should be filled with the data provided in the last argument. ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA); ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB); // The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length). ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length); // Create and build the opencl program. program = new ComputeProgram(context, clProgramSource); program.Build(null, null, null, IntPtr.Zero); // Create the kernel function and set its arguments. ComputeKernel kernel = program.CreateKernel("VectorAdd"); kernel.SetMemoryArgument(0, a); kernel.SetMemoryArgument(1, b); kernel.SetMemoryArgument(2, c); // Create the event wait list. An event list is not really needed for this example but it is important to see how it works. // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution. // For this reason their use should be avoided if possible. ComputeEventList eventList = new ComputeEventList(); // Create the command queue. This is used to control kernel execution and manage read/write/copy operations. ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command. // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created. commands.Execute(kernel, null, new long[] { count }, null, eventList); // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host. // eventList will contain two events after this method returns. commands.ReadFromBuffer(c, ref arrC, false, eventList); // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands // to finish has to be issued before "arrC" can be used. // This explicit synchronization can be achieved in two ways: // 1) Wait for the events in the list to finish, //eventList.Wait(); // 2) Or simply use commands.Finish(); // Print the results to a log/console. for (int i = 0; i < count; i++) log.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]); // cleanup commands commands.Dispose(); // cleanup events foreach (ComputeEventBase eventBase in eventList) { eventBase.Dispose(); } eventList.Clear(); // cleanup kernel kernel.Dispose(); // cleanup program program.Dispose(); // cleanup buffers a.Dispose(); b.Dispose(); c.Dispose(); } catch (Exception e) { log.WriteLine(e.ToString()); } }
public unsafe void CreateCollisionCellArray() { time.Reset(); int num_of_bodies = Program.window.number_of_bodies; uint num_of_elements = (uint)num_of_bodies * 8; float ge = Program.window.grid_edge; InitializeQueueAndContext(); InitializeComponents(); time.Start(); uint[] sortedCellIDArray = new uint[num_of_elements]; uint[] indexArrayIn = new uint[num_of_elements]; for (int j = 0; j < num_of_elements; j++) { indexArrayIn[j] = (uint)j; } uint[] indexArrayOut = new uint[num_of_elements]; GCHandle gch_iai = GCHandle.Alloc(indexArrayIn, GCHandleType.Pinned); IntPtr ptr_i = gch_iai.AddrOfPinnedObject(); GCHandle gch_ge = GCHandle.Alloc(ge, GCHandleType.Pinned); IntPtr ptr_ge = gch_ge.AddrOfPinnedObject(); array = new BodyData[num_of_bodies]; for (int i = 0; i < num_of_bodies; i++) { Body body = Program.window.bodies.ElementAt(i); array[i].pos = new float[3]; array[i].cellIDs = new uint[8]; //array[i].ctrl_bits = 0; array[i].pos[0] = body.getPos().X; array[i].pos[1] = body.getPos().Y; array[i].pos[2] = body.getPos().Z; array[i].ID = (uint)i; array[i].radius = body.getBSphere().radius; } structSize = Marshal.SizeOf(array[0]); #region INITIALIZATION AND POPULATION OF DEVICE ARRAYS IntPtr ptr = Marshal.AllocHGlobal(structSize * num_of_bodies); for (int i = 0; i < num_of_bodies; i++) { Marshal.StructureToPtr(array[i], ptr + i * structSize, false); } byte[] input = new byte[structSize * num_of_bodies]; Marshal.Copy(ptr, input, 0, structSize * num_of_bodies); b_start = time.ElapsedMilliseconds; b_objectData = new ComputeBuffer <byte> (context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, input); b_objectIDArray = new ComputeBuffer <ulong> (context, ComputeMemoryFlags.ReadWrite, num_of_elements); b_cellIDArray = new ComputeBuffer <uint> (context, ComputeMemoryFlags.ReadWrite, num_of_elements); b_gridEdge = new ComputeBuffer <float> (context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, 1, ptr_ge); b_count += time.ElapsedMilliseconds - b_start; k_dataInitialization.SetMemoryArgument(0, b_objectData); k_dataInitialization.SetMemoryArgument(1, b_gridEdge); k_dataInitialization.SetMemoryArgument(2, b_cellIDArray); k_dataInitialization.SetMemoryArgument(3, b_objectIDArray); dataInit_start = time.ElapsedMilliseconds; queue.Execute(k_dataInitialization, null, new long[] { num_of_bodies }, null, null); queue.Finish(); Console.WriteLine("TIME SPENT EXECUTIND DATA INITIALIZATION KERNEL: " + (time.ElapsedMilliseconds - dataInit_start) + "ms"); #endregion #if PRINT uint[] rs_array = new uint[num_of_bodies * 8]; queue.ReadFromBuffer <uint>(b_cellIDArray, ref rs_array, true, null); queue.Finish(); #region READ DATA INITIALIZED IN DEVICE array = new BodyData[num_of_bodies]; byte[] result = new byte[structSize * num_of_bodies]; IntPtr intPtr = Marshal.AllocHGlobal(structSize * num_of_bodies); queue.ReadFromBuffer <byte>(b_objectData, ref result, true, null); queue.Finish(); #endregion #region CHECK CORRECTNESS Marshal.Copy(result, 0, intPtr, structSize * num_of_bodies); for (int i = 0; i < num_of_bodies; i++) { array[i] = (BodyData)Marshal.PtrToStructure(intPtr + i * structSize, typeof(BodyData)); } checkCorrectness(); Marshal.FreeHGlobal(intPtr); #endregion #region CHECK OBJECT ID AND CELL ID ARRAYS uint[] unsortedCellIDArray = new uint[num_of_elements]; ulong[] unsortedObjectIDArray = new ulong[num_of_elements]; queue.ReadFromBuffer <uint>(b_cellIDArray, ref unsortedCellIDArray, true, null); queue.Finish(); string cellIDArrayLog = ""; for (int i = 0; i < num_of_elements; i++) { cellIDArrayLog += "[" + i + "]" + unsortedCellIDArray[i] + "\n"; } File.WriteAllText(Application.StartupPath + @"\unsortedCellIDArrayLog.txt", cellIDArrayLog); string objectIDArrayLog = ""; queue.ReadFromBuffer <ulong>(b_objectIDArray, ref unsortedObjectIDArray, true, null); queue.Finish(); for (int i = 0; i < num_of_elements; i++) { objectIDArrayLog += "[" + i + "]" + (uint)unsortedObjectIDArray[i] + "\n"; } File.WriteAllText(Application.StartupPath + @"\unsortedObjectIDArrayLog.txt", objectIDArrayLog); #endregion #endif #if ALL #region RADIX SORT #region INITIALIZING RADIX SORT MEMBERS uint block_count = (uint)Math.Ceiling((float)num_of_elements / BLOCK_SIZE); uint lScanCount = block_count * (1 << DIGITS_PER_ROUND) / 4; uint lScanSize = (uint)Math.Ceiling(((float)lScanCount / BLOCK_SIZE)) * BLOCK_SIZE; uint globalSize = block_count * BLOCK_SIZE; #endregion #region POPULATING RADIX SORT BUFFERS b_start = time.ElapsedMilliseconds; b_blockScan = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, block_count * (1 << DIGITS_PER_ROUND)); b_blockOffset = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, block_count * (1 << DIGITS_PER_ROUND)); b_blockSum = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, BLOCK_SIZE); b_temp = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, num_of_elements); b_iArrayIn = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, num_of_elements, ptr_i); b_iArrayOut = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, num_of_elements); b_count += time.ElapsedMilliseconds - b_start; #endregion GCHandle gch_sc = GCHandle.Alloc(lScanCount, GCHandleType.Pinned); IntPtr ptr_sc = gch_sc.AddrOfPinnedObject(); GCHandle gch_ec = GCHandle.Alloc(num_of_elements, GCHandleType.Pinned); IntPtr ptr_ec = gch_ec.AddrOfPinnedObject(); b_start = time.ElapsedMilliseconds; b_scanCount = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, 1, ptr_sc); b_numberOfElems = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, 1, ptr_ec); b_count += time.ElapsedMilliseconds - b_start; #region libCL RADIX SORT ITERATION for (uint j = 0; j < 32; j += DIGITS_PER_ROUND) { GCHandle gch_iter = GCHandle.Alloc(j, GCHandleType.Pinned); IntPtr ptr_j = gch_iter.AddrOfPinnedObject(); b_start = time.ElapsedMilliseconds; ComputeBuffer <uint> b_iteration = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, 1, ptr_j); b_count += time.ElapsedMilliseconds - b_start; #region BLOCK SORT kernel_block_sort.SetMemoryArgument(0, b_cellIDArray); kernel_block_sort.SetMemoryArgument(1, b_temp); kernel_block_sort.SetMemoryArgument(2, b_iArrayIn); kernel_block_sort.SetMemoryArgument(3, b_iArrayOut); kernel_block_sort.SetMemoryArgument(4, b_iteration); kernel_block_sort.SetMemoryArgument(5, b_blockScan); kernel_block_sort.SetMemoryArgument(6, b_blockOffset); kernel_block_sort.SetMemoryArgument(7, b_numberOfElems); radixSort_start = time.ElapsedMilliseconds; queue.Execute(kernel_block_sort, null, new long[] { globalSize }, new long[] { BLOCK_SIZE }, null); queue.Finish(); radixSort_count += time.ElapsedMilliseconds - radixSort_start; #endregion #region BLOCK SCAN kernel_block_scan.SetMemoryArgument(0, b_blockScan); kernel_block_scan.SetMemoryArgument(1, b_blockSum); kernel_block_scan.SetMemoryArgument(2, b_scanCount); radixSort_start = time.ElapsedMilliseconds; queue.Execute(kernel_block_scan, null, new long[] { lScanSize }, new long[] { BLOCK_SIZE }, null); queue.Finish(); radixSort_count += time.ElapsedMilliseconds - radixSort_start; #endregion #region BLOCK PREFIX kernel_block_prefix.SetMemoryArgument(0, b_blockScan); kernel_block_prefix.SetMemoryArgument(1, b_blockSum); kernel_block_prefix.SetMemoryArgument(2, b_scanCount); radixSort_start = time.ElapsedMilliseconds; queue.Execute(kernel_block_prefix, null, new long[] { lScanSize }, new long[] { BLOCK_SIZE }, null); queue.Finish(); radixSort_count += time.ElapsedMilliseconds - radixSort_start; #endregion #region REORDER kernel_reorder.SetMemoryArgument(0, b_temp); kernel_reorder.SetMemoryArgument(1, b_cellIDArray); kernel_reorder.SetMemoryArgument(2, b_iArrayOut); kernel_reorder.SetMemoryArgument(3, b_iArrayIn); kernel_reorder.SetMemoryArgument(4, b_blockScan); kernel_reorder.SetMemoryArgument(5, b_blockOffset); kernel_reorder.SetMemoryArgument(6, b_iteration); kernel_reorder.SetMemoryArgument(7, b_numberOfElems); radixSort_start = time.ElapsedMilliseconds; queue.Execute(kernel_reorder, null, new long[] { globalSize }, new long[] { BLOCK_SIZE }, null); queue.Finish(); radixSort_count += time.ElapsedMilliseconds - radixSort_start; #endregion b_iteration.Dispose(); gch_iter.Free(); } #endregion Console.WriteLine("TIME SPENT EXECUTING RADIX SORT: " + radixSort_count + " ms"); #endregion #if PRINT #region CHECK CELL ID ARRAY ORDER queue.ReadFromBuffer <uint>(b_cellIDArray, ref sortedCellIDArray, true, null); queue.Finish(); string orderedCellIDArrayLog = ""; for (int i = 0; i < num_of_elements; i++) { orderedCellIDArrayLog += "[" + i + "]" + sortedCellIDArray[i] + "\n"; } File.WriteAllText(Application.StartupPath + @"\radixSortLog.txt", orderedCellIDArrayLog); #endregion #endif #region REORDER OBJECT ID ARRAY ulong[] o_array = new ulong[num_of_elements]; b_start = time.ElapsedMilliseconds; b_reorder = new ComputeBuffer <ulong>(context, ComputeMemoryFlags.ReadWrite, num_of_elements); b_count += time.ElapsedMilliseconds - b_start; k_reorder.SetMemoryArgument(0, b_iArrayIn); k_reorder.SetMemoryArgument(1, b_objectIDArray); k_reorder.SetMemoryArgument(2, b_reorder); reorder_start = time.ElapsedMilliseconds; queue.Execute(k_reorder, null, new long[] { num_of_elements }, null, null); queue.Finish(); Console.WriteLine("TIME SPENT REORDERING OBJECT ID ARRAY: " + (time.ElapsedMilliseconds - reorder_start) + " ms"); #endregion #if PRINT #region CHECK OBJECT ID ORDER indexArrayOut = new uint[num_of_elements]; queue.ReadFromBuffer <uint>(b_iArrayIn, ref indexArrayOut, true, null); queue.Finish(); queue.ReadFromBuffer <ulong>(b_reorder, ref o_array, true, null); queue.Finish(); string orderedObjIDArray = ""; for (int i = 0; i < num_of_elements; i++) { orderedObjIDArray += "[" + i + "]" + (uint)o_array[i] + "\n"; } string indexOut = ""; for (int i = 0; i < num_of_elements; i++) { indexOut += "[" + i + "]" + indexArrayOut[i] + "\n"; } File.WriteAllText(Application.StartupPath + @"\reorderLog.txt", orderedObjIDArray); File.WriteAllText(Application.StartupPath + @"\indexLog.txt", indexOut); #endregion #endif #region ELEMENT COUNT //511 -> max value a cell hash can be //uint[] occurrences = new uint[512]; uint[] n_occurrences = new uint[512]; uint[] temp_array = new uint[num_of_elements]; uint[] temp_array2 = new uint[num_of_elements]; uint[] nocc = new uint[1]; b_start = time.ElapsedMilliseconds; b_occPerRad = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, 512); b_temp2 = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, num_of_elements); b_flags = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, num_of_elements); b_numOfCC = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, 1); b_count = time.ElapsedMilliseconds - b_start; k_elementCount.SetMemoryArgument(0, b_reorder); k_elementCount.SetMemoryArgument(1, b_temp2); k_elementCount.SetMemoryArgument(2, b_numOfCC); k_elementCount.SetMemoryArgument(3, b_occPerRad); k_elementCount.SetMemoryArgument(4, b_flags); elemCount_start = time.ElapsedMilliseconds; try{ queue.Execute(k_elementCount, null, new long[] { num_of_elements }, null, null); } catch (Exception e) { File.WriteAllText(Application.StartupPath + @"\exeLog.txt", e.Message); } queue.Finish(); Console.WriteLine("TIME SPENT EXECUTING ELEMENT COUNT: " + (time.ElapsedMilliseconds - elemCount_start) + " ms"); queue.ReadFromBuffer <uint>(b_temp2, ref temp_array, true, null); queue.ReadFromBuffer <uint>(b_numOfCC, ref nocc, true, null); queue.Finish(); #endregion #if PRINT #region CHECK ELEMENT COUNT queue.ReadFromBuffer <uint>(b_occPerRad, ref n_occurrences, true, null); queue.Finish(); string hs = ""; for (int h = 0; h < 512; h++) { hs += "[" + h + "] " + //temp_array[h] + "\n\t" + n_occurrences[h] + "\n"; } File.WriteAllText(Application.StartupPath + @"\elementCountLog_nOcc.txt", hs); Array.Sort <uint>(rs_array); //sort by .NET string s = ""; for (int h = 0; h < num_of_elements; h++) { s += "[" + h + "] " + //s += sortedCellIDArray[h] + "\n"; //OCL //s += rs_array[h] + "\n"; //CPU //s += unchecked((uint)o_array[h]) + "\n"; //OCL reordered array cell ID //s += ((o_array[h] & ((ulong)281470681743360)) >> 32) + "\n"; //OCL reordered array obj ID //s += (indexArrayOut[h] / 8) + "\n"; // cell index divided by 8 --> should be equal to the line above; temp_array[h] + "\n"; } File.WriteAllText(Application.StartupPath + @"\elementCountLog_temp.txt", s); #endregion #endif #region PREFIX SUM int maxIter = (int)Math.Log(num_of_elements, 2); for (uint d = 0; d < maxIter; d++) { GCHandle gch_iteration = GCHandle.Alloc(d, GCHandleType.Pinned); IntPtr ptr_iteration = gch_iteration.AddrOfPinnedObject(); b_start = time.ElapsedMilliseconds; ComputeBuffer <uint> b_iteration2 = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, 1, ptr_iteration); b_count += time.ElapsedMilliseconds - b_start; k_prefixSum.SetMemoryArgument(0, b_temp2); k_prefixSum.SetMemoryArgument(1, b_iteration2); prefixSum_start = time.ElapsedMilliseconds; try { queue.Execute(k_prefixSum, null, new long[] { num_of_elements }, new long[] { num_of_elements }, null); } catch (Exception e) { File.WriteAllText(Application.StartupPath + @"\exeLog.txt", e.Message); } queue.Finish(); prefixSum_count += time.ElapsedMilliseconds - prefixSum_start; b_iteration2.Dispose(); gch_iteration.Free(); } Console.WriteLine("TIME SPENT EXECUTING PREFIX SUM: " + prefixSum_count + " ms"); queue.ReadFromBuffer <uint>(b_temp2, ref temp_array2, true, null); #endregion #if PRINT #region CHECK SUM string sscan = ""; for (int h = 0; h < num_of_elements; h++) { //TO BE FIXED! -- fixed?! sscan += temp_array2[h] + "\n"; } File.WriteAllText(Application.StartupPath + @"\scanLog.txt", sscan); uint sum = 0; for (int p = 0; p < num_of_elements; p++) //TODO CHANGE TO nocc[0] -- but why?? { sum += temp_array[p]; } Console.WriteLine(".NET sum: " + sum); Console.WriteLine("OCL sum: " + nocc[0]); #endregion #endif #region COLLISION CELL ARRAY CREATION //uint[] outArray = new uint[nocc[0]]; ulong[] out_array = new ulong[nocc[0]]; uint[] indexes = new uint[nocc[0]]; b_ccArray = new ComputeBuffer <ulong>(context, ComputeMemoryFlags.ReadWrite, nocc[0]);//TODO !!!!! CHANGE TO nocc[0] GCHandle gch_ta = GCHandle.Alloc(temp_array, GCHandleType.Pinned); IntPtr ptr_ta = gch_ta.AddrOfPinnedObject(); b_start = time.ElapsedMilliseconds; b_temp3 = new ComputeBuffer <ulong>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, num_of_elements, ptr_ta); b_ccIndexes = new ComputeBuffer <uint>(context, ComputeMemoryFlags.ReadWrite, nocc[0]);//TODO!!! CHANGE TO nocc[0] b_count += time.ElapsedMilliseconds - b_start; k_ccArrayCreation.SetMemoryArgument(0, b_reorder); k_ccArrayCreation.SetMemoryArgument(1, b_ccArray); k_ccArrayCreation.SetMemoryArgument(2, b_temp2); k_ccArrayCreation.SetMemoryArgument(3, b_occPerRad); k_ccArrayCreation.SetMemoryArgument(4, b_temp3); k_ccArrayCreation.SetMemoryArgument(5, b_ccIndexes); k_ccArrayCreation.SetMemoryArgument(6, b_flags); ccArrayC_start = time.ElapsedMilliseconds; try { queue.Execute(k_ccArrayCreation, null, new long[] { num_of_elements }, null, null); } catch (Exception e) { File.WriteAllText(Application.StartupPath + @"\exeLog.txt", e.Message); } queue.Finish(); Console.WriteLine("TIME SPENT POPULATING COLLISION CELL ARRAY: " + (time.ElapsedMilliseconds - ccArrayC_start) + " ms"); time.Stop(); #endregion Console.WriteLine("TIME SPENT EXECUTING BROAD-PHASE COLLISION DETECTION: " + time.ElapsedMilliseconds + " ms"); Console.WriteLine("TIME SPENT CREATING DEVICE BUFFERS: " + b_count + " ms"); #if PRINT #region CHECK RESULT queue.ReadFromBuffer(b_ccArray, ref out_array, true, null); queue.ReadFromBuffer(b_ccIndexes, ref indexes, true, null); queue.Finish(); string output = ""; for (int t = 0; t < nocc[0]; t++)//CHANGE TO nocc[0] { output += "---INDEX--- " + t + "\n\t"; if ((out_array[t] & ((ulong)1 << 63)) != (ulong)0) { output += "[H] "; } output += (uint)out_array[t] + "\n\t"; output += indexes[t] + "\n"; } File.WriteAllText(Application.StartupPath + @"\outputLog.txt", output); #endregion #endif #region POINTERS DISPOSAL Marshal.FreeHGlobal(ptr); gch_ta.Free(); gch_ec.Free(); gch_sc.Free(); gch_ge.Free(); #endregion try { DisposeBuffers(); DisposeComponents(); DisposeQueueAndContext(); } catch (Exception e) { Console.WriteLine("Error encountered while releasing buffers - " + e.Message); File.WriteAllText(Application.StartupPath + @"\exeLog.txt", e.Message); } #else Marshal.FreeHGlobal(ptr); gch_iai.Free(); gch_ge.Free(); b_objectData.Dispose(); b_objectIDArray.Dispose(); b_cellIDArray.Dispose(); b_gridEdge.Dispose(); #endif }
public static void Calculate(List<Calculation> calculations) { Stopwatch s = new Stopwatch(); s.Start(); int count = calculations.Count; IntVec2[] p_p = new IntVec2[count]; IntVec2[] p_a = new IntVec2[count]; IntVec2[] p_b = new IntVec2[count]; IntVec2[] p_c = new IntVec2[count]; FloatVec3[] c = new FloatVec3[count]; int[] c_valid = new int[count]; Parallel.For(0, count, i => { var calc = calculations[i]; p_p[i] = new IntVec2(calc.P); p_a[i] = new IntVec2(calc.A); p_b[i] = new IntVec2(calc.B); p_c[i] = new IntVec2(calc.C); }); mark(s, "memory init"); ComputeBuffer<IntVec2> _p_p = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_p); ComputeBuffer<IntVec2> _p_a = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_a); ComputeBuffer<IntVec2> _p_b = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_b); ComputeBuffer<IntVec2> _p_c = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_c); ComputeBuffer<FloatVec3> _c = new ComputeBuffer<FloatVec3>(context, ComputeMemoryFlags.WriteOnly, c.Length); ComputeBuffer<int> _c_valid = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly, c_valid.Length); mark(s, "memory buffer init"); ComputeKernel kernel = program.CreateKernel("Barycentric"); kernel.SetMemoryArgument(0, _p_p); kernel.SetMemoryArgument(1, _p_a); kernel.SetMemoryArgument(2, _p_b); kernel.SetMemoryArgument(3, _p_c); kernel.SetMemoryArgument(4, _c); kernel.SetMemoryArgument(5, _c_valid); mark(s, "memory init 2"); ComputeEventList eventList = new ComputeEventList(); ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); commands.Execute(kernel, null, new long[] { count }, null, eventList); mark(s, "execute"); commands.ReadFromBuffer(_c, ref c, false, eventList); commands.ReadFromBuffer(_c_valid, ref c_valid, false, eventList); commands.Finish(); mark(s, "read 1"); Parallel.For(0, count, i => { var calc = calculations[i]; calc.Coords = new BarycentricCoordinates(c[i].U,c[i].V,c[i].W); if (c_valid[i] == 1) { lock (calc.Tri) calc.Tri.Points.Add(new DrawPoint(calc.Coords, calc.P)); } }); mark(s, "read 2"); // cleanup commands commands.Dispose(); // cleanup events foreach (ComputeEventBase eventBase in eventList) { eventBase.Dispose(); } eventList.Clear(); // cleanup kernel kernel.Dispose(); _p_p.Dispose(); _p_a.Dispose(); _p_b.Dispose(); _p_c.Dispose(); _c.Dispose(); _c_valid.Dispose(); mark(s, "dispose"); }
private void CalculateConvolution(ComputeContext computeContext) { Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); float dx; bool shiftXParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dx); if (!shiftXParse) throw new SyntaxErrorException(", needs to be ."); float dy; bool shiftYParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dy); if (!shiftYParse) throw new SyntaxErrorException(", needs to be ."); float dz; bool shiftZParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dz); if (!shiftZParse) throw new SyntaxErrorException(", needs to be ."); int pixelCount = _imageDimensionX*_imageDimensionY*_imageDimensionZ; Console.WriteLine("Computing..."); Console.WriteLine("Reading kernel..."); String kernelPath = Directory.GetParent(Directory.GetCurrentDirectory()).Parent.Parent.FullName; String kernelString; using (var sr = new StreamReader(kernelPath + "\\convolution.cl")) kernelString = sr.ReadToEnd(); Console.WriteLine("Reading kernel... done"); float[] selectedTransformation = Transformations.GetTransformation((TransformationType)comboBoxTransform.SelectedItem, 1.0f / float.Parse(textBoxPixelSize.Text), 1.0f / float.Parse(textBoxPixelSize.Text), 1.0f / float.Parse(textBoxPixelSize.Text), dx, dy, dz); //create openCL program ComputeProgram computeProgram = new ComputeProgram(computeContext, kernelString); computeProgram.Build(computeContext.Devices, null, null, IntPtr.Zero); ComputeProgramBuildStatus computeProgramBuildStatus = computeProgram.GetBuildStatus(_selectedComputeDevice); Console.WriteLine("computeProgramBuildStatus\n\t"+computeProgramBuildStatus); String buildLog = computeProgram.GetBuildLog(_selectedComputeDevice); Console.WriteLine("buildLog"); if (buildLog.Equals("\n")) Console.WriteLine("\tbuildLog is empty..."); else Console.WriteLine("\t" + buildLog); float[] fluorophores = CsvData.ReadFluorophores(_sourceFilename); ///////////////////////////////////////////// // Create a Command Queue & Event List ///////////////////////////////////////////// ComputeCommandQueue computeCommandQueue = new ComputeCommandQueue(computeContext, _selectedComputeDevice, ComputeCommandQueueFlags.None); //////////////////////////////////////////////////////////////// // Create Buffers Transform //////////////////////////////////////////////////////////////// ComputeBuffer<float> fluorophoresCoords = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.ReadWrite, fluorophores.LongLength); ComputeBuffer<float> transformationMatrix = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.ReadOnly, selectedTransformation.LongLength); ///////////////////////////////////////////// // Create the transformFluorophoresKernel /////////////////////////////////////////////////////////// ComputeKernel transformFluorophoresKernel = computeProgram.CreateKernel("transform_fluorophores"); ///////////////////////////////////////////// // Set the transformFluorophoresKernel arguments ///////////////////////////////////////////// transformFluorophoresKernel.SetMemoryArgument(0, fluorophoresCoords); transformFluorophoresKernel.SetMemoryArgument(1, transformationMatrix); ///////////////////////////////////////////// // Configure the work-item structure ///////////////////////////////////////////// long[] globalWorkOffsetTransformFluorophoresKernel = null; long[] globalWorkSizeTransformFluorophoresKernel = new long[] { fluorophores.Length / 4 }; long[] localWorkSizeTransformFluorophoresKernel = null; //////////////////////////////////////////////////////// // Enqueue the transformFluorophoresKernel for execution //////////////////////////////////////////////////////// computeCommandQueue.WriteToBuffer(fluorophores, fluorophoresCoords, true, null); computeCommandQueue.WriteToBuffer(selectedTransformation, transformationMatrix, true, null); computeCommandQueue.Execute(transformFluorophoresKernel, globalWorkOffsetTransformFluorophoresKernel, globalWorkSizeTransformFluorophoresKernel, localWorkSizeTransformFluorophoresKernel, null); // computeCommandQueue.ExecuteTask(transformFluorophoresKernel, transformFluorophoresEvents); float[] transformedFluorophores = new float[fluorophores.Length]; computeCommandQueue.ReadFromBuffer(fluorophoresCoords, ref transformedFluorophores, true, null); computeCommandQueue.Finish(); //TODO remove, only for testing // for (int i = 0; i < transformedFluorophores.Length; i++) // { // Console.WriteLine(transformedFluorophores[i]); // } // /TODO remove, only for testing stopwatch.Stop(); Console.WriteLine("Transform fluophores duration:\n\t" + stopwatch.Elapsed); stopwatch.Reset(); stopwatch.Start(); // fluorophoresCoords are now transformed (done in place) //////////////////////////////////////////////////////////////// // Create Buffers Convolve Fluorophores //////////////////////////////////////////////////////////////// const int convolve_kernel_lwgs = 16; int totalBuffer = (int) Math.Ceiling(pixelCount / (float)convolve_kernel_lwgs) * convolve_kernel_lwgs; ComputeBuffer<float> resultImage = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.WriteOnly, totalBuffer); ///////////////////////////////////////////// // Create the transformFluorophoresKernel ///////////////////////////////////////////// ComputeKernel convolveFluorophoresKernel = computeProgram.CreateKernel("convolve_fluorophores"); ///////////////////////////////////////////// // Set the convolveFluorophoresKernel arguments ///////////////////////////////////////////// convolveFluorophoresKernel.SetMemoryArgument(0, resultImage); convolveFluorophoresKernel.SetValueArgument(1, _imageDimensionX); convolveFluorophoresKernel.SetValueArgument(2, _imageDimensionY); convolveFluorophoresKernel.SetMemoryArgument(3, fluorophoresCoords); convolveFluorophoresKernel.SetLocalArgument(4, convolve_kernel_lwgs); convolveFluorophoresKernel.SetValueArgument(5, fluorophores.Length / 4); ///////////////////////////////////////////// // Configure the work-item structure ///////////////////////////////////////////// long[] globalWorkOffsetTransformConvolveFluorophoresKernel = null; long[] globalWorkSizeTransformConvolveFluorophoresKernel = new long[] { pixelCount }; long[] localWorkSizeTransformConvolveFluorophoresKernel = new long[] {convolve_kernel_lwgs}; //////////////////////////////////////////////////////// // Enqueue the convolveFluorophoresKernel for execution //////////////////////////////////////////////////////// computeCommandQueue.Execute(convolveFluorophoresKernel, globalWorkOffsetTransformConvolveFluorophoresKernel, globalWorkSizeTransformConvolveFluorophoresKernel, localWorkSizeTransformConvolveFluorophoresKernel, null); float[] resultImageData = new float[totalBuffer]; computeCommandQueue.ReadFromBuffer(resultImage, ref resultImageData, true, null); computeCommandQueue.Finish(); for (int i = 0; i < pixelCount; i++) { Console.WriteLine(resultImageData[i]); } Console.WriteLine("Writing data to file..."); // CsvData.WriteToDisk("..\\..\\..\\output.csv", resultImageData); TiffData.WriteToDisk(resultImageData, _saveFilename, _imageDimensionX, _imageDimensionY); Bitmap bitmap = new Bitmap(_imageDimensionX, _imageDimensionY); float max = resultImageData.Max(); float scale = 255/(float)max; // for (int r = 0; r < _imageDimensionY; r++) // { // for (int c = 0; c < _imageDimensionX; c++) // { // float value = resultImageData[c*(r + 1)]; // Color newColor = Color.FromArgb((int)(value * scale), (int)(value * scale), (int)(value * scale)); // bitmap.SetPixel(c,r, newColor); // } // } ushort[] ushortdata = new ushort[resultImageData.Length]; for (int i = 0; i < resultImageData.Length; i++) { ushortdata[i] = (ushort)resultImageData[i]; } uint[] convertGray16ToRgb = ConvertGray16ToRGB(ushortdata, 16); byte[] bytes = new byte[convertGray16ToRgb.Length * 4]; // // int[] resultImageData2 = new int[resultImageData.Length]; // for (int index = 0; index < convertGray16ToRgb.Length; index++) { // resultImageData2[index] = (int)(scale*resultImageData[index]); byte[] bytes1 = BitConverter.GetBytes(convertGray16ToRgb[index]); bytes[index] = bytes1[0]; bytes[4 * index + 1] = bytes1[1]; bytes[4 * index + 2] = bytes1[2]; bytes[4 * index + 3] = bytes1[3]; } // // for (int r = 0; r < _imageDimensionY; r++) // { // for (int c = 0; c < _imageDimensionX; c++) // { // float value = resultImageData2[c*(r + 1)]; // Color newColor = Color.FromArgb((int)(value), (int)(value), (int)(value)); // bitmap.SetPixel(c,r, newColor); // } // } // bitmap.Save("c:\\temp.bmp"); using (MemoryStream ms = new MemoryStream(bytes)) { Image image = Bitmap.FromStream(ms); image.Save("c:\\temp.bmp"); } Console.WriteLine("Writing data to file... done"); stopwatch.Stop(); Console.WriteLine("Convolve fluophores duration:\n\t" + stopwatch.Elapsed); Console.WriteLine("Computing... done"); }
public void Run(IComputeContext context, TextWriter log) { try { log.Write("Creating command queue... "); var commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); log.WriteLine("done."); log.Write("Generating data... "); int linearSize = 24; SysIntX2 rectSize = new SysIntX2(4, 6); SysIntX3 cubicSize = new SysIntX3(2, 3, 4); float[] linearIn = new float[linearSize]; float[] linearOut = new float[linearSize]; float[,] rectIn = new float[(int)rectSize.Y, (int)rectSize.X]; float[,] rectOut = new float[(int)rectSize.Y, (int)rectSize.X]; float[,,] cubicIn = new float[(int)cubicSize.Z, (int)cubicSize.Y, (int)cubicSize.X]; float[,,] cubicOut = new float[(int)cubicSize.Z, (int)cubicSize.Y, (int)cubicSize.X]; for (int i = 0; i < linearSize; i++) { linearIn[i] = i; } for (int i = 0; i < (int)rectSize.X; i++) { for (int j = 0; j < (int)rectSize.Y; j++) { rectIn[j, i] = (float)(rectSize.X.ToInt32() * j + i); } } for (int i = 0; i < (int)cubicSize.X; i++) { for (int j = 0; j < (int)cubicSize.Y; j++) { for (int k = 0; k < (int)cubicSize.Z; k++) { cubicIn[k, j, i] = (float)(k * cubicSize.Y.ToInt32() * cubicSize.X.ToInt32() + cubicSize.X.ToInt32() * j + i); } } } log.WriteLine("done."); log.Write("Creating buffer... "); var buffer = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadWrite, linearSize); log.WriteLine("done."); GC.Collect(); log.Write("Writing to buffer (linear)... "); commands.WriteToBuffer(linearIn, buffer, false, null); log.WriteLine("done."); log.Write("Reading from buffer (linear)... "); commands.ReadFromBuffer(buffer, ref linearOut, false, null); log.WriteLine("done."); GC.Collect(); commands.Finish(); log.Write("Comparing data... "); Compare(linearIn, linearOut); log.WriteLine("passed."); GC.Collect(); log.Write("Writing to buffer (rectangular)... "); commands.WriteToBuffer(rectIn, buffer, false, new SysIntX2(), new SysIntX2(), rectSize, null); log.WriteLine("done."); GC.Collect(); log.Write("Reading from buffer (rectangular)... "); commands.ReadFromBuffer(buffer, ref rectOut, false, new SysIntX2(), new SysIntX2(), rectSize, null); log.WriteLine("done."); GC.Collect(); commands.Finish(); log.Write("Comparing data... "); Compare(rectIn, rectOut); log.WriteLine("passed."); GC.Collect(); log.Write("Writing to buffer (cubic)... "); commands.WriteToBuffer(cubicIn, buffer, false, new SysIntX3(), new SysIntX3(), cubicSize, null); log.WriteLine("done."); GC.Collect(); log.Write("Reading from buffer (cubic)... "); commands.ReadFromBuffer(buffer, ref cubicOut, false, new SysIntX3(), new SysIntX3(), cubicSize, null); log.WriteLine("done."); GC.Collect(); commands.Finish(); log.Write("Comparing data... "); Compare(cubicIn, cubicOut); log.WriteLine("passed."); } catch (Exception e) { log.WriteLine(e.ToString()); } }
// tick: renders one frame public void Tick() { // initialize timer if (firstFrame) { timer.Reset(); timer.Start(); firstFrame = false; } // handle keys, only when running time set to -1 (infinite) if (runningTime == -1) { if (camera.HandleInput()) { // camera moved; restart ClearAccumulator(); } } // render if (false) // if (useGPU) { // add your CPU + OpenCL path here // mind the gpuPlatform parameter! This allows us to specify the platform on our // test system. // note: it is possible that the automated test tool provides you with a different // platform number than required by your hardware. In that case, you can hardcode // the platform during testing (ignoring gpuPlatform); do not forget to put back // gpuPlatform before submitting! long[] workSize = { screen.width, screen.height }; long[] localSize = { 16, 2 }; queue.Execute(kernel, null, workSize, null, null); queue.Finish(); queue.ReadFromBuffer(outputBuffer, ref screen.pixels, true, null); Console.WriteLine(screen.pixels[0]); Random r = RTTools.GetRNG(); for (int i = 0; i < 1000; i++) { randoms[i] = (float)r.NextDouble(); } rndBuffer = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, randoms); cameraBuffer = new ComputeBuffer <Vector3>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, camera.toCL()); kernel.SetMemoryArgument(6, cameraBuffer); kernel.SetMemoryArgument(7, rndBuffer); } else { // this is your CPU only path float scale = 1.0f / (float)++spp; Parallel.For(0, screen.height, y => { for (int x = 0; x < screen.width; x++) { // generate primary ray Ray ray = camera.Generate(RTTools.GetRNG(), x, y); // trace path int pixelIdx = x + y * screen.width; accumulator[pixelIdx] += Sample(ray, 0, x, y); // plot final color screen.pixels[pixelIdx] = RTTools.Vector3ToIntegerRGB(scale * accumulator[pixelIdx]); } }); } // stop and report when max render time elapsed int elapsedSeconds = (int)(timer.ElapsedMilliseconds / 1000); if (runningTime != -1) { if (elapsedSeconds >= runningTime) { OpenTKApp.Report((int)timer.ElapsedMilliseconds, spp, screen); } } }
public String SearchPassword (byte[] hash, HashType type, int maxLength, String[] keySpace) { if (type != HashType.MD5) { throw new NotImplementedException ("sums other than MD5 not supported"); } if (maxLength > 6) { throw new NotImplementedException ("doesn't support longer passwords than 7"); } var joinedKeySpace = new List<byte> (); foreach (var k in keySpace) { if (k.Length > 1) { throw new NotImplementedException ("doesn't support longer keyspaces than 1"); } joinedKeySpace.AddRange (Encoding.ASCII.GetBytes (k)); } byte[] resultData = new byte[20]; byte[] keyspaceJoined = joinedKeySpace.ToArray (); var resultBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.CopyHostPointer, resultData); var hashBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, hash); var keyspaceBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, keyspaceJoined); var passLenBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.WriteOnly, 1); var flagBuffer = new ComputeBuffer<int> (Context, ComputeMemoryFlags.None, 1); Kernel.SetMemoryArgument (0, hashBuffer); Kernel.SetMemoryArgument (1, keyspaceBuffer); Kernel.SetMemoryArgument (2, resultBuffer); Kernel.SetMemoryArgument (3, passLenBuffer); Kernel.SetMemoryArgument (4, flagBuffer); // execute kernel var queue = new ComputeCommandQueue (Context, Device, ComputeCommandQueueFlags.None); long firstDim = joinedKeySpace.Count; var globalWorksize = new long[] { firstDim, 57 * 57, 57 * 57 }; queue.Execute (Kernel, new long[] { 0, 0, 0 }, globalWorksize, null, null); byte[] passLen = new byte[1]; queue.ReadFromBuffer (resultBuffer, ref resultData, true, null); queue.ReadFromBuffer (passLenBuffer, ref passLen, true, null); String password = null; if (passLen [0] > 0) { logger.Info ("pass len {0}", passLen [0]); password = Encoding.ASCII.GetString (resultData, 0, passLen [0]); logger.Info ("Found password: \"{0}\"", password); } else { logger.Info ("Password not found."); } queue.Finish (); return password; }
public unsafe void EndSend() { for (int i = 0; i < points.Count; i++) { inx[i].x = (float)points[i].Item3.Real; inx[i].y = (float)points[i].Item3.Imaginary; inc[i].x = (float)points[i].Item4.Real; inc[i].y = (float)points[i].Item4.Imaginary; } _krnl.SetMemoryArgument(0, x); _krnl.SetMemoryArgument(1, c); for (int i = 0; i < _ld.Count; i++) { _krnl.SetMemoryArgument(2 + i, outp[i]); } ComputeCommandQueue command = new ComputeCommandQueue(_context, _context.Devices[0], ComputeCommandQueueFlags.None); command.WriteToBuffer(inx, x, false, null); command.WriteToBuffer(inc, c, false, null); command.Execute(_krnl, null, new long[] { points.Count }, null, null); for (int i = 0; i < _ld.Count; i++) command.ReadFromBuffer(outp[i], ref opl[i], false, null); command.Finish(); output = new Queue<Tuple<int, int, List<ProcessLayer>>>(); for (int i = 0; i < points.Count; i++) { List<ProcessLayer> pl = new List<ProcessLayer>(); for (int ii = 0; ii < _ld.Count; ii++) { ProcessLayer p = _ld[ii].Clone(); p.c_active = opl[ii][i].c_active != 0; p.c_calc = opl[ii][i].c_calc; p.c_cmean = opl[ii][i].c_cmean; p.c_cvariance = opl[ii][i].c_cvariance; p.c_cvarsx = opl[ii][i].c_cvarsx; p.c_isin = opl[ii][i].c_isin != 0; p.c_n = opl[ii][i].c_n; p.c_old2x = new Complex(opl[ii][i].c_old2x.x,opl[ii][i].c_old2x.y); p.c_oldx = new Complex(opl[ii][i].c_oldx.x,opl[ii][i].c_oldx.y); p.c_resn = opl[ii][i].c_resn; p.c_resx = new Complex(opl[ii][i].c_resx.x,opl[ii][i].c_resx.y); p.c_x = new Complex(opl[ii][i].c_x.x,opl[ii][i].c_x.y); pl.Add(p); } output.Enqueue(Tuple.Create(points[i].Item1, points[i].Item2, pl)); } }
private Bitmap Download(ComputeCommandQueue queue, ComputeBuffer<Vector4> buffer, int width, int height) { var pixels = new Vector4[width * height]; queue.ReadFromBuffer(buffer, ref pixels, true, 0, 0, width * height, null); queue.Finish(); var intPixels = Array.ConvertAll(pixels, pixel => { pixel = Vector4.Clamp(pixel, new Vector4(0), new Vector4(1)); return (byte)(pixel.X * 255) << 16 | (byte)(pixel.Y * 255) << 8 | (byte)(pixel.Z * 255); }); var bmp = new Bitmap(width, height, PixelFormat.Format32bppRgb); var bmpData = bmp.LockBits(new Rectangle(0, 0, bmp.Width, bmp.Height), ImageLockMode.ReadWrite, PixelFormat.Format32bppRgb); Marshal.Copy(intPixels, 0, bmpData.Scan0, intPixels.Length); bmp.UnlockBits(bmpData); return bmp; }
public void CopyFromDevice() { queue.ReadFromBuffer(gpuBuffer, ref cpuBuffer, true, null); }
public static void CallOpenCL(int[,] libertyGroups, int[,] groupNumbers, int x, int y, int[] surroundingLibs, ref int emptySurroundings, int ourSgn, ref int duplicateGroups) { //Create arguments //Does not split yet //int[,] libertyGroups, //int[,] groupNumbers, //int x, //int y, //int[] surroundingLibs, //ref int emptySurroundings, //ref int duplicateGroups, //int ourSgn, //We have to map 2 dimension to 1 dimension //Set arguments ComputeBuffer<int> libertyGroupsIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, twoDtoOneD(libertyGroups)); openCLKernel.SetMemoryArgument(0, libertyGroupsIn); ComputeBuffer<int> groupNumbersIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, twoDtoOneD(groupNumbers)); openCLKernel.SetMemoryArgument(1, groupNumbersIn); openCLKernel.SetValueArgument<int>(2, x); openCLKernel.SetValueArgument<int>(3, y); ComputeBuffer<int> surroundingLibsIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, surroundingLibs); openCLKernel.SetMemoryArgument(4, surroundingLibsIn); int[] emptySurroundRef = new int[1]; ComputeBuffer<int> emptySurroundRefIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, emptySurroundRef); openCLKernel.SetMemoryArgument(5, emptySurroundRefIn); int[] duplicateGroupsRef = new int[1]; ComputeBuffer<int> duplicateGroupsRefIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, duplicateGroupsRef); openCLKernel.SetMemoryArgument(6, duplicateGroupsRefIn); openCLKernel.SetValueArgument<int>(7, ourSgn); //long localWorkSize = Math.Min(openCLDevice.MaxComputeUnits, sideSize); //Display input data //Runs commands ComputeCommandQueue commands = new ComputeCommandQueue(openCLContext, openCLContext.Devices[0], ComputeCommandQueueFlags.None); long executionTime = DateTime.Now.Ticks; //Execute kernel //globalWorkSize in increments of localWorkSize (max of device.MaxComputeUnits or kernel.GetWorkGroupSize()) commands.Execute(openCLKernel, null, new long[] { 1 }, new long[] { 1 }, null); //Also, you should probably use this //kernel.GetPreferredWorkGroupSizeMultiple(device); commands.Finish(); //int[] surroundingLibs, //ref int emptySurroundings, //ref int duplicateGroups, //Read output data commands.ReadFromBuffer(surroundingLibsIn, ref surroundingLibs, true, null); commands.ReadFromBuffer(emptySurroundRefIn, ref emptySurroundRef, true, null); emptySurroundings = emptySurroundRef[0]; commands.ReadFromBuffer(duplicateGroupsRefIn, ref duplicateGroupsRef, true, null); duplicateGroups = duplicateGroupsRef[0]; //We could set blocking to false on reads and then read them all back in then, we could (possiblity) gain some performance //by telling it that commands can be executed out of order and then by queuing them up and calling Finish commands.Finish(); executionTime = DateTime.Now.Ticks - executionTime; GC.Collect(); // openCLProgram.Dispose(); //display output data //Test are done by our caller now Console.WriteLine(executionTime / 10000.0); }
public void Run(ComputeContext context, TextWriter log) { try { // Create the arrays and fill them with random data. int count = 640*480; // float[] arrA = new float[count]; float[] arrB = new float[count]; float[] arrC = new float[count]; Random rand = new Random(); for (int i = 0; i < count; i++) { arrA[i] = (float)(rand.NextDouble() * 100); arrB[i] = (float)(rand.NextDouble() * 100); } // Create the input buffers and fill them with data from the arrays. // Access modifiers should match those in a kernel. // CopyHostPointer means the buffer should be filled with the data provided in the last argument. program = new ComputeProgram(context, clProgramSource); program.Build(null, null, null, IntPtr.Zero); ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA); //ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB); // The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length). ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length); // Create and build the opencl program. // Create the kernel function and set its arguments. ComputeKernel kernel = program.CreateKernel("CompareGPUCPU"); DateTime ExecutionStartTime; //Var will hold Execution Starting Time DateTime ExecutionStopTime;//Var will hold Execution Stopped Time TimeSpan ExecutionTime;//Var will count Total Execution Time-Our Main Hero ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); ExecutionStartTime = DateTime.Now; //Gets the system Current date time expressed as local time int repeatTimes = 100; for (int repeatCounter = 0; repeatCounter < repeatTimes; repeatCounter++) { kernel.SetMemoryArgument(0, a); //kernel.SetMemoryArgument(1, b); //kernel.SetMemoryArgument(2, c); kernel.SetMemoryArgument(1, c); // Create the event wait list. An event list is not really needed for this example but it is important to see how it works. // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution. // For this reason their use should be avoided if possible. //ComputeEventList eventList = new ComputeEventList(); // Create the command queue. This is used to control kernel execution and manage read/write/copy operations. // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command. // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created. //commands.Execute(kernel, null, new long[] { count }, null, eventList); commands.Execute(kernel, null, new long[] { count }, null, null); // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host. // eventList will contain two events after this method returns. //commands.ReadFromBuffer(c, ref arrC, false, eventList); commands.ReadFromBuffer(c, ref arrC, false, null); // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands // to finish has to be issued before "arrC" can be used. // This explicit synchronization can be achieved in two ways: // 1) Wait for the events in the list to finish, //eventList.Wait(); // 2) Or simply use commands.Finish(); } ExecutionStopTime = DateTime.Now; ExecutionTime = ExecutionStopTime - ExecutionStartTime; double perTaskTime = ExecutionTime.TotalMilliseconds / repeatTimes; log.WriteLine("Use {0} ms using GPU", perTaskTime); // Do that using CPU /* ExecutionStartTime = DateTime.Now; //Gets the system Current date time expressed as local time for (int repeatCounter = 0; repeatCounter < repeatTimes; repeatCounter++) { for (int i = 0; i < count; i++) { //arrC[i] = arrA[i] + arrB[i]; int j; for (j = 0; j < 330 * 10; j++) arrC[i] = arrA[i] + j; } } ExecutionStopTime = DateTime.Now; ExecutionTime = ExecutionStopTime - ExecutionStartTime; perTaskTime = ExecutionTime.TotalMilliseconds / repeatTimes; log.WriteLine("Use {0} ms using CPU", ExecutionTime.TotalMilliseconds.ToString()); */ log.WriteLine("arrA[0]:{0}, arrC[0]:{1}", arrA[0], arrC[0]); } catch (Exception e) { log.WriteLine(e.ToString()); } }
public Bitmap GetScreenshot(CameraConfig camera, int screenshotHeight, int slowRender) { var screenshotWidth = (int)(screenshotHeight * ScreenshotAspectRatio); var computeBuffer = new ComputeBuffer<Vector4>(_program.Context, ComputeMemoryFlags.ReadWrite, screenshotWidth * screenshotHeight); var queue = new ComputeCommandQueue(_program.Context, _program.Context.Devices[0], ComputeCommandQueueFlags.None); var globalSize = GlobalLaunchsizeFor(screenshotWidth, screenshotHeight); for (var i = 0; i < slowRender; i++) CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize); for (var i = 0; i < camera.Frame * slowRender; i++) CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize); var pixels = new Vector4[screenshotWidth * screenshotHeight]; queue.ReadFromBuffer(computeBuffer, ref pixels, true, null); queue.Finish(); computeBuffer.Dispose(); queue.Dispose(); var bmp = new Bitmap(screenshotWidth, screenshotHeight); var destBuffer = new int[screenshotWidth * screenshotHeight]; for (var y = 0; y < screenshotHeight; y++) { for (var x = 0; x < screenshotWidth; x++) { var pixel = pixels[x + y * screenshotWidth]; if (float.IsNaN(pixel.X) || float.IsNaN(pixel.Y) || float.IsNaN(pixel.Z)) { Console.WriteLine("Warning! Caught NAN pixel while taking screenshot!"); continue; } destBuffer[y * screenshotWidth + x] = (byte)(pixel.X * 255) << 16 | (byte)(pixel.Y * 255) << 8 | (byte)(pixel.Z * 255); } } var bmpData = bmp.LockBits(new Rectangle(0, 0, screenshotWidth, screenshotHeight), ImageLockMode.ReadWrite, PixelFormat.Format32bppRgb); Marshal.Copy(destBuffer, 0, bmpData.Scan0, destBuffer.Length); bmp.UnlockBits(bmpData); return bmp; }
static void Main(string[] args) { //Test2(); Test1(); ComputePlatform plat = ComputePlatform.Platforms[0]; Console.WriteLine("Plat:" + plat.Name); ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(plat), null, IntPtr.Zero); ComputeCommandQueue queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); StreamReader rs = new StreamReader("Foom/CL/testProg.txt"); string clSrc = rs.ReadToEnd(); rs.Close(); ComputeProgram prog = new ComputeProgram(context, clSrc); prog.Build(null, null, null, IntPtr.Zero); Console.WriteLine("BS:" + prog.GetBuildStatus(context.Devices[0]).ToString()); Console.WriteLine("Info:" + prog.GetBuildLog(context.Devices[0])); ComputeKernel kern = prog.CreateKernel("vector_add"); int[] data = new int[1024]; for (int i = 0; i < 1024; i++) { data[i] = 100; } ComputeBuffer <int> b1 = new ComputeBuffer <int>(context, ComputeMemoryFlags.CopyHostPointer, data); ComputeBuffer <int> b2 = new ComputeBuffer <int>(context, ComputeMemoryFlags.WriteOnly, 1024); // queue.WriteToBuffer<int>(data, b1, true, null); kern.SetMemoryArgument(0, b1); kern.SetMemoryArgument(1, b2); long[] wo = new long[1]; wo[0] = 0; long[] ws = new long[1]; ws[0] = 1024; long[] tc = new long[1]; tc[0] = 16; queue.Execute(kern, wo, ws, tc, null); int c = Environment.TickCount; queue.Finish(); c = Environment.TickCount - c; queue.ReadFromBuffer <int>(b2, ref data, true, null); for (int i = 0; i < 10; i++) { Console.WriteLine("C:" + (int)data[i]); } Console.WriteLine("Done:" + c); while (true) { } }
public Bitmap Screenshot(int screenshotHeight, int slowRenderPower, Action<string> displayInformation) { displayInformation("Rendering screenshot"); var ccontext = _kernel.ComputeContext; _kernelInUse++; var screenshotWidth = (int)(screenshotHeight * ScreenshotAspectRatio); Bitmap bmp; try { bmp = new Bitmap(screenshotWidth, screenshotHeight, PixelFormat.Format24bppRgb); } catch (ArgumentException) { MessageBox.Show("Image size too big", "Error"); return null; } var nancount = 0; var bmpData = bmp.LockBits(new Rectangle(0, 0, screenshotWidth, screenshotHeight), ImageLockMode.ReadWrite, PixelFormat.Format24bppRgb); var scan0 = bmpData.Scan0.ToInt64(); var queue = new ComputeCommandQueue(ccontext, ccontext.Devices[0], ComputeCommandQueueFlags.None); var localSize = _kernel.Threadsize(queue); for (var i = 0; i < localSize.Length; i++) localSize[i] *= slowRenderPower; var computeBuffer = new ComputeBuffer<Vector4>(ccontext, ComputeMemoryFlags.ReadWrite, localSize[0] * localSize[1]); const int numFrames = 200; var frameDependantControls = _parameters as IFrameDependantControl; var framesToRender = frameDependantControls == null ? 1 : numFrames; var totalYs = (screenshotHeight + localSize[1] - 1) / localSize[1]; var totalXs = (screenshotWidth + localSize[0] - 1) / localSize[0]; var stopwatch = new Stopwatch(); for (var y = 0; y < totalYs; y++) { for (var x = 0; x < totalXs; x++) { stopwatch.Restart(); for (var frame = 0; frame < framesToRender; frame++) { if (frameDependantControls != null) frameDependantControls.Frame = frame; displayInformation(string.Format("Screenshot {0}% done", 100 * (y * totalXs * framesToRender + x * framesToRender + frame) / (totalXs * totalYs * framesToRender))); _kernel.Render(computeBuffer, queue, _parameters, new Size(screenshotWidth, screenshotHeight), slowRenderPower, new Size(x, y), (int)localSize[0]); } var pixels = new Vector4[localSize[0] * localSize[1]]; queue.ReadFromBuffer(computeBuffer, ref pixels, true, 0, 0, localSize[0] * localSize[1], null); queue.Finish(); stopwatch.Stop(); var elapsed = stopwatch.Elapsed.TotalMilliseconds / framesToRender; _kernel.AverageKernelTime = (elapsed + _kernel.AverageKernelTime * 4) / 5; var blockWidth = Math.Min(localSize[0], screenshotWidth - x * localSize[0]); var blockHeight = Math.Min(localSize[1], screenshotHeight - y * localSize[1]); var intPixels = new byte[blockWidth * blockHeight * 3]; for (var py = 0; py < blockHeight; py++) { for (var px = 0; px < blockWidth; px++) { var pixel = pixels[py * localSize[1] + px]; if (float.IsNaN(pixel.X) || float.IsNaN(pixel.Y) || float.IsNaN(pixel.Z)) nancount++; // BGR if (float.IsNaN(pixel.Z) == false) intPixels[(py * blockWidth + px) * 3 + 0] = (byte)(pixel.Z * 255); if (float.IsNaN(pixel.Y) == false) intPixels[(py * blockWidth + px) * 3 + 1] = (byte)(pixel.Y * 255); if (float.IsNaN(pixel.X) == false) intPixels[(py * blockWidth + px) * 3 + 2] = (byte)(pixel.X * 255); } } for (var line = 0; line < blockHeight; line++) Marshal.Copy(intPixels, line * (int)blockWidth * 3, new IntPtr(scan0 + ((y * localSize[1] + line) * bmpData.Stride) + x * localSize[0] * 3), (int)blockWidth * 3); } } bmp.UnlockBits(bmpData); if (nancount != 0) MessageBox.Show(string.Format("Caught {0} NAN pixels while taking screenshot", nancount), "Warning"); _kernelInUse--; return bmp; }
public static void Test() { string source = File.ReadAllText("MonteCarloSimulate.cl"); //Choose Device ComputePlatform platform = ComputePlatform.Platforms[0]; ComputeDevice device = platform.QueryDevices()[0]; ComputeContextPropertyList properties = new ComputeContextPropertyList(platform); //Setup of stuff on our side ComputeContext context = new ComputeContext(ComputeDeviceTypes.All, properties, null, IntPtr.Zero); //Build the program, which gets us the kernel ComputeProgram program = new ComputeProgram(context, source); program.Build(null, null, null, IntPtr.Zero); //can use notify as the 3rd command... if you want this to be non-blocking ComputeKernel kernel = program.CreateKernel("MonteCarloSimulate"); //Create arguments int sideSize = 4096; int[] inMatrixA = new int[sideSize * sideSize]; int[] inMatrixB = new int[sideSize * sideSize]; int[] outMatrixC = new int[sideSize * sideSize]; Random random = new Random((int)DateTime.Now.Ticks); if (sideSize <= 32) for (int y = 0; y < sideSize; y++) for (int x = 0; x < sideSize; x++) { inMatrixA[y * sideSize + x] = random.Next(3); inMatrixB[y * sideSize + x] = random.Next(3); outMatrixC[y * sideSize + x] = 0; } ComputeBuffer<int> bufferMatrixA = new ComputeBuffer<int>(context, ComputeMemoryFlags.UseHostPointer, inMatrixA); ComputeBuffer<int> bufferMatrixB = new ComputeBuffer<int>(context, ComputeMemoryFlags.UseHostPointer, inMatrixB); ComputeBuffer<int> bufferMatrixC = new ComputeBuffer<int>(context, ComputeMemoryFlags.UseHostPointer, outMatrixC); long localWorkSize = Math.Min(device.MaxComputeUnits, sideSize); //Sets arguments kernel.SetMemoryArgument(0, bufferMatrixA); kernel.SetMemoryArgument(1, bufferMatrixB); kernel.SetMemoryArgument(2, bufferMatrixC); kernel.SetLocalArgument(3, sideSize * 2); kernel.SetValueArgument<int>(4, sideSize); //kernel.SetLocalArgument(1, localWorkSize); string offset = " "; for (int x = 0; x < sideSize; x++) offset += " "; if (sideSize <= 32) for (int y = 0; y < sideSize; y++) { Console.Write(offset); for (int x = 0; x < sideSize; x++) Console.Write(inMatrixA[y * sideSize + x] + " "); Console.WriteLine(); } //Runs commands ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); long executionTime = DateTime.Now.Ticks; //Execute kernel //globalWorkSize in increments of localWorkSize (max of device.MaxComputeUnits or kernel.GetWorkGroupSize()) commands.Execute(kernel, null, new long[] { Math.Min(sideSize, 16), Math.Min(sideSize, 16) }, new long[] { localWorkSize, 1 }, null); //globalWorkSize can be any size //localWorkSize product much not be greater than device.MaxComputeUnits //and it must not be greater than kernel.GetWorkGroupSize() //ESSENTIALLY, the program iterates through globalWorkSize //in increments of localWorkSize. Both are multidimensional, //but this just saves us the time of doing that //(1 dimension can be put to multiple if the max dimension lengths //are known very easily with remainder). //Also, you should probably use this //kernel.GetPreferredWorkGroupSizeMultiple(device); commands.Finish(); commands.ReadFromBuffer(bufferMatrixC, ref outMatrixC, true, null); commands.Finish(); executionTime = DateTime.Now.Ticks - executionTime; GC.Collect(); program.Dispose(); Console.WriteLine(); if (sideSize <= 32) for (int y = 0; y < sideSize; y++) { for (int x = 0; x < sideSize; x++) Console.Write(inMatrixB[y * sideSize + x] + " "); Console.Write(" "); for (int x = 0; x < sideSize; x++) Console.Write(outMatrixC[y * sideSize + x] + " "); Console.WriteLine(); } int testY = random.Next(sideSize); int testX = random.Next(sideSize); int sum = 0; for (int q = 0; q < sideSize; q++) sum += inMatrixA[q * sideSize + testX] * inMatrixB[testY * sideSize + q]; Console.WriteLine(sum == outMatrixC[testY * sideSize + testX]); Console.WriteLine(executionTime / 10000.0); }