// Wartosci min, max po OpenCL. public void GetMinMaxValuesCL(Mat frame, out int[] maxValues, out int[] minValues, int windowValue) { maxValues = null; minValues = null; try { MinMaxCL.UpdateArguments(frame, clooCtx, ctxMinMaxKernel, windowValue); // execute kernel queue.Execute(ctxMinMaxKernel, null, new long[] { frame.Cols }, null, null); // max Values. maxValues = new int[frame.Cols]; GCHandle maxHandle = GCHandle.Alloc(maxValues, GCHandleType.Pinned); queue.Read(MinMaxCL.maxBufferCB, true, 0, maxValues.Length, maxHandle.AddrOfPinnedObject(), null); // min Values. minValues = new int[frame.Cols]; GCHandle minHandle = GCHandle.Alloc(minValues, GCHandleType.Pinned); queue.Read(MinMaxCL.minBufferCB, true, 0, minValues.Length, minHandle.AddrOfPinnedObject(), null); // end opencl compute. queue.Finish(); } catch (Exception ex) { MessageBox.Show(ex.Message); } }
/// <summary> /// Gets the values for an explicit input /// </summary> /// <param name="input">The explicit input</param> /// <param name="output">The output values</param> public void GetValues(Single3[] input, ref float[] output) { if (context == null || kernelExplicit == null) { throw new Exception("Compile first!"); } int inputLength = input.Length; // IO length changed if (lastLength != inputLength) { lastLength = inputLength; outputBuffer = new Cloo.ComputeBuffer <float>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, inputLength); kernelExplicit.SetMemoryArgument(2, outputBuffer); } // Setup IO Buffers ComputeBuffer <Single3> bufIn = new Cloo.ComputeBuffer <Single3>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, input); // Arrange params kernelExplicit.SetMemoryArgument(0, bufIn); // Exec and read queue.Execute(kernelExplicit, null, new long[] { input.Length }, null, null); GCHandle outHandle = GCHandle.Alloc(output, GCHandleType.Pinned); queue.Read <float>(outputBuffer, true, 0, inputLength, outHandle.AddrOfPinnedObject(), null); // Read saves about 500 - 1000 ticks. Sweet for small queues outHandle.Free(); queue.Finish(); }
public void UnlockOpenGLObject(ComputeImage2D image) { queue.Finish(); List <ComputeMemory> c = new List <ComputeMemory>() { image }; queue.ReleaseGLObjects(c, null); }
public int[] ReadIntBuffer(string key, int length) { int[] rawBuffer = _intBuffers[key]; if (HardwareAccelerationEnabled) { _commands.ReadFromBuffer(_intComputeBuffers[key], ref rawBuffer, true, 0, 0, length, null); _commands.Finish(); } return(rawBuffer); }
protected override void RunInternal() { ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.Profiling); Console.WriteLine("Original content:"); Random rand = new Random(); int count = 6; long[] bufferContent = new long[count]; for (int i = 0; i < count; i++) { bufferContent[i] = (long)(rand.NextDouble() * long.MaxValue); Console.WriteLine("\t" + bufferContent[i]); } ComputeBuffer<long> buffer = new ComputeBuffer<long>(context, ComputeMemoryFlags.CopyHostPointer, bufferContent); IntPtr mappedPtr = commands.Map(buffer, false, ComputeMemoryMappingFlags.Read, 0, bufferContent.Length, null); commands.Finish(); Console.WriteLine("Mapped content:"); for (int i = 0; i < bufferContent.Length; i++) { IntPtr ptr = new IntPtr(mappedPtr.ToInt64() + i * sizeof(long)); Console.WriteLine("\t" + Marshal.ReadInt64(ptr)); } commands.Unmap(buffer, ref mappedPtr, null); }
static void Main(string[] args) { int[] r1 = new int[] { 8, 2, 3, 4 }; int[] r2 = new int[] { 4, 3, 2, 5 }; int[] r3 = new int[4]; int rowSize = r1.Length; // pick first platform ComputePlatform platform = ComputePlatform.Platforms[0]; // create context with all gpu devices ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); // create a command queue with first gpu found ComputeCommandQueue queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); // load opencl source and // create program with opencl source ComputeProgram program = new ComputeProgram(context, CalculateKernel); // compile opencl source program.Build(null, null, null, IntPtr.Zero); // load chosen kernel from program ComputeKernel kernel = program.CreateKernel("Calc"); // allocate a memory buffer with the message (the int array) ComputeBuffer <int> row1Buffer = new ComputeBuffer <int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r1); // allocate a memory buffer with the message (the int array) ComputeBuffer <int> row2Buffer = new ComputeBuffer <int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r2); // allocate a memory buffer with the message (the int array) ComputeBuffer <int> resultBuffer = new ComputeBuffer <int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, new int[4]); kernel.SetMemoryArgument(0, row1Buffer); // set the integer array kernel.SetMemoryArgument(1, row2Buffer); // set the integer array kernel.SetValueArgument(2, rowSize); // set the array size kernel.SetMemoryArgument(3, resultBuffer); // set the integer array // execute kernel queue.ExecuteTask(kernel, null); // wait for completion queue.Finish(); GCHandle arrCHandle = GCHandle.Alloc(r3, GCHandleType.Pinned); queue.Read <int>(resultBuffer, true, 0, r3.Length, arrCHandle.AddrOfPinnedObject(), null); Console.WriteLine("display result from gpu buffer:"); for (int i = 0; i < r3.Length; i++) { Console.WriteLine(r3[i]); } arrCHandle.Free(); row1Buffer.Dispose(); row2Buffer.Dispose(); kernel.Dispose(); program.Dispose(); queue.Dispose(); context.Dispose(); Console.WriteLine("Finished"); Console.ReadKey(); }
/// <summary> /// Finds the nonce for a block header hash that meets the given target. /// </summary> /// <param name="header">serialized block header</param> /// <param name="bits">the target</param> /// <param name="nonceStart">the first nonce value to try</param> /// <param name="iterations">the number of iterations</param> /// <returns></returns> public uint FindPow(byte[] header, byte[] bits, uint nonceStart, uint iterations) { if (this.computeDevice == null) { throw new InvalidOperationException("GPU not found"); } this.ConstructOpenCLResources(); using var headerBuffer = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, header); using var bitsBuffer = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, bits); using var powBuffer = new ComputeBuffer <uint>(this.computeContext, ComputeMemoryFlags.WriteOnly, 1); this.computeKernel.SetMemoryArgument(0, headerBuffer); this.computeKernel.SetMemoryArgument(1, bitsBuffer); this.computeKernel.SetValueArgument(2, nonceStart); this.computeKernel.SetMemoryArgument(3, powBuffer); using var commands = new ComputeCommandQueue(this.computeContext, this.computeDevice, ComputeCommandQueueFlags.None); commands.Execute(this.computeKernel, null, new long[] { iterations }, null, null); var nonceOut = new uint[1]; commands.ReadFromBuffer(powBuffer, ref nonceOut, true, null); commands.Finish(); this.DisposeOpenCLResources(); return(nonceOut[0]); }
public uint FindProofOfWork(byte[] header, byte[] bits, uint nonceStart, uint iterations, out long elapsedMilliseconds) { this.stopwatch.Restart(); using var headerBuffer = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, header); using var bitsBuffer = new ComputeBuffer <byte>(this.computeContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, bits); using var powBuffer = new ComputeBuffer <uint>(this.computeContext, ComputeMemoryFlags.WriteOnly, 1); this.computeKernel.SetMemoryArgument(0, headerBuffer); this.computeKernel.SetMemoryArgument(1, bitsBuffer); this.computeKernel.SetValueArgument(2, nonceStart); this.computeKernel.SetMemoryArgument(3, powBuffer); using var commands = new ComputeCommandQueue(this.computeContext, this.computeDevice, ComputeCommandQueueFlags.None); commands.Execute(this.computeKernel, null, new long[] { iterations }, null, null); var nonceOut = new uint[1]; commands.ReadFromBuffer(powBuffer, ref nonceOut, true, null); commands.Finish(); elapsedMilliseconds = this.stopwatch.ElapsedMilliseconds; return(nonceOut[0]); }
public unsafe static void MatrixMulti_OpenCL(double[,] result, double[,] a, double[,] b) { InitCloo(); var ncols = result.GetUpperBound(0) + 1; var nrows = result.GetUpperBound(1) + 1; fixed(double *rp = result, ap = a, bp = b) { ComputeBuffer <double> aBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, a.Length, (IntPtr)ap); ComputeBuffer <double> bBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, b.Length, (IntPtr)bp); ComputeBuffer <double> rBuffer = new ComputeBuffer <double>(context, ComputeMemoryFlags.WriteOnly, result.Length); kernel.SetMemoryArgument(0, aBuffer); kernel.SetMemoryArgument(1, bBuffer); kernel.SetValueArgument(2, ncols); kernel.SetMemoryArgument(3, rBuffer); ComputeEventList events = new ComputeEventList(); ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); commands.Execute(kernel, null, new long[] { result.Length }, null, events); commands.ReadFromBuffer(rBuffer, ref result, false, new SysIntX2(), new SysIntX2(), new SysIntX2(ncols, nrows), events); commands.Finish(); } }
public override void Proccess() { // execute kernel queue.Execute(kernel, null, new long[] { DataGenerator.InputCount }, null, null); queue.Finish(); /* * short[] results2 = new short[this.results.Length]; * GCHandle arrCHandle = GCHandle.Alloc(results2, GCHandleType.Pinned); * queue.Read(result_dev, true, 0, DataFeeder.GetInputCount(), arrCHandle.AddrOfPinnedObject(), events); */ //bool[] results2 = new bool[DataFeeder.GetInputCount()]; queue.ReadFromBuffer(result_dev, ref resultsBytes, true, null); queue.ReadFromBuffer(resultCalc_dev, ref calculatables, true, null); //queue.ReadFromBuffer() /* * bool[] arrC = new bool[5]; * GCHandle arrCHandle = GCHandle.Alloc(arrC, GCHandleType.Pinned); * queue.Read<bool>(result_dev, true, 0, 5, arrCHandle.AddrOfPinnedObject(), null); */ // wait for completion //queue.Finish(); //kernel.Dispose(); //queue.Dispose(); //context.Dispose(); }
private double[] MatrixMultiply(Matrix <double> L, Matrix <double> R) { var L_array = L.To1D(); var R_array = R.To1D(); var O_array = new double[L.M * R.N]; ComputeBuffer <double> a = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, L_array); ComputeBuffer <double> b = new ComputeBuffer <double>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, R_array); ComputeBuffer <double> c = new ComputeBuffer <double>(context, ComputeMemoryFlags.WriteOnly, O_array.Length); kernel.SetMemoryArgument(0, a); kernel.SetMemoryArgument(1, b); kernel.SetMemoryArgument(2, c); kernel.SetValueArgument(3, L.N); kernel.SetValueArgument(4, L.M); kernel.SetValueArgument(5, R.N); kernel.SetValueArgument(6, R.M); commands.Execute(kernel, null, new long[] { R.N, L.M }, null, null); commands.ReadFromBuffer(c, ref O_array, true, null); commands.Finish(); // cleanup buffers a.Dispose(); b.Dispose(); c.Dispose(); return(O_array); }
public static Tuple <List <List <int> >, TimeSpan> MultiplyParallel(List <List <int> > matrixOne, List <List <int> > matrixTwo) { if (!isRegularMatrix(matrixOne) || !isRegularMatrix(matrixTwo)) { throw new ArgumentException("Non regular matrix detected. Rows size mismatch detected."); } if (matrixOne[0].Count != matrixTwo.Count) { throw new ArgumentException("Matrixes is not compatible. Columns count of first matrix is not equal to rows count of second matrix."); } List <List <int> > result = new List <List <int> >(); ComputePlatform platform = GetGPU(); if (platform is null) { throw new PlatformNotSupportedException("Platform doesn't have a dedicated GPU. Run is impossible."); } ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); ComputeCommandQueue queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); ComputeProgram program = new ComputeProgram(context, CalculateKernel); program.Build(null, null, null, IntPtr.Zero); ComputeKernel kernel = program.CreateKernel("Multiply"); List <ComputeBuffer <int> > rowsMatrixOne = matrixOne.TransformMatrixToComputerBuffersOfRows(context); List <ComputeBuffer <int> > columnsMatrixTwo = matrixTwo.TransformMatrixToComputerBuffersOfColumns(context); List <ComputeBuffer <int> > resultRowsMatrix = TwoDToOneDResult(matrixOne.Count, matrixTwo[0].Count, context); Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); for (int i = 0; i < resultRowsMatrix.Count; ++i) { for (int j = 0; j < resultRowsMatrix[i].Count; ++j) { kernel.SetMemoryArgument(0, rowsMatrixOne[i]); kernel.SetMemoryArgument(1, columnsMatrixTwo[j]); kernel.SetMemoryArgument(2, resultRowsMatrix[i]); kernel.SetValueArgument(3, matrixTwo.Count); kernel.SetValueArgument(4, j); queue.ExecuteTask(kernel, null); } } queue.Finish(); stopwatch.Stop(); for (int i = 0; i < resultRowsMatrix.Count; ++i) { int[] res = new int[resultRowsMatrix[i].Count]; GCHandle gCHandle = GCHandle.Alloc(res, GCHandleType.Pinned); queue.Read <int>(resultRowsMatrix[i], true, 0, res.Length, gCHandle.AddrOfPinnedObject(), null); result.Add(new List <int>(res)); } return(new Tuple <List <List <int> >, TimeSpan>(result, stopwatch.Elapsed)); }
public Bitmap ProcessImage(Bitmap inImage) { ComputeImage2D oclInImage = null; ComputeImage2D oclOutImage = null; BitmapData inImageData = inImage.LockBits(new Rectangle(0, 0, inImage.Width, inImage.Height), ImageLockMode.ReadOnly, PixelFormat.Format32bppArgb); Bitmap outImage = new Bitmap(inImage.Width, inImage.Height, PixelFormat.Format32bppArgb); BitmapData outImageData = outImage.LockBits(new Rectangle(0, 0, outImage.Width, outImage.Height), ImageLockMode.WriteOnly, PixelFormat.Format32bppArgb); try { oclInImage = new ComputeImage2D(oclContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, new ComputeImageFormat(ComputeImageChannelOrder.Bgra, ComputeImageChannelType.UNormInt8), inImage.Width, inImage.Height, 0, inImageData.Scan0); oclOutImage = new ComputeImage2D(oclContext, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.CopyHostPointer, new ComputeImageFormat(ComputeImageChannelOrder.Bgra, ComputeImageChannelType.UNormInt8), outImage.Width, outImage.Height, 0, outImageData.Scan0); oclKernel.SetMemoryArgument(0, oclInImage); oclKernel.SetMemoryArgument(1, oclOutImage); oclCommandQueue.Execute(oclKernel, new long[] { 0, 0 }, new long[] { oclInImage.Width, oclInImage.Height }, null, null); oclCommandQueue.Finish(); oclCommandQueue.ReadFromImage(oclOutImage, outImageData.Scan0, true, null); } catch (Exception) { throw; } finally { inImage.UnlockBits(inImageData); outImage.UnlockBits(outImageData); if (oclInImage != null) { oclInImage.Dispose(); oclInImage = null; } if (oclOutImage != null) { oclOutImage.Dispose(); oclOutImage = null; } } return(outImage); }
/// <summary> /// Subsequent calls to Invoke work faster without arguments /// </summary> public void Invoke(string Method, long Offset, long Worksize) { if (LastKernel == null) { throw new InvalidOperationException("You need to call Invoke with arguments before. All Arguments are saved"); } ComputeEventList eventList = new ComputeEventList(); InvokeStarted?.Invoke(this, EventArgs.Empty); queue.Execute(LastKernel, new long[] { Offset }, new long[] { Worksize }, null, eventList); eventList[0].Completed += (sender, e) => EasyCL_Completed(sender, null); eventList[0].Aborted += (sender, e) => EasyCL_Aborted(sender, Method); queue.Finish(); }
public float[] MultiplyMatrices(float[] matrix1, float[] matrix2, int matrix1Height, int matrix1WidthMatrix2Height, int matrix2Width) { if (!_initialized) { Initialize(); _initialized = true; } ComputeBuffer <float> matrix1Buffer = new ComputeBuffer <float>(_context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, matrix1); _kernel.SetMemoryArgument(0, matrix1Buffer); ComputeBuffer <float> matrix2Buffer = new ComputeBuffer <float>(_context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, matrix2); _kernel.SetMemoryArgument(1, matrix2Buffer); float[] ret = new float[matrix1Height * matrix2Width]; ComputeBuffer <float> retBuffer = new ComputeBuffer <float>(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, ret); _kernel.SetMemoryArgument(2, retBuffer); _kernel.SetValueArgument <int>(3, matrix1WidthMatrix2Height); _kernel.SetValueArgument <int>(4, matrix2Width); _commandQueue.Execute(_kernel, new long[] { 0 }, new long[] { matrix2Width, matrix1Height }, null, null); unsafe { fixed(float *retPtr = ret) { _commandQueue.Read(retBuffer, false, 0, ret.Length, new IntPtr(retPtr), null); _commandQueue.Finish(); } } matrix1Buffer.Dispose(); matrix2Buffer.Dispose(); retBuffer.Dispose(); return(ret); }
public void Can_copy() { var platform = ComputePlatform.Platforms.First(); var device = platform.Devices.First(); var context = new ComputeContext(new[] { device }, new ComputeContextPropertyList(platform), null, IntPtr.Zero); using var program = new ComputeProgram(context, Copy_kernel); try { program.Build(new[] { device }, "-cl-std=CL1.2", null, IntPtr.Zero); } catch (Exception ex) { OnProgramBuilt(program, device); return; } using var queue = new ComputeCommandQueue(context, device, ComputeCommandQueueFlags.None); using var kernel = program.CreateKernel("copy"); var sourcePath = Path.GetFullPath(Path.Combine(Environment.CurrentDirectory, "sample00.png")); using var sourceImage = (Bitmap)Image.FromFile(sourcePath); var w = sourceImage.Width; var h = sourceImage.Height; var source = sourceImage.ToBytes(); source.SaveFormatted("source.txt", w, h, channels: 4); using var sourceBuffer = context.CreateImage2D(source, w, h); var target = new byte[h * w * 4]; using var targetBuffer = context.CreateBuffer(target); kernel.SetMemoryArgument(0, sourceBuffer); kernel.SetMemoryArgument(1, targetBuffer); queue.Execute(kernel, null, new long[] { w, h }, null, null); queue.Finish(); target.SaveFormatted("target.txt", w, h, channels: 4); var result = target.ToBitmap(w, h, 4); var resultPath = Path.GetFullPath(Path.Combine(Environment.CurrentDirectory, "copy.png")); result.Save(resultPath); Run("source.txt"); Run("target.txt"); Run(resultPath); }
protected T[] InternalExecuteOpencl <T>( String source, String function, int bufferSize, ParallelTaskParams loaderParams, params Object[] kernelParams) where T : struct { TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointStart); ComputeCommandQueue queue = QueueWithDevice(loaderParams.OpenCLDevice); TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointPlatformInit); String updatedSource = "#define OpenCL\r\n" + source; ComputeProgram program = new ComputeProgram(queue.Context, updatedSource); program.Build(new ComputeDevice[] { queue.Device }, null, null, IntPtr.Zero); TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointKernelBuild); T[] resultBuffer = new T[bufferSize]; ComputeBuffer <T> resultBufferVar = new ComputeBuffer <T>(queue.Context, ComputeMemoryFlags.WriteOnly, bufferSize); List <ComputeMemory> vars = new List <ComputeMemory>(); vars.Add(resultBufferVar); vars.AddRange(WrapDeviceVariables(kernelParams, queue.Context)); ComputeKernel kernel = program.CreateKernel(function); for (int i = 0; i < vars.Count; i++) { kernel.SetMemoryArgument(i, vars[i]); } TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointDeviceWrite); long[] workersGlobal = new long[2] { loaderParams.GlobalWorkers.Width, loaderParams.GlobalWorkers.Height }; queue.Execute(kernel, null, workersGlobal, null, null); TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointKernelExecute); queue.ReadFromBuffer <T>(resultBufferVar, ref resultBuffer, false, null); TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointDeviceRead); queue.Finish(); TriggerCheckpoint(ParallelExecutionCheckpointType.CheckpointPlatformDeinit); return(resultBuffer); }
/// <summary> /// Ignores previously set parameters set by OpenCL.SetArgument, is slower than Execute /// </summary> public void Execute(long Offset, long Worksize, long Localsize, params object[] parameter) { if (parameter == null || parameter.Length == 0) { SetArgs(); } else { SetParameter(parameter); SetArgs(); } if (Localsize == -1) { queue.Execute(kernel, new long[] { Offset }, new long[] { Worksize }, null, null); } else { queue.Execute(kernel, new long[] { Offset }, new long[] { Worksize }, new long[] { Localsize }, null); } queue.Finish(); }
public Bitmap GetScreenshot(CameraConfig camera, int screenshotHeight, int slowRender) { var screenshotWidth = (int)(screenshotHeight * ScreenshotAspectRatio); var computeBuffer = new ComputeBuffer <Vector4>(_program.Context, ComputeMemoryFlags.ReadWrite, screenshotWidth * screenshotHeight); var queue = new ComputeCommandQueue(_program.Context, _program.Context.Devices[0], ComputeCommandQueueFlags.None); var globalSize = GlobalLaunchsizeFor(screenshotWidth, screenshotHeight); for (var i = 0; i < slowRender; i++) { CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize); } for (var i = 0; i < camera.Frame * slowRender; i++) { CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize); } var pixels = new Vector4[screenshotWidth * screenshotHeight]; queue.ReadFromBuffer(computeBuffer, ref pixels, true, null); queue.Finish(); computeBuffer.Dispose(); queue.Dispose(); var bmp = new Bitmap(screenshotWidth, screenshotHeight); var destBuffer = new int[screenshotWidth * screenshotHeight]; for (var y = 0; y < screenshotHeight; y++) { for (var x = 0; x < screenshotWidth; x++) { var pixel = pixels[x + y * screenshotWidth]; if (float.IsNaN(pixel.X) || float.IsNaN(pixel.Y) || float.IsNaN(pixel.Z)) { Console.WriteLine("Warning! Caught NAN pixel while taking screenshot!"); continue; } destBuffer[y * screenshotWidth + x] = (byte)(pixel.X * 255) << 16 | (byte)(pixel.Y * 255) << 8 | (byte)(pixel.Z * 255); } } var bmpData = bmp.LockBits(new Rectangle(0, 0, screenshotWidth, screenshotHeight), ImageLockMode.ReadWrite, PixelFormat.Format32bppRgb); Marshal.Copy(destBuffer, 0, bmpData.Scan0, destBuffer.Length); bmp.UnlockBits(bmpData); return(bmp); }
/// <summary> /// Executes the specified kernel function name. /// </summary> /// <typeparam name="TSource">The type of the source.</typeparam> /// <param name="functionName">Name of the function.</param> /// <param name="args"></param> /// <exception cref="ExecutionException"> /// </exception> public override void Execute(string functionName, params object[] args) { ValidateArgs(functionName, args); ComputeKernel kernel = _compiledKernels.FirstOrDefault(x => (x.FunctionName == functionName)); ComputeCommandQueue commands = new ComputeCommandQueue(_context, _defaultDevice, ComputeCommandQueueFlags.None); if (kernel == null) { throw new ExecutionException(string.Format("Kernal function {0} not found", functionName)); } try { var ndobject = (Array)args.FirstOrDefault(x => (x.GetType().IsArray)); long length = ndobject != null ? ndobject.Length : 1; var method = KernelFunctions.FirstOrDefault(x => (x.Name == functionName)); var buffers = BuildKernelArguments(method, args, kernel, length); commands.Execute(kernel, null, new long[] { length }, null, null); for (int i = 0; i < args.Length; i++) { if (!args[i].GetType().IsArray) { continue; } var ioMode = method.Parameters.ElementAt(i).Value.IOMode; if (ioMode == IOMode.InOut || ioMode == IOMode.Out) { Array r = (Array)args[i]; commands.ReadFromMemory(buffers[i], ref r, true, 0, null); } buffers[i].Dispose(); } } catch (Exception ex) { throw new ExecutionException(ex.Message); } finally { commands.Finish(); commands.Dispose(); } }
private static void ConductSearch(ComputeContext context, ComputeKernel kernel) { var todos = GetQueenTaskPartition(NumQueens, 4); var done = new List <QueenTask>(); ComputeEventList eventList = new ComputeEventList(); var commands = new ComputeCommandQueue(context, context.Devices[1], ComputeCommandQueueFlags.None); Console.WriteLine("Starting {0} tasks, and working {1} at a time.", todos.Count, Spread); QueenTask[] inProgress = GetNextAssignment(new QueenTask[] {}, todos, done); var sw = new Stopwatch(); sw.Start(); while (inProgress.Any()) { var taskBuffer = new ComputeBuffer <QueenTask>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, inProgress); kernel.SetMemoryArgument(0, taskBuffer); commands.WriteToBuffer(inProgress, taskBuffer, false, null); for (int i = 0; i < 12; i++) { commands.Execute(kernel, null, new long[] { inProgress.Length }, null, eventList); } commands.ReadFromBuffer(taskBuffer, ref inProgress, false, eventList); commands.Finish(); inProgress = GetNextAssignment(inProgress, todos, done); } sw.Stop(); Console.WriteLine(sw.ElapsedMilliseconds / 1000.0); ulong sum = done.Select(state => state.solutions) .Aggregate((total, next) => total + next); Console.WriteLine("Q({0})={1}", NumQueens, sum); }
public void GPUCopy(byte[] origin, int originOffset, byte[] dest, int destOffset, int lenght) { ComputeBuffer <byte> originBuffer, destBuffer; originBuffer = new ComputeBuffer <byte>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, origin); destBuffer = new ComputeBuffer <byte>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, dest); kernel.SetMemoryArgument(0, originBuffer); kernel.SetValueArgument(1, originOffset); kernel.SetMemoryArgument(2, destBuffer); kernel.SetValueArgument(3, destOffset); queue.Execute(kernel, new long[] { 0 }, new long[] { lenght }, null, new ComputeEventList()); queue.Finish(); }
public void FindPoints(byte[] baseImage, byte[] nextImage, int[] X, int[] Y, int searchDelta, int subsetDelta, int BitmapWidth, int BitmapHeight, int PointsinX, int PointsinY) { using var commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); using ComputeBuffer <byte> baseImageBuffer = new ComputeBuffer <byte>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.CopyHostPointer, baseImage); using ComputeBuffer <byte> nextImageBuffer = new ComputeBuffer <byte>(context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.CopyHostPointer, nextImage); using ComputeBuffer <int> XBuffer = new ComputeBuffer <int>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, X); using ComputeBuffer <int> YBuffer = new ComputeBuffer <int>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, Y); kernel.SetMemoryArgument(0, baseImageBuffer); kernel.SetMemoryArgument(1, nextImageBuffer); kernel.SetMemoryArgument(2, XBuffer); kernel.SetMemoryArgument(3, YBuffer); kernel.SetValueArgument(4, searchDelta); kernel.SetValueArgument(5, subsetDelta); kernel.SetValueArgument(6, BitmapHeight); kernel.SetValueArgument(7, PointsinX); commands.Execute(kernel, null, new long[] { PointsinX, PointsinY }, null, null); commands.Finish(); }
private static void ConductSearch(ComputeContext context, ComputeKernel kernel) { var todos = GetQueenTaskPartition(NumQueens, 4); var done = new List<QueenTask>(); ComputeEventList eventList = new ComputeEventList(); var commands = new ComputeCommandQueue(context, context.Devices[1], ComputeCommandQueueFlags.None); Console.WriteLine("Starting {0} tasks, and working {1} at a time.", todos.Count, Spread); QueenTask[] inProgress = GetNextAssignment(new QueenTask[] {}, todos, done); var sw = new Stopwatch(); sw.Start(); while (inProgress.Any()) { var taskBuffer = new ComputeBuffer<QueenTask>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, inProgress); kernel.SetMemoryArgument(0, taskBuffer); commands.WriteToBuffer(inProgress, taskBuffer, false, null); for (int i = 0; i < 12; i++) commands.Execute(kernel, null, new long[] { inProgress.Length }, null, eventList); commands.ReadFromBuffer(taskBuffer, ref inProgress, false, eventList); commands.Finish(); inProgress = GetNextAssignment(inProgress, todos, done); } sw.Stop(); Console.WriteLine(sw.ElapsedMilliseconds / 1000.0); ulong sum = done.Select(state => state.solutions) .Aggregate((total, next) => total + next); Console.WriteLine("Q({0})={1}", NumQueens, sum); }
public void Run(ComputeContext context, TextWriter log) { try { ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); log.WriteLine("Original content:"); Random rand = new Random(); int count = 6; long[] bufferContent = new long[count]; for (int i = 0; i < count; i++) { bufferContent[i] = (long)(rand.NextDouble() * long.MaxValue); log.WriteLine("\t" + bufferContent[i]); } ComputeBuffer <long> buffer = new ComputeBuffer <long>(context, ComputeMemoryFlags.CopyHostPointer, bufferContent); IntPtr mappedPtr = commands.Map(buffer, true, ComputeMemoryMappingFlags.Read, 0, bufferContent.Length, null); log.WriteLine("Mapped content:"); for (int i = 0; i < bufferContent.Length; i++) { IntPtr ptr = new IntPtr(mappedPtr.ToInt64() + i * sizeof(long)); log.WriteLine("\t" + Marshal.ReadInt64(ptr)); } commands.Unmap(buffer, ref mappedPtr, null); // wait for the unmap to happen commands.Finish(); // cleanup buffer buffer.Dispose(); // cleanup commands commands.Dispose(); } catch (Exception e) { log.WriteLine(e.ToString()); } }
public void Run(ComputeContext context, TextWriter log) { try { ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); log.WriteLine("Original content:"); Random rand = new Random(); int count = 6; long[] bufferContent = new long[count]; for (int i = 0; i < count; i++) { bufferContent[i] = (long)(rand.NextDouble() * long.MaxValue); log.WriteLine("\t" + bufferContent[i]); } ComputeBuffer<long> buffer = new ComputeBuffer<long>(context, ComputeMemoryFlags.CopyHostPointer, bufferContent); IntPtr mappedPtr = commands.Map(buffer, true, ComputeMemoryMappingFlags.Read, 0, bufferContent.Length, null); log.WriteLine("Mapped content:"); for (int i = 0; i < bufferContent.Length; i++) { IntPtr ptr = new IntPtr(mappedPtr.ToInt64() + i * sizeof(long)); log.WriteLine("\t" + Marshal.ReadInt64(ptr)); } commands.Unmap(buffer, ref mappedPtr, null); // wait for the unmap to happen commands.Finish(); // cleanup buffer buffer.Dispose(); // cleanup commands commands.Dispose(); } catch (Exception e) { log.WriteLine(e.ToString()); } }
public override void RunKernel(ComputeContext context, ComputeKernel kernel, ComputeCommandQueue commands, long[] dimensions) { var tradeprofitsBuffer = new ComputeBuffer <short>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, TradeProfits); var tradearbitrageBuffer = new ComputeBuffer <byte>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, TradeArbitrage); var fit_functionBuffer = new ComputeBuffer <float>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, FitFunction); kernel.SetMemoryArgument(0, allvarsBuffer); kernel.SetMemoryArgument(1, loboundsBuffer); kernel.SetMemoryArgument(2, upboundsBuffer); kernel.SetMemoryArgument(3, tradeprofitsBuffer); kernel.SetMemoryArgument(4, tradearbitrageBuffer); kernel.SetMemoryArgument(5, fit_functionBuffer); kernel.SetValueArgument <int>(6, VariablesCount); var eventList = new ComputeEventList(); commands.Execute(kernel, null, dimensions, null, eventList); commands.ReadFromBuffer(fit_functionBuffer, ref FitFunction, true, null); commands.Finish(); }
public static void Run(TextWriter log, ComputeContext context) { StartTest(log, "Dummy test"); try { ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); log.WriteLine("Original content:"); Random rand = new Random(); int count = 6; long[] bufferContent = new long[count]; for (int i = 0; i < count; i++) { bufferContent[i] = (long)(rand.NextDouble() * long.MaxValue); log.WriteLine("\t" + bufferContent[i]); } ComputeBuffer<long> buffer = new ComputeBuffer<long>(context, ComputeMemoryFlags.CopyHostPointer, bufferContent); IntPtr mappedPtr = commands.Map(buffer, false, ComputeMemoryMappingFlags.Read, 0, bufferContent.Length, null); commands.Finish(); log.WriteLine("Mapped content:"); for (int i = 0; i < bufferContent.Length; i++) { IntPtr ptr = new IntPtr(mappedPtr.ToInt64() + i * sizeof(long)); log.WriteLine("\t" + Marshal.ReadInt64(ptr)); } commands.Unmap(buffer, ref mappedPtr, null); } catch (Exception e) { log.WriteLine(e.ToString()); } EndTest(log, "Dummy test"); }
public void Draw(Action <ComputeBuffer <Vector4>, ComputeCommandQueue> renderer) { GL.Clear(ClearBufferMask.ColorBufferBit | ClearBufferMask.DepthBufferBit); GL.Finish(); _queue.AcquireGLObjects(new[] { _openCl }, null); renderer(_openCl, _queue); _queue.ReleaseGLObjects(new[] { _openCl }, null); _queue.Finish(); GL.BindBuffer(BufferTarget.PixelUnpackBuffer, _pub); GL.BindTexture(TextureTarget.Texture2D, _texture); GL.TexSubImage2D(TextureTarget.Texture2D, 0, 0, 0, _width, _height, PixelFormat.Rgba, PixelType.Float, IntPtr.Zero); GL.Begin(BeginMode.Quads); GL.TexCoord2(0f, 1f); GL.Vertex3(0f, 0f, 0f); GL.TexCoord2(0f, 0f); GL.Vertex3(0f, 1f, 0f); GL.TexCoord2(1f, 0f); GL.Vertex3(1f, 1f, 0f); GL.TexCoord2(1f, 1f); GL.Vertex3(1f, 0f, 0f); GL.End(); }
public static void RunKernels(int nofKernels) { try { // Create the event wait list. An event list is not really needed for this example but it is important to see how it works. // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution. // For this reason their use should be avoided if possible. ComputeEventList eventList = new ComputeEventList(); commandQueue1.WriteToBuffer(input, CB_input, false, eventList); commandQueue1.WriteToBuffer(weightIDs, CB_networkIndex, false, eventList); // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command. // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created. commandQueue1.Execute(kernel, null, new long[] { nofKernels }, null, eventList); // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host. // eventList will contain two events after this method returns. commandQueue1.ReadFromBuffer(CB_output, ref output, false, eventList); // , eventList // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands // to finish has to be issued before "arrC" can be used. // This explicit synchronization can be achieved in two ways: // 1) Wait for the events in the list to finish, eventList.Wait(); // 2) Or simply use commandQueue1.Finish(); } catch (Exception e) { Console.WriteLine(e.ToString()); } }
/// <summary> /// Executes the specified kernel function name. /// </summary> /// <typeparam name="TSource">The type of the source.</typeparam> /// <param name="functionName">Name of the function.</param> /// <param name="inputs">The inputs.</param> /// <param name="returnInputVariable">The return result.</param> /// <returns></returns> /// <exception cref="ExecutionException"> /// </exception> public override void Execute <TSource>(string functionName, params object[] args) { ComputeKernel kernel = _compiledKernels.FirstOrDefault(x => (x.FunctionName == functionName)); ComputeCommandQueue commands = new ComputeCommandQueue(_context, _defaultDevice, ComputeCommandQueueFlags.None); if (kernel == null) { throw new ExecutionException(string.Format("Kernal function {0} not found", functionName)); } try { var ndobject = (TSource[])args.FirstOrDefault(x => (x.GetType() == typeof(TSource[]))); long length = ndobject != null ? ndobject.Length : 1; var buffers = BuildKernelArguments <TSource>(args, kernel, length); commands.Execute(kernel, null, new long[] { length }, null, null); foreach (var item in buffers) { TSource[] r = (TSource[])args[item.Key]; commands.ReadFromBuffer(item.Value, ref r, true, null); //args[item.Key] = r; item.Value.Dispose(); } commands.Finish(); } catch (Exception ex) { throw new ExecutionException(ex.Message); } finally { commands.Dispose(); } }
private void Run(OpenCLInfo[] info, OpenCLPointInfo[] points, OpenCLNote[] envs, float[] result) { //var s = sw.ElapsedMilliseconds; ComputeBuffer <OpenCLInfo> infoBuffer = new ComputeBuffer <OpenCLInfo>(FContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, info); ComputeBuffer <OpenCLPointInfo> pointsBuffer = new ComputeBuffer <OpenCLPointInfo>(FContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, points); ComputeBuffer <OpenCLNote> envsBuffer = new ComputeBuffer <OpenCLNote>(FContext, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, envs); ComputeBuffer <float> resultBuffer = new ComputeBuffer <float>(FContext, ComputeMemoryFlags.WriteOnly, result.Length); kernel.SetMemoryArgument(0, infoBuffer); kernel.SetMemoryArgument(1, pointsBuffer); // 2 - FWaveformBuffer kernel.SetMemoryArgument(3, envsBuffer); kernel.SetMemoryArgument(4, resultBuffer); //var f = sw.ElapsedMilliseconds; commands.Execute(kernel, null, new long[] { result.Length / 2 }, null, null); //var e = sw.ElapsedMilliseconds; commands.ReadFromBuffer(resultBuffer, ref result, true, null); //var r = sw.ElapsedMilliseconds; commands.Finish(); infoBuffer.Dispose(); pointsBuffer.Dispose(); envsBuffer.Dispose(); resultBuffer.Dispose(); //var d = sw.ElapsedMilliseconds; //if (App.DebugMode) // Debug.WriteLine($"OpenCLWaveProvider.Run: tot: {d - s}"); }
static void Main(string[] args) { int[] r1 = new int[] {1, 2, 3, 4}; int[] r2 = new int[] {4, 3, 2, 1}; int rowSize = r1.Length; // pick first platform ComputePlatform platform = ComputePlatform.Platforms[0]; // create context with all gpu devices ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu, new ComputeContextPropertyList(platform), null, IntPtr.Zero); // create a command queue with first gpu found ComputeCommandQueue queue = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); // load opencl source and // create program with opencl source ComputeProgram program = new ComputeProgram(context, CalculateKernel); // compile opencl source program.Build(null, null, null, IntPtr.Zero); // load chosen kernel from program ComputeKernel kernel = program.CreateKernel("Calc"); // allocate a memory buffer with the message (the int array) ComputeBuffer<int> row1Buffer = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r1); // allocate a memory buffer with the message (the int array) ComputeBuffer<int> row2Buffer = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, r2); kernel.SetMemoryArgument(0, row1Buffer); // set the integer array kernel.SetMemoryArgument(1, row2Buffer); // set the integer array kernel.SetValueArgument(2, rowSize); // set the array size // execute kernel queue.ExecuteTask(kernel, null); // wait for completion queue.Finish(); Console.WriteLine("Finished"); Console.ReadKey(); }
/// <summary> /// Run kernel against all elements. /// </summary> /// <typeparam name="TSource">Struct type that corresponds to kernel function type</typeparam> /// <param name="array">Array of elements to process</param> /// <param name="kernelCode">The code of kernel function</param> /// <param name="kernelSelector">Method that selects kernel by function name; if null uses first</param> /// <param name="deviceSelector">Method that selects device by index, description, OpenCL version; if null uses first</param> public static void ClooForEach <TSource>(this TSource[] array, string kernelCode, Func <string, bool> kernelSelector = null, Func <int, string, Version, bool> deviceSelector = null) where TSource : struct { kernelSelector = kernelSelector ?? ((k) => true); deviceSelector = deviceSelector ?? ((i, d, v) => true); var device = ComputePlatform.Platforms.SelectMany(p => p.Devices).Where((d, i) => deviceSelector(i, $"{d.Name} {d.DriverVersion}", d.Version)).First(); var properties = new ComputeContextPropertyList(device.Platform); using (var context = new ComputeContext(new[] { device }, properties, null, IntPtr.Zero)) using (var program = new ComputeProgram(context, kernelCode)) { program.Build(new[] { device }, null, null, IntPtr.Zero); var kernels = program.CreateAllKernels().ToList(); try { var kernel = kernels.First((k) => kernelSelector(k.FunctionName)); using (var primesBuffer = new ComputeBuffer <TSource>(context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, array)) { kernel.SetMemoryArgument(0, primesBuffer); using (var queue = new ComputeCommandQueue(context, context.Devices[0], 0)) { queue.Execute(kernel, null, new long[] { primesBuffer.Count }, null, null); queue.Finish(); queue.ReadFromBuffer(primesBuffer, ref array, true, null); } } } finally { kernels.ForEach(k => k.Dispose()); } } }
/// <summary> /// OpenCLでの計算プログラムを作成する /// </summary> /// <param name="maxDt">初期時間刻み</param> /// <param name="a">振幅</param> /// <param name="omega">角速度</param> public ComputerCL(double maxDt, double a, double omega) : base(maxDt, a, omega) { // プラットフォームとデバイス群を取得 this.Platform = ComputePlatform.Platforms[0]; this.Devices = this.Platform.Devices; // コンテキストを作成 var context = new ComputeContext(this.Devices, new ComputeContextPropertyList(this.Platform), null, IntPtr.Zero); // キューを作成 this.queue = new ComputeCommandQueue(context, this.Devices[0], ComputeCommandQueueFlags.None); // プログラムを作成 var program = new ComputeProgram(context, Properties.Resources.SinAcceleration); // ビルドしてみて try { program.Build(this.Devices, null, null, IntPtr.Zero); } // 失敗したら catch(BuildProgramFailureComputeException ex) { // 例外を投げる throw new BuildCLException(program.Source[0], program.GetBuildLog(this.Devices[0])); } // カーネルを作成 this.sinAccelerationKernel = program.CreateKernel("SinAcceleration"); // 準備処理は何もしない this.prepare = () => { }; // 粒子が追加された時に base.ParticleAdded += (sender, e) => { // 準備処理の時の処理を実装 this.prepare = () => { // 粒子数を設定 this.particleCount = this.inputParticles.Count; // バッファーを作成 this.bufferX = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadWrite, this.particleCount); this.bufferU = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadWrite, this.particleCount); this.bufferA = new ComputeBuffer<Vector4>(context, ComputeMemoryFlags.ReadWrite, this.particleCount); this.bufferD = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly, this.particleCount); // 入力データを確保 var particlesX = new Vector4[this.particleCount]; var particlesU = new Vector4[this.particleCount]; var particlesA = new Vector4[this.particleCount]; this.particlesD = new float[this.particleCount]; this.particlesMaterial = new Material[this.particleCount]; this.particlesType = new ParticleType[this.particleCount]; // 全粒子について int i = 0; foreach(var particle in this.inputParticles) { // データをコピー particlesX[i] = new Vector4((Vector3)particle.X, 0); particlesU[i] = new Vector4((Vector3)particle.U, 0); particlesA[i] = new Vector4((Vector3)particle.A, 0); this.particlesD[i] = (float)particle.D; this.particlesMaterial[i] = particle.Material; this.particlesType[i] = particle.Type; i++; } // バッファーへ転送 this.queue.WriteToBuffer(particlesX, this.bufferX, false, null); this.queue.WriteToBuffer(particlesU, this.bufferU, false, null); this.queue.WriteToBuffer(particlesA, this.bufferA, false, null); this.queue.WriteToBuffer(this.particlesD, this.bufferD, false, null); // 入力粒子群を空にする this.inputParticles.Clear(); // 準備処理は空 this.prepare = () => { }; // ここまで完了を待機 queue.Finish(); }; }; }
public unsafe void EndSend() { for (int i = 0; i < points.Count; i++) { inx[i].x = (float)points[i].Item3.Real; inx[i].y = (float)points[i].Item3.Imaginary; inc[i].x = (float)points[i].Item4.Real; inc[i].y = (float)points[i].Item4.Imaginary; } _krnl.SetMemoryArgument(0, x); _krnl.SetMemoryArgument(1, c); for (int i = 0; i < _ld.Count; i++) { _krnl.SetMemoryArgument(2 + i, outp[i]); } ComputeCommandQueue command = new ComputeCommandQueue(_context, _context.Devices[0], ComputeCommandQueueFlags.None); command.WriteToBuffer(inx, x, false, null); command.WriteToBuffer(inc, c, false, null); command.Execute(_krnl, null, new long[] { points.Count }, null, null); for (int i = 0; i < _ld.Count; i++) command.ReadFromBuffer(outp[i], ref opl[i], false, null); command.Finish(); output = new Queue<Tuple<int, int, List<ProcessLayer>>>(); for (int i = 0; i < points.Count; i++) { List<ProcessLayer> pl = new List<ProcessLayer>(); for (int ii = 0; ii < _ld.Count; ii++) { ProcessLayer p = _ld[ii].Clone(); p.c_active = opl[ii][i].c_active != 0; p.c_calc = opl[ii][i].c_calc; p.c_cmean = opl[ii][i].c_cmean; p.c_cvariance = opl[ii][i].c_cvariance; p.c_cvarsx = opl[ii][i].c_cvarsx; p.c_isin = opl[ii][i].c_isin != 0; p.c_n = opl[ii][i].c_n; p.c_old2x = new Complex(opl[ii][i].c_old2x.x,opl[ii][i].c_old2x.y); p.c_oldx = new Complex(opl[ii][i].c_oldx.x,opl[ii][i].c_oldx.y); p.c_resn = opl[ii][i].c_resn; p.c_resx = new Complex(opl[ii][i].c_resx.x,opl[ii][i].c_resx.y); p.c_x = new Complex(opl[ii][i].c_x.x,opl[ii][i].c_x.y); pl.Add(p); } output.Enqueue(Tuple.Create(points[i].Item1, points[i].Item2, pl)); } }
public void Run(ComputeContext context, TextWriter log) { try { // Create the arrays and fill them with random data. int count = 640*480; // float[] arrA = new float[count]; float[] arrB = new float[count]; float[] arrC = new float[count]; Random rand = new Random(); for (int i = 0; i < count; i++) { arrA[i] = (float)(rand.NextDouble() * 100); arrB[i] = (float)(rand.NextDouble() * 100); } // Create the input buffers and fill them with data from the arrays. // Access modifiers should match those in a kernel. // CopyHostPointer means the buffer should be filled with the data provided in the last argument. program = new ComputeProgram(context, clProgramSource); program.Build(null, null, null, IntPtr.Zero); ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA); //ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB); // The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length). ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length); // Create and build the opencl program. // Create the kernel function and set its arguments. ComputeKernel kernel = program.CreateKernel("CompareGPUCPU"); DateTime ExecutionStartTime; //Var will hold Execution Starting Time DateTime ExecutionStopTime;//Var will hold Execution Stopped Time TimeSpan ExecutionTime;//Var will count Total Execution Time-Our Main Hero ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); ExecutionStartTime = DateTime.Now; //Gets the system Current date time expressed as local time int repeatTimes = 100; for (int repeatCounter = 0; repeatCounter < repeatTimes; repeatCounter++) { kernel.SetMemoryArgument(0, a); //kernel.SetMemoryArgument(1, b); //kernel.SetMemoryArgument(2, c); kernel.SetMemoryArgument(1, c); // Create the event wait list. An event list is not really needed for this example but it is important to see how it works. // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution. // For this reason their use should be avoided if possible. //ComputeEventList eventList = new ComputeEventList(); // Create the command queue. This is used to control kernel execution and manage read/write/copy operations. // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command. // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created. //commands.Execute(kernel, null, new long[] { count }, null, eventList); commands.Execute(kernel, null, new long[] { count }, null, null); // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host. // eventList will contain two events after this method returns. //commands.ReadFromBuffer(c, ref arrC, false, eventList); commands.ReadFromBuffer(c, ref arrC, false, null); // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands // to finish has to be issued before "arrC" can be used. // This explicit synchronization can be achieved in two ways: // 1) Wait for the events in the list to finish, //eventList.Wait(); // 2) Or simply use commands.Finish(); } ExecutionStopTime = DateTime.Now; ExecutionTime = ExecutionStopTime - ExecutionStartTime; double perTaskTime = ExecutionTime.TotalMilliseconds / repeatTimes; log.WriteLine("Use {0} ms using GPU", perTaskTime); // Do that using CPU /* ExecutionStartTime = DateTime.Now; //Gets the system Current date time expressed as local time for (int repeatCounter = 0; repeatCounter < repeatTimes; repeatCounter++) { for (int i = 0; i < count; i++) { //arrC[i] = arrA[i] + arrB[i]; int j; for (j = 0; j < 330 * 10; j++) arrC[i] = arrA[i] + j; } } ExecutionStopTime = DateTime.Now; ExecutionTime = ExecutionStopTime - ExecutionStartTime; perTaskTime = ExecutionTime.TotalMilliseconds / repeatTimes; log.WriteLine("Use {0} ms using CPU", ExecutionTime.TotalMilliseconds.ToString()); */ log.WriteLine("arrA[0]:{0}, arrC[0]:{1}", arrA[0], arrC[0]); } catch (Exception e) { log.WriteLine(e.ToString()); } }
private unsafe void notify(CLProgramHandle programHandle, IntPtr userDataPtr) { uint[] dst = new uint[16]; fixed (uint* dstPtr = dst) { using (var queue = new ComputeCommandQueue(ccontext, device, ComputeCommandQueueFlags.None)) { var buf = new ComputeBuffer<uint>(ccontext, ComputeMemoryFlags.WriteOnly, 16); var kernel = program.CreateKernel("test"); kernel.SetValueArgument(0, 1443351125U); kernel.SetMemoryArgument(1, buf); var eventList = new ComputeEventList(); queue.Execute(kernel, null, new long[] { 16L, 256L, 1048576L }, null, null); queue.Finish(); queue.Read<uint>(buf, true, 0, 16, (IntPtr)dstPtr, null); queue.Finish(); queue.Finish(); } } }
public void TakeGif(IGifableControl control, Action<string> displayInformation) { _kernelInUse++; var encoder = new AnimatedGifEncoder(); encoder.Start(Ext.UniqueFilename("sequence", "gif")); encoder.SetDelay(1000 / StaticSettings.Fetch.GifFramerate); encoder.SetRepeat(0); var endIgnoreControl = control.StartIgnoreControl(); var ccontext = _kernel.ComputeContext; var queue = new ComputeCommandQueue(ccontext, ccontext.Devices[0], ComputeCommandQueueFlags.None); var screenshotHeight = StaticSettings.Fetch.GifHeight; var screenshotWidth = (int)(screenshotHeight * ScreenshotAspectRatio); var computeBuffer = new ComputeBuffer<Vector4>(ccontext, ComputeMemoryFlags.ReadWrite, screenshotWidth * screenshotHeight); var fdc = control as IFrameDependantControl; for (var i = 0; i < StaticSettings.Fetch.GifFramecount + 1; i++) { if (fdc != null) fdc.Frame = i; var teardown = control.SetupGif((double)i / StaticSettings.Fetch.GifFramecount); _kernel.Render(computeBuffer, queue, _parameters, new Size(screenshotWidth, screenshotHeight)); queue.Finish(); teardown(); } for (var i = 1; i < StaticSettings.Fetch.GifFramecount + 1; i++) { if (fdc != null) fdc.Frame = i; displayInformation(string.Format("{0}% done with gif", (int)(100.0 * (i - 1) / StaticSettings.Fetch.GifFramecount))); var teardown = control.SetupGif((double)(i - 1) / StaticSettings.Fetch.GifFramecount); _kernel.Render(computeBuffer, queue, _parameters, new Size(screenshotWidth, screenshotHeight)); if (encoder.AddFrame(Download(queue, computeBuffer, screenshotWidth, screenshotHeight)) == false) throw new Exception("Could not add frame to gif"); teardown(); } endIgnoreControl(); encoder.Finish(); computeBuffer.Dispose(); queue.Dispose(); displayInformation("Done with gif"); _kernelInUse--; }
public static void Calculate(List<Calculation> calculations) { Stopwatch s = new Stopwatch(); s.Start(); int count = calculations.Count; IntVec2[] p_p = new IntVec2[count]; IntVec2[] p_a = new IntVec2[count]; IntVec2[] p_b = new IntVec2[count]; IntVec2[] p_c = new IntVec2[count]; FloatVec3[] c = new FloatVec3[count]; int[] c_valid = new int[count]; Parallel.For(0, count, i => { var calc = calculations[i]; p_p[i] = new IntVec2(calc.P); p_a[i] = new IntVec2(calc.A); p_b[i] = new IntVec2(calc.B); p_c[i] = new IntVec2(calc.C); }); mark(s, "memory init"); ComputeBuffer<IntVec2> _p_p = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_p); ComputeBuffer<IntVec2> _p_a = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_a); ComputeBuffer<IntVec2> _p_b = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_b); ComputeBuffer<IntVec2> _p_c = new ComputeBuffer<IntVec2>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, p_c); ComputeBuffer<FloatVec3> _c = new ComputeBuffer<FloatVec3>(context, ComputeMemoryFlags.WriteOnly, c.Length); ComputeBuffer<int> _c_valid = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly, c_valid.Length); mark(s, "memory buffer init"); ComputeKernel kernel = program.CreateKernel("Barycentric"); kernel.SetMemoryArgument(0, _p_p); kernel.SetMemoryArgument(1, _p_a); kernel.SetMemoryArgument(2, _p_b); kernel.SetMemoryArgument(3, _p_c); kernel.SetMemoryArgument(4, _c); kernel.SetMemoryArgument(5, _c_valid); mark(s, "memory init 2"); ComputeEventList eventList = new ComputeEventList(); ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); commands.Execute(kernel, null, new long[] { count }, null, eventList); mark(s, "execute"); commands.ReadFromBuffer(_c, ref c, false, eventList); commands.ReadFromBuffer(_c_valid, ref c_valid, false, eventList); commands.Finish(); mark(s, "read 1"); Parallel.For(0, count, i => { var calc = calculations[i]; calc.Coords = new BarycentricCoordinates(c[i].U,c[i].V,c[i].W); if (c_valid[i] == 1) { lock (calc.Tri) calc.Tri.Points.Add(new DrawPoint(calc.Coords, calc.P)); } }); mark(s, "read 2"); // cleanup commands commands.Dispose(); // cleanup events foreach (ComputeEventBase eventBase in eventList) { eventBase.Dispose(); } eventList.Clear(); // cleanup kernel kernel.Dispose(); _p_p.Dispose(); _p_a.Dispose(); _p_b.Dispose(); _p_c.Dispose(); _c.Dispose(); _c_valid.Dispose(); mark(s, "dispose"); }
public Bitmap GetScreenshot(CameraConfig camera, int screenshotHeight, int slowRender) { var screenshotWidth = (int)(screenshotHeight * ScreenshotAspectRatio); var computeBuffer = new ComputeBuffer<Vector4>(_program.Context, ComputeMemoryFlags.ReadWrite, screenshotWidth * screenshotHeight); var queue = new ComputeCommandQueue(_program.Context, _program.Context.Devices[0], ComputeCommandQueueFlags.None); var globalSize = GlobalLaunchsizeFor(screenshotWidth, screenshotHeight); for (var i = 0; i < slowRender; i++) CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize); for (var i = 0; i < camera.Frame * slowRender; i++) CoreRender(computeBuffer, queue, _kernels, new Vector4((Vector3)camera.Position), new Vector4((Vector3)camera.Lookat), new Vector4((Vector3)camera.Up), i, camera.Fov, slowRender, camera.FocalDistance, screenshotWidth, screenshotHeight, globalSize, _localSize); var pixels = new Vector4[screenshotWidth * screenshotHeight]; queue.ReadFromBuffer(computeBuffer, ref pixels, true, null); queue.Finish(); computeBuffer.Dispose(); queue.Dispose(); var bmp = new Bitmap(screenshotWidth, screenshotHeight); var destBuffer = new int[screenshotWidth * screenshotHeight]; for (var y = 0; y < screenshotHeight; y++) { for (var x = 0; x < screenshotWidth; x++) { var pixel = pixels[x + y * screenshotWidth]; if (float.IsNaN(pixel.X) || float.IsNaN(pixel.Y) || float.IsNaN(pixel.Z)) { Console.WriteLine("Warning! Caught NAN pixel while taking screenshot!"); continue; } destBuffer[y * screenshotWidth + x] = (byte)(pixel.X * 255) << 16 | (byte)(pixel.Y * 255) << 8 | (byte)(pixel.Z * 255); } } var bmpData = bmp.LockBits(new Rectangle(0, 0, screenshotWidth, screenshotHeight), ImageLockMode.ReadWrite, PixelFormat.Format32bppRgb); Marshal.Copy(destBuffer, 0, bmpData.Scan0, destBuffer.Length); bmp.UnlockBits(bmpData); return bmp; }
private void CalculateConvolution(ComputeContext computeContext) { Stopwatch stopwatch = new Stopwatch(); stopwatch.Start(); float dx; bool shiftXParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dx); if (!shiftXParse) throw new SyntaxErrorException(", needs to be ."); float dy; bool shiftYParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dy); if (!shiftYParse) throw new SyntaxErrorException(", needs to be ."); float dz; bool shiftZParse = float.TryParse(textBoxShiftX.Text, NumberStyles.Float, CultureInfo.InvariantCulture.NumberFormat, out dz); if (!shiftZParse) throw new SyntaxErrorException(", needs to be ."); int pixelCount = _imageDimensionX*_imageDimensionY*_imageDimensionZ; Console.WriteLine("Computing..."); Console.WriteLine("Reading kernel..."); String kernelPath = Directory.GetParent(Directory.GetCurrentDirectory()).Parent.Parent.FullName; String kernelString; using (var sr = new StreamReader(kernelPath + "\\convolution.cl")) kernelString = sr.ReadToEnd(); Console.WriteLine("Reading kernel... done"); float[] selectedTransformation = Transformations.GetTransformation((TransformationType)comboBoxTransform.SelectedItem, 1.0f / float.Parse(textBoxPixelSize.Text), 1.0f / float.Parse(textBoxPixelSize.Text), 1.0f / float.Parse(textBoxPixelSize.Text), dx, dy, dz); //create openCL program ComputeProgram computeProgram = new ComputeProgram(computeContext, kernelString); computeProgram.Build(computeContext.Devices, null, null, IntPtr.Zero); ComputeProgramBuildStatus computeProgramBuildStatus = computeProgram.GetBuildStatus(_selectedComputeDevice); Console.WriteLine("computeProgramBuildStatus\n\t"+computeProgramBuildStatus); String buildLog = computeProgram.GetBuildLog(_selectedComputeDevice); Console.WriteLine("buildLog"); if (buildLog.Equals("\n")) Console.WriteLine("\tbuildLog is empty..."); else Console.WriteLine("\t" + buildLog); float[] fluorophores = CsvData.ReadFluorophores(_sourceFilename); ///////////////////////////////////////////// // Create a Command Queue & Event List ///////////////////////////////////////////// ComputeCommandQueue computeCommandQueue = new ComputeCommandQueue(computeContext, _selectedComputeDevice, ComputeCommandQueueFlags.None); //////////////////////////////////////////////////////////////// // Create Buffers Transform //////////////////////////////////////////////////////////////// ComputeBuffer<float> fluorophoresCoords = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.ReadWrite, fluorophores.LongLength); ComputeBuffer<float> transformationMatrix = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.ReadOnly, selectedTransformation.LongLength); ///////////////////////////////////////////// // Create the transformFluorophoresKernel /////////////////////////////////////////////////////////// ComputeKernel transformFluorophoresKernel = computeProgram.CreateKernel("transform_fluorophores"); ///////////////////////////////////////////// // Set the transformFluorophoresKernel arguments ///////////////////////////////////////////// transformFluorophoresKernel.SetMemoryArgument(0, fluorophoresCoords); transformFluorophoresKernel.SetMemoryArgument(1, transformationMatrix); ///////////////////////////////////////////// // Configure the work-item structure ///////////////////////////////////////////// long[] globalWorkOffsetTransformFluorophoresKernel = null; long[] globalWorkSizeTransformFluorophoresKernel = new long[] { fluorophores.Length / 4 }; long[] localWorkSizeTransformFluorophoresKernel = null; //////////////////////////////////////////////////////// // Enqueue the transformFluorophoresKernel for execution //////////////////////////////////////////////////////// computeCommandQueue.WriteToBuffer(fluorophores, fluorophoresCoords, true, null); computeCommandQueue.WriteToBuffer(selectedTransformation, transformationMatrix, true, null); computeCommandQueue.Execute(transformFluorophoresKernel, globalWorkOffsetTransformFluorophoresKernel, globalWorkSizeTransformFluorophoresKernel, localWorkSizeTransformFluorophoresKernel, null); // computeCommandQueue.ExecuteTask(transformFluorophoresKernel, transformFluorophoresEvents); float[] transformedFluorophores = new float[fluorophores.Length]; computeCommandQueue.ReadFromBuffer(fluorophoresCoords, ref transformedFluorophores, true, null); computeCommandQueue.Finish(); //TODO remove, only for testing // for (int i = 0; i < transformedFluorophores.Length; i++) // { // Console.WriteLine(transformedFluorophores[i]); // } // /TODO remove, only for testing stopwatch.Stop(); Console.WriteLine("Transform fluophores duration:\n\t" + stopwatch.Elapsed); stopwatch.Reset(); stopwatch.Start(); // fluorophoresCoords are now transformed (done in place) //////////////////////////////////////////////////////////////// // Create Buffers Convolve Fluorophores //////////////////////////////////////////////////////////////// const int convolve_kernel_lwgs = 16; int totalBuffer = (int) Math.Ceiling(pixelCount / (float)convolve_kernel_lwgs) * convolve_kernel_lwgs; ComputeBuffer<float> resultImage = new ComputeBuffer<float>(computeContext, ComputeMemoryFlags.WriteOnly, totalBuffer); ///////////////////////////////////////////// // Create the transformFluorophoresKernel ///////////////////////////////////////////// ComputeKernel convolveFluorophoresKernel = computeProgram.CreateKernel("convolve_fluorophores"); ///////////////////////////////////////////// // Set the convolveFluorophoresKernel arguments ///////////////////////////////////////////// convolveFluorophoresKernel.SetMemoryArgument(0, resultImage); convolveFluorophoresKernel.SetValueArgument(1, _imageDimensionX); convolveFluorophoresKernel.SetValueArgument(2, _imageDimensionY); convolveFluorophoresKernel.SetMemoryArgument(3, fluorophoresCoords); convolveFluorophoresKernel.SetLocalArgument(4, convolve_kernel_lwgs); convolveFluorophoresKernel.SetValueArgument(5, fluorophores.Length / 4); ///////////////////////////////////////////// // Configure the work-item structure ///////////////////////////////////////////// long[] globalWorkOffsetTransformConvolveFluorophoresKernel = null; long[] globalWorkSizeTransformConvolveFluorophoresKernel = new long[] { pixelCount }; long[] localWorkSizeTransformConvolveFluorophoresKernel = new long[] {convolve_kernel_lwgs}; //////////////////////////////////////////////////////// // Enqueue the convolveFluorophoresKernel for execution //////////////////////////////////////////////////////// computeCommandQueue.Execute(convolveFluorophoresKernel, globalWorkOffsetTransformConvolveFluorophoresKernel, globalWorkSizeTransformConvolveFluorophoresKernel, localWorkSizeTransformConvolveFluorophoresKernel, null); float[] resultImageData = new float[totalBuffer]; computeCommandQueue.ReadFromBuffer(resultImage, ref resultImageData, true, null); computeCommandQueue.Finish(); for (int i = 0; i < pixelCount; i++) { Console.WriteLine(resultImageData[i]); } Console.WriteLine("Writing data to file..."); // CsvData.WriteToDisk("..\\..\\..\\output.csv", resultImageData); TiffData.WriteToDisk(resultImageData, _saveFilename, _imageDimensionX, _imageDimensionY); Bitmap bitmap = new Bitmap(_imageDimensionX, _imageDimensionY); float max = resultImageData.Max(); float scale = 255/(float)max; // for (int r = 0; r < _imageDimensionY; r++) // { // for (int c = 0; c < _imageDimensionX; c++) // { // float value = resultImageData[c*(r + 1)]; // Color newColor = Color.FromArgb((int)(value * scale), (int)(value * scale), (int)(value * scale)); // bitmap.SetPixel(c,r, newColor); // } // } ushort[] ushortdata = new ushort[resultImageData.Length]; for (int i = 0; i < resultImageData.Length; i++) { ushortdata[i] = (ushort)resultImageData[i]; } uint[] convertGray16ToRgb = ConvertGray16ToRGB(ushortdata, 16); byte[] bytes = new byte[convertGray16ToRgb.Length * 4]; // // int[] resultImageData2 = new int[resultImageData.Length]; // for (int index = 0; index < convertGray16ToRgb.Length; index++) { // resultImageData2[index] = (int)(scale*resultImageData[index]); byte[] bytes1 = BitConverter.GetBytes(convertGray16ToRgb[index]); bytes[index] = bytes1[0]; bytes[4 * index + 1] = bytes1[1]; bytes[4 * index + 2] = bytes1[2]; bytes[4 * index + 3] = bytes1[3]; } // // for (int r = 0; r < _imageDimensionY; r++) // { // for (int c = 0; c < _imageDimensionX; c++) // { // float value = resultImageData2[c*(r + 1)]; // Color newColor = Color.FromArgb((int)(value), (int)(value), (int)(value)); // bitmap.SetPixel(c,r, newColor); // } // } // bitmap.Save("c:\\temp.bmp"); using (MemoryStream ms = new MemoryStream(bytes)) { Image image = Bitmap.FromStream(ms); image.Save("c:\\temp.bmp"); } Console.WriteLine("Writing data to file... done"); stopwatch.Stop(); Console.WriteLine("Convolve fluophores duration:\n\t" + stopwatch.Elapsed); Console.WriteLine("Computing... done"); }
public void Run(ComputeContext context, TextWriter log) { try { // Create the arrays and fill them with random data. int count = 10; float[] arrA = new float[count]; float[] arrB = new float[count]; float[] arrC = new float[count]; Random rand = new Random(); for (int i = 0; i < count; i++) { arrA[i] = (float)(rand.NextDouble() * 100); arrB[i] = (float)(rand.NextDouble() * 100); } // Create the input buffers and fill them with data from the arrays. // Access modifiers should match those in a kernel. // CopyHostPointer means the buffer should be filled with the data provided in the last argument. ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA); ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB); // The output buffer doesn't need any data from the host. Only its size is specified (arrC.Length). ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length); // Create and build the opencl program. program = new ComputeProgram(context, clProgramSource); program.Build(null, null, null, IntPtr.Zero); // Create the kernel function and set its arguments. ComputeKernel kernel = program.CreateKernel("VectorAdd"); kernel.SetMemoryArgument(0, a); kernel.SetMemoryArgument(1, b); kernel.SetMemoryArgument(2, c); // Create the event wait list. An event list is not really needed for this example but it is important to see how it works. // Note that events (like everything else) consume OpenCL resources and creating a lot of them may slow down execution. // For this reason their use should be avoided if possible. ComputeEventList eventList = new ComputeEventList(); // Create the command queue. This is used to control kernel execution and manage read/write/copy operations. ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); // Execute the kernel "count" times. After this call returns, "eventList" will contain an event associated with this command. // If eventList == null or typeof(eventList) == ReadOnlyCollection<ComputeEventBase>, a new event will not be created. commands.Execute(kernel, null, new long[] { count }, null, eventList); // Read back the results. If the command-queue has out-of-order execution enabled (default is off), ReadFromBuffer // will not execute until any previous events in eventList (in our case only eventList[0]) are marked as complete // by OpenCL. By default the command-queue will execute the commands in the same order as they are issued from the host. // eventList will contain two events after this method returns. commands.ReadFromBuffer(c, ref arrC, false, eventList); // A blocking "ReadFromBuffer" (if 3rd argument is true) will wait for itself and any previous commands // in the command queue or eventList to finish execution. Otherwise an explicit wait for all the opencl commands // to finish has to be issued before "arrC" can be used. // This explicit synchronization can be achieved in two ways: // 1) Wait for the events in the list to finish, //eventList.Wait(); // 2) Or simply use commands.Finish(); // Print the results to a log/console. for (int i = 0; i < count; i++) log.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]); // cleanup commands commands.Dispose(); // cleanup events foreach (ComputeEventBase eventBase in eventList) { eventBase.Dispose(); } eventList.Clear(); // cleanup kernel kernel.Dispose(); // cleanup program program.Dispose(); // cleanup buffers a.Dispose(); b.Dispose(); c.Dispose(); } catch (Exception e) { log.WriteLine(e.ToString()); } }
public static void CallOpenCL(int[,] libertyGroups, int[,] groupNumbers, int x, int y, int[] surroundingLibs, ref int emptySurroundings, int ourSgn, ref int duplicateGroups) { //Create arguments //Does not split yet //int[,] libertyGroups, //int[,] groupNumbers, //int x, //int y, //int[] surroundingLibs, //ref int emptySurroundings, //ref int duplicateGroups, //int ourSgn, //We have to map 2 dimension to 1 dimension //Set arguments ComputeBuffer<int> libertyGroupsIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, twoDtoOneD(libertyGroups)); openCLKernel.SetMemoryArgument(0, libertyGroupsIn); ComputeBuffer<int> groupNumbersIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, twoDtoOneD(groupNumbers)); openCLKernel.SetMemoryArgument(1, groupNumbersIn); openCLKernel.SetValueArgument<int>(2, x); openCLKernel.SetValueArgument<int>(3, y); ComputeBuffer<int> surroundingLibsIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, surroundingLibs); openCLKernel.SetMemoryArgument(4, surroundingLibsIn); int[] emptySurroundRef = new int[1]; ComputeBuffer<int> emptySurroundRefIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, emptySurroundRef); openCLKernel.SetMemoryArgument(5, emptySurroundRefIn); int[] duplicateGroupsRef = new int[1]; ComputeBuffer<int> duplicateGroupsRefIn = new ComputeBuffer<int>(openCLContext, ComputeMemoryFlags.UseHostPointer, duplicateGroupsRef); openCLKernel.SetMemoryArgument(6, duplicateGroupsRefIn); openCLKernel.SetValueArgument<int>(7, ourSgn); //long localWorkSize = Math.Min(openCLDevice.MaxComputeUnits, sideSize); //Display input data //Runs commands ComputeCommandQueue commands = new ComputeCommandQueue(openCLContext, openCLContext.Devices[0], ComputeCommandQueueFlags.None); long executionTime = DateTime.Now.Ticks; //Execute kernel //globalWorkSize in increments of localWorkSize (max of device.MaxComputeUnits or kernel.GetWorkGroupSize()) commands.Execute(openCLKernel, null, new long[] { 1 }, new long[] { 1 }, null); //Also, you should probably use this //kernel.GetPreferredWorkGroupSizeMultiple(device); commands.Finish(); //int[] surroundingLibs, //ref int emptySurroundings, //ref int duplicateGroups, //Read output data commands.ReadFromBuffer(surroundingLibsIn, ref surroundingLibs, true, null); commands.ReadFromBuffer(emptySurroundRefIn, ref emptySurroundRef, true, null); emptySurroundings = emptySurroundRef[0]; commands.ReadFromBuffer(duplicateGroupsRefIn, ref duplicateGroupsRef, true, null); duplicateGroups = duplicateGroupsRef[0]; //We could set blocking to false on reads and then read them all back in then, we could (possiblity) gain some performance //by telling it that commands can be executed out of order and then by queuing them up and calling Finish commands.Finish(); executionTime = DateTime.Now.Ticks - executionTime; GC.Collect(); // openCLProgram.Dispose(); //display output data //Test are done by our caller now Console.WriteLine(executionTime / 10000.0); }
private Bitmap Download(ComputeCommandQueue queue, ComputeBuffer<Vector4> buffer, int width, int height) { var pixels = new Vector4[width * height]; queue.ReadFromBuffer(buffer, ref pixels, true, 0, 0, width * height, null); queue.Finish(); var intPixels = Array.ConvertAll(pixels, pixel => { pixel = Vector4.Clamp(pixel, new Vector4(0), new Vector4(1)); return (byte)(pixel.X * 255) << 16 | (byte)(pixel.Y * 255) << 8 | (byte)(pixel.Z * 255); }); var bmp = new Bitmap(width, height, PixelFormat.Format32bppRgb); var bmpData = bmp.LockBits(new Rectangle(0, 0, bmp.Width, bmp.Height), ImageLockMode.ReadWrite, PixelFormat.Format32bppRgb); Marshal.Copy(intPixels, 0, bmpData.Scan0, intPixels.Length); bmp.UnlockBits(bmpData); return bmp; }
public String SearchPassword (byte[] hash, HashType type, int maxLength, String[] keySpace) { if (type != HashType.MD5) { throw new NotImplementedException ("sums other than MD5 not supported"); } if (maxLength > 6) { throw new NotImplementedException ("doesn't support longer passwords than 7"); } var joinedKeySpace = new List<byte> (); foreach (var k in keySpace) { if (k.Length > 1) { throw new NotImplementedException ("doesn't support longer keyspaces than 1"); } joinedKeySpace.AddRange (Encoding.ASCII.GetBytes (k)); } byte[] resultData = new byte[20]; byte[] keyspaceJoined = joinedKeySpace.ToArray (); var resultBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.CopyHostPointer, resultData); var hashBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, hash); var keyspaceBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, keyspaceJoined); var passLenBuffer = new ComputeBuffer<byte> (Context, ComputeMemoryFlags.WriteOnly, 1); var flagBuffer = new ComputeBuffer<int> (Context, ComputeMemoryFlags.None, 1); Kernel.SetMemoryArgument (0, hashBuffer); Kernel.SetMemoryArgument (1, keyspaceBuffer); Kernel.SetMemoryArgument (2, resultBuffer); Kernel.SetMemoryArgument (3, passLenBuffer); Kernel.SetMemoryArgument (4, flagBuffer); // execute kernel var queue = new ComputeCommandQueue (Context, Device, ComputeCommandQueueFlags.None); long firstDim = joinedKeySpace.Count; var globalWorksize = new long[] { firstDim, 57 * 57, 57 * 57 }; queue.Execute (Kernel, new long[] { 0, 0, 0 }, globalWorksize, null, null); byte[] passLen = new byte[1]; queue.ReadFromBuffer (resultBuffer, ref resultData, true, null); queue.ReadFromBuffer (passLenBuffer, ref passLen, true, null); String password = null; if (passLen [0] > 0) { logger.Info ("pass len {0}", passLen [0]); password = Encoding.ASCII.GetString (resultData, 0, passLen [0]); logger.Info ("Found password: \"{0}\"", password); } else { logger.Info ("Password not found."); } queue.Finish (); return password; }
protected override void RunInternal() { int count = 10; float[] arrA = new float[count]; float[] arrB = new float[count]; float[] arrC = new float[count]; Random rand = new Random(); for (int i = 0; i < count; i++) { arrA[i] = (float)(rand.NextDouble() * 100); arrB[i] = (float)(rand.NextDouble() * 100); } ComputeBuffer<float> a = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrA); ComputeBuffer<float> b = new ComputeBuffer<float>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, arrB); ComputeBuffer<float> c = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, arrC.Length); ComputeProgram program = new ComputeProgram(context, new string[] { kernelSource }); program.Build(null, null, null, IntPtr.Zero); ComputeKernel kernel = program.CreateKernel("VectorAdd"); kernel.SetMemoryArgument(0, a); kernel.SetMemoryArgument(1, b); kernel.SetMemoryArgument(2, c); ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); ComputeEventList events = new ComputeEventList(); commands.Execute(kernel, null, new long[] { count }, null, events); arrC = new float[count]; GCHandle arrCHandle = GCHandle.Alloc(arrC, GCHandleType.Pinned); commands.Read(c, false, 0, count, arrCHandle.AddrOfPinnedObject(), events); commands.Finish(); arrCHandle.Free(); for (int i = 0; i < count; i++) Console.WriteLine("{0} + {1} = {2}", arrA[i], arrB[i], arrC[i]); }
public static void Test() { string source = File.ReadAllText("MonteCarloSimulate.cl"); //Choose Device ComputePlatform platform = ComputePlatform.Platforms[0]; ComputeDevice device = platform.QueryDevices()[0]; ComputeContextPropertyList properties = new ComputeContextPropertyList(platform); //Setup of stuff on our side ComputeContext context = new ComputeContext(ComputeDeviceTypes.All, properties, null, IntPtr.Zero); //Build the program, which gets us the kernel ComputeProgram program = new ComputeProgram(context, source); program.Build(null, null, null, IntPtr.Zero); //can use notify as the 3rd command... if you want this to be non-blocking ComputeKernel kernel = program.CreateKernel("MonteCarloSimulate"); //Create arguments int sideSize = 4096; int[] inMatrixA = new int[sideSize * sideSize]; int[] inMatrixB = new int[sideSize * sideSize]; int[] outMatrixC = new int[sideSize * sideSize]; Random random = new Random((int)DateTime.Now.Ticks); if (sideSize <= 32) for (int y = 0; y < sideSize; y++) for (int x = 0; x < sideSize; x++) { inMatrixA[y * sideSize + x] = random.Next(3); inMatrixB[y * sideSize + x] = random.Next(3); outMatrixC[y * sideSize + x] = 0; } ComputeBuffer<int> bufferMatrixA = new ComputeBuffer<int>(context, ComputeMemoryFlags.UseHostPointer, inMatrixA); ComputeBuffer<int> bufferMatrixB = new ComputeBuffer<int>(context, ComputeMemoryFlags.UseHostPointer, inMatrixB); ComputeBuffer<int> bufferMatrixC = new ComputeBuffer<int>(context, ComputeMemoryFlags.UseHostPointer, outMatrixC); long localWorkSize = Math.Min(device.MaxComputeUnits, sideSize); //Sets arguments kernel.SetMemoryArgument(0, bufferMatrixA); kernel.SetMemoryArgument(1, bufferMatrixB); kernel.SetMemoryArgument(2, bufferMatrixC); kernel.SetLocalArgument(3, sideSize * 2); kernel.SetValueArgument<int>(4, sideSize); //kernel.SetLocalArgument(1, localWorkSize); string offset = " "; for (int x = 0; x < sideSize; x++) offset += " "; if (sideSize <= 32) for (int y = 0; y < sideSize; y++) { Console.Write(offset); for (int x = 0; x < sideSize; x++) Console.Write(inMatrixA[y * sideSize + x] + " "); Console.WriteLine(); } //Runs commands ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); long executionTime = DateTime.Now.Ticks; //Execute kernel //globalWorkSize in increments of localWorkSize (max of device.MaxComputeUnits or kernel.GetWorkGroupSize()) commands.Execute(kernel, null, new long[] { Math.Min(sideSize, 16), Math.Min(sideSize, 16) }, new long[] { localWorkSize, 1 }, null); //globalWorkSize can be any size //localWorkSize product much not be greater than device.MaxComputeUnits //and it must not be greater than kernel.GetWorkGroupSize() //ESSENTIALLY, the program iterates through globalWorkSize //in increments of localWorkSize. Both are multidimensional, //but this just saves us the time of doing that //(1 dimension can be put to multiple if the max dimension lengths //are known very easily with remainder). //Also, you should probably use this //kernel.GetPreferredWorkGroupSizeMultiple(device); commands.Finish(); commands.ReadFromBuffer(bufferMatrixC, ref outMatrixC, true, null); commands.Finish(); executionTime = DateTime.Now.Ticks - executionTime; GC.Collect(); program.Dispose(); Console.WriteLine(); if (sideSize <= 32) for (int y = 0; y < sideSize; y++) { for (int x = 0; x < sideSize; x++) Console.Write(inMatrixB[y * sideSize + x] + " "); Console.Write(" "); for (int x = 0; x < sideSize; x++) Console.Write(outMatrixC[y * sideSize + x] + " "); Console.WriteLine(); } int testY = random.Next(sideSize); int testX = random.Next(sideSize); int sum = 0; for (int q = 0; q < sideSize; q++) sum += inMatrixA[q * sideSize + testX] * inMatrixB[testY * sideSize + q]; Console.WriteLine(sum == outMatrixC[testY * sideSize + testX]); Console.WriteLine(executionTime / 10000.0); }
public Bitmap Screenshot(int screenshotHeight, int slowRenderPower, Action<string> displayInformation) { displayInformation("Rendering screenshot"); var ccontext = _kernel.ComputeContext; _kernelInUse++; var screenshotWidth = (int)(screenshotHeight * ScreenshotAspectRatio); Bitmap bmp; try { bmp = new Bitmap(screenshotWidth, screenshotHeight, PixelFormat.Format24bppRgb); } catch (ArgumentException) { MessageBox.Show("Image size too big", "Error"); return null; } var nancount = 0; var bmpData = bmp.LockBits(new Rectangle(0, 0, screenshotWidth, screenshotHeight), ImageLockMode.ReadWrite, PixelFormat.Format24bppRgb); var scan0 = bmpData.Scan0.ToInt64(); var queue = new ComputeCommandQueue(ccontext, ccontext.Devices[0], ComputeCommandQueueFlags.None); var localSize = _kernel.Threadsize(queue); for (var i = 0; i < localSize.Length; i++) localSize[i] *= slowRenderPower; var computeBuffer = new ComputeBuffer<Vector4>(ccontext, ComputeMemoryFlags.ReadWrite, localSize[0] * localSize[1]); const int numFrames = 200; var frameDependantControls = _parameters as IFrameDependantControl; var framesToRender = frameDependantControls == null ? 1 : numFrames; var totalYs = (screenshotHeight + localSize[1] - 1) / localSize[1]; var totalXs = (screenshotWidth + localSize[0] - 1) / localSize[0]; var stopwatch = new Stopwatch(); for (var y = 0; y < totalYs; y++) { for (var x = 0; x < totalXs; x++) { stopwatch.Restart(); for (var frame = 0; frame < framesToRender; frame++) { if (frameDependantControls != null) frameDependantControls.Frame = frame; displayInformation(string.Format("Screenshot {0}% done", 100 * (y * totalXs * framesToRender + x * framesToRender + frame) / (totalXs * totalYs * framesToRender))); _kernel.Render(computeBuffer, queue, _parameters, new Size(screenshotWidth, screenshotHeight), slowRenderPower, new Size(x, y), (int)localSize[0]); } var pixels = new Vector4[localSize[0] * localSize[1]]; queue.ReadFromBuffer(computeBuffer, ref pixels, true, 0, 0, localSize[0] * localSize[1], null); queue.Finish(); stopwatch.Stop(); var elapsed = stopwatch.Elapsed.TotalMilliseconds / framesToRender; _kernel.AverageKernelTime = (elapsed + _kernel.AverageKernelTime * 4) / 5; var blockWidth = Math.Min(localSize[0], screenshotWidth - x * localSize[0]); var blockHeight = Math.Min(localSize[1], screenshotHeight - y * localSize[1]); var intPixels = new byte[blockWidth * blockHeight * 3]; for (var py = 0; py < blockHeight; py++) { for (var px = 0; px < blockWidth; px++) { var pixel = pixels[py * localSize[1] + px]; if (float.IsNaN(pixel.X) || float.IsNaN(pixel.Y) || float.IsNaN(pixel.Z)) nancount++; // BGR if (float.IsNaN(pixel.Z) == false) intPixels[(py * blockWidth + px) * 3 + 0] = (byte)(pixel.Z * 255); if (float.IsNaN(pixel.Y) == false) intPixels[(py * blockWidth + px) * 3 + 1] = (byte)(pixel.Y * 255); if (float.IsNaN(pixel.X) == false) intPixels[(py * blockWidth + px) * 3 + 2] = (byte)(pixel.X * 255); } } for (var line = 0; line < blockHeight; line++) Marshal.Copy(intPixels, line * (int)blockWidth * 3, new IntPtr(scan0 + ((y * localSize[1] + line) * bmpData.Stride) + x * localSize[0] * 3), (int)blockWidth * 3); } } bmp.UnlockBits(bmpData); if (nancount != 0) MessageBox.Show(string.Format("Caught {0} NAN pixels while taking screenshot", nancount), "Warning"); _kernelInUse--; return bmp; }
public TerrainGen() { #if CPU_DEBUG var platform = ComputePlatform.Platforms[1]; #else var platform = ComputePlatform.Platforms[0]; #endif _devices = new List<ComputeDevice>(); _devices.Add(platform.Devices[0]); _properties = new ComputeContextPropertyList(platform); _context = new ComputeContext(_devices, _properties, null, IntPtr.Zero); _cmdQueue = new ComputeCommandQueue(_context, _devices[0], ComputeCommandQueueFlags.None); #region setup generator kernel bool loadFromSource = Gbl.HasRawHashChanged[Gbl.RawDir.Scripts]; loadFromSource = true; _chunkWidthInBlocks = Gbl.LoadContent<int>("TGen_ChunkWidthInBlocks"); _chunkWidthInVerts = _chunkWidthInBlocks + 1; _blockWidth = Gbl.LoadContent<int>("TGen_BlockWidthInMeters"); float lacunarity = Gbl.LoadContent<float>("TGen_Lacunarity"); float gain = Gbl.LoadContent<float>("TGen_Gain"); int octaves = Gbl.LoadContent<int>("TGen_Octaves"); float offset = Gbl.LoadContent<float>("TGen_Offset"); float hScale = Gbl.LoadContent<float>("TGen_HScale"); float vScale = Gbl.LoadContent<float>("TGen_VScale"); _genConstants = new ComputeBuffer<float>(_context, ComputeMemoryFlags.ReadOnly, 8); var genArr = new[]{ lacunarity, gain, offset, octaves, hScale, vScale, _blockWidth, _chunkWidthInBlocks }; _cmdQueue.WriteToBuffer(genArr, _genConstants, false, null); if (loadFromSource){ _generationPrgm = new ComputeProgram(_context, Gbl.LoadScript("TGen_Generator")); #if CPU_DEBUG _generationPrgm.Build(null, @"-g -s D:\Projects\Gondola\Scripts\GenTerrain.cl", null, IntPtr.Zero); //use option -I + scriptDir for header search #else _generationPrgm.Build(null, "", null, IntPtr.Zero);//use option -I + scriptDir for header search #endif Gbl.SaveBinary(_generationPrgm.Binaries, "TGen_Generator"); } else{ var binary = Gbl.LoadBinary("TGen_Generator"); _generationPrgm = new ComputeProgram(_context, binary, _devices); _generationPrgm.Build(null, "", null, IntPtr.Zero); } //loadFromSource = false; _terrainGenKernel = _generationPrgm.CreateKernel("GenTerrain"); _normalGenKernel = _generationPrgm.CreateKernel("GenNormals"); //despite the script using float3 for these fields, we need to consider it to be float4 because the //implementation is basically a float4 wrapper that uses zero for the last variable _geometry = new ComputeBuffer<float>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts*4); _normals = new ComputeBuffer<ushort>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts * _chunkWidthInVerts * 4); _binormals = new ComputeBuffer<byte>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts*4); _tangents = new ComputeBuffer<byte>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts*4); _uvCoords = new ComputeBuffer<float>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts*2); _terrainGenKernel.SetMemoryArgument(0, _genConstants); _terrainGenKernel.SetMemoryArgument(3, _geometry); _terrainGenKernel.SetMemoryArgument(4, _uvCoords); _normalGenKernel.SetMemoryArgument(0, _genConstants); _normalGenKernel.SetMemoryArgument(3, _geometry); _normalGenKernel.SetMemoryArgument(4, _normals); _normalGenKernel.SetMemoryArgument(5, _binormals); _normalGenKernel.SetMemoryArgument(6, _tangents); #endregion #region setup quadtree kernel if (loadFromSource){ _qTreePrgm = new ComputeProgram(_context, Gbl.LoadScript("TGen_QTree")); #if CPU_DEBUG _qTreePrgm.Build(null, @"-g -s D:\Projects\Gondola\Scripts\Quadtree.cl", null, IntPtr.Zero); #else _qTreePrgm.Build(null, "", null, IntPtr.Zero); #endif Gbl.SaveBinary(_qTreePrgm.Binaries, "TGen_QTree"); } else{ var binary = Gbl.LoadBinary("TGen_QTree"); _qTreePrgm = new ComputeProgram(_context, binary, _devices); _qTreePrgm.Build(null, "", null, IntPtr.Zero); } _qTreeKernel = _qTreePrgm.CreateKernel("QuadTree"); _crossCullKernel = _qTreePrgm.CreateKernel("CrossCull"); _activeVerts = new ComputeBuffer<byte>(_context, ComputeMemoryFlags.None, _chunkWidthInVerts*_chunkWidthInVerts); _dummy = new ComputeBuffer<int>(_context, ComputeMemoryFlags.None, 50); var rawNormals = new ushort[_chunkWidthInVerts * _chunkWidthInVerts * 4]; _emptyVerts = new byte[_chunkWidthInVerts*_chunkWidthInVerts]; for (int i = 0; i < _emptyVerts.Length; i++){ _emptyVerts[i] = 1; } _cmdQueue.WriteToBuffer(rawNormals, _normals, true, null); _cmdQueue.WriteToBuffer(_emptyVerts, _activeVerts, true, null); _qTreeKernel.SetValueArgument(1, _chunkWidthInBlocks); _qTreeKernel.SetMemoryArgument(2, _normals); _qTreeKernel.SetMemoryArgument(3, _activeVerts); _qTreeKernel.SetMemoryArgument(4, _dummy); _crossCullKernel.SetValueArgument(1, _chunkWidthInBlocks); _crossCullKernel.SetMemoryArgument(2, _normals); _crossCullKernel.SetMemoryArgument(3, _activeVerts); _crossCullKernel.SetMemoryArgument(4, _dummy); #endregion #region setup winding kernel if (loadFromSource){ _winderPrgm = new ComputeProgram(_context, Gbl.LoadScript("TGen_VertexWinder")); #if CPU_DEBUG _winderPrgm.Build(null, @"-g -s D:\Projects\Gondola\Scripts\VertexWinder.cl", null, IntPtr.Zero); #else _winderPrgm.Build(null, "", null, IntPtr.Zero); #endif Gbl.SaveBinary(_winderPrgm.Binaries, "TGen_VertexWinder"); } else{ var binary = Gbl.LoadBinary("TGen_VertexWinder"); _winderPrgm = new ComputeProgram(_context, binary, _devices); _winderPrgm.Build(null, "", null, IntPtr.Zero); } _winderKernel = _winderPrgm.CreateKernel("VertexWinder"); _indicies = new ComputeBuffer<int>(_context, ComputeMemoryFlags.None, (_chunkWidthInBlocks)*(_chunkWidthInBlocks)*8); _winderKernel.SetMemoryArgument(0, _activeVerts); _winderKernel.SetMemoryArgument(1, _indicies); _emptyIndices = new int[(_chunkWidthInBlocks)*(_chunkWidthInBlocks)*8]; for (int i = 0; i < (_chunkWidthInBlocks)*(_chunkWidthInBlocks)*8; i++){ _emptyIndices[i] = 0; } _cmdQueue.WriteToBuffer(_emptyIndices, _indicies, true, null); #endregion if (loadFromSource){ Gbl.AllowMD5Refresh[Gbl.RawDir.Scripts] = true; } _cmdQueue.Finish(); }
static void Main(string[] args) { #region const string programName = "Prime Number"; Stopwatch stopWatch = new Stopwatch(); string clProgramSource = KernelProgram(); Console.WriteLine("Environment OS:"); Console.WriteLine("-----------------------------------------"); Console.WriteLine(Environment.OSVersion); #endregion if (ComputePlatform.Platforms.Count == 0) { Console.WriteLine("No OpenCL Platforms are availble!"); } else { #region 1 // step 1 choose the first available platform ComputePlatform platform = ComputePlatform.Platforms[0]; // output the basic info BasicInfo(platform); Console.WriteLine("Program: " + programName); Console.WriteLine("-----------------------------------------"); #endregion //Cpu 10 seconds Gpu 28 seconds int count = 64; int[] output_Z = new int[count * count * count]; int[] input_X = new int[count * count * count]; for (int x = 0; x < count * count * count; x++) { input_X[x] = x; } #region 2 // step 2 create context for that platform and all devices ComputeContextPropertyList properties = new ComputeContextPropertyList(platform); ComputeContext context = new ComputeContext(platform.Devices, properties, null, IntPtr.Zero); // step 3 create and build program ComputeProgram program = new ComputeProgram(context, clProgramSource); program.Build(platform.Devices, null, null, IntPtr.Zero); #endregion // step 4 create memory objects ComputeBuffer<int> a = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, input_X); ComputeBuffer<int> z = new ComputeBuffer<int>(context, ComputeMemoryFlags.WriteOnly, output_Z.Length); // step 5 create kernel object with same kernel programe name VectorAdd ComputeKernel kernel = program.CreateKernel("PrimeNumber"); // step 6 set kernel arguments //kernel.SetMemoryArgument(0, a); kernel.SetMemoryArgument(0, a); kernel.SetMemoryArgument(1, z); ComputeEventList eventList = new ComputeEventList(); //for (int j = 0; j < context.Devices.Count; j++) // query available devices n,...,1,0. cpu first then gpu for (int j = context.Devices.Count-1; j > -1; j--) { #region 3 stopWatch.Start(); // step 7 create command queue on that context on that device ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[j], ComputeCommandQueueFlags.None); // step 8 run the kernel program commands.Execute(kernel, null, new long[] { count, count, count }, null, eventList); //Application.DoEvents(); #endregion // step 9 read results commands.ReadFromBuffer(z, ref output_Z, false, eventList); #region 4 commands.Finish(); string fileName = "C:\\primenumber\\PrimeNumberGPU.txt"; StreamWriter file = new StreamWriter(fileName, true); FileInfo info = new FileInfo(fileName); long fs = info.Length; // 1 MegaByte = 1.049e+6 Byte int index = 1; if (fs == 1.049e+6) { fileName = "C:\\primenumber\\PrimeNumberGPU" + index.ToString() + ".txt"; file = new System.IO.StreamWriter(fileName, true); index++; } #endregion for (uint xx = 0; xx < count * count * count; xx++) { if (output_Z[xx] != 0 && output_Z[xx] != 1) { Console.WriteLine(output_Z[xx]); file.Write(output_Z[xx]); file.Write("x"); } } #region 5 file.Close(); stopWatch.Stop(); ComputeCommandProfilingInfo start = ComputeCommandProfilingInfo.Started; ComputeCommandProfilingInfo end = ComputeCommandProfilingInfo.Ended; double time = 10e-9 * (end - start); //Console.WriteLine("Nanosecond: " + time); TimeSpan ts = stopWatch.Elapsed; string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", ts.Hours, ts.Minutes, ts.Seconds, ts.Milliseconds); Console.WriteLine(context.Devices[j].Name.Trim() + " Elapsed Time " + elapsedTime); Console.WriteLine("-----------------------------------------"); #endregion } Console.ReadLine(); } }