internal static void SumError( Action <AcceleratorStream, Index2, ArrayView2D <float>, ArrayView2D <float>, ArrayView3D <float>, ArrayView2D <float>, ArrayView2D <float>, ArrayView <float> > action, Index2 extent, MemoryBuffer2D <float> error, MemoryBuffer2D <float> activated, MemoryBuffer2D <float> should, MemoryBuffer2D <float> derived, MemoryBuffer <float> variable ) { action(accelerator.DefaultStream, extent, error, activated, dummyBuffer3D, derived, should, variable); }
internal static void WheightAdjustment( Action <AcceleratorStream, Index2, ArrayView3D <float>, ArrayView2D <float>, ArrayView2D <float>, ArrayView2D <float>, ArrayView <float> > action, Index2 extent, MemoryBuffer3D <float> weights, MemoryBuffer2D <float> error, MemoryBuffer2D <float> activatedPreviousLayer, MemoryBuffer2D <float> bias, MemoryBuffer <float> variables ) { action(accelerator.DefaultStream, extent, weights, error, activatedPreviousLayer, bias, variables); }
internal static void SumError( Action <AcceleratorStream, Index2, ArrayView2D <float>, ArrayView2D <float>, ArrayView3D <float>, ArrayView2D <float>, ArrayView2D <float>, ArrayView <float> > action, Index2 extent, MemoryBuffer2D <float> error, MemoryBuffer2D <float> errorNextLayer, MemoryBuffer3D <float> weightNextLayer, MemoryBuffer2D <float> derived, MemoryBuffer <float> variable ) { action(accelerator.DefaultStream, extent, error, errorNextLayer, weightNextLayer, derived, dummyBuffer2D, variable); }
internal static void SumCalculate( Action <AcceleratorStream, Index2, ArrayView3D <float>, ArrayView2D <float>, ArrayView2D <float>, ArrayView2D <float>, ArrayView <float> > action, Index2 extent, MemoryBuffer3D <float> weights, MemoryBuffer2D <float> outputPreviousLayerActivated, MemoryBuffer2D <float> sumInput, MemoryBuffer2D <float> bias, MemoryBuffer <float> variables ) { action(accelerator.DefaultStream, extent, weights, outputPreviousLayerActivated, sumInput, bias, variables); }
internal static void DerivativeFunction( Action <AcceleratorStream, Index2, ArrayView2D <float>, ArrayView2D <float>, ArrayView <float> > action, Index2 extent, MemoryBuffer2D <float> sumInput, MemoryBuffer2D <float> derived, MemoryBuffer <float> variable ) { action(accelerator.DefaultStream, extent, sumInput, derived, variable); }
internal static void NormalizationFunction( Action <AcceleratorStream, Index2, ArrayView2D <float>, ArrayView2D <float>, ArrayView <float> > action, Index2 extent, MemoryBuffer2D <float> outputPreviousLayerActivated, MemoryBuffer2D <float> outputActivated, MemoryBuffer <float> variable ) { action(accelerator.DefaultStream, extent, outputPreviousLayerActivated, outputActivated, variable); }
internal static void ScrapInputBuffer(MemoryBuffer2D <float> inputBuffer) { if (inputBuffer != null) { reusableInputBuffer.Enqueue(inputBuffer); } }
GetUniqueTilesPositions(int[,] bitmap, int tilewidth, int tileheight) { int w = bitmap.GetLength(0); int h = bitmap.GetLength(1); int l; Index2 extent = new Index2(tilewidth, tileheight); ConcurrentQueue <TileKey> ts = new ConcurrentQueue <TileKey>(); ConcurrentDictionary <TileKey, int> remainingVals = new ConcurrentDictionary <TileKey, int>(); Tuple <ConcurrentDictionary <TileKey, int>, ConcurrentDictionary <TileKey, TileKey> > ret = null; using (MemoryBuffer2D <int> bpbuffer = HardwareAcceleratorManager.GPUAccelerator.Allocate <int>(w, h)) { bpbuffer.CopyFrom(bitmap, Index2.Zero, Index2.Zero, bpbuffer.Extent); w /= tilewidth; h /= tileheight; l = w * h; Parallel.For(0, l, ind => { int i1 = ind % w; int j1 = ind / w; TileKey k1 = new TileKey(i1, j1, w); if (!TileIsBlank.Execute(extent, bpbuffer, new Index2(i1 * tilewidth, j1 * tileheight))) { remainingVals.TryAdd(k1, ind); } }); ret = CheckRepeatedKernel.Execute(remainingVals, bpbuffer, extent.X, extent.Y); } return(ret); }
public MemoryBuffer2D <double> GetNeurons() { if (_neurons != null) { return(_neurons); } var neurons = GPU.Instance.Accelerator.Allocate <double>(1 + 1 + 1 + Neurons[0].Weights.Count, Neurons.Count); // 1 activation + 1 is bias + 1 is bias connected for (int y = 0; y < Neurons.Count; ++y) { neurons.CopyFrom(Neurons[y].Activation, new Index2(0, y)); neurons.CopyFrom(Neurons[y].IsBias ? 1 : 0, new Index2(1, y)); neurons.CopyFrom(Neurons[y].IsBiasConnected ? 1 : 0, new Index2(2, y)); for (int x = 0; x < Neurons[0].Weights.Count; ++x) { neurons.CopyFrom(Neurons[y].Weights[x].Weight, new Index2(3 + x, y)); } } _neurons = neurons; return(neurons); }
protected Layer2D(Layer2D prevLayer, string activationFunction, int sliceCount) { SetPrevious(prevLayer); if (prevLayer != null) { prevLayer.SetNext(this); } this.function = activationFunction; variable = new MemoryBuffer <float> [sliceCount]; activated = new MemoryBuffer2D <float> [sliceCount]; sumInput = new MemoryBuffer2D <float> [sliceCount]; error = new MemoryBuffer2D <float> [sliceCount]; weight = new MemoryBuffer3D <float> [sliceCount]; bias = new MemoryBuffer2D <float> [sliceCount]; derived = new MemoryBuffer2D <float> [sliceCount]; for (int i = 0; i < sliceCount; i++) { float[] source = { Config.learningRate, 0f }; this.variable[i] = GPUHelper.CreateBuffer(source, source.Length); activated[i] = GPUHelper.dummyBuffer2D; sumInput[i] = GPUHelper.dummyBuffer2D; error[i] = GPUHelper.dummyBuffer2D; weight[i] = GPUHelper.dummyBuffer3D; bias[i] = GPUHelper.dummyBuffer2D; derived[i] = GPUHelper.dummyBuffer2D; } }
public void Initialize(Context context, AcceleratorId acceleratorId, double[,] independents, double[] dependants) { AcceleratorId = acceleratorId; AcceleratorType acceleratorType = AcceleratorId.AcceleratorType; if (acceleratorType == AcceleratorType.CPU) { Accelerator = Accelerator.Create(context, AcceleratorId); } else if (acceleratorType == AcceleratorType.OpenCL) { Accelerator = CLAccelerator.Create(context, AcceleratorId); } else if (acceleratorType == AcceleratorType.Cuda) { Accelerator = CudaAccelerator.Create(context, AcceleratorId); } EvaluationKernel = Accelerator.LoadAutoGroupedStreamKernel <Index2, ArrayView2D <double>, ArrayView <double>, ArrayView <NodeGPU>, ArrayView <int>, ArrayView2D <double> >(EvaluationKernelFunction); ProcessResultsKernel = Accelerator.LoadAutoGroupedStreamKernel <Index1, ArrayView2D <double>, ArrayView <double> >(ProcessResultsKernelFunction); IndependentsTableSize = new Index2(independents.GetUpperBound(0) + 1, independents.GetUpperBound(1) + 1); Independents = Accelerator.Allocate <double>(IndependentsTableSize); Independents.CopyFrom(independents, new Index2(), new Index2(), IndependentsTableSize); Dependants = Accelerator.Allocate <double>(dependants.Length); Dependants.CopyFrom(dependants, 0, 0, dependants.Length); }
internal static void ScrapOutputBuffer(MemoryBuffer2D <float> outputBuffer) { if (outputBuffer != null) { reusableOutputBuffer.Enqueue(outputBuffer); } }
/// <summary> /// Allocates and initializes buffers on the GPU /// </summary> /// <param name="xImage"></param> /// <param name="bMap">Gets modified by this method. output: bMap/aMap</param> /// <param name="lambda"></param> /// <param name="alpha"></param> private void AllocateGPU(float[,] xImage, float[,] bMap, float lambda, float alpha) { var zeroIndex = new Index2(0, 0); var size = new Index2(xImage.GetLength(1), xImage.GetLength(0)); xImageGPU = accelerator.Allocate <float>(size); xImageGPU.CopyFrom(xImage, zeroIndex, zeroIndex, size); var bMapSize = new Index2(bMap.GetLength(1), bMap.GetLength(0)); bMapGPU = accelerator.Allocate <float>(bMapSize); bMapGPU.CopyFrom(bMap, zeroIndex, zeroIndex, bMapSize); aMapGPU = accelerator.Allocate <float>(size); aMapGPU.CopyFrom(aMap, zeroIndex, zeroIndex, size); var sizePSF = new Index2(psf2.GetLength(1), psf2.GetLength(0)); psf2GPU = accelerator.Allocate <float>(sizePSF); psf2GPU.CopyFrom(psf2, zeroIndex, zeroIndex, sizePSF); lambdaAlpha = accelerator.Allocate <float>(2); lambdaAlpha.CopyFrom(lambda, new ILGPU.Index(0)); lambdaAlpha.CopyFrom(alpha, new ILGPU.Index(1)); maxPixelGPU = accelerator.Allocate <Pixel>(1); }
public MemoryBuffer2D <T> CloneBuffer <T>(MemoryBuffer2D <T> obj) where T : struct { var buffer = _accelerator.Allocate <T>(obj.Width, obj.Height); obj.CopyTo(buffer, new Index2(0, 0)); return(buffer); }
private static MemoryBuffer <T> _joinColumns <T>(Index size, MemoryBuffer2D <T> m) where T : struct { var output = ProcessingDevice.ArrayDevice.Executor.CreateBuffer <T>(size); ProcessingDevice.ArrayDevice.Executor["_M_2_columns_V"].Launch(size, output.View, m.View); ProcessingDevice.ArrayDevice.Executor.Wait(); return(output); }
public GPU_ByteData2D(byte[,] data) { if (data == null) { return; } _view = ILGPUMethods.Allocate(data); AxesX = Enumerable.Range(0, data.GetLength(0)); AxesY = Enumerable.Range(0, data.GetLength(1)); }
public static MemoryBuffer2D <T> Clone <T>(MemoryBuffer2D <T> data) where T : struct { //var watch = System.Diagnostics.Stopwatch.StartNew(); var mem = ProcessingDevice.FloatArrayDevice.Executor.CloneBuffer(data); //watch.Stop(); //Console.WriteLine($"\n-----\nAllocation Time: {watch.ElapsedMilliseconds}ms\nSize {size.X}\n-----"); return(mem); }
/// <summary> /// Initializes the. /// </summary> /// <param name="args">The args.</param> public override void Initialize(params object[] args) { if (Buffer != null) { Buffer.Dispose(); } base.Initialize(args); Buffer = HardwareAcceleratorManager.GPUAccelerator.Allocate <byte>((int)args[0], (int)args[1]); Buffer.MemSetToZero(); RequireCopyTo = true; }
/// <summary> /// Saves a 2DMemoryBuffer. /// </summary> protected void SaveBuffer(StreamWriter writer, MemoryBuffer2D<float> buffer, int sliceIndex, string name) { if (buffer.Equals(GPUHelper.dummyBuffer2D)) return; writer.WriteLine("buffer2D: " + name + " sliceIndex: " + sliceIndex); var arr = buffer.GetAs2DArray(); var xBound = arr.GetUpperBound(0) + 1; var yBound = arr.GetUpperBound(1) + 1; writer.WriteLine("xBound: " + xBound + " yBound: " + yBound); for (int x = 0; x < xBound; x++) { for (int y = 0; y < yBound; y++) { writer.Write(arr[x, y] + " "); } } }
// DO NOT CHANGE FUNCTION PARAMETERS // width and height are the output bitmap size // the code will be unloaded on resize // setup is always called once before loop public static void setup(Accelerator accelerator, int width, int height) { canvasData = accelerator.Allocate <Vec3>(width, height); //h_particleSystem = new HostParticleSystem(particleCount, accelerator, width, height); h_particleSystem = new HostParticleSystemStructOfArrays(particleCount, accelerator, width, height); c = new DeviceData(canvasData, h_particleSystem.deviceParticleSystem, width, height); bitmapData = accelerator.Allocate <byte>(width * height * 3); frameBufferToBitmap = accelerator.LoadAutoGroupedStreamKernel <Index2, DeviceData, ArrayView <byte> >(DeviceData.CanvasToBitmap); particleProcessingKernel = accelerator.LoadAutoGroupedStreamKernel <Index1, DeviceData>(particleKernel); }
public void Verify2D <T>(MemoryBuffer2D <T> buffer, T[,] expected) where T : unmanaged { var data = buffer.GetAs2DArray(Accelerator.DefaultStream); Assert.Equal(data.Length, expected.Length); for (int i = 0; i < data.GetLength(0); ++i) { for (int j = 0; j < data.GetLength(1); ++j) { Assert.Equal(expected[i, j], data[i, j]); } } }
public void CopyToGPU() { var accelerator = HardwareAcceleratorManager.GPUAccelerator; if (stream == null) { stream = accelerator.CreateStream(); } if (buffer == null) //this is fine as we cannot resize matrices { buffer = accelerator.Allocate <double>(GetSize(0), GetSize(1)); } buffer.CopyFrom(stream, array2d, Index2.Zero, Index2.Zero, buffer.Extent); }
public static Tuple <ConcurrentDictionary <TileKey, int>, ConcurrentDictionary <TileKey, TileKey> > Execute(ConcurrentDictionary <TileKey, int> tiles, ArrayView2D <int> bpBuffer, int tilewidth, int tileheight) { int[,] ts = new int[tiles.Count, 3]; int i = 0; foreach (var kvp in tiles) { ts[i, 0] = 0; ts[i, 1] = kvp.Key.X; ts[i, 2] = kvp.Key.Y; i++; } int tilesperRow = bpBuffer.Extent.X / tilewidth; using (MemoryBuffer2D <int> res = HardwareAcceleratorManager.GPUAccelerator.Allocate <int>(ts.GetLength(0), ts.GetLength(1))) { res.CopyFrom(ts, Index2.Zero, Index2.Zero, res.Extent); kernel(new Index2(tiles.Count, tiles.Count), bpBuffer, res, new Index2(tilewidth, tileheight), tilesperRow); HardwareAcceleratorManager.GPUAccelerator.Synchronize(); res.CopyTo(ts, Index2.Zero, Index2.Zero, res.Extent); } ConcurrentDictionary <TileKey, int> ret = new ConcurrentDictionary <TileKey, int>(); ConcurrentDictionary <TileKey, TileKey> ret2 = new ConcurrentDictionary <TileKey, TileKey>(); Parallel.For(0, tiles.Count, x => { if (ts[x, 0] == 0) { ret.TryAdd(new TileKey(ts[x, 1], ts[x, 2], tilesperRow), -1); } ret2.TryAdd(new TileKey(ts[x, 1], ts[x, 2], tilesperRow), new TileKey((ts[x, 0] - 1) % tilesperRow, (ts[x, 0] - 1) / tilesperRow, tilesperRow)); }); foreach (var kvp in ret2) { if (kvp.Value.X >= 0) { TileKey aux = kvp.Value; while (aux.X >= 0) { ret2[kvp.Key] = aux; aux = ret2[aux]; } } } return(new Tuple <ConcurrentDictionary <TileKey, int>, ConcurrentDictionary <TileKey, TileKey> >(ret, ret2)); }
public static void Main() { using Context context = new Context(); context.EnableAlgorithms(); using Accelerator device = new CudaAccelerator(context); int width = 1920; int height = 1080; byte[] h_bitmapData = new byte[width * height * 3]; using MemoryBuffer2D <Vec3> canvasData = device.Allocate <Vec3>(width, height); using MemoryBuffer <byte> d_bitmapData = device.Allocate <byte>(width * height * 3); CanvasData c = new CanvasData(canvasData, d_bitmapData, width, height); // pos // look at // up Camera camera = new Camera(new Vec3(0, 50, -100), new Vec3(0, 0, 0), new Vec3(0, -1, 0), width, height, 40f); WorldData world = new WorldData(device); //world.loadMesh(new Vec3(10, 0, 0), "./Assets/defaultcube.obj"); world.loadMesh(new Vec3(0, 0, 0), "./Assets/cat.obj"); var frameBufferToBitmap = device.LoadAutoGroupedStreamKernel <Index2, CanvasData>(CanvasData.CanvasToBitmap); var RTMethod = device.LoadAutoGroupedStreamKernel <Index2, CanvasData, dWorldBuffer, Camera>(PerPixelRayIntersectionMethod); //do rasterization here Stopwatch timer = new Stopwatch(); timer.Start(); RTMethod(new Index2(width, height), c, world.getDeviceWorldBuffer(), camera); frameBufferToBitmap(canvasData.Extent, c); device.Synchronize(); d_bitmapData.CopyTo(h_bitmapData, 0, 0, d_bitmapData.Extent); timer.Stop(); Console.WriteLine("Rendered in: " + timer.Elapsed); //bitmap magic that ignores striding be careful with some using Bitmap b = new Bitmap(width, height, width * 3, PixelFormat.Format24bppRgb, Marshal.UnsafeAddrOfPinnedArrayElement(h_bitmapData, 0)); b.Save("out.bmp"); Process.Start("cmd.exe", "/c out.bmp"); }
public override void BuildFromBitmap(Int32[,] bp, ConcurrentDictionary <Int32, int> coldic, int xOffset, int yOffset) { if (bp == null) { throw new ArgumentNullException(nameof(bp)); } if (coldic == null) { throw new ArgumentNullException(nameof(coldic)); } int sizew = Math.Min(Width, bp.GetLength(0) - xOffset); int sizeh = Math.Min(Height, bp.GetLength(1) - yOffset); int[,] til = new int[sizew, sizeh]; Parallel.For(0, sizew, i => { Parallel.For(0, sizeh, j => { til[i, j] = bp[i + xOffset, j + yOffset]; }); }); MemoryBuffer2D <Int32> buff = HardwareAcceleratorManager.GPUAccelerator.Allocate <Int32>(sizew, sizeh); buff.CopyFrom(til, Index2.Zero, Index2.Zero, buff.Extent); int[] kvps = new int[coldic.Count]; Parallel.ForEach(coldic, kvp => { kvps[kvp.Value] = kvp.Key; }); MemoryBuffer <int> pal = HardwareAcceleratorManager.GPUAccelerator.Allocate <int>(kvps.Length); pal.CopyFrom(kvps, 0, 0, kvps.Length); BuildFromBitmapKernel.Execute(buff.Extent, Buffer, buff, pal); pal.Dispose(); buff.Dispose(); }
public void Run(NodeGPU[] nodes, int[] nodeArrayStarts) { Nodes = Accelerator.Allocate <NodeGPU>(nodes.Length); Nodes.CopyFrom(nodes, new Index1(), new Index1(), nodes.Length); NodeArrayStarts = Accelerator.Allocate <int>(nodeArrayStarts.Length); NodeArrayStarts.CopyFrom(nodeArrayStarts, 0, new Index1(), nodeArrayStarts.Length); Results2D = Accelerator.Allocate <double>(nodeArrayStarts.Length, IndependentsTableSize.X); Results1D = Accelerator.Allocate <double>(nodeArrayStarts.Length); Index2 index = new Index2(nodeArrayStarts.Length, IndependentsTableSize.X); EvaluationKernel(index, Independents, Dependants, Nodes, NodeArrayStarts, Results2D); Accelerator.Synchronize(); ProcessResultsKernel(nodeArrayStarts.Length, Results2D, Results1D); Accelerator.Synchronize(); Results = new double[nodeArrayStarts.Length]; Results1D.CopyTo(Results, new Index1(), 0, Results1D.Extent); Nodes.Dispose(); NodeArrayStarts.Dispose(); Results2D.Dispose(); Results1D.Dispose(); }
public static int Execute(Index2 extent, ArrayView2D <int> bp, Index2 offT1, Index2 offT2) { int[,] res = new int[extent.X, extent.Y]; using (MemoryBuffer2D <int> s = HardwareAcceleratorManager.GPUAccelerator.Allocate <int>(extent)) { kernel(extent, bp, offT1, offT2, new Index2(16, 16), s); HardwareAcceleratorManager.GPUAccelerator.Synchronize(); s.CopyTo(res, Index2.Zero, Index2.Zero, s.Extent); kernelPerLine(extent.X, s); HardwareAcceleratorManager.GPUAccelerator.Synchronize(); s.CopyTo(res, Index2.Zero, Index2.Zero, s.Extent); kernelResume(1, s); HardwareAcceleratorManager.GPUAccelerator.Synchronize(); s.CopyTo(res, Index2.Zero, Index2.Zero, s.Extent); } int ret = res[0, 0]; if ((ret & 1) == 1) { return(0); } else if ((ret & 2) == 2) { return(1); } else if ((ret & 4) == 4) { return(2); } else if ((ret & 8) == 8) { return(3); } return(-1); }
public Array2DW(MemoryBuffer2D <T> memoryBuffer) { _memoryBuffer = memoryBuffer; }
public static void Execute(int[,] clusters) { using (MemoryBuffer2D <int> clusterBuff = HardwareAcceleratorManager.GPUAccelerator.Allocate <int>(clusters.GetLength(0), clusters.GetLength(1))) { clusterBuff.CopyFrom(clusters, Index2.Zero, Index2.Zero, clusterBuff.Extent); using (MemoryBuffer3D <int> diffs = HardwareAcceleratorManager.GPUAccelerator.Allocate <int>(clusterBuff.Extent.X, clusterBuff.Extent.X, 5)) using (MemoryBuffer2D <int> bests = HardwareAcceleratorManager.GPUAccelerator.Allocate <int>(clusterBuff.Extent.X, 3 + clusterBuff.Extent.X)) { bool change = true; while (change) { change = false; diffs.MemSetToZero(); bests.MemSetToZero(); kernel(new Index2(clusterBuff.Extent.X, clusterBuff.Extent.X), clusterBuff, diffs); HardwareAcceleratorManager.GPUAccelerator.Synchronize(); kernelBests(bests.Extent.X, diffs, bests); HardwareAcceleratorManager.GPUAccelerator.Synchronize(); int[,] best = new int[bests.Extent.X, bests.Extent.Y]; bests.CopyTo(best, Index2.Zero, Index2.Zero, bests.Extent); int eq = -1; int diffl = -1; int ts = -1; for (int i = 0; i < best.GetLength(0); i++) { if (best[i, 0] > eq) { eq = best[i, 0]; diffl = best[i, 1]; ts = best[i, 2]; } else if (best[i, 0] == eq && best[i, 1] > diffl) { diffl = best[i, 1]; ts = best[i, 2]; } else if (best[i, 0] == eq && best[i, 1] == diffl && ts < best[i, 2]) { ts = best[i, 2]; } } if (eq >= 0) { change = true; List <int> bestofbest = new List <int>(); List <int> bestofbestID = new List <int>(); for (int i = 0; i < best.GetLength(0); i++) { if (eq == best[i, 0] && diffl == best[i, 1] && ts == best[i, 2]) { for (int j = 3; j < best.GetLength(1); j++) { if (best[i, j] < 0) { break; } else { bestofbest.Add(i); bestofbestID.Add(best[i, j]); } } } } int[] bob1 = bestofbest.ToArray(); int[] bob2 = bestofbestID.ToArray(); int[] bob3 = new int[bob1.Length]; bool repeated; for (int i = 0, k = 0; i < bob1.Length; i++) { bob3[i] = bob1[i]; repeated = false; for (int j = 0; j < i; j++) { if (bob3[i] == bob3[j]) { repeated = true; } } if (repeated) { for (int j = k; j < bob2.Length; j++) { repeated = false; for (int q = 0; q < i; q++) { if (bob3[q] == bob2[j]) { repeated = true; break; } } if (!repeated) { bob3[i] = bob2[j]; k = j; break; } } } } using (MemoryBuffer <int> bobbuff1 = HardwareAcceleratorManager.GPUAccelerator.Allocate <int>(bestofbest.Count)) using (MemoryBuffer <int> bobbuff2 = HardwareAcceleratorManager.GPUAccelerator.Allocate <int>(bestofbestID.Count)) using (MemoryBuffer <int> newPos = HardwareAcceleratorManager.GPUAccelerator.Allocate <int>(bestofbest.Count)) using (MemoryBuffer2D <int> newPals = HardwareAcceleratorManager.GPUAccelerator.Allocate <int>(bestofbestID.Count, clusterBuff.Extent.Y)) { newPals.MemSetToZero(); bobbuff1.CopyFrom(bob1, 0, 0, bobbuff1.Extent); bobbuff2.CopyFrom(bob2, 0, 0, bobbuff2.Extent); newPos.CopyFrom(bob3, 0, 0, newPos.Extent); kernelInvalidateCluster(bobbuff1.Extent, bobbuff1, bobbuff2, newPals, clusterBuff); HardwareAcceleratorManager.GPUAccelerator.Synchronize(); kernelCopyNewPal(newPos.Extent, newPos, newPals, clusterBuff); HardwareAcceleratorManager.GPUAccelerator.Synchronize(); kernelFilter(bobbuff1.Extent, bobbuff1, newPos, clusterBuff); HardwareAcceleratorManager.GPUAccelerator.Synchronize(); kernelFilter(bobbuff2.Extent, bobbuff2, newPos, clusterBuff); HardwareAcceleratorManager.GPUAccelerator.Synchronize(); } kernelRemoveRepeated(new Index2(clusterBuff.Extent.X, clusterBuff.Extent.X), clusterBuff); HardwareAcceleratorManager.GPUAccelerator.Synchronize(); } } } clusterBuff.CopyTo(clusters, Index2.Zero, Index2.Zero, clusterBuff.Extent); } }
GetTiles <T, K>(T[] palettes, ConcurrentDictionary <TileKey, int> tiles, ConcurrentDictionary <TileKey, TileKey> alltiles, Int32[,] bp, int tileWidth, int tileHeight) where T : ColorPaletteDisguise, new() where K : IndexedBitmapBufferDisguise, new() { BytesPerPixel bpp = palettes[0].RealObject.BytesPerColor; ConcurrentDictionary <Int32, int> colDic; ConcurrentDictionary <TileKey, K> curtileQ; int rm; int flip; ConcurrentDictionary <TileKey, int> cloneTiles = new ConcurrentDictionary <TileKey, int>(); ConcurrentQueue <Tuple <TileKey, bool, bool, K> > results; Parallel.ForEach(tiles, kvp => { cloneTiles.TryAdd(kvp.Key, 0); }); ConcurrentDictionary <ColorPaletteIndex, List <Tuple <TileKey, bool, bool, K> > > ret = new ConcurrentDictionary <ColorPaletteIndex, List <Tuple <TileKey, bool, bool, K> > >(); using (MemoryBuffer2D <int> bpbuff = HardwareAcceleratorManager.GPUAccelerator.Allocate <int>(bp.GetLength(0), bp.GetLength(1))) { bpbuff.CopyFrom(bp, Index2.Zero, Index2.Zero, bpbuff.Extent); for (int x = 0; x < palettes.Length; x++) { colDic = palettes[x].RealObject.ToColorDictionary(); curtileQ = new ConcurrentDictionary <TileKey, K>(); results = new ConcurrentQueue <Tuple <TileKey, bool, bool, K> >(); foreach (var kvp in cloneTiles) { int curpX = 0; int curpY = 0; int offX = kvp.Key.X * tileWidth; int offY = kvp.Key.Y * tileHeight; int c; bool add = true; K curTile; for (int i = 0; i < tileWidth && add; i++) { curpX = i + offX; for (int j = 0; j < tileHeight; j++) { curpY = j + offY; c = bp[curpX, curpY]; c = bpp.ShortColor(c); if (!colDic.ContainsKey(c)) { add = false; break; } } } if (add) { curTile = IndexedBitmapBufferDisguise.Generate <K>(tileWidth, tileHeight); curTile.RealObject.BuildFromBitmap(bp, colDic, offX, offY); curtileQ.TryAdd(kvp.Key, curTile); } } foreach (var t in curtileQ) { cloneTiles.TryRemove(t.Key, out rm); } foreach (var kvp in alltiles) { if (kvp.Value.X == -1 && curtileQ.ContainsKey(kvp.Key)) { results.Enqueue(new Tuple <TileKey, bool, bool, K>(kvp.Key, false, false, curtileQ[kvp.Key])); } else if (curtileQ.ContainsKey(kvp.Value)) { flip = TileCheckerKernel.Execute(new Index2(tileWidth, tileHeight), bpbuff, new Index2(kvp.Value.X * tileWidth, kvp.Value.Y * tileHeight), new Index2(kvp.Key.X * tileWidth, kvp.Key.Y * tileHeight)); if (flip >= 0) { results.Enqueue(new Tuple <TileKey, bool, bool, K>(kvp.Key, (flip & 1) == 1, (flip & 2) == 2, curtileQ[kvp.Value])); } } } ret.TryAdd(palettes[x].RealObject.Index, results.ToList()); } } return(ret); }