public override FractalEngine.Stats GatherStats() { mainStream.Synchronize(); context.Synchronize(); Stats stats = new Stats(); CudaMem.CopyStructRaw(totalIterCountMem, out stats.TotalIterCount); CudaMem.CopyStructRaw(totalDotCountMem, out stats.TotalDotCount); stats.meanDotsPerSubpixel = (float)((double)stats.TotalDotCount / (double)(xRes * yRes * AALevel * AALevel)); return(stats); }
public override FractalEngine.Stats GatherStats() { mainStream.Synchronize(); context.Synchronize(); Stats stats = new Stats(); NativeGlobalStatEntry[] gStats = new NativeGlobalStatEntry[1]; CudaMem.Copy(globalStatBuffer, gStats); stats.TotalIterCount = gStats[0].IterCount; stats.TotalDotCount = gStats[0].DotCount; stats.meanDotsPerSubpixel = (float)((double)stats.TotalDotCount / (double)(xRes * yRes * AALevel * AALevel)); return(stats); }
public override void ApplyPalette(Palette palette) { context.Synchronize(); mainStream.Synchronize(); if (palette.Width <= 0 || palette.Height <= 0) { throw new ArgumentException("palette may not be empty."); } paletteImage.Free(); paletteImage = CudaArray.Allocate(palette.Width, palette.Height, CudaArrayFormat.Float, 4); HostBuffer2D <Vec4> hostPaletteBuffer = HostBuffer2D <Vec4> .Alloc(palette.Width, palette.Height); Color col; Vec4 colorVec; for (int y = 0; y < palette.Height; y++) { for (int x = 0; x < palette.Width; x++) { col = palette.GetPixel(x, y); colorVec = new Vec4( (float)col.R / 255.0f, (float)col.G / 255.0f, (float)col.B / 255.0f, 1.0f); hostPaletteBuffer[y, x] = colorVec; } } CudaMem.Copy(hostPaletteBuffer, paletteImage); hostPaletteBuffer.Free(); paletteTex.Array = paletteImage; paletteTex.SetFormat(CudaArrayFormat.Float, 4); paletteTex.AddressModeX = TexAddressMode.Clamp; paletteTex.AddressModeY = TexAddressMode.Clamp; paletteTex.FilterMode = TexFilterMode.Linear; paletteTex.Flags = TexFlags.NormalizedCoordinates; iterateKernel.SetTexRef(paletteTex); context.Synchronize(); }
internal CudaFractalEngine(Device device) { if (device == null) { throw new ArgumentException("Invalid device passed to CudaFractalEngine.", "device"); } this.device = device; context = device.CreateContext(); iterBlockCount = Util.Clamp(device.MultiprocessorCount * 2, 2, 64); System.IO.MemoryStream ptxStream = new System.IO.MemoryStream(Kernels.KernelResources.kernels_ptx); module = context.LoadModule(ptxStream); initIteratorsKernel = module.GetKernel("init_iterators_kernel"); resetIteratorsKernel = module.GetKernel("reset_iterators_kernel"); iterateKernel = module.GetKernel("iterate_kernel"); updateStatsKernel = module.GetKernel("update_stats_kernel"); resetOutputKernel = module.GetKernel("reset_output_kernel"); updateOutputKernel = module.GetKernel("update_output_kernel"); glOutputBufferID = 0; mainStream = new Cuda.Stream(); iterPosStateBuffer = DeviceBuffer.Alloc(8, IteratorCount); iterColorStateBuffer = DeviceBuffer.Alloc(8, IteratorCount); iterStatBuffer = DeviceBuffer.Alloc(Marshal.SizeOf(typeof(NativeIterStatEntry)), IteratorCount); globalStatBuffer = DeviceBuffer.Alloc(Marshal.SizeOf(typeof(NativeGlobalStatEntry)), 1); entropyXBuffer = DeviceBuffer.Alloc(16, IteratorCount); entropyCBuffer = DeviceBuffer.Alloc(4, IteratorCount); entropySeedBuffer = DeviceBuffer.Alloc(4, IteratorCount); uint[] seeds = new uint[IteratorCount]; for (int i = 0; i < IteratorCount; i++) { seeds[i] = (uint)rand.Next(65536); } CudaMem.Copy(seeds, entropySeedBuffer); paletteImage = CudaArray.Null; paletteTex = module.GetTexRef("paletteTex"); resetBeginEvt = new Event(); resetEndEvt = new Event(); cycleIterEvt = new Event(); cycleStatEvt = new Event(); cycleEndEvt = new Event(); toneBeginEvt = new Event(); toneEndEvt = new Event(); initIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1); initIteratorsKernel.SetGridDim(IterBlockCount, 1); initIteratorsKernel.SetSharedSize(0); resetIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1); resetIteratorsKernel.SetGridDim(IterBlockCount, 1); resetIteratorsKernel.SetSharedSize(0); iterateKernel.SetBlockShape(IterBlockSize, 1, 1); iterateKernel.SetGridDim(IterBlockCount, 1); iterateKernel.SetSharedSize(0); updateStatsKernel.SetBlockShape(1, 1, 1); updateStatsKernel.SetGridDim(1, 1); updateStatsKernel.SetSharedSize(0); initIteratorsKernel.Launch(entropyXBuffer.Ptr.RawPtr, entropyCBuffer.Ptr.RawPtr, entropySeedBuffer.Ptr.RawPtr); context.Synchronize(); }
public CudaFractalEngine() { device = Device.Devices[0]; context = device.CreateContext(); iterBlockCount = Util.Clamp(device.MultiprocessorCount * 2, 2, 64); //System.Reflection.Assembly loadedAssembly = typeof(CudaFractalEngine).Assembly; //System.IO.Stream stream = loadedAssembly.GetManifestResourceStream(typeof(CudaFractalEngine), "kernels.ptx"); System.IO.MemoryStream stream = new System.IO.MemoryStream(CudaResources.kernels_ptx); module = context.LoadModule(stream); initIteratorsKernel = module.GetKernel("init_iterators_kernel"); resetIteratorsKernel = module.GetKernel("reset_iterators_kernel"); iterateKernel = module.GetKernel("iterate_kernel"); updateStatsKernel = module.GetKernel("update_stats_kernel"); resetOutputKernel = module.GetKernel("reset_output_kernel"); updateOutputKernel = module.GetKernel("update_output_kernel"); glOutputBufferID = 0; mainStream = new Cuda.Stream(); iterPosStateBuffer = DeviceBuffer2D.Alloc(8, IterBlockSize, IterBlockCount); module.WriteConstant("iterPosStateBuffer", iterPosStateBuffer); iterColorStateBuffer = DeviceBuffer2D.Alloc(16, IterBlockSize, IterBlockCount); module.WriteConstant("iterColorStateBuffer", iterColorStateBuffer); entropyXBuffer = DeviceBuffer2D.Alloc(16, IterBlockSize, IterBlockCount); module.WriteConstant("entropyXBuffer", entropyXBuffer); entropyCBuffer = DeviceBuffer2D.Alloc(4, IterBlockSize, IterBlockCount); module.WriteConstant("entropyCBuffer", entropyCBuffer); entropySeedBuffer = DeviceBuffer2D.Alloc(4, IterBlockSize, IterBlockCount); module.WriteConstant("entropySeedBuffer", entropySeedBuffer); HostBuffer2D <uint> hostEntropySeedBuffer = HostBuffer2D <uint> .Alloc(IterBlockSize, IterBlockCount); uint rnd; for (int y = 0; y < IterBlockCount; y++) { for (int x = 0; x < IterBlockSize; x++) { rnd = (uint)rand.Next(65536); hostEntropySeedBuffer[y, x] = rnd; } } CudaMem.Copy(hostEntropySeedBuffer, entropySeedBuffer); hostEntropySeedBuffer.Free(); dotCountBuffer = DeviceBuffer2D.Alloc(8, IterBlockSize, IterBlockCount); module.WriteConstant("dotCountBuffer", dotCountBuffer); peakDensityBuffer = DeviceBuffer2D.Alloc(4, IterBlockSize, IterBlockCount); module.WriteConstant("peakDensityBuffer", peakDensityBuffer); totalIterCountMem = DevicePtr.AllocRaw(8); module.WriteConstant("totalIterCountMem", totalIterCountMem); totalDotCountMem = DevicePtr.AllocRaw(8); module.WriteConstant("totalDotCountMem", totalDotCountMem); densityMem = DevicePtr.AllocRaw(4); module.WriteConstant("densityMem", densityMem); peakDensityMem = DevicePtr.AllocRaw(4); module.WriteConstant("peakDensityMem", peakDensityMem); scaleConstantMem = DevicePtr.AllocRaw(4); module.WriteConstant("scaleConstantMem", scaleConstantMem); paletteImage = CudaArray.Null; paletteTex = module.GetTexRef("paletteTex"); resetBeginEvt = new Event(); resetEndEvt = new Event(); cycleIterEvt = new Event(); cycleStatEvt = new Event(); cycleEndEvt = new Event(); toneBeginEvt = new Event(); toneEndEvt = new Event(); initIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1); initIteratorsKernel.SetGridDim(IterBlockCount, 1); initIteratorsKernel.SetSharedSize(0); resetIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1); resetIteratorsKernel.SetGridDim(IterBlockCount, 1); resetIteratorsKernel.SetSharedSize(0); iterateKernel.SetBlockShape(IterBlockSize, 1, 1); iterateKernel.SetGridDim(IterBlockCount, 1); iterateKernel.SetSharedSize(0); updateStatsKernel.SetBlockShape(1, 1, 1); updateStatsKernel.SetGridDim(1, 1); updateStatsKernel.SetSharedSize(0); initIteratorsKernel.Launch(); context.Synchronize(); }