public override void ApplyPalette(Palette palette) { context.Synchronize(); mainStream.Synchronize(); if (palette.Width <= 0 || palette.Height <= 0) { throw new ArgumentException("palette may not be empty."); } paletteImage.Free(); paletteImage = CudaArray.Allocate(palette.Width, palette.Height, CudaArrayFormat.Float, 4); HostBuffer2D <Vec4> hostPaletteBuffer = HostBuffer2D <Vec4> .Alloc(palette.Width, palette.Height); Color col; Vec4 colorVec; for (int y = 0; y < palette.Height; y++) { for (int x = 0; x < palette.Width; x++) { col = palette.GetPixel(x, y); colorVec = new Vec4( (float)col.R / 255.0f, (float)col.G / 255.0f, (float)col.B / 255.0f, 1.0f); hostPaletteBuffer[y, x] = colorVec; } } CudaMem.Copy(hostPaletteBuffer, paletteImage); hostPaletteBuffer.Free(); paletteTex.Array = paletteImage; paletteTex.SetFormat(CudaArrayFormat.Float, 4); paletteTex.AddressModeX = TexAddressMode.Clamp; paletteTex.AddressModeY = TexAddressMode.Clamp; paletteTex.FilterMode = TexFilterMode.Linear; paletteTex.Flags = TexFlags.NormalizedCoordinates; iterateKernel.SetTexRef(paletteTex); context.Synchronize(); }
public CudaFractalEngine() { device = Device.Devices[0]; context = device.CreateContext(); iterBlockCount = Util.Clamp(device.MultiprocessorCount * 2, 2, 64); //System.Reflection.Assembly loadedAssembly = typeof(CudaFractalEngine).Assembly; //System.IO.Stream stream = loadedAssembly.GetManifestResourceStream(typeof(CudaFractalEngine), "kernels.ptx"); System.IO.MemoryStream stream = new System.IO.MemoryStream(CudaResources.kernels_ptx); module = context.LoadModule(stream); initIteratorsKernel = module.GetKernel("init_iterators_kernel"); resetIteratorsKernel = module.GetKernel("reset_iterators_kernel"); iterateKernel = module.GetKernel("iterate_kernel"); updateStatsKernel = module.GetKernel("update_stats_kernel"); resetOutputKernel = module.GetKernel("reset_output_kernel"); updateOutputKernel = module.GetKernel("update_output_kernel"); glOutputBufferID = 0; mainStream = new Cuda.Stream(); iterPosStateBuffer = DeviceBuffer2D.Alloc(8, IterBlockSize, IterBlockCount); module.WriteConstant("iterPosStateBuffer", iterPosStateBuffer); iterColorStateBuffer = DeviceBuffer2D.Alloc(16, IterBlockSize, IterBlockCount); module.WriteConstant("iterColorStateBuffer", iterColorStateBuffer); entropyXBuffer = DeviceBuffer2D.Alloc(16, IterBlockSize, IterBlockCount); module.WriteConstant("entropyXBuffer", entropyXBuffer); entropyCBuffer = DeviceBuffer2D.Alloc(4, IterBlockSize, IterBlockCount); module.WriteConstant("entropyCBuffer", entropyCBuffer); entropySeedBuffer = DeviceBuffer2D.Alloc(4, IterBlockSize, IterBlockCount); module.WriteConstant("entropySeedBuffer", entropySeedBuffer); HostBuffer2D <uint> hostEntropySeedBuffer = HostBuffer2D <uint> .Alloc(IterBlockSize, IterBlockCount); uint rnd; for (int y = 0; y < IterBlockCount; y++) { for (int x = 0; x < IterBlockSize; x++) { rnd = (uint)rand.Next(65536); hostEntropySeedBuffer[y, x] = rnd; } } CudaMem.Copy(hostEntropySeedBuffer, entropySeedBuffer); hostEntropySeedBuffer.Free(); dotCountBuffer = DeviceBuffer2D.Alloc(8, IterBlockSize, IterBlockCount); module.WriteConstant("dotCountBuffer", dotCountBuffer); peakDensityBuffer = DeviceBuffer2D.Alloc(4, IterBlockSize, IterBlockCount); module.WriteConstant("peakDensityBuffer", peakDensityBuffer); totalIterCountMem = DevicePtr.AllocRaw(8); module.WriteConstant("totalIterCountMem", totalIterCountMem); totalDotCountMem = DevicePtr.AllocRaw(8); module.WriteConstant("totalDotCountMem", totalDotCountMem); densityMem = DevicePtr.AllocRaw(4); module.WriteConstant("densityMem", densityMem); peakDensityMem = DevicePtr.AllocRaw(4); module.WriteConstant("peakDensityMem", peakDensityMem); scaleConstantMem = DevicePtr.AllocRaw(4); module.WriteConstant("scaleConstantMem", scaleConstantMem); paletteImage = CudaArray.Null; paletteTex = module.GetTexRef("paletteTex"); resetBeginEvt = new Event(); resetEndEvt = new Event(); cycleIterEvt = new Event(); cycleStatEvt = new Event(); cycleEndEvt = new Event(); toneBeginEvt = new Event(); toneEndEvt = new Event(); initIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1); initIteratorsKernel.SetGridDim(IterBlockCount, 1); initIteratorsKernel.SetSharedSize(0); resetIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1); resetIteratorsKernel.SetGridDim(IterBlockCount, 1); resetIteratorsKernel.SetSharedSize(0); iterateKernel.SetBlockShape(IterBlockSize, 1, 1); iterateKernel.SetGridDim(IterBlockCount, 1); iterateKernel.SetSharedSize(0); updateStatsKernel.SetBlockShape(1, 1, 1); updateStatsKernel.SetGridDim(1, 1); updateStatsKernel.SetSharedSize(0); initIteratorsKernel.Launch(); context.Synchronize(); }