internal CudaFractalEngine(Device device) { if (device == null) { throw new ArgumentException("Invalid device passed to CudaFractalEngine.", "device"); } this.device = device; context = device.CreateContext(); iterBlockCount = Util.Clamp(device.MultiprocessorCount * 2, 2, 64); System.IO.MemoryStream ptxStream = new System.IO.MemoryStream(Kernels.KernelResources.kernels_ptx); module = context.LoadModule(ptxStream); initIteratorsKernel = module.GetKernel("init_iterators_kernel"); resetIteratorsKernel = module.GetKernel("reset_iterators_kernel"); iterateKernel = module.GetKernel("iterate_kernel"); updateStatsKernel = module.GetKernel("update_stats_kernel"); resetOutputKernel = module.GetKernel("reset_output_kernel"); updateOutputKernel = module.GetKernel("update_output_kernel"); glOutputBufferID = 0; mainStream = new Cuda.Stream(); iterPosStateBuffer = DeviceBuffer.Alloc(8, IteratorCount); iterColorStateBuffer = DeviceBuffer.Alloc(8, IteratorCount); iterStatBuffer = DeviceBuffer.Alloc(Marshal.SizeOf(typeof(NativeIterStatEntry)), IteratorCount); globalStatBuffer = DeviceBuffer.Alloc(Marshal.SizeOf(typeof(NativeGlobalStatEntry)), 1); entropyXBuffer = DeviceBuffer.Alloc(16, IteratorCount); entropyCBuffer = DeviceBuffer.Alloc(4, IteratorCount); entropySeedBuffer = DeviceBuffer.Alloc(4, IteratorCount); uint[] seeds = new uint[IteratorCount]; for (int i = 0; i < IteratorCount; i++) { seeds[i] = (uint)rand.Next(65536); } CudaMem.Copy(seeds, entropySeedBuffer); paletteImage = CudaArray.Null; paletteTex = module.GetTexRef("paletteTex"); resetBeginEvt = new Event(); resetEndEvt = new Event(); cycleIterEvt = new Event(); cycleStatEvt = new Event(); cycleEndEvt = new Event(); toneBeginEvt = new Event(); toneEndEvt = new Event(); initIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1); initIteratorsKernel.SetGridDim(IterBlockCount, 1); initIteratorsKernel.SetSharedSize(0); resetIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1); resetIteratorsKernel.SetGridDim(IterBlockCount, 1); resetIteratorsKernel.SetSharedSize(0); iterateKernel.SetBlockShape(IterBlockSize, 1, 1); iterateKernel.SetGridDim(IterBlockCount, 1); iterateKernel.SetSharedSize(0); updateStatsKernel.SetBlockShape(1, 1, 1); updateStatsKernel.SetGridDim(1, 1); updateStatsKernel.SetSharedSize(0); initIteratorsKernel.Launch(entropyXBuffer.Ptr.RawPtr, entropyCBuffer.Ptr.RawPtr, entropySeedBuffer.Ptr.RawPtr); context.Synchronize(); }
public CudaFractalEngine() { device = Device.Devices[0]; context = device.CreateContext(); iterBlockCount = Util.Clamp(device.MultiprocessorCount * 2, 2, 64); //System.Reflection.Assembly loadedAssembly = typeof(CudaFractalEngine).Assembly; //System.IO.Stream stream = loadedAssembly.GetManifestResourceStream(typeof(CudaFractalEngine), "kernels.ptx"); System.IO.MemoryStream stream = new System.IO.MemoryStream(CudaResources.kernels_ptx); module = context.LoadModule(stream); initIteratorsKernel = module.GetKernel("init_iterators_kernel"); resetIteratorsKernel = module.GetKernel("reset_iterators_kernel"); iterateKernel = module.GetKernel("iterate_kernel"); updateStatsKernel = module.GetKernel("update_stats_kernel"); resetOutputKernel = module.GetKernel("reset_output_kernel"); updateOutputKernel = module.GetKernel("update_output_kernel"); glOutputBufferID = 0; mainStream = new Cuda.Stream(); iterPosStateBuffer = DeviceBuffer2D.Alloc(8, IterBlockSize, IterBlockCount); module.WriteConstant("iterPosStateBuffer", iterPosStateBuffer); iterColorStateBuffer = DeviceBuffer2D.Alloc(16, IterBlockSize, IterBlockCount); module.WriteConstant("iterColorStateBuffer", iterColorStateBuffer); entropyXBuffer = DeviceBuffer2D.Alloc(16, IterBlockSize, IterBlockCount); module.WriteConstant("entropyXBuffer", entropyXBuffer); entropyCBuffer = DeviceBuffer2D.Alloc(4, IterBlockSize, IterBlockCount); module.WriteConstant("entropyCBuffer", entropyCBuffer); entropySeedBuffer = DeviceBuffer2D.Alloc(4, IterBlockSize, IterBlockCount); module.WriteConstant("entropySeedBuffer", entropySeedBuffer); HostBuffer2D <uint> hostEntropySeedBuffer = HostBuffer2D <uint> .Alloc(IterBlockSize, IterBlockCount); uint rnd; for (int y = 0; y < IterBlockCount; y++) { for (int x = 0; x < IterBlockSize; x++) { rnd = (uint)rand.Next(65536); hostEntropySeedBuffer[y, x] = rnd; } } CudaMem.Copy(hostEntropySeedBuffer, entropySeedBuffer); hostEntropySeedBuffer.Free(); dotCountBuffer = DeviceBuffer2D.Alloc(8, IterBlockSize, IterBlockCount); module.WriteConstant("dotCountBuffer", dotCountBuffer); peakDensityBuffer = DeviceBuffer2D.Alloc(4, IterBlockSize, IterBlockCount); module.WriteConstant("peakDensityBuffer", peakDensityBuffer); totalIterCountMem = DevicePtr.AllocRaw(8); module.WriteConstant("totalIterCountMem", totalIterCountMem); totalDotCountMem = DevicePtr.AllocRaw(8); module.WriteConstant("totalDotCountMem", totalDotCountMem); densityMem = DevicePtr.AllocRaw(4); module.WriteConstant("densityMem", densityMem); peakDensityMem = DevicePtr.AllocRaw(4); module.WriteConstant("peakDensityMem", peakDensityMem); scaleConstantMem = DevicePtr.AllocRaw(4); module.WriteConstant("scaleConstantMem", scaleConstantMem); paletteImage = CudaArray.Null; paletteTex = module.GetTexRef("paletteTex"); resetBeginEvt = new Event(); resetEndEvt = new Event(); cycleIterEvt = new Event(); cycleStatEvt = new Event(); cycleEndEvt = new Event(); toneBeginEvt = new Event(); toneEndEvt = new Event(); initIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1); initIteratorsKernel.SetGridDim(IterBlockCount, 1); initIteratorsKernel.SetSharedSize(0); resetIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1); resetIteratorsKernel.SetGridDim(IterBlockCount, 1); resetIteratorsKernel.SetSharedSize(0); iterateKernel.SetBlockShape(IterBlockSize, 1, 1); iterateKernel.SetGridDim(IterBlockCount, 1); iterateKernel.SetSharedSize(0); updateStatsKernel.SetBlockShape(1, 1, 1); updateStatsKernel.SetGridDim(1, 1); updateStatsKernel.SetSharedSize(0); initIteratorsKernel.Launch(); context.Synchronize(); }