internal CudaFractalEngine(Device device)
        {
            if (device == null)
            {
                throw new ArgumentException("Invalid device passed to CudaFractalEngine.", "device");
            }

            this.device = device;

            context = device.CreateContext();

            iterBlockCount = Util.Clamp(device.MultiprocessorCount * 2, 2, 64);

            System.IO.MemoryStream ptxStream = new System.IO.MemoryStream(Kernels.KernelResources.kernels_ptx);

            module = context.LoadModule(ptxStream);

            initIteratorsKernel  = module.GetKernel("init_iterators_kernel");
            resetIteratorsKernel = module.GetKernel("reset_iterators_kernel");
            iterateKernel        = module.GetKernel("iterate_kernel");
            updateStatsKernel    = module.GetKernel("update_stats_kernel");
            resetOutputKernel    = module.GetKernel("reset_output_kernel");
            updateOutputKernel   = module.GetKernel("update_output_kernel");
            glOutputBufferID     = 0;

            mainStream = new Cuda.Stream();

            iterPosStateBuffer   = DeviceBuffer.Alloc(8, IteratorCount);
            iterColorStateBuffer = DeviceBuffer.Alloc(8, IteratorCount);
            iterStatBuffer       = DeviceBuffer.Alloc(Marshal.SizeOf(typeof(NativeIterStatEntry)), IteratorCount);
            globalStatBuffer     = DeviceBuffer.Alloc(Marshal.SizeOf(typeof(NativeGlobalStatEntry)), 1);

            entropyXBuffer    = DeviceBuffer.Alloc(16, IteratorCount);
            entropyCBuffer    = DeviceBuffer.Alloc(4, IteratorCount);
            entropySeedBuffer = DeviceBuffer.Alloc(4, IteratorCount);

            uint[] seeds = new uint[IteratorCount];
            for (int i = 0; i < IteratorCount; i++)
            {
                seeds[i] = (uint)rand.Next(65536);
            }
            CudaMem.Copy(seeds, entropySeedBuffer);

            paletteImage = CudaArray.Null;
            paletteTex   = module.GetTexRef("paletteTex");

            resetBeginEvt = new Event();
            resetEndEvt   = new Event();
            cycleIterEvt  = new Event();
            cycleStatEvt  = new Event();
            cycleEndEvt   = new Event();
            toneBeginEvt  = new Event();
            toneEndEvt    = new Event();

            initIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1);
            initIteratorsKernel.SetGridDim(IterBlockCount, 1);
            initIteratorsKernel.SetSharedSize(0);
            resetIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1);
            resetIteratorsKernel.SetGridDim(IterBlockCount, 1);
            resetIteratorsKernel.SetSharedSize(0);
            iterateKernel.SetBlockShape(IterBlockSize, 1, 1);
            iterateKernel.SetGridDim(IterBlockCount, 1);
            iterateKernel.SetSharedSize(0);
            updateStatsKernel.SetBlockShape(1, 1, 1);
            updateStatsKernel.SetGridDim(1, 1);
            updateStatsKernel.SetSharedSize(0);

            initIteratorsKernel.Launch(entropyXBuffer.Ptr.RawPtr, entropyCBuffer.Ptr.RawPtr, entropySeedBuffer.Ptr.RawPtr);

            context.Synchronize();
        }
Esempio n. 2
0
        public CudaFractalEngine()
        {
            device  = Device.Devices[0];
            context = device.CreateContext();

            iterBlockCount = Util.Clamp(device.MultiprocessorCount * 2, 2, 64);


            //System.Reflection.Assembly loadedAssembly = typeof(CudaFractalEngine).Assembly;
            //System.IO.Stream stream = loadedAssembly.GetManifestResourceStream(typeof(CudaFractalEngine), "kernels.ptx");
            System.IO.MemoryStream stream = new System.IO.MemoryStream(CudaResources.kernels_ptx);

            module = context.LoadModule(stream);
            initIteratorsKernel  = module.GetKernel("init_iterators_kernel");
            resetIteratorsKernel = module.GetKernel("reset_iterators_kernel");
            iterateKernel        = module.GetKernel("iterate_kernel");
            updateStatsKernel    = module.GetKernel("update_stats_kernel");
            resetOutputKernel    = module.GetKernel("reset_output_kernel");
            updateOutputKernel   = module.GetKernel("update_output_kernel");
            glOutputBufferID     = 0;

            mainStream = new Cuda.Stream();

            iterPosStateBuffer = DeviceBuffer2D.Alloc(8, IterBlockSize, IterBlockCount);
            module.WriteConstant("iterPosStateBuffer", iterPosStateBuffer);
            iterColorStateBuffer = DeviceBuffer2D.Alloc(16, IterBlockSize, IterBlockCount);
            module.WriteConstant("iterColorStateBuffer", iterColorStateBuffer);

            entropyXBuffer = DeviceBuffer2D.Alloc(16, IterBlockSize, IterBlockCount);
            module.WriteConstant("entropyXBuffer", entropyXBuffer);
            entropyCBuffer = DeviceBuffer2D.Alloc(4, IterBlockSize, IterBlockCount);
            module.WriteConstant("entropyCBuffer", entropyCBuffer);
            entropySeedBuffer = DeviceBuffer2D.Alloc(4, IterBlockSize, IterBlockCount);
            module.WriteConstant("entropySeedBuffer", entropySeedBuffer);

            HostBuffer2D <uint> hostEntropySeedBuffer = HostBuffer2D <uint> .Alloc(IterBlockSize, IterBlockCount);

            uint rnd;

            for (int y = 0; y < IterBlockCount; y++)
            {
                for (int x = 0; x < IterBlockSize; x++)
                {
                    rnd = (uint)rand.Next(65536);
                    hostEntropySeedBuffer[y, x] = rnd;
                }
            }

            CudaMem.Copy(hostEntropySeedBuffer, entropySeedBuffer);
            hostEntropySeedBuffer.Free();


            dotCountBuffer = DeviceBuffer2D.Alloc(8, IterBlockSize, IterBlockCount);
            module.WriteConstant("dotCountBuffer", dotCountBuffer);

            peakDensityBuffer = DeviceBuffer2D.Alloc(4, IterBlockSize, IterBlockCount);
            module.WriteConstant("peakDensityBuffer", peakDensityBuffer);

            totalIterCountMem = DevicePtr.AllocRaw(8);
            module.WriteConstant("totalIterCountMem", totalIterCountMem);
            totalDotCountMem = DevicePtr.AllocRaw(8);
            module.WriteConstant("totalDotCountMem", totalDotCountMem);
            densityMem = DevicePtr.AllocRaw(4);
            module.WriteConstant("densityMem", densityMem);
            peakDensityMem = DevicePtr.AllocRaw(4);
            module.WriteConstant("peakDensityMem", peakDensityMem);
            scaleConstantMem = DevicePtr.AllocRaw(4);
            module.WriteConstant("scaleConstantMem", scaleConstantMem);

            paletteImage = CudaArray.Null;

            paletteTex = module.GetTexRef("paletteTex");

            resetBeginEvt = new Event();
            resetEndEvt   = new Event();
            cycleIterEvt  = new Event();
            cycleStatEvt  = new Event();
            cycleEndEvt   = new Event();
            toneBeginEvt  = new Event();
            toneEndEvt    = new Event();

            initIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1);
            initIteratorsKernel.SetGridDim(IterBlockCount, 1);
            initIteratorsKernel.SetSharedSize(0);
            resetIteratorsKernel.SetBlockShape(IterBlockSize, 1, 1);
            resetIteratorsKernel.SetGridDim(IterBlockCount, 1);
            resetIteratorsKernel.SetSharedSize(0);
            iterateKernel.SetBlockShape(IterBlockSize, 1, 1);
            iterateKernel.SetGridDim(IterBlockCount, 1);
            iterateKernel.SetSharedSize(0);
            updateStatsKernel.SetBlockShape(1, 1, 1);
            updateStatsKernel.SetGridDim(1, 1);
            updateStatsKernel.SetSharedSize(0);

            initIteratorsKernel.Launch();
            context.Synchronize();
        }