Esempio n. 1
1
        private void initGLAndCuda()
        {
            //Create render target control
            m_renderControl = new OpenTK.GLControl(GraphicsMode.Default, 1, 0, GraphicsContextFlags.Default);
            m_renderControl.Dock = DockStyle.Fill;
            m_renderControl.BackColor = Color.White;
            m_renderControl.BorderStyle = BorderStyle.FixedSingle;
            m_renderControl.KeyDown += new KeyEventHandler(m_renderControl_KeyDown);
            m_renderControl.MouseMove += new MouseEventHandler(m_renderControl_MouseMove);
            m_renderControl.MouseDown += new MouseEventHandler(m_renderControl_MouseDown);
            m_renderControl.SizeChanged += new EventHandler(m_renderControl_SizeChanged);

            panel1.Controls.Add(m_renderControl);
            Console.WriteLine("   OpenGL device is Available");

            int deviceID = CudaContext.GetMaxGflopsDeviceId();

            ctx = CudaContext.CreateOpenGLContext(deviceID, CUCtxFlags.BlockingSync);
            string console = string.Format("CUDA device [{0}] has {1} Multi-Processors", ctx.GetDeviceName(), ctx.GetDeviceInfo().MultiProcessorCount);
            Console.WriteLine(console);

            CUmodule module = ctx.LoadModulePTX("kernel.ptx");

            addForces_k = new CudaKernel("addForces_k", module, ctx);
            advectVelocity_k = new CudaKernel("advectVelocity_k", module, ctx);
            diffuseProject_k = new CudaKernel("diffuseProject_k", module, ctx);
            updateVelocity_k = new CudaKernel("updateVelocity_k", module, ctx);
            advectParticles_k = new CudaKernel("advectParticles_OGL", module, ctx);

            hvfield = new cData[DS];
            dvfield = new CudaPitchedDeviceVariable<cData>(DIM, DIM);
            tPitch = dvfield.Pitch;

            dvfield.CopyToDevice(hvfield);

            vxfield = new CudaDeviceVariable<cData>(DS);
            vyfield = new CudaDeviceVariable<cData>(DS);

            // Create particle array
            particles = new cData[DS];
            initParticles(particles, DIM, DIM);

            // TODO: update kernels to use the new unpadded memory layout for perf
            // rather than the old FFTW-compatible layout
            planr2c = new CudaFFTPlan2D(DIM, DIM, cufftType.R2C, Compatibility.FFTWPadding);
            planc2r = new CudaFFTPlan2D(DIM, DIM, cufftType.C2R, Compatibility.FFTWPadding);

            GL.GenBuffers(1, out vbo);
            GL.BindBuffer(BufferTarget.ArrayBuffer, vbo);
            GL.BufferData<cData>(BufferTarget.ArrayBuffer, new IntPtr(cData.SizeOf * DS), particles, BufferUsageHint.DynamicDraw);
            int bsize;
            GL.GetBufferParameter(BufferTarget.ArrayBuffer, BufferParameterName.BufferSize, out bsize);

            if (bsize != DS * cData.SizeOf)
                throw new Exception("Sizes don't match.");

            GL.BindBuffer(BufferTarget.ArrayBuffer, 0);

            cuda_vbo_resource = new CudaGraphicsInteropResourceCollection();
            cuda_vbo_resource.Add(new CudaOpenGLBufferInteropResource(vbo, CUGraphicsRegisterFlags.None));

            texref = new CudaTextureArray2D(advectVelocity_k, "texref", CUAddressMode.Wrap, CUFilterMode.Linear, 0, CUArrayFormat.Float, DIM, DIM, CudaArray2DNumChannels.Two);

            stopwatch = new CudaStopWatch(CUEventFlags.Default);

            reshape();
            isInit = true;
            display();
        }
Esempio n. 2
0
        // General GPU Device CUDA Initialization
        static int gpuDeviceInit(int devID)
        {
            int deviceCount = CudaContext.GetDeviceCount();

            if (deviceCount == 0)
            {
                Console.Write("gpuDeviceInit() CUDA error: no devices supporting CUDA.\n");
                Environment.Exit(-1);
            }
            if (devID < 0)
            {
                devID = 0;
            }
            if (devID > deviceCount - 1)
            {
                Console.Write("\n");
                Console.Write(">> {0} CUDA capable GPU device(s) detected. <<\n", deviceCount);
                Console.Write(">> gpuDeviceInit (-device={0}) is not a valid GPU device. <<\n", devID);
                Console.Write("\n");
                return(-devID);
            }


            if (CudaContext.GetDeviceComputeCapability(devID).Major < 1)
            {
                Console.Write("gpuDeviceInit(): GPU device does not support CUDA.\n");
                Environment.Exit(-1);
            }
            ctx = new CudaContext(devID);
            Console.Write("> gpuDeviceInit() CUDA device [{0}]: {1}\n", devID, ctx.GetDeviceName());
            return(devID);
        }
Esempio n. 3
0
        private int findGraphicsGPU(out string devName)
        {
            int    nGraphicsGPU = 0;
            int    deviceCount = 0;
            bool   bFoundGraphics = false;
            string firstGraphicsName = string.Empty, temp;

            devName = string.Empty;

            deviceCount = CudaContext.GetDeviceCount();

            // This function call returns 0 if there are no CUDA capable devices.
            if (deviceCount == 0)
            {
                Console.WriteLine("There are no device(s) supporting CUDA");
                return(0);
            }
            else
            {
                Console.WriteLine("> Found " + deviceCount + " CUDA Capable Device(s)");
            }

            for (int dev = 0; dev < deviceCount; dev++)
            {
                temp = CudaContext.GetDeviceName(dev);
                bool          bGraphics = !temp.Contains("Tesla");
                StringBuilder sb        = new StringBuilder();
                sb.Append("> ");
                if (bGraphics)
                {
                    sb.Append("Graphics");
                }
                else
                {
                    sb.Append("Compute");
                }

                sb.Append("\t\tGPU ").Append(dev).Append(": ").Append(CudaContext.GetDeviceName(dev));
                Console.WriteLine(sb.ToString());

                if (bGraphics)
                {
                    if (!bFoundGraphics)
                    {
                        firstGraphicsName = temp;
                    }
                    nGraphicsGPU++;
                }
            }

            if (nGraphicsGPU != 0)
            {
                devName = firstGraphicsName;
            }
            else
            {
                devName = "this hardware";
            }
            return(nGraphicsGPU);
        }
Esempio n. 4
0
        static int Main()
        {
            try
            {
                int deviceCount = CudaContext.GetDeviceCount();
                if (deviceCount == 0)
                {
                    Console.Error.WriteLine("No CUDA devices detected. Sad face.");
                    return(-1);
                }
                Console.WriteLine($"{deviceCount} CUDA devices detected (first will be used)");
                for (int i = 0; i < deviceCount; i++)
                {
                    Console.WriteLine($"{i}: {CudaContext.GetDeviceName(i)}");
                }
                const int Count = 100;
                using (var state = new CudaProcessor(deviceId: 0))
                {
                    Console.WriteLine("Initializing kernel...");
                    var compileResult = state.LoadKernel(out string log);
                    if (compileResult != ManagedCuda.NVRTC.nvrtcResult.Success)
                    {
                        Console.Error.WriteLine(compileResult);
                        Console.Error.WriteLine(log);
                        return(-1);
                    }
                    Console.WriteLine(log);
                    Console.WriteLine("Initializing data...");
                    state.InitializeData(Count);

                    Console.WriteLine("Running kernel...");
                    state.CalculateAsync(2);
                    Console.WriteLine("Copying data back...");
                    state.CopyToHost();
                    Console.WriteLine("Waiting for completion...");
                    state.Synchronize();
                    Console.WriteLine("all done; showing some results");
                    var random = new Random(123456);
                    for (int i = 0; i < 20; i++)
                    {
                        //var rnd = random.Next(Count);
                        var multiply = state[i];
                        var adding   = state.Get(i);
                        Console.WriteLine($"{i}: Multiplication:{nameof(multiply.X)}={multiply.X}, {nameof(multiply.Y)}={multiply.Y}");
                        Console.WriteLine($"{i}: Addition: {nameof(adding.X)}={adding.X}, {nameof(adding.Y)}={adding.Y}");
                    }
                    Console.WriteLine("Cleaning up...");
                }
                Console.WriteLine("All done; have a nice day");
            }
            catch (Exception ex)
            {
                Console.Error.WriteLine(ex.Message);
                Console.ReadKey();
                return(-1);
            }
            Console.ReadKey();
            return(0);
        }
Esempio n. 5
0
        // Initialization code to find the best CUDA Device
        static int findCudaDevice(string[] args)
        {
            int devID = 0;
            // If the command-line has a device number specified, use it
            bool found = false;

            foreach (var item in args)
            {
                if (item.Contains("device="))
                {
                    found = true;
                    if (!int.TryParse(item, out devID))
                    {
                        Console.WriteLine("Invalid command line parameters");
                        Environment.Exit(-1);
                    }
                    if (devID < 0)
                    {
                        Console.WriteLine("Invalid command line parameters\n");
                        Environment.Exit(-1);
                    }
                    else
                    {
                        devID = gpuDeviceInit(devID);
                        if (devID < 0)
                        {
                            Console.WriteLine("exiting...\n");
                            ShrQATest.shrQAFinishExit(args, ShrQATest.eQAstatus.QA_FAILED);
                            Environment.Exit(-1);
                        }
                    }
                }
            }

            if (!found)
            {
                // Otherwise pick the device with highest Gflops/s
                devID = CudaContext.GetMaxGflopsDeviceId();
                ctx   = new CudaContext(devID, CUCtxFlags.SchedAuto);
                Console.Write("> Using CUDA device [{0}]: {1}\n", devID, ctx.GetDeviceName());
            }
            return(devID);
        }
        private bool InitializeD3D()
        {
            HwndSource hwnd = new HwndSource(0, 0, 0, 0, 0, "null", IntPtr.Zero);

            // Create the D3D object.
            d3d = new Direct3D();


            PresentParameters pp = new PresentParameters();

            pp.BackBufferWidth        = 512;
            pp.BackBufferHeight       = 512;
            pp.BackBufferFormat       = Format.Unknown;
            pp.BackBufferCount        = 0;
            pp.Multisample            = MultisampleType.None;
            pp.MultisampleQuality     = 0;
            pp.SwapEffect             = SwapEffect.Discard;
            pp.DeviceWindowHandle     = (IntPtr)0;
            pp.Windowed               = true;
            pp.EnableAutoDepthStencil = false;
            pp.AutoDepthStencilFormat = Format.Unknown;
            pp.PresentationInterval   = PresentInterval.Default;

            bDeviceFound = false;
            CUdevice[] cudaDevices = null;
            for (g_iAdapter = 0; g_iAdapter < d3d.AdapterCount; g_iAdapter++)
            {
                device = new Device(d3d, d3d.Adapters[g_iAdapter].Adapter, DeviceType.Hardware, hwnd.Handle, CreateFlags.HardwareVertexProcessing | CreateFlags.Multithreaded, pp);
                try
                {
                    cudaDevices  = CudaContext.GetDirectXDevices(device.ComPointer, CUd3dXDeviceList.All, CudaContext.DirectXVersion.D3D9);
                    bDeviceFound = cudaDevices.Length > 0;
                    infoLog.AppendText("> Display Device #" + d3d.Adapters[g_iAdapter].Adapter
                                       + ": \"" + d3d.Adapters[g_iAdapter].Details.Description + "\" supports Direct3D9 and CUDA.\n");
                    break;
                }
                catch (CudaException)
                {
                    //No Cuda device found for this Direct3D9 device
                    infoLog.AppendText("> Display Device #" + d3d.Adapters[g_iAdapter].Adapter
                                       + ": \"" + d3d.Adapters[g_iAdapter].Details.Description + "\" supports Direct3D9 but not CUDA.\n");
                }
            }

            // we check to make sure we have found a cuda-compatible D3D device to work on
            if (!bDeviceFound)
            {
                infoLog.AppendText("No CUDA-compatible Direct3D9 device available");
                if (device != null)
                {
                    device.Dispose();
                }
                return(false);
            }

            ctx             = new CudaContext(cudaDevices[0], device.ComPointer, CUCtxFlags.BlockingSync, CudaContext.DirectXVersion.D3D9);
            deviceName.Text = "Device name: " + ctx.GetDeviceName();

            // Set projection matrix
            SlimDX.Matrix matProj = SlimDX.Matrix.OrthoOffCenterLH(0, 1, 1, 0, 0, 1);
            device.SetTransform(TransformState.Projection, matProj);

            // Turn off D3D lighting, since we are providing our own vertex colors
            device.SetRenderState(RenderState.Lighting, false);

            //Load kernels
            CUmodule module = ctx.LoadModulePTX("kernel.ptx");

            addForces_k       = new CudaKernel("addForces_k", module, ctx);
            advectVelocity_k  = new CudaKernel("advectVelocity_k", module, ctx);
            diffuseProject_k  = new CudaKernel("diffuseProject_k", module, ctx);
            updateVelocity_k  = new CudaKernel("updateVelocity_k", module, ctx);
            advectParticles_k = new CudaKernel("advectParticles_k", module, ctx);

            d3dimage.Lock();
            Surface surf = device.GetBackBuffer(0, 0);

            d3dimage.SetBackBuffer(D3DResourceType.IDirect3DSurface9, surf.ComPointer);
            d3dimage.Unlock();
            surf.Dispose();

            //Setup the "real" frame rate counter.
            //The cuda counter only measures cuda runtime, not the overhead to actually
            //show the result via DirectX and WPF.
            realLastTick = Environment.TickCount;
            return(true);
        }
Esempio n. 7
0
        static int Main()
        {
            try
            {
                int deviceCount = CudaContext.GetDeviceCount();
                if (deviceCount == 0)
                {
                    Console.Error.WriteLine("No CUDA devices detected. Sad face.");
                    return(-1);
                }
                Console.WriteLine($"{deviceCount} CUDA devices detected (first will be used)");
                for (int i = 0; i < deviceCount; i++)
                {
                    Console.WriteLine($"{i}: {CudaContext.GetDeviceName(i)}");
                }
                const int Count = 1024 * 1024;
                using (var state = new SomeState(deviceId: 0))
                {
                    Console.WriteLine("Initializing kernel...");
                    string log;
                    var    compileResult = state.LoadKernel(out log);
                    if (compileResult != ManagedCuda.NVRTC.nvrtcResult.Success)
                    {
                        Console.Error.WriteLine(compileResult);
                        Console.Error.WriteLine(log);
                        return(-1);
                    }
                    Console.WriteLine(log);

                    Console.WriteLine("Initializing data...");
                    state.InitializeData(Count);

                    Console.WriteLine("Running kernel...");
                    for (int i = 0; i < 8; i++)
                    {
                        state.MultiplyAsync(2);
                    }

                    Console.WriteLine("Copying data back...");
                    state.CopyToHost(); // note: usually you try to minimize how much you need to
                    // fetch from the device, as that can be a bottleneck; you should prefer fetching
                    // minimal aggregate data (counts, etc), or the required pages of data; fetching
                    // *all* the data works, but should be avoided when possible.

                    Console.WriteLine("Waiting for completion...");
                    state.Synchronize();

                    Console.WriteLine("all done; showing some results");
                    var random = new Random(123456);
                    for (int i = 0; i < 20; i++)
                    {
                        var record = state[random.Next(Count)];
                        Console.WriteLine($"{i}: {nameof(record.Id)}={record.Id}, {nameof(record.Value)}={record.Value}");
                    }

                    Console.WriteLine("Cleaning up...");
                }

                Console.WriteLine("All done; have a nice day");
                return(0);
            } catch (Exception ex)
            {
                Console.Error.WriteLine(ex.Message);
                return(-1);
            }
        }
Esempio n. 8
0
        static void Main(string[] args)
        {
            try
            {
                if (args.Length > 0)
                {
                    deviceID = int.Parse(args[0]);
                }
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Device ID parse error");
            }

            try
            {
                if (args.Length > 1)
                {
                    port = int.Parse(args[1]);
                    Comms.ConnectToMaster(port);
                }
                else
                {
                    TEST = true;
                    Logger.CopyToConsole = true;
                    CGraph.ShowCycles    = true;
                }
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Master connection error");
            }

            try
            {
                if (args.Length > 3)
                {
                    gpuCount = int.Parse(args[3]);
                    fastCuda = gpuCount <= (Environment.ProcessorCount / 2);
                    if (fastCuda)
                    {
                        Logger.Log(LogLevel.Info, "Using single GPU blocking mode");
                    }
                }
            }
            catch
            {
            }

            if (TEST)
            {
                currentJob = nextJob = new Job()
                {
                    jobID     = 0,
                    k0        = 0xf4956dc403730b01L,
                    k1        = 0xe6d45de39c2a5a3eL,
                    k2        = 0xcbf626a8afee35f6L,
                    k3        = 0x4307b94b1a0c9980L,
                    pre_pow   = TestPrePow,
                    timestamp = DateTime.Now
                };
            }
            else
            {
                currentJob = nextJob = new Job()
                {
                    jobID     = 0,
                    k0        = 0xf4956dc403730b01L,
                    k1        = 0xe6d45de39c2a5a3eL,
                    k2        = 0xcbf626a8afee35f6L,
                    k3        = 0x4307b94b1a0c9980L,
                    pre_pow   = TestPrePow,
                    timestamp = DateTime.Now
                };

                if (!Comms.IsConnected())
                {
                    Console.WriteLine("Master connection failed, aborting");
                    Logger.Log(LogLevel.Error, "No master connection, exitting!");
                    return;
                }

                if (deviceID < 0)
                {
                    int devCnt             = CudaContext.GetDeviceCount();
                    GpuDevicesMessage gpum = new GpuDevicesMessage()
                    {
                        devices = new List <GpuDevice>(devCnt)
                    };
                    for (int i = 0; i < devCnt; i++)
                    {
                        string name = CudaContext.GetDeviceName(i);
                        var    info = CudaContext.GetDeviceInfo(i);
                        gpum.devices.Add(new GpuDevice()
                        {
                            deviceID = i, name = name, memory = info.TotalGlobalMemory
                        });
                    }
                    //Console.WriteLine(devCnt);
                    Comms.gpuMsg = gpum;
                    Comms.SetEvent();
                    //Console.WriteLine("event fired");
                    Task.Delay(1000).Wait();
                    //Console.WriteLine("closing");
                    Comms.Close();
                    return;
                }
            }


            try
            {
                var assembly       = Assembly.GetEntryAssembly();
                var resourceStream = assembly.GetManifestResourceStream("CudaSolver.kernel_x64.ptx");
                ctx = new CudaContext(deviceID, !fastCuda ? (CUCtxFlags.BlockingSync | CUCtxFlags.MapHost) : CUCtxFlags.MapHost);

                meanSeedA = ctx.LoadKernelPTX(resourceStream, "FluffySeed2A");
                meanSeedA.BlockDimensions = 128;
                meanSeedA.GridDimensions  = 2048;
                meanSeedA.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                meanSeedB = ctx.LoadKernelPTX(resourceStream, "FluffySeed2B");
                meanSeedB.BlockDimensions = 128;
                meanSeedB.GridDimensions  = 2048;
                meanSeedB.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                meanSeedB_4 = ctx.LoadKernelPTX(resourceStream, "FluffySeed2B");
                meanSeedB_4.BlockDimensions = 128;
                meanSeedB_4.GridDimensions  = 1024;
                meanSeedB_4.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                meanRound = ctx.LoadKernelPTX(resourceStream, "FluffyRound");
                meanRound.BlockDimensions = 512;
                meanRound.GridDimensions  = 4096;
                meanRound.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                meanRound_2 = ctx.LoadKernelPTX(resourceStream, "FluffyRound");
                meanRound_2.BlockDimensions = 512;
                meanRound_2.GridDimensions  = 2048;
                meanRound_2.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                meanRoundJoin = ctx.LoadKernelPTX(resourceStream, "FluffyRound_J");
                meanRoundJoin.BlockDimensions = 512;
                meanRoundJoin.GridDimensions  = 4096;
                meanRoundJoin.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                meanTail = ctx.LoadKernelPTX(resourceStream, "FluffyTail");
                meanTail.BlockDimensions = 1024;
                meanTail.GridDimensions  = 4096;
                meanTail.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1;

                meanRecover = ctx.LoadKernelPTX(resourceStream, "FluffyRecovery");
                meanRecover.BlockDimensions = 256;
                meanRecover.GridDimensions  = 2048;
                meanRecover.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1;
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Unable to create kernels: " + ex.Message);
                Task.Delay(500).Wait();
                Comms.Close();
                return;
            }

            try
            {
                d_buffer    = new CudaDeviceVariable <ulong>(BUFFER_SIZE_U32);
                d_bufferMid = new CudaDeviceVariable <ulong>(d_buffer.DevicePointer + (BUFFER_SIZE_B * 8));
                d_bufferB   = new CudaDeviceVariable <ulong>(d_buffer.DevicePointer + (BUFFER_SIZE_A * 8));

                d_indexesA = new CudaDeviceVariable <uint>(INDEX_SIZE * 2);
                d_indexesB = new CudaDeviceVariable <uint>(INDEX_SIZE * 2);

                Array.Clear(h_indexesA, 0, h_indexesA.Length);
                Array.Clear(h_indexesB, 0, h_indexesA.Length);

                d_indexesA = h_indexesA;
                d_indexesB = h_indexesB;

                streamPrimary   = new CudaStream(CUStreamFlags.NonBlocking);
                streamSecondary = new CudaStream(CUStreamFlags.NonBlocking);
            }
            catch (Exception ex)
            {
                Task.Delay(200).Wait();
                Logger.Log(LogLevel.Error, $"Out of video memory! Only {ctx.GetFreeDeviceMemorySize()} free");
                Task.Delay(500).Wait();
                Comms.Close();
                return;
            }

            try
            {
                AllocateHostMemory(true, ref h_a, ref hAligned_a, 1024 * 1024 * 32);
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Unable to create pinned memory.");
                Task.Delay(500).Wait();
                Comms.Close();
                return;
            }

            int loopCnt = 0;

            while (!Comms.IsTerminated)
            {
                try
                {
                    if (!TEST && (Comms.nextJob.pre_pow == null || Comms.nextJob.pre_pow == "" || Comms.nextJob.pre_pow == TestPrePow))
                    {
                        Logger.Log(LogLevel.Info, string.Format("Waiting for job...."));
                        Task.Delay(1000).Wait();
                        continue;
                    }

                    if (!TEST && ((currentJob.pre_pow != Comms.nextJob.pre_pow) || (currentJob.origin != Comms.nextJob.origin)))
                    {
                        currentJob           = Comms.nextJob;
                        currentJob.timestamp = DateTime.Now;
                    }

                    if (!TEST && (currentJob.timestamp.AddMinutes(30) < DateTime.Now) && Comms.lastIncoming.AddMinutes(30) < DateTime.Now)
                    {
                        Logger.Log(LogLevel.Info, string.Format("Job too old..."));
                        Task.Delay(1000).Wait();
                        continue;
                    }

                    // test runs only once
                    if (TEST && loopCnt++ > 100)
                    {
                        Comms.IsTerminated = true;
                    }

                    Solution s;
                    while (graphSolutions.TryDequeue(out s))
                    {
                        meanRecover.SetConstantVariable <ulong>("recovery", s.GetUlongEdges());
                        d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                        meanRecover.RunAsync(streamPrimary.Stream, s.job.k0, s.job.k1, s.job.k2, s.job.k3, d_indexesB.DevicePointer);
                        streamPrimary.Synchronize();
                        s.nonces = new uint[40];
                        d_indexesB.CopyToHost(s.nonces, 0, 0, 40 * 4);
                        s.nonces = s.nonces.OrderBy(n => n).ToArray();
                        lock (Comms.graphSolutionsOut)
                        {
                            Comms.graphSolutionsOut.Enqueue(s);
                        }
                        Comms.SetEvent();
                    }
                    uint[] count;
                    do
                    {
                        if (!TEST && ((currentJob.pre_pow != Comms.nextJob.pre_pow) || (currentJob.origin != Comms.nextJob.origin)))
                        {
                            currentJob           = Comms.nextJob;
                            currentJob.timestamp = DateTime.Now;
                        }
                        currentJob = currentJob.Next();

                        Logger.Log(LogLevel.Debug, string.Format("GPU NV{4}:Trimming #{4}: {0} {1} {2} {3}", currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, currentJob.jobID, deviceID));

                        timer.Restart();

                        d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                        d_indexesB.MemsetAsync(0, streamPrimary.Stream);

                        meanSeedA.RunAsync(streamPrimary.Stream, currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, d_bufferMid.DevicePointer, d_indexesB.DevicePointer);
                        meanSeedB_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, 0);
                        meanSeedB_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_A * 8) / 4) * 1, d_indexesB.DevicePointer, d_indexesA.DevicePointer, 16);
                        meanSeedB_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_A * 8) / 4) * 2, d_indexesB.DevicePointer, d_indexesA.DevicePointer, 32);
                        meanSeedB_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_A * 8) / 4) * 3, d_indexesB.DevicePointer, d_indexesA.DevicePointer, 48);

                        d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                        meanRound_2.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer + ((BUFFER_SIZE_A * 8) / 4) * 2, d_bufferB.DevicePointer, d_indexesA.DevicePointer + (2048 * 4), d_indexesB.DevicePointer + (4096 * 4), DUCK_EDGES_A, DUCK_EDGES_B / 2);
                        meanRound_2.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer - (BUFFER_SIZE_B * 8), d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_A, DUCK_EDGES_B / 2);
                        d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                        meanRoundJoin.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer - (BUFFER_SIZE_B * 8), d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2);

                        //d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                        //meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B, DUCK_EDGES_B / 2);
                        d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                        meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2);
                        d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                        meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2);
                        d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                        meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 4);

                        for (int i = 0; i < trimRounds; i++)
                        {
                            d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                            meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 4, DUCK_EDGES_B / 4);
                            d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                            meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 4, DUCK_EDGES_B / 4);
                        }

                        d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                        meanTail.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer);

                        ctx.Synchronize();
                        streamPrimary.Synchronize();

                        count = new uint[2];
                        d_indexesA.CopyToHost(count, 0, 0, 8);

                        if (count[0] > 4194304)
                        {
                            // trouble
                            count[0] = 4194304;
                            // log
                        }

                        hAligned_a.AsyncCopyFromDevice(d_buffer.DevicePointer, 0, 0, count[0] * 8, streamPrimary.Stream);
                        streamPrimary.Synchronize();
                        System.Runtime.InteropServices.Marshal.Copy(hAligned_a.PinnedHostPointer, h_a, 0, ((int)count[0] * 8) / sizeof(int));

                        timer.Stop();
                        currentJob.solvedAt = DateTime.Now;
                        currentJob.trimTime = timer.ElapsedMilliseconds;

                        //Console.WriteLine("Trimmed in {0}ms to {1} edges", timer.ElapsedMilliseconds, count[0]);
                        Logger.Log(LogLevel.Info, string.Format("GPU NV{2}:     Trimmed in {0}ms to {1} edges, h {3}", timer.ElapsedMilliseconds, count[0], deviceID, currentJob.height));
                    }while((currentJob.height != Comms.nextJob.height) && (!Comms.IsTerminated) && (!TEST));

                    if (TEST)
                    {
                        //Console.WriteLine("Trimmed in {0}ms to {1} edges", timer.ElapsedMilliseconds, count[0]);

                        CGraph cg = FinderBag.GetFinder();
                        if (cg == null)
                        {
                            continue;
                        }

                        cg.SetEdges(h_a, (int)count[0]);
                        cg.SetHeader(currentJob);

                        //currentJob = currentJob.Next();

                        Task.Factory.StartNew(() =>
                        {
                            Stopwatch sw = new Stopwatch();
                            sw.Start();

                            if (count[0] < 200000)
                            {
                                try
                                {
                                    if (findersInFlight++ < 3)
                                    {
                                        Stopwatch cycleTime = new Stopwatch();
                                        cycleTime.Start();
                                        cg.FindSolutions(graphSolutions);
                                        cycleTime.Stop();
                                        AdjustTrims(cycleTime.ElapsedMilliseconds);
                                        if (graphSolutions.Count > 0)
                                        {
                                            solutions++;
                                        }
                                    }
                                    else
                                    {
                                        Logger.Log(LogLevel.Warning, "CPU overloaded!");
                                    }
                                }
                                catch (Exception ex)
                                {
                                    Logger.Log(LogLevel.Error, "Cycle finder error" + ex.Message);
                                }
                                finally
                                {
                                    findersInFlight--;
                                    FinderBag.ReturnFinder(cg);
                                }
                            }

                            sw.Stop();

                            if (++trims % 50 == 0)
                            {
                                Console.ForegroundColor = ConsoleColor.Green;
                                Console.WriteLine("SOLS: {0}/{1} - RATE: {2:F1}", solutions, trims, (float)trims / solutions);
                                Console.ResetColor();
                            }
                            //Console.WriteLine("Finder completed in {0}ms on {1} edges with {2} solution(s)", sw.ElapsedMilliseconds, count[0], graphSolutions.Count);
                            //Console.WriteLine("Duped edges: {0}", cg.dupes);
                            Logger.Log(LogLevel.Info, string.Format("Finder completed in {0}ms on {1} edges with {2} solution(s) and {3} dupes", sw.ElapsedMilliseconds, count[0], graphSolutions.Count, cg.dupes));
                        });

                        //h_indexesA = d_indexesA;
                        //h_indexesB = d_indexesB;

                        //var sumA = h_indexesA.Sum(e => e);
                        //var sumB = h_indexesB.Sum(e => e);

                        ;
                    }
                    else
                    {
                        CGraph cg = FinderBag.GetFinder();
                        cg.SetEdges(h_a, (int)count[0]);
                        cg.SetHeader(currentJob);

                        Task.Factory.StartNew(() =>
                        {
                            if (count[0] < 200000)
                            {
                                try
                                {
                                    if (findersInFlight++ < 3)
                                    {
                                        Stopwatch cycleTime = new Stopwatch();
                                        cycleTime.Start();
                                        cg.FindSolutions(graphSolutions);
                                        cycleTime.Stop();
                                        AdjustTrims(cycleTime.ElapsedMilliseconds);
                                        if (graphSolutions.Count > 0)
                                        {
                                            solutions++;
                                        }
                                    }
                                    else
                                    {
                                        Logger.Log(LogLevel.Warning, "CPU overloaded!");
                                    }
                                }
                                catch (Exception ex)
                                {
                                    Logger.Log(LogLevel.Error, "Cycle finder crashed: " + ex.Message);
                                }
                                finally
                                {
                                    findersInFlight--;
                                    FinderBag.ReturnFinder(cg);
                                }
                            }
                        });
                    }
                }
                catch (Exception ex)
                {
                    Logger.Log(LogLevel.Error, "Critical error in main cuda loop " + ex.Message);
                    Task.Delay(5000).Wait();
                }
            }

            // clean up
            try
            {
                Task.Delay(500).Wait();

                Comms.Close();

                d_buffer.Dispose();
                d_indexesA.Dispose();
                d_indexesB.Dispose();

                streamPrimary.Dispose();
                streamSecondary.Dispose();

                hAligned_a.Dispose();

                if (ctx != null)
                {
                    ctx.Dispose();
                }
            }
            catch { }
        }
Esempio n. 9
0
        // Initialization code to find the best CUDA Device
        static int findCudaDevice(string[] args)
        {
            int devID = 0;
            // If the command-line has a device number specified, use it
            bool found = false;
            foreach (var item in args)
            {
                if (item.Contains("device="))
                {
                    found = true;
                    if (!int.TryParse(item, out devID))
                    {
                        Console.WriteLine("Invalid command line parameters");
                        Environment.Exit(-1);
                    }
                    if (devID < 0)
                    {
                        Console.WriteLine("Invalid command line parameters\n");
                        Environment.Exit(-1);
                    }
                    else
                    {
                        devID = gpuDeviceInit(devID);
                        if (devID < 0)
                        {
                            Console.WriteLine("exiting...\n");
                            ShrQATest.shrQAFinishExit(args, ShrQATest.eQAstatus.QA_FAILED);
                            Environment.Exit(-1);
                        }
                    }
                }
            }

            if (!found)
            {
                // Otherwise pick the device with highest Gflops/s
                devID = CudaContext.GetMaxGflopsDeviceId();
                ctx = new CudaContext(devID, CUCtxFlags.SchedAuto);
                Console.Write("> Using CUDA device [{0}]: {1}\n", devID, ctx.GetDeviceName());
            }
            return devID;
        }
Esempio n. 10
0
        // General GPU Device CUDA Initialization
        static int gpuDeviceInit(int devID)
        {
            int deviceCount = CudaContext.GetDeviceCount();

            if (deviceCount == 0)
            {
                Console.Write("gpuDeviceInit() CUDA error: no devices supporting CUDA.\n");
                Environment.Exit(-1);
            }
            if (devID < 0)
                devID = 0;
            if (devID > deviceCount - 1)
            {
                Console.Write("\n");
                Console.Write(">> {0} CUDA capable GPU device(s) detected. <<\n", deviceCount);
                Console.Write(">> gpuDeviceInit (-device={0}) is not a valid GPU device. <<\n", devID);
                Console.Write("\n");
                return -devID;
            }

            if (CudaContext.GetDeviceComputeCapability(devID).Major < 1)
            {
                Console.Write("gpuDeviceInit(): GPU device does not support CUDA.\n");
                Environment.Exit(-1);
            }
            ctx = new CudaContext(devID);
            Console.Write("> gpuDeviceInit() CUDA device [{0}]: {1}\n", devID, ctx.GetDeviceName());
            return devID;
        }
Esempio n. 11
0
        static void Main(string[] args)
        {
            try
            {
                if (args.Length == 1 && args[0].ToLower().Contains("fidelity"))
                {
                    string[] fseg = args[0].Split(':');
                    deviceID = int.Parse(fseg[1]);
                    nonce    = Int64.Parse(fseg[2]) - 1;
                    range    = int.Parse(fseg[3]);
                    QTEST    = true;
                }
                else
                {
                    if (args.Length > 0)
                    {
                        deviceID = int.Parse(args[0]);
                    }
                }
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Device ID parse error: " + ex.Message);
            }

            try
            {
                if (args.Length > 0)
                {
                    deviceID = int.Parse(args[0]);
                }
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Device ID parse error");
            }

            try
            {
                if (args.Length > 1)
                {
                    port = int.Parse(args[1]);
                    Comms.ConnectToMaster(port);
                }
                else
                {
                    TEST = true;
                    Logger.CopyToConsole = true;
                    CGraph.ShowCycles    = true;
                }
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Master connection error");
            }

            try
            {
                if (args.Length > 3)
                {
                    gpuCount = int.Parse(args[3]);
                    fastCuda = gpuCount <= (Environment.ProcessorCount / 2);
                    if (fastCuda)
                    {
                        Logger.Log(LogLevel.Info, "Using single GPU blocking mode");
                    }
                }
            }
            catch
            {
            }

            if (TEST)
            {
                currentJob = nextJob = new Job()
                {
                    jobID     = 0,
                    k0        = 0xf4956dc403730b01L,
                    k1        = 0xe6d45de39c2a5a3eL,
                    k2        = 0xcbf626a8afee35f6L,
                    k3        = 0x4307b94b1a0c9980L,
                    pre_pow   = TestPrePow,
                    timestamp = DateTime.Now
                };
            }
            else
            {
                currentJob = nextJob = new Job()
                {
                    jobID     = 0,
                    k0        = 0xf4956dc403730b01L,
                    k1        = 0xe6d45de39c2a5a3eL,
                    k2        = 0xcbf626a8afee35f6L,
                    k3        = 0x4307b94b1a0c9980L,
                    pre_pow   = TestPrePow,
                    timestamp = DateTime.Now
                };

                if (!Comms.IsConnected())
                {
                    Console.WriteLine("Master connection failed, aborting");
                    Logger.Log(LogLevel.Error, "No master connection, exitting!");
                    return;
                }

                if (deviceID < 0)
                {
                    int devCnt             = CudaContext.GetDeviceCount();
                    GpuDevicesMessage gpum = new GpuDevicesMessage()
                    {
                        devices = new List <GpuDevice>(devCnt)
                    };
                    for (int i = 0; i < devCnt; i++)
                    {
                        string name = CudaContext.GetDeviceName(i);
                        var    info = CudaContext.GetDeviceInfo(i);
                        gpum.devices.Add(new GpuDevice()
                        {
                            deviceID = i, name = name, memory = info.TotalGlobalMemory
                        });
                    }
                    //Console.WriteLine(devCnt);
                    Comms.gpuMsg = gpum;
                    Comms.SetEvent();
                    //Console.WriteLine("event fired");
                    Task.Delay(1000).Wait();
                    //Console.WriteLine("closing");
                    Comms.Close();
                    return;
                }
            }

            try
            {
                var assembly       = Assembly.GetEntryAssembly();
                var resourceStream = assembly.GetManifestResourceStream("CudaSolver.kernel_x64.ptx");
                ctx = new CudaContext(deviceID, /*!fastCuda ? (CUCtxFlags.BlockingSync | CUCtxFlags.MapHost) :*/ CUCtxFlags.MapHost);
                string pow = new StreamReader(resourceStream).ReadToEnd();

                //pow = File.ReadAllText(@"kernel_x64.ptx");

                Turing = ctx.GetDeviceInfo().MaxSharedMemoryPerMultiprocessor == 65536;

                using (var s = GenerateStreamFromString(pow))
                {
                    if (!Turing)
                    {
                        meanSeedA = ctx.LoadKernelPTX(s, "FluffySeed4K", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)40 });
                        meanSeedA.BlockDimensions = 512;
                        meanSeedA.GridDimensions  = 1024;
                        meanSeedA.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                        meanRound = ctx.LoadKernelPTX(s, "FluffyRound_A2", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)40 });
                        meanRound.BlockDimensions = 512;
                        meanRound.GridDimensions  = 4096;
                        meanRound.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                        meanRound_4 = ctx.LoadKernelPTX(s, "FluffyRound_A1", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)32 });
                        meanRound_4.BlockDimensions = 1024;
                        meanRound_4.GridDimensions  = 1024;
                        meanRound_4.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                        meanRoundJoin = ctx.LoadKernelPTX(s, "FluffyRound_A3", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)32 });
                        meanRoundJoin.BlockDimensions = 1024;
                        meanRoundJoin.GridDimensions  = 4096;
                        meanRoundJoin.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                        meanTail = ctx.LoadKernelPTX(s, "FluffyTail");
                        meanTail.BlockDimensions = 1024;
                        meanTail.GridDimensions  = 4096;
                        meanTail.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1;

                        meanRecover = ctx.LoadKernelPTX(s, "FluffyRecovery");
                        meanRecover.BlockDimensions = 256;
                        meanRecover.GridDimensions  = 2048;
                        meanRecover.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1;
                    }
                    else
                    {
                        meanSeedA = ctx.LoadKernelPTX(s, "FluffySeed4K", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)64 });
                        meanSeedA.BlockDimensions = 512;
                        meanSeedA.GridDimensions  = 1024;
                        meanSeedA.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                        meanRound = ctx.LoadKernelPTX(s, "FluffyRound_C2", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)32 });
                        meanRound.BlockDimensions = 1024;
                        meanRound.GridDimensions  = 4096;
                        meanRound.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                        meanRound_4 = ctx.LoadKernelPTX(s, "FluffyRound_C1", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)64 });
                        meanRound_4.BlockDimensions = 1024;
                        meanRound_4.GridDimensions  = 1024;
                        meanRound_4.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                        meanRoundJoin = ctx.LoadKernelPTX(s, "FluffyRound_C3", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)32 });
                        meanRoundJoin.BlockDimensions = 1024;
                        meanRoundJoin.GridDimensions  = 4096;
                        meanRoundJoin.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                        meanTail = ctx.LoadKernelPTX(s, "FluffyTail");
                        meanTail.BlockDimensions = 1024;
                        meanTail.GridDimensions  = 4096;
                        meanTail.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1;

                        meanRecover = ctx.LoadKernelPTX(s, "FluffyRecovery");
                        meanRecover.BlockDimensions = 256;
                        meanRecover.GridDimensions  = 2048;
                        meanRecover.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1;
                    }
                }
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Unable to create kernels: " + ex.Message);
                Task.Delay(500).Wait();
                Comms.Close();
                return;
            }

            try
            {
                d_buffer    = new CudaDeviceVariable <ulong>(BUFFER_SIZE_U32 * (temp ? 8 : 1));
                d_bufferMid = new CudaDeviceVariable <ulong>(d_buffer.DevicePointer + (BUFFER_SIZE_B * 2));
                d_bufferB   = new CudaDeviceVariable <ulong>(d_buffer.DevicePointer + (BUFFER_SIZE_B * 8));

                d_indexesA = new CudaDeviceVariable <uint>(INDEX_SIZE);
                d_indexesB = new CudaDeviceVariable <uint>(INDEX_SIZE);
                d_aux      = new CudaDeviceVariable <uint>(512);

                Array.Clear(h_indexesA, 0, h_indexesA.Length);
                Array.Clear(h_indexesB, 0, h_indexesA.Length);

                d_indexesA = h_indexesA;
                d_indexesB = h_indexesB;

                streamPrimary = new CudaStream(CUStreamFlags.NonBlocking);
            }
            catch (Exception ex)
            {
                Task.Delay(200).Wait();
                Logger.Log(LogLevel.Error, $"Mem alloc exception. Out of video memory? {ctx.GetFreeDeviceMemorySize()} free");
                Task.Delay(500).Wait();
                Comms.Close();
                return;
            }

            try
            {
                AllocateHostMemory(true, ref h_a, ref hAligned_a, 1024 * 1024 * 32);
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Unable to create pinned memory.");
                Task.Delay(500).Wait();
                Comms.Close();
                return;
            }

            int loopCnt = 0;

            while (!Comms.IsTerminated)
            {
                try
                {
                    if (!TEST && (Comms.nextJob.pre_pow == null || Comms.nextJob.pre_pow == "" || Comms.nextJob.pre_pow == TestPrePow))
                    {
                        Logger.Log(LogLevel.Info, string.Format("Waiting for job...."));
                        Task.Delay(1000).Wait();
                        continue;
                    }

                    if (!TEST && ((currentJob.pre_pow != Comms.nextJob.pre_pow) || (currentJob.origin != Comms.nextJob.origin)))
                    {
                        currentJob           = Comms.nextJob;
                        currentJob.timestamp = DateTime.Now;
                    }

                    if (!TEST && (currentJob.timestamp.AddMinutes(30) < DateTime.Now) && Comms.lastIncoming.AddMinutes(30) < DateTime.Now)
                    {
                        Logger.Log(LogLevel.Info, string.Format("Job too old..."));
                        Task.Delay(1000).Wait();
                        continue;
                    }

                    // test runs only once
                    if (TEST && ++loopCnt >= range)
                    {
                        Comms.IsTerminated = true;
                    }

                    Solution s;
                    while (graphSolutions.TryDequeue(out s))
                    {
                        meanRecover.SetConstantVariable <ulong>("recovery", s.GetUlongEdges());
                        d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                        meanRecover.RunAsync(streamPrimary.Stream, s.job.k0, s.job.k1, s.job.k2, s.job.k3, d_indexesB.DevicePointer);
                        streamPrimary.Synchronize();
                        s.nonces = new uint[32];
                        d_indexesB.CopyToHost(s.nonces, 0, 0, 32 * 4);
                        s.nonces = s.nonces.OrderBy(n => n).ToArray();
                        //fidelity = (32-cycles_found / graphs_searched) * 32
                        solutions++;
                        s.fidelity = ((double)solutions / (double)trims) * 32.0;
                        //Console.WriteLine(s.fidelity.ToString("0.000"));
                        if (Comms.IsConnected())
                        {
                            Comms.graphSolutionsOut.Enqueue(s);
                            Comms.SetEvent();
                        }
                        if (QTEST)
                        {
                            Console.ForegroundColor = ConsoleColor.Red;
                            Console.WriteLine($"Solution for nonce {s.job.nonce}: {string.Join(' ', s.nonces)}");
                            Console.ResetColor();
                        }
                    }

                    if (QTEST)
                    {
                        currentJob = currentJob.NextSequential(ref nonce);
                        Console.WriteLine($"Nonce: {nonce} K0: {currentJob.k0:X} K1: {currentJob.k1:X} K2: {currentJob.k2:X} K3: {currentJob.k3:X}");
                    }
                    else
                    {
                        currentJob = currentJob.Next();
                    }

                    Logger.Log(LogLevel.Debug, string.Format("GPU NV{4}:Trimming #{4}: {0} {1} {2} {3}", currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, currentJob.jobID, deviceID));

                    timer.Restart();

                    d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                    d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                    d_aux.MemsetAsync(0, streamPrimary.Stream);

                    meanSeedA.RunAsync(streamPrimary.Stream, currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, d_bufferMid.DevicePointer, d_indexesB.DevicePointer, 0);
                    meanSeedA.RunAsync(streamPrimary.Stream, currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, d_bufferMid.DevicePointer + ((BUFFER_SIZE_A * 8) / 4 / 4) * 1, d_indexesB.DevicePointer + (4096 * 4), EDGE_SEG);
                    meanSeedA.RunAsync(streamPrimary.Stream, currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, d_bufferMid.DevicePointer + ((BUFFER_SIZE_A * 8) / 4 / 4) * 2, d_indexesB.DevicePointer + (4096 * 8), EDGE_SEG * 2);
                    meanSeedA.RunAsync(streamPrimary.Stream, currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, d_bufferMid.DevicePointer + ((BUFFER_SIZE_A * 8) / 4 / 4) * 3, d_indexesB.DevicePointer + (4096 * 12), EDGE_SEG * 3);

                    meanRound_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_A / 4, DUCK_EDGES_B / 4, 0);
                    meanRound_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 1, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_A / 4, DUCK_EDGES_B / 4, 1024);
                    meanRound_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 2, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_A / 4, DUCK_EDGES_B / 4, 2048);
                    meanRound_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 3, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_A / 4, DUCK_EDGES_B / 4, 3072);


                    //streamPrimary.Synchronize();
                    //h_indexesA = d_indexesA;
                    //h_indexesB = d_indexesB;
                    //var sumA = h_indexesA.Sum(e => e);
                    //var sumB = h_indexesB.Sum(e => e);
                    //streamPrimary.Synchronize();

                    d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                    meanRoundJoin.RunAsync(streamPrimary.Stream,
                                           d_buffer.DevicePointer,
                                           d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 1,
                                           d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 2,
                                           d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 3,
                                           d_bufferB.DevicePointer,
                                           d_indexesA.DevicePointer,
                                           d_indexesB.DevicePointer, DUCK_EDGES_B / 4, DUCK_EDGES_B / 2);

                    d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                    meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2, 0, d_aux.DevicePointer);
                    d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                    meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2, 1, d_aux.DevicePointer);
                    d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                    meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2, 2, d_aux.DevicePointer);
                    d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                    meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 4, 3, d_aux.DevicePointer);

                    for (int i = 0; i < (TEST ? 80 : trimRounds); i++)
                    //for (int i = 0; i < 85; i++)
                    {
                        d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                        meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 4, DUCK_EDGES_B / 4, i * 2 + 4, d_aux.DevicePointer);
                        d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                        meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 4, DUCK_EDGES_B / 4, i * 2 + 5, d_aux.DevicePointer);
                    }

                    d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                    meanTail.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer);

                    Task.Delay((int)lastTrimMs).Wait();

                    streamPrimary.Synchronize();

                    uint[] count = new uint[2];
                    d_indexesA.CopyToHost(count, 0, 0, 8);

                    if (count[0] > 131071)
                    {
                        // trouble
                        count[0] = 131071;
                        // log
                    }

                    hAligned_a.AsyncCopyFromDevice(d_buffer.DevicePointer, 0, 0, count[0] * 8, streamPrimary.Stream);
                    streamPrimary.Synchronize();
                    System.Runtime.InteropServices.Marshal.Copy(hAligned_a.PinnedHostPointer, h_a, 0, ((int)count[0] * 8) / sizeof(int));

                    trims++;
                    timer.Stop();
                    lastTrimMs          = (long)Math.Min(Math.Max((float)timer.ElapsedMilliseconds * 0.9f, 50), 500);
                    currentJob.solvedAt = DateTime.Now;
                    currentJob.trimTime = timer.ElapsedMilliseconds;

                    //Console.WriteLine("Trimmed in {0}ms to {1} edges", timer.ElapsedMilliseconds, count[0]);
                    Logger.Log(LogLevel.Info, string.Format("GPU NV{2}:     Trimmed in {0}ms to {1} edges", timer.ElapsedMilliseconds, count[0], deviceID));


                    FinderBag.RunFinder(TEST, ref trims, count[0], h_a, currentJob, graphSolutions, timer);

                    if (trims % 50 == 0 && TEST)
                    {
                        Console.ForegroundColor = ConsoleColor.Green;
                        Console.WriteLine("SOLS: {0}/{1} - RATE: {2:F1}", solutions, trims, (float)trims / solutions);
                        Console.ResetColor();
                    }

                    /*
                     * if (TEST)
                     * {
                     *  //Console.WriteLine("Trimmed in {0}ms to {1} edges", timer.ElapsedMilliseconds, count[0]);
                     *
                     *  CGraph cg = FinderBag.GetFinder();
                     *  cg.SetEdges(h_a, (int)count[0]);
                     *  cg.SetHeader(currentJob);
                     *
                     *  //currentJob = currentJob.Next();
                     *
                     *  Task.Factory.StartNew(() =>
                     *     {
                     *         Stopwatch sw = new Stopwatch();
                     *         sw.Start();
                     *
                     *         if (count[0] < 131071)
                     *         {
                     *             try
                     *             {
                     *                 if (findersInFlight++ < 3)
                     *                 {
                     *                     Stopwatch cycleTime = new Stopwatch();
                     *                     cycleTime.Start();
                     *                     cg.FindSolutions(graphSolutions);
                     *                     cycleTime.Stop();
                     *                     AdjustTrims(cycleTime.ElapsedMilliseconds);
                     *                     //if (graphSolutions.Count > 0) solutions++;
                     *                 }
                     *                 else
                     *                     Logger.Log(LogLevel.Warning, "CPU overloaded!");
                     *             }
                     *             catch (Exception ex)
                     *             {
                     *                 Logger.Log(LogLevel.Error, "Cycle finder error" + ex.Message);
                     *             }
                     *             finally
                     *             {
                     *                 FinderBag.ReturnFinder(cg);
                     *                 findersInFlight--;
                     *             }
                     *         }
                     *
                     *         sw.Stop();
                     *
                     *         if (trims % 50 == 0)
                     *         {
                     *             Console.ForegroundColor = ConsoleColor.Green;
                     *             Console.WriteLine("SOLS: {0}/{1} - RATE: {2:F1}", solutions, trims, (float)trims/solutions );
                     *             Console.ResetColor();
                     *         }
                     *         //Console.WriteLine("Finder completed in {0}ms on {1} edges with {2} solution(s)", sw.ElapsedMilliseconds, count[0], graphSolutions.Count);
                     *         //Console.WriteLine("Duped edges: {0}", cg.dupes);
                     *         if (!QTEST)
                     *          Logger.Log(LogLevel.Info, string.Format("Finder completed in {0}ms on {1} edges with {2} solution(s) and {3} dupes", sw.ElapsedMilliseconds, count[0], graphSolutions.Count, cg.dupes));
                     *     });
                     *
                     *  //h_indexesA = d_indexesA;
                     *  //h_indexesB = d_indexesB;
                     *
                     *  //var sumA = h_indexesA.Sum(e => e);
                     *  //var sumB = h_indexesB.Sum(e => e);
                     *
                     *  ;
                     * }
                     * else
                     * {
                     *  CGraph cg = FinderBag.GetFinder();
                     *  cg.SetEdges(h_a, (int)count[0]);
                     *  cg.SetHeader(currentJob);
                     *
                     *  Task.Factory.StartNew(() =>
                     *  {
                     *      if (count[0] < 131071)
                     *      {
                     *          try
                     *          {
                     *              if (findersInFlight++ < 3)
                     *              {
                     *                  Stopwatch cycleTime = new Stopwatch();
                     *                  cycleTime.Start();
                     *                  cg.FindSolutions(graphSolutions);
                     *                  cycleTime.Stop();
                     *                  AdjustTrims(cycleTime.ElapsedMilliseconds);
                     *              }
                     *              else
                     *                  Logger.Log(LogLevel.Warning, "CPU overloaded!");
                     *          }
                     *          catch (Exception ex)
                     *          {
                     *              Logger.Log(LogLevel.Warning, "Cycle finder crashed: " + ex.Message);
                     *          }
                     *          finally
                     *          {
                     *              FinderBag.ReturnFinder(cg);
                     *              findersInFlight--;
                     *          }
                     *      }
                     *  });
                     * }
                     *
                     */
                }
                catch (Exception ex)
                {
                    Logger.Log(LogLevel.Error, "Critical error in main cuda loop " + ex.Message);
                    Task.Delay(500).Wait();
                    break;
                }
            }

            // clean up
            try
            {
                Task.Delay(500).Wait();

                Comms.Close();

                d_buffer.Dispose();
                d_indexesA.Dispose();
                d_indexesB.Dispose();
                d_aux.Dispose();

                streamPrimary.Dispose();
                streamSecondary.Dispose();

                hAligned_a.Dispose();

                if (ctx != null)
                {
                    ctx.Dispose();
                }
            }
            catch { }
        }
Esempio n. 12
0
        static int Main()
        {
            Console.Write("Press enter to test your GPU");

            var scr = Console.ReadLine();

            try
            {
                int deviceCount = CudaContext.GetDeviceCount();
                if (deviceCount == 0)
                {
                    Console.Error.WriteLine("No CUDA devices detected. Sad face.");
                    return(-1);
                }
                Console.WriteLine($"{deviceCount} CUDA devices detected (first will be used)");
                for (int i = 0; i < deviceCount; i++)
                {
                    Console.WriteLine($"{i}: {CudaContext.GetDeviceName(i)}");
                }

                bool GoOn = true;

                while (GoOn)
                {
                    //for (int z = 100000; z < 100000*3000; z = z*2)
                    //{
                    long z = 50000000;
                    for (int a = 2; a < 3; a++)
                    {
                        Console.WriteLine($"GridDim:{a} n:{z / 1000000} MM");
                        using (var myGPU = new GPU(deviceId: 0, _count: 1024 * a))
                        {
                            // Console.WriteLine("Initializing kernel...");
                            string log;
                            var    compileResult = myGPU.LoadKernel(out log);
                            if (compileResult != ManagedCuda.NVRTC.nvrtcResult.Success)
                            {
                                Console.Error.WriteLine(compileResult);
                                Console.Error.WriteLine(log);
                                Console.ReadLine();
                                return(-1);
                            }
                            //Console.WriteLine(log);

                            //Tests.Test_0(Count, myGPU);

                            Tests.Test_1(myGPU, 1, z);

                            //Tests.Test_2(z, myGPU);
                        }

                        Console.WriteLine("Cleaning up...");
                    }
                    //     Console.ReadKey();
                    //}

                    Console.WriteLine("All done; ESC to exit any key to repeat :)");
                    var k = Console.ReadKey();
                    GoOn = k.Key.ToString() != Keys.Escape.ToString();
                }
            } catch (Exception ex)
            {
                Console.Error.WriteLine(ex.Message);
            }

            return(0);
        }
Esempio n. 13
0
        private void initGLAndCuda()
        {
            //Create render target control
            m_renderControl              = new OpenTK.GLControl(GraphicsMode.Default, 1, 0, GraphicsContextFlags.Default);
            m_renderControl.Dock         = DockStyle.Fill;
            m_renderControl.BackColor    = Color.White;
            m_renderControl.BorderStyle  = BorderStyle.FixedSingle;
            m_renderControl.KeyDown     += new KeyEventHandler(m_renderControl_KeyDown);
            m_renderControl.MouseMove   += new MouseEventHandler(m_renderControl_MouseMove);
            m_renderControl.MouseDown   += new MouseEventHandler(m_renderControl_MouseDown);
            m_renderControl.SizeChanged += new EventHandler(m_renderControl_SizeChanged);

            panel1.Controls.Add(m_renderControl);
            Console.WriteLine("   OpenGL device is Available");

            int deviceID = CudaContext.GetMaxGflopsDeviceId();

            ctx = CudaContext.CreateOpenGLContext(deviceID, CUCtxFlags.BlockingSync);
            string console = string.Format("CUDA device [{0}] has {1} Multi-Processors", ctx.GetDeviceName(), ctx.GetDeviceInfo().MultiProcessorCount);

            Console.WriteLine(console);

            CUmodule module = ctx.LoadModulePTX("kernel.ptx");

            addForces_k       = new CudaKernel("addForces_k", module, ctx);
            advectVelocity_k  = new CudaKernel("advectVelocity_k", module, ctx);
            diffuseProject_k  = new CudaKernel("diffuseProject_k", module, ctx);
            updateVelocity_k  = new CudaKernel("updateVelocity_k", module, ctx);
            advectParticles_k = new CudaKernel("advectParticles_OGL", module, ctx);

            hvfield = new cData[DS];
            dvfield = new CudaPitchedDeviceVariable <cData>(DIM, DIM);
            tPitch  = dvfield.Pitch;

            dvfield.CopyToDevice(hvfield);

            vxfield = new CudaDeviceVariable <cData>(DS);
            vyfield = new CudaDeviceVariable <cData>(DS);

            // Create particle array
            particles = new cData[DS];
            initParticles(particles, DIM, DIM);

            // TODO: update kernels to use the new unpadded memory layout for perf
            // rather than the old FFTW-compatible layout
            planr2c = new CudaFFTPlan2D(DIM, DIM, cufftType.R2C, Compatibility.FFTWPadding);
            planc2r = new CudaFFTPlan2D(DIM, DIM, cufftType.C2R, Compatibility.FFTWPadding);

            GL.GenBuffers(1, out vbo);
            GL.BindBuffer(BufferTarget.ArrayBuffer, vbo);
            GL.BufferData <cData>(BufferTarget.ArrayBuffer, new IntPtr(cData.SizeOf * DS), particles, BufferUsageHint.DynamicDraw);
            int bsize;

            GL.GetBufferParameter(BufferTarget.ArrayBuffer, BufferParameterName.BufferSize, out bsize);

            if (bsize != DS * cData.SizeOf)
            {
                throw new Exception("Sizes don't match.");
            }

            GL.BindBuffer(BufferTarget.ArrayBuffer, 0);

            cuda_vbo_resource = new CudaGraphicsInteropResourceCollection();
            cuda_vbo_resource.Add(new CudaOpenGLBufferInteropResource(vbo, CUGraphicsRegisterFlags.None));

            texref = new CudaTextureArray2D(advectVelocity_k, "texref", CUAddressMode.Wrap, CUFilterMode.Linear, 0, CUArrayFormat.Float, DIM, DIM, CudaArray2DNumChannels.Two);

            stopwatch = new CudaStopWatch(CUEventFlags.Default);

            reshape();
            isInit = true;
            display();
        }
Esempio n. 14
0
        private bool InitializeD3D()
        {
            HwndSource hwnd = new HwndSource(0, 0, 0, 0, 0, "null", IntPtr.Zero);
            // Create the D3D object.
            d3d = new Direct3DEx();

            PresentParameters pp = new PresentParameters();
            pp.BackBufferWidth = 512;
            pp.BackBufferHeight = 512;
            pp.BackBufferFormat = Format.Unknown;
            pp.BackBufferCount = 0;
            pp.Multisample = MultisampleType.None;
            pp.MultisampleQuality = 0;
            pp.SwapEffect = SwapEffect.Discard;
            pp.DeviceWindowHandle = (IntPtr)0;
            pp.Windowed = true;
            pp.EnableAutoDepthStencil = false;
            pp.AutoDepthStencilFormat = Format.Unknown;
            pp.PresentationInterval = PresentInterval.Default;

            bDeviceFound = false;
            CUdevice[] cudaDevices = null;
            for (g_iAdapter = 0; g_iAdapter < d3d.AdapterCount; g_iAdapter++)
            {
                device = new DeviceEx(d3d, d3d.Adapters[g_iAdapter].Adapter, DeviceType.Hardware, hwnd.Handle, CreateFlags.HardwareVertexProcessing | CreateFlags.Multithreaded, pp);
                try
                {
                    cudaDevices = CudaContext.GetDirectXDevices(device.ComPointer, CUd3dXDeviceList.All, CudaContext.DirectXVersion.D3D9);
                    bDeviceFound = cudaDevices.Length > 0;
                    infoLog.AppendText("> Display Device #" + d3d.Adapters[g_iAdapter].Adapter
                        + ": \"" + d3d.Adapters[g_iAdapter].Details.Description + "\" supports Direct3D9 and CUDA.\n");
                    break;
                }
                catch (CudaException)
                {
                    //No Cuda device found for this Direct3D9 device
                    infoLog.AppendText("> Display Device #" + d3d.Adapters[g_iAdapter].Adapter
                        + ": \"" + d3d.Adapters[g_iAdapter].Details.Description + "\" supports Direct3D9 but not CUDA.\n");
                }
            }

            // we check to make sure we have found a cuda-compatible D3D device to work on
            if (!bDeviceFound)
            {
                infoLog.AppendText("No CUDA-compatible Direct3D9 device available");
                if (device != null)
                    device.Dispose();
                return false;
            }

            ctx = new CudaContext(cudaDevices[0], device.ComPointer, CUCtxFlags.BlockingSync, CudaContext.DirectXVersion.D3D9);
            deviceName.Text = "Device name: " + ctx.GetDeviceName();

            // Set projection matrix
            SlimDX.Matrix matProj = SlimDX.Matrix.OrthoOffCenterLH(0, 1, 1, 0, 0, 1);
            device.SetTransform(TransformState.Projection, matProj);

            // Turn off D3D lighting, since we are providing our own vertex colors
            device.SetRenderState(RenderState.Lighting, false);

            //Load kernels
            CUmodule module = ctx.LoadModulePTX("kernel.ptx");

            addForces_k = new CudaKernel("addForces_k", module, ctx);
            advectVelocity_k = new CudaKernel("advectVelocity_k", module, ctx);
            diffuseProject_k = new CudaKernel("diffuseProject_k", module, ctx);
            updateVelocity_k = new CudaKernel("updateVelocity_k", module, ctx);
            advectParticles_k = new CudaKernel("advectParticles_k", module, ctx);

            d3dimage.Lock();
            Surface surf = device.GetBackBuffer(0, 0);
            d3dimage.SetBackBuffer(D3DResourceType.IDirect3DSurface9, surf.ComPointer);
            d3dimage.Unlock();
            surf.Dispose();

            //Setup the "real" frame rate counter.
            //The cuda counter only measures cuda runtime, not the overhead to actually
            //show the result via DirectX and WPF.
            realLastTick = Environment.TickCount;
            return true;
        }