Esempio n. 1
0
        private static void WaitSend()
        {
            while (!IsTerminated)
            {
                try
                {
                    flushToMaster.WaitOne();

                    Solution s;
                    while (graphSolutionsOut.TryDequeue(out s))
                    {
                        if ((IsVerified && verificationMessage == null) || IsVerified)
                        {
                            (new BinaryFormatter()
                            {
                                AssemblyFormat = System.Runtime.Serialization.Formatters.FormatterAssemblyStyle.Full
                            }).Serialize(stream, s);
                        }
                    }
                    if (gpuMsg != null)
                    {
                        //Console.WriteLine("sending now");
                        (new BinaryFormatter()
                        {
                            AssemblyFormat = System.Runtime.Serialization.Formatters.FormatterAssemblyStyle.Simple
                        }).Serialize(stream, gpuMsg);
                        gpuMsg = null;
                        //Console.WriteLine("flushing now");
                        stream.Flush();
                    }
                    LogMessage lm;
                    while (logsOut.TryDequeue(out lm))
                    {
                        if (verificationMessage == null && lm.message != null && lm.message.ToLower().Contains("trimmed"))
                        {
                            verificationMessage = "GrinPro2.Solvers." + lm.message;
                        }

                        (new BinaryFormatter()
                        {
                            AssemblyFormat = System.Runtime.Serialization.Formatters.FormatterAssemblyStyle.Full
                        }).Serialize(stream, lm);
                    }
                }
                catch (Exception ex)
                {
                    // log to local file and console
                    //Logger.Log(LogLevel.Warning, "WaitSend error", ex);
                    //Console.WriteLine("exc " + ex.InnerException.Message);
                }
            }
        }
Esempio n. 2
0
        private static void WaitSend()
        {
            while (!IsTerminated)
            {
                try
                {
                    flushToMaster.WaitOne();

                    Solution s;
                    while (graphSolutionsOut.TryDequeue(out s))
                    {
                        (new BinaryFormatter()
                        {
                            AssemblyFormat = System.Runtime.Serialization.Formatters.FormatterAssemblyStyle.Simple
                        }).Serialize(stream, s);
                    }
                    if (gpuMsg != null)
                    {
                        //Console.WriteLine("sending now");
                        (new BinaryFormatter()
                        {
                            AssemblyFormat = System.Runtime.Serialization.Formatters.FormatterAssemblyStyle.Simple
                        }).Serialize(stream, gpuMsg);
                        gpuMsg = null;
                        //Console.WriteLine("flushing now");
                        stream.Flush();
                    }
                    LogMessage lm;
                    while (logsOut.TryDequeue(out lm))
                    {
                        (new BinaryFormatter()
                        {
                            AssemblyFormat = System.Runtime.Serialization.Formatters.FormatterAssemblyStyle.Simple
                        }).Serialize(stream, lm);
                    }
                }
                catch (Exception ex)
                {
                    // log to local file and console
                    //Logger.Log(LogLevel.Warning, "WaitSend error", ex);
                    //Console.WriteLine("exc " + ex.InnerException.Message);
                }
            }
        }
Esempio n. 3
0
        static void Main(string[] args)
        {
            try
            {
                if (args.Length > 0)
                {
                    deviceID = int.Parse(args[0]);
                }
                if (args.Length > 2)
                {
                    platformID = int.Parse(args[2]);
                }
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Device ID parse error", ex);
            }

            try
            {
                if (args.Length > 1)
                {
                    port = int.Parse(args[1]);
                    Comms.ConnectToMaster(port);
                }
                else
                {
                    TEST = true;
                    CGraph.ShowCycles    = true;
                    Logger.CopyToConsole = true;
                }
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Master connection error");
            }


            // Gets all available platforms and their corresponding devices, and prints them out in a table
            List <Platform> platforms = null;

            try
            {
                platforms = Platform.GetPlatforms().ToList();
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Failed to get OpenCL platform list");
                return;
            }

            if (TEST)
            {
                currentJob = nextJob = new Job()
                {
                    jobID = 0,
                    k0    = 0xf4956dc403730b01L,
                    k1    = 0xe6d45de39c2a5a3eL,
                    k2    = 0xcbf626a8afee35f6L,
                    k3    = 0x4307b94b1a0c9980L,
                    //k0 = 0x10ef16eadd6aa061L,
                    //k1 = 0x563f07e7a3c788b3L,
                    //k2 = 0xe8d7c8db1518f29aL,
                    //k3 = 0xc0ab7d1b4ca1adffL,
                    pre_pow   = TestPrePow,
                    timestamp = DateTime.Now
                };
            }
            else
            {
                currentJob = nextJob = new Job()
                {
                    jobID     = 0,
                    k0        = 0xf4956dc403730b01L,
                    k1        = 0xe6d45de39c2a5a3eL,
                    k2        = 0xcbf626a8afee35f6L,
                    k3        = 0x4307b94b1a0c9980L,
                    pre_pow   = TestPrePow,
                    timestamp = DateTime.Now
                };

                if (!Comms.IsConnected())
                {
                    Console.WriteLine("Master connection failed, aborting");
                    Logger.Log(LogLevel.Error, "No master connection, exitting!");
                    Task.Delay(500).Wait();
                    return;
                }

                if (deviceID < 0)
                {
                    try
                    {
                        //Environment.SetEnvironmentVariable("GPU_FORCE_64BIT_PTR", "1", EnvironmentVariableTarget.Machine);
                        Environment.SetEnvironmentVariable("GPU_MAX_HEAP_SIZE", "100", EnvironmentVariableTarget.User);
                        Environment.SetEnvironmentVariable("GPU_USE_SYNC_OBJECTS", "1", EnvironmentVariableTarget.User);
                        Environment.SetEnvironmentVariable("GPU_MAX_ALLOC_PERCENT", "100", EnvironmentVariableTarget.User);
                        Environment.SetEnvironmentVariable("GPU_SINGLE_ALLOC_PERCENT", "100", EnvironmentVariableTarget.User);
                        Environment.SetEnvironmentVariable("GPU_64BIT_ATOMICS", "1", EnvironmentVariableTarget.User);
                        Environment.SetEnvironmentVariable("GPU_MAX_WORKGROUP_SIZE", "1024", EnvironmentVariableTarget.User);
                        //Environment.SetEnvironmentVariable("AMD_OCL_BUILD_OPTIONS_APPEND", "-cl-std=CL2.0", EnvironmentVariableTarget.Machine);

                        GpuDevicesMessage gpum = new GpuDevicesMessage()
                        {
                            devices = new List <GpuDevice>()
                        };
                        //foreach (Platform platform in platforms)
                        for (int p = 0; p < platforms.Count(); p++)
                        {
                            Platform platform = platforms[p];
                            var      devices  = platform.GetDevices(DeviceType.Gpu).ToList();
                            //foreach (Device device in platform.GetDevices(DeviceType.All))
                            for (int d = 0; d < devices.Count(); d++)
                            {
                                Device device = devices[d];
                                string name   = device.Name;
                                string pName  = platform.Name;
                                //Console.WriteLine(device.Name + " " + platform.Version.VersionString);
                                gpum.devices.Add(new GpuDevice()
                                {
                                    deviceID = d, platformID = p, platformName = pName, name = name, memory = device.GlobalMemorySize
                                });
                            }
                        }
                        Comms.gpuMsg = gpum;
                        Comms.SetEvent();
                        Task.Delay(1000).Wait();
                        Comms.Close();
                        return;
                    }
                    catch (Exception ex)
                    {
                        Logger.Log(LogLevel.Error, "Unable to enumerate OpenCL devices");
                        Task.Delay(500).Wait();
                        Comms.Close();
                        return;
                    }
                }
            }

            try
            {
                Device chosenDevice = null;
                try
                {
                    chosenDevice = platforms[platformID].GetDevices(DeviceType.Gpu).ToList()[deviceID];
                    Console.WriteLine($"Using OpenCL device: {chosenDevice.Name} ({chosenDevice.Vendor})");
                    Console.WriteLine();
                }
                catch (Exception ex)
                {
                    Logger.Log(LogLevel.Error, $"Unable to select OpenCL device {deviceID} on platform {platformID} ");
                    Task.Delay(500).Wait();
                    Comms.Close();
                    return;
                }

                var assembly       = Assembly.GetEntryAssembly();
                var resourceStream = assembly.GetManifestResourceStream("OclSolver.kernel.cl");
                using (StreamReader reader = new StreamReader(resourceStream))
                {
                    using (Context context = Context.CreateContext(chosenDevice))
                    {
                        /*
                         * Once the program has been created you can use clGetProgramInfo with CL_PROGRAM_BINARY_SIZES and then CL_PROGRAM_BINARIES, storing the resulting binary programs (one for each device of the context) into a buffer you supply. You can then save this binary data to disk for use in later runs.
                         * Not all devices might support binaries, so you will need to check the CL_PROGRAM_BINARY_SIZES result (it returns a zero size for that device if binaries are not supported).
                         */
                        using (OpenCl.DotNetCore.Programs.Program program = context.CreateAndBuildProgramFromString(reader.ReadToEnd()))
                        {
                            using (CommandQueue commandQueue = CommandQueue.CreateCommandQueue(context, chosenDevice))
                            {
                                IntPtr clearPattern = IntPtr.Zero;
                                uint[] edgesCount;
                                int[]  edgesLeft;
                                int    trims = 0;
                                try
                                {
                                    clearPattern = Marshal.AllocHGlobal(4);
                                    Marshal.Copy(new byte[4] {
                                        0, 0, 0, 0
                                    }, 0, clearPattern, 4);

                                    try
                                    {
                                        bufferA1 = context.CreateBuffer <uint>(MemoryFlag.ReadWrite, BUFFER_SIZE_A1);
                                        bufferA2 = context.CreateBuffer <uint>(MemoryFlag.ReadWrite, BUFFER_SIZE_A2);
                                        bufferB  = context.CreateBuffer <uint>(MemoryFlag.ReadWrite, BUFFER_SIZE_B);

                                        bufferI1 = context.CreateBuffer <uint>(MemoryFlag.ReadWrite, INDEX_SIZE);
                                        bufferI2 = context.CreateBuffer <uint>(MemoryFlag.ReadWrite, INDEX_SIZE);

                                        bufferR = context.CreateBuffer <uint>(MemoryFlag.ReadOnly, 42 * 2);
                                    }
                                    catch (Exception ex)
                                    {
                                        Task.Delay(500).Wait();
                                        Logger.Log(LogLevel.Error, "Unable to allocate buffers, out of memory?");
                                        Task.Delay(500).Wait();
                                        Comms.Close();
                                        return;
                                    }

                                    using (Kernel kernelSeedA = program.CreateKernel("FluffySeed2A"))
                                        using (Kernel kernelSeedB1 = program.CreateKernel("FluffySeed2B"))
                                            using (Kernel kernelSeedB2 = program.CreateKernel("FluffySeed2B"))
                                                using (Kernel kernelRound1 = program.CreateKernel("FluffyRound1"))
                                                    using (Kernel kernelRoundO = program.CreateKernel("FluffyRoundNO1"))
                                                        using (Kernel kernelRoundNA = program.CreateKernel("FluffyRoundNON"))
                                                            using (Kernel kernelRoundNB = program.CreateKernel("FluffyRoundNON"))
                                                                using (Kernel kernelTail = program.CreateKernel("FluffyTailO"))
                                                                    using (Kernel kernelRecovery = program.CreateKernel("FluffyRecovery"))
                                                                    {
                                                                        Stopwatch sw = new Stopwatch();

                                                                        kernelSeedA.SetKernelArgumentGeneric(0, currentJob.k0);
                                                                        kernelSeedA.SetKernelArgumentGeneric(1, currentJob.k1);
                                                                        kernelSeedA.SetKernelArgumentGeneric(2, currentJob.k2);
                                                                        kernelSeedA.SetKernelArgumentGeneric(3, currentJob.k3);
                                                                        kernelSeedA.SetKernelArgument(4, bufferB);
                                                                        kernelSeedA.SetKernelArgument(5, bufferA1);
                                                                        kernelSeedA.SetKernelArgument(6, bufferI1);

                                                                        kernelSeedB1.SetKernelArgument(0, bufferA1);
                                                                        kernelSeedB1.SetKernelArgument(1, bufferA1);
                                                                        kernelSeedB1.SetKernelArgument(2, bufferA2);
                                                                        kernelSeedB1.SetKernelArgument(3, bufferI1);
                                                                        kernelSeedB1.SetKernelArgument(4, bufferI2);
                                                                        kernelSeedB1.SetKernelArgumentGeneric(5, (uint)32);

                                                                        kernelSeedB2.SetKernelArgument(0, bufferB);
                                                                        kernelSeedB2.SetKernelArgument(1, bufferA1);
                                                                        kernelSeedB2.SetKernelArgument(2, bufferA2);
                                                                        kernelSeedB2.SetKernelArgument(3, bufferI1);
                                                                        kernelSeedB2.SetKernelArgument(4, bufferI2);
                                                                        kernelSeedB2.SetKernelArgumentGeneric(5, (uint)0);

                                                                        kernelRound1.SetKernelArgument(0, bufferA1);
                                                                        kernelRound1.SetKernelArgument(1, bufferA2);
                                                                        kernelRound1.SetKernelArgument(2, bufferB);
                                                                        kernelRound1.SetKernelArgument(3, bufferI2);
                                                                        kernelRound1.SetKernelArgument(4, bufferI1);
                                                                        kernelRound1.SetKernelArgumentGeneric(5, (uint)DUCK_SIZE_A * 1024);
                                                                        kernelRound1.SetKernelArgumentGeneric(6, (uint)DUCK_SIZE_B * 1024);

                                                                        kernelRoundO.SetKernelArgument(0, bufferB);
                                                                        kernelRoundO.SetKernelArgument(1, bufferA1);
                                                                        kernelRoundO.SetKernelArgument(2, bufferI1);
                                                                        kernelRoundO.SetKernelArgument(3, bufferI2);

                                                                        kernelRoundNA.SetKernelArgument(0, bufferB);
                                                                        kernelRoundNA.SetKernelArgument(1, bufferA1);
                                                                        kernelRoundNA.SetKernelArgument(2, bufferI1);
                                                                        kernelRoundNA.SetKernelArgument(3, bufferI2);

                                                                        kernelRoundNB.SetKernelArgument(0, bufferA1);
                                                                        kernelRoundNB.SetKernelArgument(1, bufferB);
                                                                        kernelRoundNB.SetKernelArgument(2, bufferI2);
                                                                        kernelRoundNB.SetKernelArgument(3, bufferI1);

                                                                        kernelTail.SetKernelArgument(0, bufferB);
                                                                        kernelTail.SetKernelArgument(1, bufferA1);
                                                                        kernelTail.SetKernelArgument(2, bufferI1);
                                                                        kernelTail.SetKernelArgument(3, bufferI2);

                                                                        kernelRecovery.SetKernelArgumentGeneric(0, currentJob.k0);
                                                                        kernelRecovery.SetKernelArgumentGeneric(1, currentJob.k1);
                                                                        kernelRecovery.SetKernelArgumentGeneric(2, currentJob.k2);
                                                                        kernelRecovery.SetKernelArgumentGeneric(3, currentJob.k3);
                                                                        kernelRecovery.SetKernelArgument(4, bufferR);
                                                                        kernelRecovery.SetKernelArgument(5, bufferI2);

                                                                        int loopCnt = 0;
                                                                        //for (int i = 0; i < runs; i++)
                                                                        while (!Comms.IsTerminated)
                                                                        {
                                                                            try
                                                                            {
                                                                                if (!TEST && (Comms.nextJob.pre_pow == null || Comms.nextJob.pre_pow == "" || Comms.nextJob.pre_pow == TestPrePow))
                                                                                {
                                                                                    Logger.Log(LogLevel.Info, string.Format("Waiting for job...."));
                                                                                    Task.Delay(1000).Wait();
                                                                                    continue;
                                                                                }

                                                                                if (!TEST && ((currentJob.pre_pow != Comms.nextJob.pre_pow) || (currentJob.origin != Comms.nextJob.origin)))
                                                                                {
                                                                                    currentJob           = Comms.nextJob;
                                                                                    currentJob.timestamp = DateTime.Now;
                                                                                }

                                                                                if (!TEST && (currentJob.timestamp.AddMinutes(30) < DateTime.Now) && Comms.lastIncoming.AddMinutes(30) < DateTime.Now)
                                                                                {
                                                                                    Logger.Log(LogLevel.Info, string.Format("Job too old..."));
                                                                                    Task.Delay(1000).Wait();
                                                                                    continue;
                                                                                }

                                                                                // test runs only once
                                                                                if (TEST && loopCnt++ > 100000)
                                                                                {
                                                                                    Comms.IsTerminated = true;
                                                                                }

                                                                                Logger.Log(LogLevel.Debug, string.Format("GPU AMD{4}:Trimming #{4}: {0} {1} {2} {3}", currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, currentJob.jobID, deviceID));

                                                                                //Stopwatch srw = new Stopwatch();
                                                                                //srw.Start();

                                                                                Solution s;
                                                                                while (graphSolutions.TryDequeue(out s))
                                                                                {
                                                                                    kernelRecovery.SetKernelArgumentGeneric(0, s.job.k0);
                                                                                    kernelRecovery.SetKernelArgumentGeneric(1, s.job.k1);
                                                                                    kernelRecovery.SetKernelArgumentGeneric(2, s.job.k2);
                                                                                    kernelRecovery.SetKernelArgumentGeneric(3, s.job.k3);
                                                                                    commandQueue.EnqueueWriteBufferEdges(bufferR, s.GetLongEdges());
                                                                                    commandQueue.EnqueueClearBuffer(bufferI2, 64 * 64 * 4, clearPattern);
                                                                                    commandQueue.EnqueueNDRangeKernel(kernelRecovery, 1, 2048 * 256, 256, 0);
                                                                                    s.nonces = commandQueue.EnqueueReadBuffer <uint>(bufferI2, 42);
                                                                                    OpenCl.DotNetCore.Interop.CommandQueues.CommandQueuesNativeApi.Finish(commandQueue.Handle);
                                                                                    s.nonces = s.nonces.OrderBy(n => n).ToArray();
                                                                                    Comms.graphSolutionsOut.Enqueue(s);
                                                                                    Comms.SetEvent();
                                                                                }

                                                                                //srw.Stop();
                                                                                //Console.WriteLine("RECOVERY " + srw.ElapsedMilliseconds);

                                                                                currentJob = currentJob.Next();

                                                                                kernelSeedA.SetKernelArgumentGeneric(0, currentJob.k0);
                                                                                kernelSeedA.SetKernelArgumentGeneric(1, currentJob.k1);
                                                                                kernelSeedA.SetKernelArgumentGeneric(2, currentJob.k2);
                                                                                kernelSeedA.SetKernelArgumentGeneric(3, currentJob.k3);

                                                                                sw.Restart();

                                                                                commandQueue.EnqueueClearBuffer(bufferI2, 64 * 64 * 4, clearPattern);
                                                                                commandQueue.EnqueueClearBuffer(bufferI1, 64 * 64 * 4, clearPattern);
                                                                                commandQueue.EnqueueNDRangeKernel(kernelSeedA, 1, 2048 * 128, 128, 0);
                                                                                commandQueue.EnqueueNDRangeKernel(kernelSeedB1, 1, 1024 * 128, 128, 0);
                                                                                commandQueue.EnqueueNDRangeKernel(kernelSeedB2, 1, 1024 * 128, 128, 0);
                                                                                commandQueue.EnqueueClearBuffer(bufferI1, 64 * 64 * 4, clearPattern);
                                                                                commandQueue.EnqueueNDRangeKernel(kernelRound1, 1, 4096 * 1024, 1024, 0);

                                                                                commandQueue.EnqueueClearBuffer(bufferI2, 64 * 64 * 4, clearPattern);
                                                                                commandQueue.EnqueueNDRangeKernel(kernelRoundO, 1, 4096 * 1024, 1024, 0);
                                                                                commandQueue.EnqueueClearBuffer(bufferI1, 64 * 64 * 4, clearPattern);
                                                                                commandQueue.EnqueueNDRangeKernel(kernelRoundNB, 1, 4096 * 1024, 1024, 0);

                                                                                for (int r = 0; r < trimRounds; r++)
                                                                                {
                                                                                    commandQueue.EnqueueClearBuffer(bufferI2, 64 * 64 * 4, clearPattern);
                                                                                    commandQueue.EnqueueNDRangeKernel(kernelRoundNA, 1, 4096 * 1024, 1024, 0);
                                                                                    commandQueue.EnqueueClearBuffer(bufferI1, 64 * 64 * 4, clearPattern);
                                                                                    commandQueue.EnqueueNDRangeKernel(kernelRoundNB, 1, 4096 * 1024, 1024, 0);
                                                                                }

                                                                                commandQueue.EnqueueClearBuffer(bufferI2, 64 * 64 * 4, clearPattern);
                                                                                commandQueue.EnqueueNDRangeKernel(kernelTail, 1, 4096 * 1024, 1024, 0);

                                                                                edgesCount    = commandQueue.EnqueueReadBuffer <uint>(bufferI2, 1);
                                                                                edgesCount[0] = edgesCount[0] > 1000000 ? 1000000 : edgesCount[0];
                                                                                edgesLeft     = commandQueue.EnqueueReadBuffer(bufferA1, (int)edgesCount[0] * 2);

                                                                                OpenCl.DotNetCore.Interop.CommandQueues.CommandQueuesNativeApi.Flush(commandQueue.Handle);
                                                                                OpenCl.DotNetCore.Interop.CommandQueues.CommandQueuesNativeApi.Finish(commandQueue.Handle);

                                                                                sw.Stop();

                                                                                currentJob.trimTime = sw.ElapsedMilliseconds;
                                                                                currentJob.solvedAt = DateTime.Now;

                                                                                Logger.Log(LogLevel.Info, string.Format("GPU AMD{2}:    Trimmed in {0}ms to {1} edges", sw.ElapsedMilliseconds, edgesCount[0], deviceID));

                                                                                CGraph cg = new CGraph();
                                                                                cg.SetEdges(edgesLeft, (int)edgesCount[0]);
                                                                                cg.SetHeader(currentJob);

                                                                                Task.Factory.StartNew(() =>
                                                                                {
                                                                                    if (edgesCount[0] < 200000)
                                                                                    {
                                                                                        try
                                                                                        {
                                                                                            if (findersInFlight++ < 3)
                                                                                            {
                                                                                                Stopwatch cycleTime = new Stopwatch();
                                                                                                cycleTime.Start();
                                                                                                cg.FindSolutions(graphSolutions);
                                                                                                cycleTime.Stop();
                                                                                                AdjustTrims(cycleTime.ElapsedMilliseconds);
                                                                                                if (TEST)
                                                                                                {
                                                                                                    Logger.Log(LogLevel.Info, string.Format("Finder completed in {0}ms on {1} edges with {2} solution(s) and {3} dupes", sw.ElapsedMilliseconds, edgesCount[0], graphSolutions.Count, cg.dupes));

                                                                                                    if (++trims % 50 == 0)
                                                                                                    {
                                                                                                        Console.ForegroundColor = ConsoleColor.Green;
                                                                                                        Console.WriteLine("SOLS: {0}/{1} - RATE: {2:F1}", solutions, trims, (float)trims / solutions);
                                                                                                        Console.ResetColor();
                                                                                                    }
                                                                                                }
                                                                                                if (graphSolutions.Count > 0)
                                                                                                {
                                                                                                    solutions++;
                                                                                                }
                                                                                            }
                                                                                            else
                                                                                            {
                                                                                                Logger.Log(LogLevel.Warning, "CPU overloaded!");
                                                                                            }
                                                                                        }
                                                                                        catch (Exception ex)
                                                                                        {
                                                                                            Logger.Log(LogLevel.Error, "Cycle finder crashed " + ex.Message);
                                                                                        }
                                                                                        finally
                                                                                        {
                                                                                            findersInFlight--;
                                                                                        }
                                                                                    }
                                                                                });
                                                                            }
                                                                            catch (Exception ex)
                                                                            {
                                                                                Logger.Log(LogLevel.Error, "Critical error in main ocl loop " + ex.Message);
                                                                                Task.Delay(5000).Wait();
                                                                            }
                                                                        }

                                                                        //uint[] resultArray = commandQueue.EnqueueReadBuffer<uint>(bufferI1, 64 * 64);
                                                                        //uint[] resultArray2 = commandQueue.EnqueueReadBuffer<uint>(bufferI2, 64 * 64);
                                                                        //Console.WriteLine("SeedA: " + resultArray.Sum(e => e) + " in " + sw.ElapsedMilliseconds / runs);
                                                                        //Console.WriteLine("SeedB: " + resultArray2.Sum(e => e) + " in " + sw.ElapsedMilliseconds / runs);
                                                                        //Task.Delay(1000).Wait();
                                                                        //Console.WriteLine("");
                                                                    }
                                }
                                finally
                                {
                                    // clear pattern
                                    if (clearPattern != IntPtr.Zero)
                                    {
                                        Marshal.FreeHGlobal(clearPattern);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Critical error in OCL Init " + ex.Message);
                Console.ForegroundColor = ConsoleColor.Yellow;
                Console.WriteLine(ex.Message);
                Console.ResetColor();
                Task.Delay(500).Wait();
            }
            finally
            {
                Task.Delay(500).Wait();

                try
                {
                    Comms.Close();
                    bufferA1.Dispose();
                    bufferA2.Dispose();
                    bufferB.Dispose();
                    bufferI1.Dispose();
                    bufferI2.Dispose();
                    bufferR.Dispose();

                    if (OpenCl.DotNetCore.CommandQueues.CommandQueue.resultValuePointer != IntPtr.Zero)
                    {
                        Marshal.FreeHGlobal(OpenCl.DotNetCore.CommandQueues.CommandQueue.resultValuePointer);
                    }
                }
                catch { }
            }

            //Console.ReadKey();
        }
Esempio n. 4
0
        static void Main(string[] args)
        {
            try
            {
                if (args.Length > 0)
                {
                    deviceID = int.Parse(args[0]);
                }
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Device ID parse error");
            }

            try
            {
                if (args.Length > 1)
                {
                    port = int.Parse(args[1]);
                    Comms.ConnectToMaster(port);
                }
                else
                {
                    TEST = true;
                    Logger.CopyToConsole = true;
                    CGraph.ShowCycles    = true;
                }
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Master connection error");
            }

            try
            {
                if (args.Length > 3)
                {
                    gpuCount = int.Parse(args[3]);
                    fastCuda = gpuCount <= (Environment.ProcessorCount / 2);
                    if (fastCuda)
                    {
                        Logger.Log(LogLevel.Info, "Using single GPU blocking mode");
                    }
                }
            }
            catch
            {
            }

            if (TEST)
            {
                currentJob = nextJob = new Job()
                {
                    jobID     = 0,
                    k0        = 0xf4956dc403730b01L,
                    k1        = 0xe6d45de39c2a5a3eL,
                    k2        = 0xcbf626a8afee35f6L,
                    k3        = 0x4307b94b1a0c9980L,
                    pre_pow   = TestPrePow,
                    timestamp = DateTime.Now
                };
            }
            else
            {
                currentJob = nextJob = new Job()
                {
                    jobID     = 0,
                    k0        = 0xf4956dc403730b01L,
                    k1        = 0xe6d45de39c2a5a3eL,
                    k2        = 0xcbf626a8afee35f6L,
                    k3        = 0x4307b94b1a0c9980L,
                    pre_pow   = TestPrePow,
                    timestamp = DateTime.Now
                };

                if (!Comms.IsConnected())
                {
                    Console.WriteLine("Master connection failed, aborting");
                    Logger.Log(LogLevel.Error, "No master connection, exitting!");
                    return;
                }

                if (deviceID < 0)
                {
                    int devCnt             = CudaContext.GetDeviceCount();
                    GpuDevicesMessage gpum = new GpuDevicesMessage()
                    {
                        devices = new List <GpuDevice>(devCnt)
                    };
                    for (int i = 0; i < devCnt; i++)
                    {
                        string name = CudaContext.GetDeviceName(i);
                        var    info = CudaContext.GetDeviceInfo(i);
                        gpum.devices.Add(new GpuDevice()
                        {
                            deviceID = i, name = name, memory = info.TotalGlobalMemory
                        });
                    }
                    //Console.WriteLine(devCnt);
                    Comms.gpuMsg = gpum;
                    Comms.SetEvent();
                    //Console.WriteLine("event fired");
                    Task.Delay(1000).Wait();
                    //Console.WriteLine("closing");
                    Comms.Close();
                    return;
                }
            }


            try
            {
                var assembly       = Assembly.GetEntryAssembly();
                var resourceStream = assembly.GetManifestResourceStream("CudaSolver.kernel_x64.ptx");
                ctx = new CudaContext(deviceID, !fastCuda ? (CUCtxFlags.BlockingSync | CUCtxFlags.MapHost) : CUCtxFlags.MapHost);

                meanSeedA = ctx.LoadKernelPTX(resourceStream, "FluffySeed2A");
                meanSeedA.BlockDimensions = 128;
                meanSeedA.GridDimensions  = 2048;
                meanSeedA.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                meanSeedB = ctx.LoadKernelPTX(resourceStream, "FluffySeed2B");
                meanSeedB.BlockDimensions = 128;
                meanSeedB.GridDimensions  = 2048;
                meanSeedB.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                meanSeedB_4 = ctx.LoadKernelPTX(resourceStream, "FluffySeed2B");
                meanSeedB_4.BlockDimensions = 128;
                meanSeedB_4.GridDimensions  = 1024;
                meanSeedB_4.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                meanRound = ctx.LoadKernelPTX(resourceStream, "FluffyRound");
                meanRound.BlockDimensions = 512;
                meanRound.GridDimensions  = 4096;
                meanRound.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                meanRound_2 = ctx.LoadKernelPTX(resourceStream, "FluffyRound");
                meanRound_2.BlockDimensions = 512;
                meanRound_2.GridDimensions  = 2048;
                meanRound_2.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                meanRoundJoin = ctx.LoadKernelPTX(resourceStream, "FluffyRound_J");
                meanRoundJoin.BlockDimensions = 512;
                meanRoundJoin.GridDimensions  = 4096;
                meanRoundJoin.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                meanTail = ctx.LoadKernelPTX(resourceStream, "FluffyTail");
                meanTail.BlockDimensions = 1024;
                meanTail.GridDimensions  = 4096;
                meanTail.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1;

                meanRecover = ctx.LoadKernelPTX(resourceStream, "FluffyRecovery");
                meanRecover.BlockDimensions = 256;
                meanRecover.GridDimensions  = 2048;
                meanRecover.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1;
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Unable to create kernels: " + ex.Message);
                Task.Delay(500).Wait();
                Comms.Close();
                return;
            }

            try
            {
                d_buffer    = new CudaDeviceVariable <ulong>(BUFFER_SIZE_U32);
                d_bufferMid = new CudaDeviceVariable <ulong>(d_buffer.DevicePointer + (BUFFER_SIZE_B * 8));
                d_bufferB   = new CudaDeviceVariable <ulong>(d_buffer.DevicePointer + (BUFFER_SIZE_A * 8));

                d_indexesA = new CudaDeviceVariable <uint>(INDEX_SIZE * 2);
                d_indexesB = new CudaDeviceVariable <uint>(INDEX_SIZE * 2);

                Array.Clear(h_indexesA, 0, h_indexesA.Length);
                Array.Clear(h_indexesB, 0, h_indexesA.Length);

                d_indexesA = h_indexesA;
                d_indexesB = h_indexesB;

                streamPrimary   = new CudaStream(CUStreamFlags.NonBlocking);
                streamSecondary = new CudaStream(CUStreamFlags.NonBlocking);
            }
            catch (Exception ex)
            {
                Task.Delay(200).Wait();
                Logger.Log(LogLevel.Error, $"Out of video memory! Only {ctx.GetFreeDeviceMemorySize()} free");
                Task.Delay(500).Wait();
                Comms.Close();
                return;
            }

            try
            {
                AllocateHostMemory(true, ref h_a, ref hAligned_a, 1024 * 1024 * 32);
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Unable to create pinned memory.");
                Task.Delay(500).Wait();
                Comms.Close();
                return;
            }

            int loopCnt = 0;

            while (!Comms.IsTerminated)
            {
                try
                {
                    if (!TEST && (Comms.nextJob.pre_pow == null || Comms.nextJob.pre_pow == "" || Comms.nextJob.pre_pow == TestPrePow))
                    {
                        Logger.Log(LogLevel.Info, string.Format("Waiting for job...."));
                        Task.Delay(1000).Wait();
                        continue;
                    }

                    if (!TEST && ((currentJob.pre_pow != Comms.nextJob.pre_pow) || (currentJob.origin != Comms.nextJob.origin)))
                    {
                        currentJob           = Comms.nextJob;
                        currentJob.timestamp = DateTime.Now;
                    }

                    if (!TEST && (currentJob.timestamp.AddMinutes(30) < DateTime.Now) && Comms.lastIncoming.AddMinutes(30) < DateTime.Now)
                    {
                        Logger.Log(LogLevel.Info, string.Format("Job too old..."));
                        Task.Delay(1000).Wait();
                        continue;
                    }

                    // test runs only once
                    if (TEST && loopCnt++ > 100)
                    {
                        Comms.IsTerminated = true;
                    }

                    Solution s;
                    while (graphSolutions.TryDequeue(out s))
                    {
                        meanRecover.SetConstantVariable <ulong>("recovery", s.GetUlongEdges());
                        d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                        meanRecover.RunAsync(streamPrimary.Stream, s.job.k0, s.job.k1, s.job.k2, s.job.k3, d_indexesB.DevicePointer);
                        streamPrimary.Synchronize();
                        s.nonces = new uint[40];
                        d_indexesB.CopyToHost(s.nonces, 0, 0, 40 * 4);
                        s.nonces = s.nonces.OrderBy(n => n).ToArray();
                        lock (Comms.graphSolutionsOut)
                        {
                            Comms.graphSolutionsOut.Enqueue(s);
                        }
                        Comms.SetEvent();
                    }
                    uint[] count;
                    do
                    {
                        if (!TEST && ((currentJob.pre_pow != Comms.nextJob.pre_pow) || (currentJob.origin != Comms.nextJob.origin)))
                        {
                            currentJob           = Comms.nextJob;
                            currentJob.timestamp = DateTime.Now;
                        }
                        currentJob = currentJob.Next();

                        Logger.Log(LogLevel.Debug, string.Format("GPU NV{4}:Trimming #{4}: {0} {1} {2} {3}", currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, currentJob.jobID, deviceID));

                        timer.Restart();

                        d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                        d_indexesB.MemsetAsync(0, streamPrimary.Stream);

                        meanSeedA.RunAsync(streamPrimary.Stream, currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, d_bufferMid.DevicePointer, d_indexesB.DevicePointer);
                        meanSeedB_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, 0);
                        meanSeedB_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_A * 8) / 4) * 1, d_indexesB.DevicePointer, d_indexesA.DevicePointer, 16);
                        meanSeedB_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_A * 8) / 4) * 2, d_indexesB.DevicePointer, d_indexesA.DevicePointer, 32);
                        meanSeedB_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_A * 8) / 4) * 3, d_indexesB.DevicePointer, d_indexesA.DevicePointer, 48);

                        d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                        meanRound_2.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer + ((BUFFER_SIZE_A * 8) / 4) * 2, d_bufferB.DevicePointer, d_indexesA.DevicePointer + (2048 * 4), d_indexesB.DevicePointer + (4096 * 4), DUCK_EDGES_A, DUCK_EDGES_B / 2);
                        meanRound_2.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer - (BUFFER_SIZE_B * 8), d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_A, DUCK_EDGES_B / 2);
                        d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                        meanRoundJoin.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer - (BUFFER_SIZE_B * 8), d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2);

                        //d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                        //meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B, DUCK_EDGES_B / 2);
                        d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                        meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2);
                        d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                        meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2);
                        d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                        meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 4);

                        for (int i = 0; i < trimRounds; i++)
                        {
                            d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                            meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 4, DUCK_EDGES_B / 4);
                            d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                            meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 4, DUCK_EDGES_B / 4);
                        }

                        d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                        meanTail.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer);

                        ctx.Synchronize();
                        streamPrimary.Synchronize();

                        count = new uint[2];
                        d_indexesA.CopyToHost(count, 0, 0, 8);

                        if (count[0] > 4194304)
                        {
                            // trouble
                            count[0] = 4194304;
                            // log
                        }

                        hAligned_a.AsyncCopyFromDevice(d_buffer.DevicePointer, 0, 0, count[0] * 8, streamPrimary.Stream);
                        streamPrimary.Synchronize();
                        System.Runtime.InteropServices.Marshal.Copy(hAligned_a.PinnedHostPointer, h_a, 0, ((int)count[0] * 8) / sizeof(int));

                        timer.Stop();
                        currentJob.solvedAt = DateTime.Now;
                        currentJob.trimTime = timer.ElapsedMilliseconds;

                        //Console.WriteLine("Trimmed in {0}ms to {1} edges", timer.ElapsedMilliseconds, count[0]);
                        Logger.Log(LogLevel.Info, string.Format("GPU NV{2}:     Trimmed in {0}ms to {1} edges, h {3}", timer.ElapsedMilliseconds, count[0], deviceID, currentJob.height));
                    }while((currentJob.height != Comms.nextJob.height) && (!Comms.IsTerminated) && (!TEST));

                    if (TEST)
                    {
                        //Console.WriteLine("Trimmed in {0}ms to {1} edges", timer.ElapsedMilliseconds, count[0]);

                        CGraph cg = FinderBag.GetFinder();
                        if (cg == null)
                        {
                            continue;
                        }

                        cg.SetEdges(h_a, (int)count[0]);
                        cg.SetHeader(currentJob);

                        //currentJob = currentJob.Next();

                        Task.Factory.StartNew(() =>
                        {
                            Stopwatch sw = new Stopwatch();
                            sw.Start();

                            if (count[0] < 200000)
                            {
                                try
                                {
                                    if (findersInFlight++ < 3)
                                    {
                                        Stopwatch cycleTime = new Stopwatch();
                                        cycleTime.Start();
                                        cg.FindSolutions(graphSolutions);
                                        cycleTime.Stop();
                                        AdjustTrims(cycleTime.ElapsedMilliseconds);
                                        if (graphSolutions.Count > 0)
                                        {
                                            solutions++;
                                        }
                                    }
                                    else
                                    {
                                        Logger.Log(LogLevel.Warning, "CPU overloaded!");
                                    }
                                }
                                catch (Exception ex)
                                {
                                    Logger.Log(LogLevel.Error, "Cycle finder error" + ex.Message);
                                }
                                finally
                                {
                                    findersInFlight--;
                                    FinderBag.ReturnFinder(cg);
                                }
                            }

                            sw.Stop();

                            if (++trims % 50 == 0)
                            {
                                Console.ForegroundColor = ConsoleColor.Green;
                                Console.WriteLine("SOLS: {0}/{1} - RATE: {2:F1}", solutions, trims, (float)trims / solutions);
                                Console.ResetColor();
                            }
                            //Console.WriteLine("Finder completed in {0}ms on {1} edges with {2} solution(s)", sw.ElapsedMilliseconds, count[0], graphSolutions.Count);
                            //Console.WriteLine("Duped edges: {0}", cg.dupes);
                            Logger.Log(LogLevel.Info, string.Format("Finder completed in {0}ms on {1} edges with {2} solution(s) and {3} dupes", sw.ElapsedMilliseconds, count[0], graphSolutions.Count, cg.dupes));
                        });

                        //h_indexesA = d_indexesA;
                        //h_indexesB = d_indexesB;

                        //var sumA = h_indexesA.Sum(e => e);
                        //var sumB = h_indexesB.Sum(e => e);

                        ;
                    }
                    else
                    {
                        CGraph cg = FinderBag.GetFinder();
                        cg.SetEdges(h_a, (int)count[0]);
                        cg.SetHeader(currentJob);

                        Task.Factory.StartNew(() =>
                        {
                            if (count[0] < 200000)
                            {
                                try
                                {
                                    if (findersInFlight++ < 3)
                                    {
                                        Stopwatch cycleTime = new Stopwatch();
                                        cycleTime.Start();
                                        cg.FindSolutions(graphSolutions);
                                        cycleTime.Stop();
                                        AdjustTrims(cycleTime.ElapsedMilliseconds);
                                        if (graphSolutions.Count > 0)
                                        {
                                            solutions++;
                                        }
                                    }
                                    else
                                    {
                                        Logger.Log(LogLevel.Warning, "CPU overloaded!");
                                    }
                                }
                                catch (Exception ex)
                                {
                                    Logger.Log(LogLevel.Error, "Cycle finder crashed: " + ex.Message);
                                }
                                finally
                                {
                                    findersInFlight--;
                                    FinderBag.ReturnFinder(cg);
                                }
                            }
                        });
                    }
                }
                catch (Exception ex)
                {
                    Logger.Log(LogLevel.Error, "Critical error in main cuda loop " + ex.Message);
                    Task.Delay(5000).Wait();
                }
            }

            // clean up
            try
            {
                Task.Delay(500).Wait();

                Comms.Close();

                d_buffer.Dispose();
                d_indexesA.Dispose();
                d_indexesB.Dispose();

                streamPrimary.Dispose();
                streamSecondary.Dispose();

                hAligned_a.Dispose();

                if (ctx != null)
                {
                    ctx.Dispose();
                }
            }
            catch { }
        }
Esempio n. 5
0
        static void Main(string[] args)
        {
            try
            {
                if (args.Length == 1 && args[0].ToLower().Contains("fidelity"))
                {
                    string[] fseg = args[0].Split(':');
                    deviceID = int.Parse(fseg[1]);
                    nonce    = Int64.Parse(fseg[2]) - 1;
                    range    = int.Parse(fseg[3]);
                    QTEST    = true;
                }
                else
                {
                    if (args.Length > 0)
                    {
                        deviceID = int.Parse(args[0]);
                    }
                }
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Device ID parse error: " + ex.Message);
            }

            try
            {
                if (args.Length > 0)
                {
                    deviceID = int.Parse(args[0]);
                }
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Device ID parse error");
            }

            try
            {
                if (args.Length > 1)
                {
                    port = int.Parse(args[1]);
                    Comms.ConnectToMaster(port);
                }
                else
                {
                    TEST = true;
                    Logger.CopyToConsole = true;
                    CGraph.ShowCycles    = true;
                }
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Master connection error");
            }

            try
            {
                if (args.Length > 3)
                {
                    gpuCount = int.Parse(args[3]);
                    fastCuda = gpuCount <= (Environment.ProcessorCount / 2);
                    if (fastCuda)
                    {
                        Logger.Log(LogLevel.Info, "Using single GPU blocking mode");
                    }
                }
            }
            catch
            {
            }

            if (TEST)
            {
                currentJob = nextJob = new Job()
                {
                    jobID     = 0,
                    k0        = 0xf4956dc403730b01L,
                    k1        = 0xe6d45de39c2a5a3eL,
                    k2        = 0xcbf626a8afee35f6L,
                    k3        = 0x4307b94b1a0c9980L,
                    pre_pow   = TestPrePow,
                    timestamp = DateTime.Now
                };
            }
            else
            {
                currentJob = nextJob = new Job()
                {
                    jobID     = 0,
                    k0        = 0xf4956dc403730b01L,
                    k1        = 0xe6d45de39c2a5a3eL,
                    k2        = 0xcbf626a8afee35f6L,
                    k3        = 0x4307b94b1a0c9980L,
                    pre_pow   = TestPrePow,
                    timestamp = DateTime.Now
                };

                if (!Comms.IsConnected())
                {
                    Console.WriteLine("Master connection failed, aborting");
                    Logger.Log(LogLevel.Error, "No master connection, exitting!");
                    return;
                }

                if (deviceID < 0)
                {
                    int devCnt             = CudaContext.GetDeviceCount();
                    GpuDevicesMessage gpum = new GpuDevicesMessage()
                    {
                        devices = new List <GpuDevice>(devCnt)
                    };
                    for (int i = 0; i < devCnt; i++)
                    {
                        string name = CudaContext.GetDeviceName(i);
                        var    info = CudaContext.GetDeviceInfo(i);
                        gpum.devices.Add(new GpuDevice()
                        {
                            deviceID = i, name = name, memory = info.TotalGlobalMemory
                        });
                    }
                    //Console.WriteLine(devCnt);
                    Comms.gpuMsg = gpum;
                    Comms.SetEvent();
                    //Console.WriteLine("event fired");
                    Task.Delay(1000).Wait();
                    //Console.WriteLine("closing");
                    Comms.Close();
                    return;
                }
            }

            try
            {
                var assembly       = Assembly.GetEntryAssembly();
                var resourceStream = assembly.GetManifestResourceStream("CudaSolver.kernel_x64.ptx");
                ctx = new CudaContext(deviceID, /*!fastCuda ? (CUCtxFlags.BlockingSync | CUCtxFlags.MapHost) :*/ CUCtxFlags.MapHost);
                string pow = new StreamReader(resourceStream).ReadToEnd();

                //pow = File.ReadAllText(@"kernel_x64.ptx");

                Turing = ctx.GetDeviceInfo().MaxSharedMemoryPerMultiprocessor == 65536;

                using (var s = GenerateStreamFromString(pow))
                {
                    if (!Turing)
                    {
                        meanSeedA = ctx.LoadKernelPTX(s, "FluffySeed4K", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)40 });
                        meanSeedA.BlockDimensions = 512;
                        meanSeedA.GridDimensions  = 1024;
                        meanSeedA.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                        meanRound = ctx.LoadKernelPTX(s, "FluffyRound_A2", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)40 });
                        meanRound.BlockDimensions = 512;
                        meanRound.GridDimensions  = 4096;
                        meanRound.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                        meanRound_4 = ctx.LoadKernelPTX(s, "FluffyRound_A1", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)32 });
                        meanRound_4.BlockDimensions = 1024;
                        meanRound_4.GridDimensions  = 1024;
                        meanRound_4.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                        meanRoundJoin = ctx.LoadKernelPTX(s, "FluffyRound_A3", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)32 });
                        meanRoundJoin.BlockDimensions = 1024;
                        meanRoundJoin.GridDimensions  = 4096;
                        meanRoundJoin.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                        meanTail = ctx.LoadKernelPTX(s, "FluffyTail");
                        meanTail.BlockDimensions = 1024;
                        meanTail.GridDimensions  = 4096;
                        meanTail.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1;

                        meanRecover = ctx.LoadKernelPTX(s, "FluffyRecovery");
                        meanRecover.BlockDimensions = 256;
                        meanRecover.GridDimensions  = 2048;
                        meanRecover.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1;
                    }
                    else
                    {
                        meanSeedA = ctx.LoadKernelPTX(s, "FluffySeed4K", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)64 });
                        meanSeedA.BlockDimensions = 512;
                        meanSeedA.GridDimensions  = 1024;
                        meanSeedA.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                        meanRound = ctx.LoadKernelPTX(s, "FluffyRound_C2", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)32 });
                        meanRound.BlockDimensions = 1024;
                        meanRound.GridDimensions  = 4096;
                        meanRound.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                        meanRound_4 = ctx.LoadKernelPTX(s, "FluffyRound_C1", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)64 });
                        meanRound_4.BlockDimensions = 1024;
                        meanRound_4.GridDimensions  = 1024;
                        meanRound_4.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                        meanRoundJoin = ctx.LoadKernelPTX(s, "FluffyRound_C3", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)32 });
                        meanRoundJoin.BlockDimensions = 1024;
                        meanRoundJoin.GridDimensions  = 4096;
                        meanRoundJoin.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared;

                        meanTail = ctx.LoadKernelPTX(s, "FluffyTail");
                        meanTail.BlockDimensions = 1024;
                        meanTail.GridDimensions  = 4096;
                        meanTail.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1;

                        meanRecover = ctx.LoadKernelPTX(s, "FluffyRecovery");
                        meanRecover.BlockDimensions = 256;
                        meanRecover.GridDimensions  = 2048;
                        meanRecover.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1;
                    }
                }
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Unable to create kernels: " + ex.Message);
                Task.Delay(500).Wait();
                Comms.Close();
                return;
            }

            try
            {
                d_buffer    = new CudaDeviceVariable <ulong>(BUFFER_SIZE_U32 * (temp ? 8 : 1));
                d_bufferMid = new CudaDeviceVariable <ulong>(d_buffer.DevicePointer + (BUFFER_SIZE_B * 2));
                d_bufferB   = new CudaDeviceVariable <ulong>(d_buffer.DevicePointer + (BUFFER_SIZE_B * 8));

                d_indexesA = new CudaDeviceVariable <uint>(INDEX_SIZE);
                d_indexesB = new CudaDeviceVariable <uint>(INDEX_SIZE);
                d_aux      = new CudaDeviceVariable <uint>(512);

                Array.Clear(h_indexesA, 0, h_indexesA.Length);
                Array.Clear(h_indexesB, 0, h_indexesA.Length);

                d_indexesA = h_indexesA;
                d_indexesB = h_indexesB;

                streamPrimary = new CudaStream(CUStreamFlags.NonBlocking);
            }
            catch (Exception ex)
            {
                Task.Delay(200).Wait();
                Logger.Log(LogLevel.Error, $"Mem alloc exception. Out of video memory? {ctx.GetFreeDeviceMemorySize()} free");
                Task.Delay(500).Wait();
                Comms.Close();
                return;
            }

            try
            {
                AllocateHostMemory(true, ref h_a, ref hAligned_a, 1024 * 1024 * 32);
            }
            catch (Exception ex)
            {
                Logger.Log(LogLevel.Error, "Unable to create pinned memory.");
                Task.Delay(500).Wait();
                Comms.Close();
                return;
            }

            int loopCnt = 0;

            while (!Comms.IsTerminated)
            {
                try
                {
                    if (!TEST && (Comms.nextJob.pre_pow == null || Comms.nextJob.pre_pow == "" || Comms.nextJob.pre_pow == TestPrePow))
                    {
                        Logger.Log(LogLevel.Info, string.Format("Waiting for job...."));
                        Task.Delay(1000).Wait();
                        continue;
                    }

                    if (!TEST && ((currentJob.pre_pow != Comms.nextJob.pre_pow) || (currentJob.origin != Comms.nextJob.origin)))
                    {
                        currentJob           = Comms.nextJob;
                        currentJob.timestamp = DateTime.Now;
                    }

                    if (!TEST && (currentJob.timestamp.AddMinutes(30) < DateTime.Now) && Comms.lastIncoming.AddMinutes(30) < DateTime.Now)
                    {
                        Logger.Log(LogLevel.Info, string.Format("Job too old..."));
                        Task.Delay(1000).Wait();
                        continue;
                    }

                    // test runs only once
                    if (TEST && ++loopCnt >= range)
                    {
                        Comms.IsTerminated = true;
                    }

                    Solution s;
                    while (graphSolutions.TryDequeue(out s))
                    {
                        meanRecover.SetConstantVariable <ulong>("recovery", s.GetUlongEdges());
                        d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                        meanRecover.RunAsync(streamPrimary.Stream, s.job.k0, s.job.k1, s.job.k2, s.job.k3, d_indexesB.DevicePointer);
                        streamPrimary.Synchronize();
                        s.nonces = new uint[32];
                        d_indexesB.CopyToHost(s.nonces, 0, 0, 32 * 4);
                        s.nonces = s.nonces.OrderBy(n => n).ToArray();
                        //fidelity = (32-cycles_found / graphs_searched) * 32
                        solutions++;
                        s.fidelity = ((double)solutions / (double)trims) * 32.0;
                        //Console.WriteLine(s.fidelity.ToString("0.000"));
                        if (Comms.IsConnected())
                        {
                            Comms.graphSolutionsOut.Enqueue(s);
                            Comms.SetEvent();
                        }
                        if (QTEST)
                        {
                            Console.ForegroundColor = ConsoleColor.Red;
                            Console.WriteLine($"Solution for nonce {s.job.nonce}: {string.Join(' ', s.nonces)}");
                            Console.ResetColor();
                        }
                    }

                    if (QTEST)
                    {
                        currentJob = currentJob.NextSequential(ref nonce);
                        Console.WriteLine($"Nonce: {nonce} K0: {currentJob.k0:X} K1: {currentJob.k1:X} K2: {currentJob.k2:X} K3: {currentJob.k3:X}");
                    }
                    else
                    {
                        currentJob = currentJob.Next();
                    }

                    Logger.Log(LogLevel.Debug, string.Format("GPU NV{4}:Trimming #{4}: {0} {1} {2} {3}", currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, currentJob.jobID, deviceID));

                    timer.Restart();

                    d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                    d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                    d_aux.MemsetAsync(0, streamPrimary.Stream);

                    meanSeedA.RunAsync(streamPrimary.Stream, currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, d_bufferMid.DevicePointer, d_indexesB.DevicePointer, 0);
                    meanSeedA.RunAsync(streamPrimary.Stream, currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, d_bufferMid.DevicePointer + ((BUFFER_SIZE_A * 8) / 4 / 4) * 1, d_indexesB.DevicePointer + (4096 * 4), EDGE_SEG);
                    meanSeedA.RunAsync(streamPrimary.Stream, currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, d_bufferMid.DevicePointer + ((BUFFER_SIZE_A * 8) / 4 / 4) * 2, d_indexesB.DevicePointer + (4096 * 8), EDGE_SEG * 2);
                    meanSeedA.RunAsync(streamPrimary.Stream, currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, d_bufferMid.DevicePointer + ((BUFFER_SIZE_A * 8) / 4 / 4) * 3, d_indexesB.DevicePointer + (4096 * 12), EDGE_SEG * 3);

                    meanRound_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_A / 4, DUCK_EDGES_B / 4, 0);
                    meanRound_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 1, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_A / 4, DUCK_EDGES_B / 4, 1024);
                    meanRound_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 2, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_A / 4, DUCK_EDGES_B / 4, 2048);
                    meanRound_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 3, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_A / 4, DUCK_EDGES_B / 4, 3072);


                    //streamPrimary.Synchronize();
                    //h_indexesA = d_indexesA;
                    //h_indexesB = d_indexesB;
                    //var sumA = h_indexesA.Sum(e => e);
                    //var sumB = h_indexesB.Sum(e => e);
                    //streamPrimary.Synchronize();

                    d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                    meanRoundJoin.RunAsync(streamPrimary.Stream,
                                           d_buffer.DevicePointer,
                                           d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 1,
                                           d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 2,
                                           d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 3,
                                           d_bufferB.DevicePointer,
                                           d_indexesA.DevicePointer,
                                           d_indexesB.DevicePointer, DUCK_EDGES_B / 4, DUCK_EDGES_B / 2);

                    d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                    meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2, 0, d_aux.DevicePointer);
                    d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                    meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2, 1, d_aux.DevicePointer);
                    d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                    meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2, 2, d_aux.DevicePointer);
                    d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                    meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 4, 3, d_aux.DevicePointer);

                    for (int i = 0; i < (TEST ? 80 : trimRounds); i++)
                    //for (int i = 0; i < 85; i++)
                    {
                        d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                        meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 4, DUCK_EDGES_B / 4, i * 2 + 4, d_aux.DevicePointer);
                        d_indexesB.MemsetAsync(0, streamPrimary.Stream);
                        meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 4, DUCK_EDGES_B / 4, i * 2 + 5, d_aux.DevicePointer);
                    }

                    d_indexesA.MemsetAsync(0, streamPrimary.Stream);
                    meanTail.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer);

                    Task.Delay((int)lastTrimMs).Wait();

                    streamPrimary.Synchronize();

                    uint[] count = new uint[2];
                    d_indexesA.CopyToHost(count, 0, 0, 8);

                    if (count[0] > 131071)
                    {
                        // trouble
                        count[0] = 131071;
                        // log
                    }

                    hAligned_a.AsyncCopyFromDevice(d_buffer.DevicePointer, 0, 0, count[0] * 8, streamPrimary.Stream);
                    streamPrimary.Synchronize();
                    System.Runtime.InteropServices.Marshal.Copy(hAligned_a.PinnedHostPointer, h_a, 0, ((int)count[0] * 8) / sizeof(int));

                    trims++;
                    timer.Stop();
                    lastTrimMs          = (long)Math.Min(Math.Max((float)timer.ElapsedMilliseconds * 0.9f, 50), 500);
                    currentJob.solvedAt = DateTime.Now;
                    currentJob.trimTime = timer.ElapsedMilliseconds;

                    //Console.WriteLine("Trimmed in {0}ms to {1} edges", timer.ElapsedMilliseconds, count[0]);
                    Logger.Log(LogLevel.Info, string.Format("GPU NV{2}:     Trimmed in {0}ms to {1} edges", timer.ElapsedMilliseconds, count[0], deviceID));


                    FinderBag.RunFinder(TEST, ref trims, count[0], h_a, currentJob, graphSolutions, timer);

                    if (trims % 50 == 0 && TEST)
                    {
                        Console.ForegroundColor = ConsoleColor.Green;
                        Console.WriteLine("SOLS: {0}/{1} - RATE: {2:F1}", solutions, trims, (float)trims / solutions);
                        Console.ResetColor();
                    }

                    /*
                     * if (TEST)
                     * {
                     *  //Console.WriteLine("Trimmed in {0}ms to {1} edges", timer.ElapsedMilliseconds, count[0]);
                     *
                     *  CGraph cg = FinderBag.GetFinder();
                     *  cg.SetEdges(h_a, (int)count[0]);
                     *  cg.SetHeader(currentJob);
                     *
                     *  //currentJob = currentJob.Next();
                     *
                     *  Task.Factory.StartNew(() =>
                     *     {
                     *         Stopwatch sw = new Stopwatch();
                     *         sw.Start();
                     *
                     *         if (count[0] < 131071)
                     *         {
                     *             try
                     *             {
                     *                 if (findersInFlight++ < 3)
                     *                 {
                     *                     Stopwatch cycleTime = new Stopwatch();
                     *                     cycleTime.Start();
                     *                     cg.FindSolutions(graphSolutions);
                     *                     cycleTime.Stop();
                     *                     AdjustTrims(cycleTime.ElapsedMilliseconds);
                     *                     //if (graphSolutions.Count > 0) solutions++;
                     *                 }
                     *                 else
                     *                     Logger.Log(LogLevel.Warning, "CPU overloaded!");
                     *             }
                     *             catch (Exception ex)
                     *             {
                     *                 Logger.Log(LogLevel.Error, "Cycle finder error" + ex.Message);
                     *             }
                     *             finally
                     *             {
                     *                 FinderBag.ReturnFinder(cg);
                     *                 findersInFlight--;
                     *             }
                     *         }
                     *
                     *         sw.Stop();
                     *
                     *         if (trims % 50 == 0)
                     *         {
                     *             Console.ForegroundColor = ConsoleColor.Green;
                     *             Console.WriteLine("SOLS: {0}/{1} - RATE: {2:F1}", solutions, trims, (float)trims/solutions );
                     *             Console.ResetColor();
                     *         }
                     *         //Console.WriteLine("Finder completed in {0}ms on {1} edges with {2} solution(s)", sw.ElapsedMilliseconds, count[0], graphSolutions.Count);
                     *         //Console.WriteLine("Duped edges: {0}", cg.dupes);
                     *         if (!QTEST)
                     *          Logger.Log(LogLevel.Info, string.Format("Finder completed in {0}ms on {1} edges with {2} solution(s) and {3} dupes", sw.ElapsedMilliseconds, count[0], graphSolutions.Count, cg.dupes));
                     *     });
                     *
                     *  //h_indexesA = d_indexesA;
                     *  //h_indexesB = d_indexesB;
                     *
                     *  //var sumA = h_indexesA.Sum(e => e);
                     *  //var sumB = h_indexesB.Sum(e => e);
                     *
                     *  ;
                     * }
                     * else
                     * {
                     *  CGraph cg = FinderBag.GetFinder();
                     *  cg.SetEdges(h_a, (int)count[0]);
                     *  cg.SetHeader(currentJob);
                     *
                     *  Task.Factory.StartNew(() =>
                     *  {
                     *      if (count[0] < 131071)
                     *      {
                     *          try
                     *          {
                     *              if (findersInFlight++ < 3)
                     *              {
                     *                  Stopwatch cycleTime = new Stopwatch();
                     *                  cycleTime.Start();
                     *                  cg.FindSolutions(graphSolutions);
                     *                  cycleTime.Stop();
                     *                  AdjustTrims(cycleTime.ElapsedMilliseconds);
                     *              }
                     *              else
                     *                  Logger.Log(LogLevel.Warning, "CPU overloaded!");
                     *          }
                     *          catch (Exception ex)
                     *          {
                     *              Logger.Log(LogLevel.Warning, "Cycle finder crashed: " + ex.Message);
                     *          }
                     *          finally
                     *          {
                     *              FinderBag.ReturnFinder(cg);
                     *              findersInFlight--;
                     *          }
                     *      }
                     *  });
                     * }
                     *
                     */
                }
                catch (Exception ex)
                {
                    Logger.Log(LogLevel.Error, "Critical error in main cuda loop " + ex.Message);
                    Task.Delay(500).Wait();
                    break;
                }
            }

            // clean up
            try
            {
                Task.Delay(500).Wait();

                Comms.Close();

                d_buffer.Dispose();
                d_indexesA.Dispose();
                d_indexesB.Dispose();
                d_aux.Dispose();

                streamPrimary.Dispose();
                streamSecondary.Dispose();

                hAligned_a.Dispose();

                if (ctx != null)
                {
                    ctx.Dispose();
                }
            }
            catch { }
        }