public static Task <List <DeviceMonitor> > GetDeviceMonitors(IEnumerable <BaseDevice> devices)
        {
            return(Task.Run(() =>
            {
                var ret = new List <DeviceMonitor>();

                var cpus = devices.Where(dev => dev is CPUDevice).Cast <CPUDevice>().ToList();
                var amds = devices.Where(dev => dev is AMDDevice).Cast <AMDDevice>().ToList();
                var nvidias = devices.Where(dev => dev is CUDADevice).Cast <CUDADevice>().ToList();

                foreach (var cpu in cpus)
                {
                    ret.Add(new DeviceMonitorCPU(cpu.UUID));
                }
                if (amds.Count > 0)
                {
                    AMD_ODN.nhm_amd_set_debug_log_level(_amdDebugLogLevel);
                    AMD_ODN.nhm_amd_reg_log_cb(_amdLog);
                    var amdInit = AMD_ODN.nhm_amd_init();
                    if (0 == amdInit)
                    {
                        foreach (var amd in amds)
                        {
                            var hasRet = AMD_ODN.nhm_amd_has_adapter(amd.PCIeBusID);
                            if (0 == hasRet)
                            {
                                ret.Add(new DeviceMonitorAMD(amd.UUID, amd.PCIeBusID));
                            }
                            else
                            {
                                Logger.Info("DeviceMonitorManager", $"AMD nhm_amd_has_adapter {hasRet} for BusID {amd.PCIeBusID}");
                            }
                        }
                    }
                    else
                    {
                        Logger.Info("DeviceMonitorManager", $"AMD nhm_amd_init {amdInit}");
                    }
                }
                if (nvidias.Count > 0)
                {
                    var initialNvmlRestartTimeWait = Math.Min(500 * nvidias.Count, 5000); // 500ms per GPU or initial MAX of 5seconds
                    var firstMaxTimeoutAfterNvmlRestart = TimeSpan.FromMilliseconds(initialNvmlRestartTimeWait);
                    var nvidiaUUIDAndBusIds = nvidias.ToDictionary(nvidia => nvidia.UUID, nvidia => nvidia.PCIeBusID);
                    NvidiaMonitorManager.Init(nvidiaUUIDAndBusIds);
                    foreach (var nvidia in nvidias)
                    {
                        var deviceMonitorNVIDIA = new DeviceMonitorNVIDIA(nvidia.UUID, nvidia.PCIeBusID, firstMaxTimeoutAfterNvmlRestart);
                        ret.Add(deviceMonitorNVIDIA);
                    }
                }

                return ret;
            }));
        }
Ejemplo n.º 2
0
        public static Task <List <DeviceMonitor> > GetDeviceMonitors(IEnumerable <BaseDevice> devices)
        {
            return(Task.Run(() =>
            {
                var ret = new List <DeviceMonitor>();

                void addCPUs()
                {
                    var cpus = devices.GetDeviceTypes <CPUDevice>();
                    foreach (var cpu in cpus)
                    {
                        ret.Add(new DeviceMonitorCPU(cpu.UUID));
                    }
                }
                void addAMDs()
                {
                    var amds = devices.GetDeviceTypes <AMDDevice>();
                    if (!amds.Any())
                    {
                        return;
                    }

                    AMD_ODN.nhm_amd_set_debug_log_level(_amdDebugLogLevel);
                    AMD_ODN.nhm_amd_reg_log_cb(_amdLog);
                    var amdInit = AMD_ODN.nhm_amd_init();
                    if (0 != amdInit)
                    {
                        Logger.Info("DeviceMonitorManager", $"AMD nhm_amd_init {amdInit}");
                        return;
                    }
                    foreach (var amd in amds)
                    {
                        var hasRet = AMD_ODN.nhm_amd_has_adapter(amd.PCIeBusID);
                        if (0 == hasRet)
                        {
                            ret.Add(new DeviceMonitorAMD(amd.UUID, amd.PCIeBusID));
                        }
                        else
                        {
                            Logger.Info("DeviceMonitorManager", $"AMD nhm_amd_has_adapter {hasRet} for BusID {amd.PCIeBusID}");
                        }
                    }
                }
                void addNVIDIAs()
                {
                    var nvidias = devices.GetDeviceTypes <CUDADevice>();
                    if (!nvidias.Any())
                    {
                        return;
                    }

                    NVIDIA_MON.nhm_nvidia_set_debug_log_level(_nvidiaDebugLogLevel);
                    NVIDIA_MON.nhm_nvidia_reg_log_cb(_nvidiaLog);
                    var initialNvmlRestartTimeWait = Math.Min(500 * nvidias.Length, 5000); // 500ms per GPU or initial MAX of 5seconds
                    var nvidiaUUIDAndBusIds = nvidias.ToDictionary(nvidia => nvidia.UUID, nvidia => nvidia.PCIeBusID);
                    var nvidiaInit = NVIDIA_MON.nhm_nvidia_init();
                    NVIDIA_MON.nhm_nvidia_reg_log_cb(_nvidiaLog);
                    DeviceMonitorNVIDIA.Init();

                    if (nvidiaInit != 0)
                    {
                        Logger.Info("DeviceMonitorManager", $"AMD nhm_nvidia_init {nvidiaInit}");
                        return;
                    }

                    foreach (var nvidia in nvidias)
                    {
                        ret.Add(new DeviceMonitorNVIDIA(nvidia.UUID, nvidia.PCIeBusID));
                    }
                }
                addCPUs();
                addAMDs();
                addNVIDIAs();
                return ret;
            }));
        }