private void Worker_DoWork(object sender, ActionStateArgs <T> e) { SolverInfo <T> info = e.Arg as SolverInfo <T>; NCCL <T> nccl = null; m_cuda = new common.CudaDnn <T>(e.DeviceID, DEVINIT.CUBLAS | DEVINIT.CURAND, null, info.CudaPath); try { Solver <T> rank0 = info.Rank0; Log log = new Log("Worker solver for DeviceID = " + e.DeviceID.ToString()); //----------------------------------------- // Transfer the NCCL handle from the // main kernel that created it to the // one used by the CudaDnn on this thread. // // After the copy, this thread will 'own' // the nccl and be responsible for its // destruction. //----------------------------------------- long hNccl = m_cuda.KernelCopyNccl(info.KernelHandle, info.NcclHandle); // Create solver and install callbacks SolverParameter param = rank0.parameter.Clone(); param.device_id = e.DeviceID; param.type = rank0.parameter.type; Solver <T> solver = Solver <T> .Create(m_cuda, log, param, rank0.CancelEvent, null, null, rank0.Database, null, rank0.solver_count, info.SolverRank); info.StartedEvent.Set(); log.CHECK_EQ((int)solver.type, (int)rank0.type, "The solver types should be the same."); //----------------------------------------- // Turn off logging for all other // operations on the worker thread. //----------------------------------------- log.Enable = false; nccl = new NCCL <T>(m_cuda, log, solver, e.DeviceID, hNccl, info.GradientReadyEvents); info.InitializedEvent.Set(); m_cuda.SynchronizeDevice(); List <WaitHandle> rgWait = new List <WaitHandle>(); rgWait.AddRange(rank0.CancelEvent.Handles); rgWait.Add(info.AllCreatedEvent); int nWait = WaitHandle.WaitAny(rgWait.ToArray()); if (nWait < rgWait.Count - 1) { return; } nccl.Broadcast(); int nIterations = param.max_iter - solver.iter; if (info.IterationOverride > 0) { nIterations = info.IterationOverride; } solver.Step(nIterations); solver.Dispose(); } catch (Exception excpt) { info.Error = excpt; info.ErrorEvent.Set(); } finally { if (nccl != null) { nccl.Dispose(); } m_cuda.Dispose(); m_cuda = null; } }