public void ReleaseCudaManager(CudaManager cudaManager) { CudaManagersInUse -= 1; bool cudaManagerIsValid = cudaManager.IsValid(); if (!cudaManagerIsValid || UseNewContext) { cudaManager.DisposeCuda(); } else { cudaManager.SetInactive(); _unusedCudaManagers.Enqueue(cudaManager); } if (!cudaManagerIsValid) { // an invalid CudaManager could be because // a) a segfault, so we want to fail the request // b) loss of power/data to GPU, so we want to fail the instance & rerun the request somewhere else // failing to create a CudaManager indicates case b), and will trigger a graceful shutdown of the instance new CudaManager(OrderId, DeviceId, x => x.DisposeCuda()).Dispose(); } }
public void VerifyAvailableGPUsAreWorking() { for (int i = 0; i < NumberOfAvailableGpus; ++i) { using (CudaManager cudaManager = GetCudaManagerForThread()) { KernelManager kernels = new KernelManager(cudaManager); kernels["TestKernel"].ExecuteTask(); } } }
private void ReleaseCudaManager(CudaManager cudaManager) { LogUnDisposedCudaArrays(cudaManager.AllocationStats); int cudaManagersInUse; lock (_lock) { GpuPriority gpu = _sortedGpus.First(g => g.DeviceId == cudaManager.DeviceId); gpu.ReleaseCudaManager(cudaManager); cudaManagersInUse = gpu.CudaManagersInUse; } _logger.Debug("released cuda context for gpu {gpuOrderId} (deviceId={gpuDeviceId}). Gpu now has {gpuContextCount} contexts in use", new { gpuOrderId = cudaManager.OrderId, gpuDeviceId = cudaManager.DeviceId, gpuContextCount = cudaManagersInUse, }); }
public KernelManager(CudaManager cudaManager) { CudaManager = cudaManager; PrefixArguments = new object[0]; }
private void TeardownForThread() { Current = null; AllocationStats = null; }
private void SetupForThread() { AllocationStats = new AllocationStats(); Current = this; }