コード例 #1
0
 /// <summary>
 /// Disposes this Cuda buffer.
 /// </summary>
 protected override void DisposeAcceleratorObject(bool disposing)
 {
     CudaException.VerifyDisposed(
         disposing,
         CurrentAPI.FreeMemory(NativePtr));
     NativePtr = IntPtr.Zero;
 }
コード例 #2
0
        /// <summary cref="MemoryBuffer{T, TIndex}.CopyFromView(
        /// AcceleratorStream, ArrayView{T}, Index1)"/>
        protected internal unsafe override void CopyFromView(
            AcceleratorStream stream,
            ArrayView <T> source,
            Index1 targetOffset)
        {
            var binding = Accelerator.BindScoped();

            var sourceAddress = new IntPtr(source.LoadEffectiveAddress());
            var targetAddress = new IntPtr(ComputeEffectiveAddress(targetOffset));

            switch (source.AcceleratorType)
            {
            case AcceleratorType.CPU:
                CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyHostToDevice(
                                                targetAddress,
                                                sourceAddress,
                                                new IntPtr(source.LengthInBytes),
                                                stream));
                break;

            case AcceleratorType.Cuda:
                CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyDeviceToDevice(
                                                targetAddress,
                                                sourceAddress,
                                                new IntPtr(source.LengthInBytes),
                                                stream));
                break;

            default:
                throw new NotSupportedException(
                          RuntimeErrorMessages.NotSupportedTargetAccelerator);
            }

            binding.Recover();
        }
コード例 #3
0
 /// <inheritdoc/>
 protected override void DisposeAcceleratorObject(bool disposing)
 {
     CudaException.VerifyDisposed(
         disposing,
         CurrentAPI.DestroyEvent(EventPtr));
     EventPtr = IntPtr.Zero;
 }
コード例 #4
0
ファイル: CudaKernel.cs プロジェクト: m4rs-mt/ILGPU
        /// <summary>
        /// Loads a compiled kernel into the given Cuda context as kernel program.
        /// </summary>
        /// <param name="accelerator">The associated accelerator.</param>
        /// <param name="kernel">The source kernel.</param>
        /// <param name="launcher">The launcher method for the given kernel.</param>
        internal CudaKernel(
            CudaAccelerator accelerator,
            PTXCompiledKernel kernel,
            MethodInfo launcher)
            : base(accelerator, kernel, launcher)
        {
            var kernelLoaded = CurrentAPI.LoadModule(
                out modulePtr,
                kernel.PTXAssembly,
                out string errorLog);

            if (kernelLoaded != CudaError.CUDA_SUCCESS)
            {
                Trace.WriteLine("PTX Kernel loading failed:");
                if (string.IsNullOrWhiteSpace(errorLog))
                {
                    Trace.WriteLine(">> No error information available");
                }
                else
                {
                    Trace.WriteLine(errorLog);
                }
            }
            CudaException.ThrowIfFailed(kernelLoaded);

            CudaException.ThrowIfFailed(
                CurrentAPI.GetModuleFunction(
                    out functionPtr,
                    modulePtr,
                    kernel.Name));
        }
コード例 #5
0
        /// <summary cref="MemoryBuffer{T, TIndex}.CopyToView(
        /// AcceleratorStream, ArrayView{T}, LongIndex1)"/>
        protected internal unsafe override void CopyToView(
            AcceleratorStream stream,
            ArrayView <T> target,
            LongIndex1 sourceOffset)
        {
            var binding = Accelerator.BindScoped();

            var targetBuffer  = target.Source;
            var sourceAddress = new IntPtr(ComputeEffectiveAddress(sourceOffset));
            var targetAddress = new IntPtr(target.LoadEffectiveAddress());
            var lengthInBytes = new IntPtr(target.LengthInBytes);

            switch (targetBuffer.AcceleratorType)
            {
            case AcceleratorType.CPU:
            case AcceleratorType.Cuda:
                CudaException.ThrowIfFailed(
                    CurrentAPI.MemcpyAsync(
                        targetAddress,
                        sourceAddress,
                        lengthInBytes,
                        stream));
                break;

            default:
                throw new NotSupportedException(
                          RuntimeErrorMessages.NotSupportedTargetAccelerator);
            }

            binding.Recover();
        }
コード例 #6
0
        /// <summary cref="MemoryBuffer{T, TIndex}.CopyFromViewInternal(ArrayView{T, Index}, AcceleratorType, TIndex, AcceleratorStream)"/>
        protected internal override void CopyFromViewInternal(
            ArrayView <T, Index> source,
            AcceleratorType acceleratorType,
            TIndex targetOffset,
            AcceleratorStream stream)
        {
            switch (acceleratorType)
            {
            case AcceleratorType.CPU:
                CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyHostToDevice(
                                                GetSubView(targetOffset).Pointer,
                                                source.Pointer,
                                                new IntPtr(source.LengthInBytes),
                                                stream));
                break;

            case AcceleratorType.Cuda:
                CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyDeviceToDevice(
                                                GetSubView(targetOffset).Pointer,
                                                source.Pointer,
                                                new IntPtr(source.LengthInBytes),
                                                stream));
                break;

            default:
                throw new NotSupportedException(RuntimeErrorMessages.NotSupportedTargetAccelerator);
            }
        }
コード例 #7
0
        /// <inheritdoc/>
        protected override ProfilingMarker AddProfilingMarkerInternal()
        {
            var profilingMarker = new CudaProfilingMarker();

            CudaException.ThrowIfFailed(
                CurrentAPI.RecordEvent(profilingMarker.EventPtr, StreamPtr));
            return(profilingMarker);
        }
コード例 #8
0
ファイル: CudaStream.cs プロジェクト: awesomedotnetcore/ILGPU
 /// <summary cref="DisposeBase.Dispose(bool)"/>
 protected override void Dispose(bool disposing)
 {
     if (streamPtr != IntPtr.Zero)
     {
         CudaException.ThrowIfFailed(CudaAPI.Current.DestroyStream(streamPtr));
         streamPtr = IntPtr.Zero;
     }
 }
コード例 #9
0
ファイル: CudaStream.cs プロジェクト: saimarpaka/ILGPU
 /// <summary>
 /// Constructs a new cuda stream.
 /// </summary>
 /// <param name="accelerator">The associated accelerator.</param>
 internal CudaStream(Accelerator accelerator)
     : base(accelerator)
 {
     CudaException.ThrowIfFailed(
         CudaAPI.Current.CreateStream(
             out streamPtr,
             StreamFlags.CU_STREAM_NON_BLOCKING));
 }
コード例 #10
0
ファイル: CudaKernel.cs プロジェクト: m4rs-mt/ILGPU
 /// <summary>
 /// Disposes this Cuda kernel.
 /// </summary>
 protected override void DisposeAcceleratorObject(bool disposing)
 {
     CudaException.VerifyDisposed(
         disposing,
         CurrentAPI.DestroyModule(modulePtr));
     functionPtr = IntPtr.Zero;
     modulePtr   = IntPtr.Zero;
 }
コード例 #11
0
ファイル: CudaStream.cs プロジェクト: nguyenvuduc/ILGPU
 /// <summary cref="AcceleratorStream.Synchronize"/>
 public override void Synchronize()
 {
     using (var binding = Accelerator.BindScoped())
     {
         CudaException.ThrowIfFailed(
             CudaAPI.Current.SynchronizeStream(streamPtr));
     }
 }
コード例 #12
0
 /// <summary cref="DisposeBase.Dispose(bool)"/>
 protected override void Dispose(bool disposing)
 {
     if (NativePtr != IntPtr.Zero)
     {
         CudaException.ThrowIfFailed(CudaAPI.Current.FreeMemory(NativePtr));
         NativePtr = IntPtr.Zero;
     }
     base.Dispose(disposing);
 }
コード例 #13
0
ファイル: CudaStream.cs プロジェクト: m4rs-mt/ILGPU
        /// <summary cref="AcceleratorStream.Synchronize"/>
        public override void Synchronize()
        {
            var binding = Accelerator.BindScoped();

            CudaException.ThrowIfFailed(
                CurrentAPI.SynchronizeStream(streamPtr));

            binding.Recover();
        }
コード例 #14
0
ファイル: CudaKernel.cs プロジェクト: awesomedotnetcore/ILGPU
 /// <summary cref="DisposeBase.Dispose(bool)"/>
 protected override void Dispose(bool disposing)
 {
     if (modulePtr != IntPtr.Zero)
     {
         CudaException.ThrowIfFailed(CudaAPI.Current.DestroyModule(modulePtr));
         functionPtr = IntPtr.Zero;
         modulePtr   = IntPtr.Zero;
     }
 }
コード例 #15
0
ファイル: CudaKernel.cs プロジェクト: awesomedotnetcore/ILGPU
 internal CudaKernel(
     CudaAccelerator accelerator,
     CompiledKernel kernel,
     MethodInfo launcher)
     : base(accelerator, kernel, launcher)
 {
     CudaException.ThrowIfFailed(CudaAPI.Current.LoadModule(out modulePtr, kernel.GetBuffer()));
     CudaException.ThrowIfFailed(CudaAPI.Current.GetModuleFunction(out functionPtr, modulePtr, kernel.EntryName));
 }
コード例 #16
0
ファイル: CudaStream.cs プロジェクト: m4rs-mt/ILGPU
        /// <inheritdoc/>
        protected override ProfilingMarker AddProfilingMarkerInternal()
        {
            using var binding = Accelerator.BindScoped();
            var profilingMarker = new CudaProfilingMarker(Accelerator);

            CudaException.ThrowIfFailed(
                CurrentAPI.RecordEvent(profilingMarker.EventPtr, StreamPtr));
            return(profilingMarker);
        }
コード例 #17
0
ファイル: CudaStream.cs プロジェクト: m4rs-mt/ILGPU
 /// <summary>
 /// Constructs a new Cuda stream with given <see cref="StreamFlags"/>.
 /// </summary>
 /// <param name="accelerator">The associated accelerator.</param>
 /// <param name="flag">
 /// Stream flag to use. Allows blocking and non-blocking streams.
 /// </param>
 internal CudaStream(Accelerator accelerator, StreamFlags flag)
     : base(accelerator)
 {
     CudaException.ThrowIfFailed(
         CurrentAPI.CreateStream(
             out streamPtr,
             flag));
     responsibleForHandle = true;
 }
コード例 #18
0
        /// <summary cref="DisposeBase.Dispose(bool)"/>
        protected override void Dispose(bool disposing)
        {
            if (Pointer == IntPtr.Zero)
            {
                return;
            }

            CudaException.ThrowIfFailed(CudaAPI.Current.FreeMemory(Pointer));
            Pointer = IntPtr.Zero;
        }
コード例 #19
0
ファイル: CudaStream.cs プロジェクト: killop/ILGPU
 /// <summary cref="DisposeBase.Dispose(bool)"/>
 protected override void Dispose(bool disposing)
 {
     if (responsibleForHandle && streamPtr != IntPtr.Zero)
     {
         CudaException.ThrowIfFailed(
             CurrentAPI.DestroyStream(streamPtr));
         streamPtr = IntPtr.Zero;
     }
     base.Dispose(disposing);
 }
コード例 #20
0
        /// <summary cref="MemoryBuffer.MemSetToZero(AcceleratorStream)"/>
        public override void MemSetToZero(AcceleratorStream stream)
        {
            var binding = Accelerator.BindScoped();

            CudaException.ThrowIfFailed(CudaAPI.Current.Memset(
                                            NativePtr,
                                            0,
                                            new IntPtr(LengthInBytes),
                                            stream));

            binding.Recover();
        }
コード例 #21
0
        /// <summary>
        /// Constructs a new Cuda accelerator.
        /// </summary>
        /// <param name="context">The ILGPU context.</param>
        /// <param name="deviceId">The target device id.</param>
        /// <param name="acceleratorFlags">The accelerator flags.</param>
        public CudaAccelerator(
            Context context,
            int deviceId,
            CudaAcceleratorFlags acceleratorFlags)
            : base(context, AcceleratorType.Cuda)
        {
            CudaException.ThrowIfFailed(
                CurrentAPI.CreateContext(out contextPtr, acceleratorFlags, deviceId));
            DeviceId = deviceId;

            SetupAccelerator();
        }
コード例 #22
0
ファイル: CudaStream.cs プロジェクト: m4rs-mt/ILGPU
        /// <summary>
        /// Disposes this Cuda stream.
        /// </summary>
        protected override void DisposeAcceleratorObject(bool disposing)
        {
            if (!responsibleForHandle || streamPtr == IntPtr.Zero)
            {
                return;
            }

            CudaException.VerifyDisposed(
                disposing,
                CurrentAPI.DestroyStream(streamPtr));
            streamPtr = IntPtr.Zero;
        }
コード例 #23
0
        /// <inheritdoc/>
        public override void Synchronize()
        {
            var errorStatus = CurrentAPI.QueryEvent(EventPtr);

            if (errorStatus == CudaError.CUDA_ERROR_NOT_READY)
            {
                CudaException.ThrowIfFailed(CurrentAPI.SynchronizeEvent(EventPtr));
            }
            else
            {
                CudaException.ThrowIfFailed(errorStatus);
            }
        }
コード例 #24
0
        /// <inheritdoc/>
        public override void Synchronize()
        {
            using var binding = Accelerator.BindScoped();

            var errorStatus = CurrentAPI.QueryEvent(EventPtr);

            if (errorStatus == CudaError.CUDA_ERROR_NOT_READY)
            {
                CudaException.ThrowIfFailed(CurrentAPI.SynchronizeEvent(EventPtr));
            }
            else
            {
                CudaException.ThrowIfFailed(errorStatus);
            }
        }
コード例 #25
0
ファイル: CudaAccelerator.cs プロジェクト: losttech/ILGPU
        /// <summary>
        /// Resolves the memory type of the given device pointer.
        /// </summary>
        /// <param name="value">The device pointer to check.</param>
        /// <returns>The resolved memory type</returns>
        public static unsafe CudaMemoryType GetCudaMemoryType(IntPtr value)
        {
            int data = 0;
            var err  = CurrentAPI.GetPointerAttribute(
                new IntPtr(Unsafe.AsPointer(ref data)),
                PointerAttribute.CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
                value);

            if (err == CudaError.CUDA_ERROR_INVALID_VALUE)
            {
                return(CudaMemoryType.None);
            }
            CudaException.ThrowIfFailed(err);
            return((CudaMemoryType)data);
        }
コード例 #26
0
        /// <inheritdoc/>
        protected internal override unsafe void MemSetInternal(
            AcceleratorStream stream,
            byte value,
            long offsetInBytes,
            long lengthInBytes)
        {
            var binding = Accelerator.BindScoped();

            CudaException.ThrowIfFailed(
                CurrentAPI.Memset(
                    new IntPtr(NativePtr.ToInt64() + offsetInBytes),
                    value,
                    new IntPtr(lengthInBytes),
                    stream));

            binding.Recover();
        }
コード例 #27
0
        /// <summary>
        /// Resolves the memory type of the given device pointer.
        /// </summary>
        /// <param name="value">The device pointer to check.</param>
        /// <returns>The resolved memory type</returns>
        public static unsafe CudaMemoryType GetCudaMemoryType(IntPtr value)
        {
            // This functionality requires unified addresses (X64)
            Backends.Backend.EnsureRunningOnPlatform(TargetPlatform.X64);

            int data = 0;
            var err  = CurrentAPI.GetPointerAttribute(
                new IntPtr(Unsafe.AsPointer(ref data)),
                PointerAttribute.CU_POINTER_ATTRIBUTE_MEMORY_TYPE,
                value);

            if (err == CudaError.CUDA_ERROR_INVALID_VALUE)
            {
                return(CudaMemoryType.None);
            }
            CudaException.ThrowIfFailed(err);
            return((CudaMemoryType)data);
        }
コード例 #28
0
        /// <summary>
        /// Setups all required settings.
        /// </summary>
        private void SetupAccelerator()
        {
            Bind();

            CudaException.ThrowIfFailed(
                CurrentAPI.GetDeviceName(out string name, DeviceId));
            Name          = name;
            DefaultStream = new CudaStream(this, IntPtr.Zero);

            CudaException.ThrowIfFailed(
                CurrentAPI.GetTotalDeviceMemory(out long total, DeviceId));
            MemorySize = total;

            // Resolve max grid size
            MaxGridSize = new Index3(
                CurrentAPI.GetDeviceAttribute(
                    DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, DeviceId),
                CurrentAPI.GetDeviceAttribute(
                    DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, DeviceId),
                CurrentAPI.GetDeviceAttribute(
                    DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, DeviceId));

            // Resolve max group size
            MaxGroupSize = new Index3(
                CurrentAPI.GetDeviceAttribute(
                    DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, DeviceId),
                CurrentAPI.GetDeviceAttribute(
                    DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, DeviceId),
                CurrentAPI.GetDeviceAttribute(
                    DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, DeviceId));

            // Resolve max threads per group
            MaxNumThreadsPerGroup = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, DeviceId);

            // Resolve max shared memory per block
            MaxSharedMemoryPerGroup = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
                DeviceId);

            // Resolve total constant memory
            MaxConstantMemory = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, DeviceId);

            // Resolve clock rate
            ClockRate = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE, DeviceId);

            // Resolve warp size
            WarpSize = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE, DeviceId);

            // Resolve number of multiprocessors
            NumMultiprocessors = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, DeviceId);

            // Result max number of threads per multiprocessor
            MaxNumThreadsPerMultiprocessor = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR,
                DeviceId);

            // Resolve cache configuration
            CudaException.ThrowIfFailed(
                CurrentAPI.GetSharedMemoryConfig(out sharedMemoryConfiguration));
            CudaException.ThrowIfFailed(
                CurrentAPI.GetCacheConfig(out cacheConfiguration));

            // Setup architecture and backend
            CudaException.ThrowIfFailed(
                CurrentAPI.GetDeviceComputeCapability(
                    out int major,
                    out int minor,
                    DeviceId));
            Architecture = PTXArchitectureUtils.GetArchitecture(major, minor);

            CudaException.ThrowIfFailed(
                CurrentAPI.GetDriverVersion(out var driverVersion));
            InstructionSet = GetInstructionSet(Architecture, driverVersion);
            Init(new PTXBackend(
                     Context,
                     Architecture,
                     InstructionSet));
        }
コード例 #29
0
ファイル: CudaStream.cs プロジェクト: awesomedotnetcore/ILGPU
 /// <summary cref="AcceleratorStream.Synchronize"/>
 public override void Synchronize()
 {
     CudaException.ThrowIfFailed(CudaAPI.Current.SynchronizeStream(streamPtr));
 }
コード例 #30
0
        /// <summary>
        /// Setups all required settings.
        /// </summary>
        private void SetupAccelerator()
        {
            Bind();

            CudaException.ThrowIfFailed(
                CurrentAPI.GetDeviceName(out string name, DeviceId));
            Name          = name;
            DefaultStream = new CudaStream(this, IntPtr.Zero, false);

            CudaException.ThrowIfFailed(
                CurrentAPI.GetTotalDeviceMemory(out long total, DeviceId));
            MemorySize = total;

            // Resolve max grid size
            MaxGridSize = new Index3(
                CurrentAPI.GetDeviceAttribute(
                    DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, DeviceId),
                CurrentAPI.GetDeviceAttribute(
                    DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y, DeviceId),
                CurrentAPI.GetDeviceAttribute(
                    DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z, DeviceId));

            // Resolve max group size
            MaxGroupSize = new Index3(
                CurrentAPI.GetDeviceAttribute(
                    DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, DeviceId),
                CurrentAPI.GetDeviceAttribute(
                    DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y, DeviceId),
                CurrentAPI.GetDeviceAttribute(
                    DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z, DeviceId));

            // Resolve max threads per group
            MaxNumThreadsPerGroup = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, DeviceId);

            // Resolve max shared memory per block
            MaxSharedMemoryPerGroup = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK,
                DeviceId);

            // Resolve total constant memory
            MaxConstantMemory = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY, DeviceId);

            // Resolve clock rate
            ClockRate = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_CLOCK_RATE, DeviceId) / 1000;

            // Resolve memory clock rate
            MemoryClockRate = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, DeviceId) / 1000;

            // Resolve the bus width
            MemoryBusWidth = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, DeviceId);

            // Resolve warp size
            WarpSize = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_WARP_SIZE, DeviceId);

            // Resolve number of multiprocessors
            NumMultiprocessors = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, DeviceId);

            // Result max number of threads per multiprocessor
            MaxNumThreadsPerMultiprocessor = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR,
                DeviceId);

            // Resolve the L2 cache size
            L2CacheSize = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE, DeviceId);

            // Resolve the maximum amount of shared memory per multiprocessor
            MaxSharedMemoryPerMultiprocessor = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR,
                DeviceId);

            // Resolve the total number of registers per multiprocessor
            TotalNumRegistersPerMultiprocessor = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR,
                DeviceId);

            // Resolve the total number of registers per group
            TotalNumRegistersPerGroup = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, DeviceId);

            // Resolve the max memory pitch
            MaxMemoryPitch = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_MAX_PITCH, DeviceId);

            // Resolve the number of concurrent copy engines
            NumConcurrentCopyEngines = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT, DeviceId);

            // Resolve whether this device has ECC support
            HasECCSupport = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_ECC_ENABLED, DeviceId) != 0;

            // Resolve whether this device supports managed memory
            SupportsManagedMemory = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, DeviceId) != 0;

            // Resolve whether this device supports compute preemption
            SupportsComputePreemption = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED,
                DeviceId) != 0;

            // Resolve the current driver mode
            DriverMode = (DeviceDriverMode)CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_TCC_DRIVER,
                DeviceId);

            // Resolve the PCI domain id
            PCIDomainId = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID,
                DeviceId);

            // Resolve the PCI device id
            PCIBusId = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_PCI_BUS_ID,
                DeviceId);

            // Resolve the PCI device id
            PCIDeviceId = CurrentAPI.GetDeviceAttribute(
                DeviceAttribute.CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID,
                DeviceId);

            // Resolve cache configuration
            CudaException.ThrowIfFailed(
                CurrentAPI.GetSharedMemoryConfig(out sharedMemoryConfiguration));
            CudaException.ThrowIfFailed(
                CurrentAPI.GetCacheConfig(out cacheConfiguration));

            // Setup architecture and backend
            CudaException.ThrowIfFailed(
                CurrentAPI.GetDeviceComputeCapability(
                    out int major,
                    out int minor,
                    DeviceId));
            Architecture = PTXArchitectureUtils.GetArchitecture(major, minor);

            CudaException.ThrowIfFailed(
                CurrentAPI.GetDriverVersion(out var driverVersion));
            DriverVersion     = driverVersion;
            InstructionSet    = GetInstructionSet(Architecture, driverVersion);
            base.Capabilities = new CudaCapabilityContext(Architecture);

            Init(new PTXBackend(
                     Context,
                     Capabilities,
                     Architecture,
                     InstructionSet));
        }