public static void Invoke(TSCudaContext context, CudaContext cudaContext, byte[] ptx, string baseName, params object[] args) { ThrowIfAnyTensorInvalid(args); cudaContext.SetCurrent(); CudaDeviceProperties deviceInfo = context.DeviceInfoForContext(cudaContext); IEnumerable <Tensor> allTensors = args.OfType <Tensor>(); Tensor firstTensor = allTensors.First(); long elementCount = firstTensor.ElementCount(); ApplySpecialization spec = new ApplySpecialization(allTensors.ToArray()); ConvertTensorArgs.Convert(cudaContext, spec.Use32BitIndices, args); ManagedCuda.VectorTypes.dim3 block = ApplyUtils.GetApplyBlock(); ManagedCuda.VectorTypes.dim3 grid = ApplyUtils.GetApplyGrid(deviceInfo, elementCount); string fullKernelName = PermutationGenerator.GetMangledName(baseName, spec); CudaKernel kernel = context.KernelCache.Get(cudaContext, ptx, fullKernelName); kernel.GridDimensions = grid; kernel.BlockDimensions = block; kernel.RunAsync(CUstream.NullStream, args); }
public Tensor Scatter(Tensor result, Tensor src, int dim, Tensor indices) { TSCudaContext context = CudaHelpers.TSContextForTensor(src); CudaContext cudaContext = context.CudaContextForTensor(src); if (result == null) { throw new ArgumentNullException("result"); } if (result.DimensionCount != src.DimensionCount) { throw new InvalidOperationException("result and src must have same number of dimensions"); } if (dim < 0 && dim >= result.DimensionCount) { throw new ArgumentOutOfRangeException("dim"); } if (indices.DimensionCount != src.DimensionCount) { throw new InvalidOperationException("src and indices must have same number of dimensions"); } if (!src.IsSameSizeAs(indices)) { throw new InvalidOperationException("src and indices must be the same size"); } if (!TensorResultBuilder.ArrayEqualExcept(src.Sizes, result.Sizes, dim)) { throw new InvalidOperationException("result and src must be the same size except in dimension dim"); } Tensor writeTarget = result; long nElement = indices.ElementCount(); dim3 block = ApplyUtils.GetApplyBlock(); dim3 grid = ApplyUtils.GetApplyGrid(context.DeviceInfoForContext(cudaContext), nElement); if (ApplyUtils.CanUse32BitIndexMath(writeTarget) && ApplyUtils.CanUse32BitIndexMath(src) && ApplyUtils.CanUse32BitIndexMath(indices)) { int dims = indices.DimensionCount <= 3 ? indices.DimensionCount : -1; string kernelName = MakeKernelName(ScatterBaseName, true, dims); Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, true, writeTarget, src, indices, dim, (int)nElement); } else { string kernelName = MakeKernelName(ScatterBaseName, false, -1); Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, false, writeTarget, src, indices, dim, nElement); } return(writeTarget); }
public Tensor Gather(Tensor result, Tensor src, int dim, Tensor indices) { var context = CudaHelpers.TSContextForTensor(src); var cudaContext = context.CudaContextForTensor(src); if (result != null && result.DimensionCount != src.DimensionCount) { throw new InvalidOperationException("result and src must have same number of dimensions"); } if (result != null && dim < 0 && dim >= result.DimensionCount) { throw new ArgumentOutOfRangeException(nameof(dim)); } if (indices.DimensionCount != src.DimensionCount) { throw new InvalidOperationException("src and indices must have same number of dimensions"); } if (result != null && !result.IsSameSizeAs(indices)) { throw new InvalidOperationException("result and indices must be the same size"); } if (result != null && !TensorResultBuilder.ArrayEqualExcept(src.Sizes, result.Sizes, dim)) { throw new InvalidOperationException("result and src must be the same size except in dimension dim"); } var writeTarget = TensorResultBuilder.GetWriteTarget(result, indices.Allocator, src.ElementType, false, indices.Sizes); var nElement = indices.ElementCount(); var block = ApplyUtils.GetApplyBlock(); var grid = ApplyUtils.GetApplyGrid(context.DeviceInfoForContext(cudaContext), nElement); if (ApplyUtils.CanUse32BitIndexMath(writeTarget) && ApplyUtils.CanUse32BitIndexMath(src) && ApplyUtils.CanUse32BitIndexMath(indices)) { var dims = indices.DimensionCount <= 3 ? indices.DimensionCount : -1; var kernelName = MakeKernelName(GatherBaseName, true, dims); this.Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, true, writeTarget, src, indices, dim, (int)nElement); } else { var kernelName = MakeKernelName(GatherBaseName, false, -1); this.Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, false, writeTarget, src, indices, dim, nElement); } return(writeTarget); }
public Tensor Scatter(Tensor result, Tensor src, int dim, Tensor indices) { try { TSCudaContext context = CudaHelpers.TSContextForTensor(src); CudaContext cudaContext = context.CudaContextForTensor(src); if (result == null) { throw new ArgumentNullException("result"); } if (result.DimensionCount != src.DimensionCount) { throw new InvalidOperationException($"result and src must have same number of dimensions. result dim count = '{result.DimensionCount}', source dim count = '{src.DimensionCount}'"); } if (dim < 0 && dim >= result.DimensionCount) { throw new ArgumentOutOfRangeException("dim"); } if (indices.DimensionCount != src.DimensionCount) { throw new InvalidOperationException("src and indices must have same number of dimensions"); } if (!src.IsSameSizeAs(indices)) { throw new InvalidOperationException("src and indices must be the same size"); } if (!TensorResultBuilder.ArrayEqualExcept(src.Sizes, result.Sizes, dim)) { throw new InvalidOperationException("result and src must be the same size except in dimension dim"); } Tensor writeTarget = result; long nElement = indices.ElementCount(); dim3 block = ApplyUtils.GetApplyBlock(); dim3 grid = ApplyUtils.GetApplyGrid(context.DeviceInfoForContext(cudaContext), nElement); Invoke(context, cudaContext, "scatter_kernel", grid, block, 0, CUstream.NullStream, false, writeTarget, src, indices, dim, nElement); return(writeTarget); } catch (Exception err) { Logger.WriteLine($"Error = '{err.Message}', Call stack = '{err.StackTrace}'"); throw; } }
public Tensor ScatterFill(Tensor result, float value, int dim, Tensor indices) { var context = CudaHelpers.TSContextForTensor(indices); var cudaContext = context.CudaContextForTensor(indices); if (result == null) { throw new ArgumentNullException(nameof(result)); } if (dim < 0 && dim >= result.DimensionCount) { throw new ArgumentOutOfRangeException(nameof(dim)); } if (indices.DimensionCount != result.DimensionCount) { throw new InvalidOperationException("result and indices must have same number of dimensions"); } if (!TensorResultBuilder.ArrayEqualExcept(indices.Sizes, result.Sizes, dim)) { throw new InvalidOperationException("result and indices must be the same size except in dimension dim"); } var writeTarget = result; var nElement = indices.ElementCount(); var block = ApplyUtils.GetApplyBlock(); var grid = ApplyUtils.GetApplyGrid(context.DeviceInfoForContext(cudaContext), nElement); if (ApplyUtils.CanUse32BitIndexMath(writeTarget) && ApplyUtils.CanUse32BitIndexMath(indices)) { var dims = indices.DimensionCount <= 3 ? indices.DimensionCount : -1; var kernelName = MakeKernelName(ScatterFillBaseName, true, dims); this.Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, true, writeTarget, indices, value, dim, (int)nElement); } else { var kernelName = MakeKernelName(ScatterFillBaseName, false, -1); this.Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, false, writeTarget, indices, value, dim, nElement); } return(writeTarget); }
public Tensor Gather(Tensor result, Tensor src, int dim, Tensor indices) { TSCudaContext context = CudaHelpers.TSContextForTensor(src); CudaContext cudaContext = context.CudaContextForTensor(src); if (result != null && result.DimensionCount != src.DimensionCount) { throw new InvalidOperationException("result and src must have same number of dimensions"); } if (result != null && dim < 0 && dim >= result.DimensionCount) { throw new ArgumentOutOfRangeException("dim"); } if (indices.DimensionCount != src.DimensionCount) { throw new InvalidOperationException("src and indices must have same number of dimensions"); } if (result != null && !result.IsSameSizeAs(indices)) { throw new InvalidOperationException("result and indices must be the same size"); } if (result != null && !TensorResultBuilder.ArrayEqualExcept(src.Sizes, result.Sizes, dim)) { throw new InvalidOperationException("result and src must be the same size except in dimension dim"); } Tensor writeTarget = TensorResultBuilder.GetWriteTarget(result, indices.Allocator, src.ElementType, false, indices.Sizes); long nElement = indices.ElementCount(); dim3 block = ApplyUtils.GetApplyBlock(); dim3 grid = ApplyUtils.GetApplyGrid(context.DeviceInfoForContext(cudaContext), nElement); Invoke(context, cudaContext, "gather_kernel", grid, block, 0, CUstream.NullStream, false, writeTarget, src, indices, dim, nElement); return(writeTarget); }
/// <summary> /// Invokes the specified context. /// </summary> /// <param name="context">The context.</param> /// <param name="cudaContext">The cuda context.</param> /// <param name="ptx">The PTX.</param> /// <param name="baseName">Name of the base.</param> /// <param name="args">The arguments.</param> public static void Invoke(TSCudaContext context, CudaContext cudaContext, byte[] ptx, string baseName, params object[] args) { ThrowIfAnyTensorInvalid(args); var deviceInfo = context.DeviceInfoForContext(cudaContext); var allTensors = args.OfType <NDArray>(); var firstTensor = allTensors.First(); var elementCount = firstTensor.ElementCount(); var spec = new ApplySpecialization(allTensors.ToArray()); ConvertTensorArgs.Convert(cudaContext, spec.Use32BitIndices, args); var block = ApplyUtils.GetApplyBlock(); var grid = ApplyUtils.GetApplyGrid(deviceInfo, elementCount); var fullKernelName = PermutationGenerator.GetMangledName(baseName, spec); var kernel = context.KernelCache.Get(cudaContext, ptx, fullKernelName); kernel.GridDimensions = grid; kernel.BlockDimensions = block; kernel.RunAsync(CUstream.NullStream, args); }
public Tensor ScatterFill(Tensor result, float value, int dim, Tensor indices) { TSCudaContext context = CudaHelpers.TSContextForTensor(indices); CudaContext cudaContext = context.CudaContextForTensor(indices); if (result == null) { throw new ArgumentNullException("result"); } if (dim < 0 && dim >= result.DimensionCount) { throw new ArgumentOutOfRangeException("dim"); } if (indices.DimensionCount != result.DimensionCount) { throw new InvalidOperationException("result and indices must have same number of dimensions"); } if (!TensorResultBuilder.ArrayEqualExcept(indices.Sizes, result.Sizes, dim)) { throw new InvalidOperationException("result and indices must be the same size except in dimension dim"); } Tensor writeTarget = result; long nElement = indices.ElementCount(); dim3 block = ApplyUtils.GetApplyBlock(); dim3 grid = ApplyUtils.GetApplyGrid(context.DeviceInfoForContext(cudaContext), nElement); Invoke(context, cudaContext, "scatterFill_kernel", grid, block, 0, CUstream.NullStream, false, writeTarget, indices, value, dim, nElement); return(writeTarget); }