public static void Invoke(TSCudaContext context, CudaContext cudaContext, byte[] ptx, string baseName, params object[] args)
        {
            ThrowIfAnyTensorInvalid(args);

            cudaContext.SetCurrent();

            CudaDeviceProperties deviceInfo = context.DeviceInfoForContext(cudaContext);

            IEnumerable <Tensor> allTensors = args.OfType <Tensor>();
            Tensor firstTensor       = allTensors.First();
            long   elementCount      = firstTensor.ElementCount();
            ApplySpecialization spec = new ApplySpecialization(allTensors.ToArray());

            ConvertTensorArgs.Convert(cudaContext, spec.Use32BitIndices, args);

            ManagedCuda.VectorTypes.dim3 block = ApplyUtils.GetApplyBlock();
            ManagedCuda.VectorTypes.dim3 grid  = ApplyUtils.GetApplyGrid(deviceInfo, elementCount);

            string     fullKernelName = PermutationGenerator.GetMangledName(baseName, spec);
            CudaKernel kernel         = context.KernelCache.Get(cudaContext, ptx, fullKernelName);

            kernel.GridDimensions  = grid;
            kernel.BlockDimensions = block;
            kernel.RunAsync(CUstream.NullStream, args);
        }
        public Tensor Scatter(Tensor result, Tensor src, int dim, Tensor indices)
        {
            TSCudaContext context     = CudaHelpers.TSContextForTensor(src);
            CudaContext   cudaContext = context.CudaContextForTensor(src);

            if (result == null)
            {
                throw new ArgumentNullException("result");
            }

            if (result.DimensionCount != src.DimensionCount)
            {
                throw new InvalidOperationException("result and src must have same number of dimensions");
            }

            if (dim < 0 && dim >= result.DimensionCount)
            {
                throw new ArgumentOutOfRangeException("dim");
            }

            if (indices.DimensionCount != src.DimensionCount)
            {
                throw new InvalidOperationException("src and indices must have same number of dimensions");
            }

            if (!src.IsSameSizeAs(indices))
            {
                throw new InvalidOperationException("src and indices must be the same size");
            }

            if (!TensorResultBuilder.ArrayEqualExcept(src.Sizes, result.Sizes, dim))
            {
                throw new InvalidOperationException("result and src must be the same size except in dimension dim");
            }

            Tensor writeTarget = result;

            long nElement = indices.ElementCount();
            dim3 block    = ApplyUtils.GetApplyBlock();
            dim3 grid     = ApplyUtils.GetApplyGrid(context.DeviceInfoForContext(cudaContext), nElement);

            if (ApplyUtils.CanUse32BitIndexMath(writeTarget) &&
                ApplyUtils.CanUse32BitIndexMath(src) &&
                ApplyUtils.CanUse32BitIndexMath(indices))
            {
                int    dims       = indices.DimensionCount <= 3 ? indices.DimensionCount : -1;
                string kernelName = MakeKernelName(ScatterBaseName, true, dims);
                Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, true,
                       writeTarget, src, indices, dim, (int)nElement);
            }
            else
            {
                string kernelName = MakeKernelName(ScatterBaseName, false, -1);
                Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, false,
                       writeTarget, src, indices, dim, nElement);
            }

            return(writeTarget);
        }
Beispiel #3
0
        public Tensor Gather(Tensor result, Tensor src, int dim, Tensor indices)
        {
            var context     = CudaHelpers.TSContextForTensor(src);
            var cudaContext = context.CudaContextForTensor(src);

            if (result != null && result.DimensionCount != src.DimensionCount)
            {
                throw new InvalidOperationException("result and src must have same number of dimensions");
            }

            if (result != null && dim < 0 && dim >= result.DimensionCount)
            {
                throw new ArgumentOutOfRangeException(nameof(dim));
            }

            if (indices.DimensionCount != src.DimensionCount)
            {
                throw new InvalidOperationException("src and indices must have same number of dimensions");
            }

            if (result != null && !result.IsSameSizeAs(indices))
            {
                throw new InvalidOperationException("result and indices must be the same size");
            }

            if (result != null && !TensorResultBuilder.ArrayEqualExcept(src.Sizes, result.Sizes, dim))
            {
                throw new InvalidOperationException("result and src must be the same size except in dimension dim");
            }

            var writeTarget = TensorResultBuilder.GetWriteTarget(result, indices.Allocator, src.ElementType, false, indices.Sizes);

            var nElement = indices.ElementCount();
            var block    = ApplyUtils.GetApplyBlock();
            var grid     = ApplyUtils.GetApplyGrid(context.DeviceInfoForContext(cudaContext), nElement);

            if (ApplyUtils.CanUse32BitIndexMath(writeTarget) &&
                ApplyUtils.CanUse32BitIndexMath(src) &&
                ApplyUtils.CanUse32BitIndexMath(indices))
            {
                var dims       = indices.DimensionCount <= 3 ? indices.DimensionCount : -1;
                var kernelName = MakeKernelName(GatherBaseName, true, dims);
                this.Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, true,
                            writeTarget, src, indices, dim, (int)nElement);
            }
            else
            {
                var kernelName = MakeKernelName(GatherBaseName, false, -1);
                this.Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, false,
                            writeTarget, src, indices, dim, nElement);
            }

            return(writeTarget);
        }
Beispiel #4
0
        public Tensor Scatter(Tensor result, Tensor src, int dim, Tensor indices)
        {
            try
            {
                TSCudaContext context     = CudaHelpers.TSContextForTensor(src);
                CudaContext   cudaContext = context.CudaContextForTensor(src);

                if (result == null)
                {
                    throw new ArgumentNullException("result");
                }

                if (result.DimensionCount != src.DimensionCount)
                {
                    throw new InvalidOperationException($"result and src must have same number of dimensions. result dim count = '{result.DimensionCount}', source dim count = '{src.DimensionCount}'");
                }

                if (dim < 0 && dim >= result.DimensionCount)
                {
                    throw new ArgumentOutOfRangeException("dim");
                }

                if (indices.DimensionCount != src.DimensionCount)
                {
                    throw new InvalidOperationException("src and indices must have same number of dimensions");
                }

                if (!src.IsSameSizeAs(indices))
                {
                    throw new InvalidOperationException("src and indices must be the same size");
                }

                if (!TensorResultBuilder.ArrayEqualExcept(src.Sizes, result.Sizes, dim))
                {
                    throw new InvalidOperationException("result and src must be the same size except in dimension dim");
                }

                Tensor writeTarget = result;

                long nElement = indices.ElementCount();
                dim3 block    = ApplyUtils.GetApplyBlock();
                dim3 grid     = ApplyUtils.GetApplyGrid(context.DeviceInfoForContext(cudaContext), nElement);

                Invoke(context, cudaContext, "scatter_kernel", grid, block, 0, CUstream.NullStream, false, writeTarget, src, indices, dim, nElement);

                return(writeTarget);
            }
            catch (Exception err)
            {
                Logger.WriteLine($"Error = '{err.Message}', Call stack = '{err.StackTrace}'");
                throw;
            }
        }
Beispiel #5
0
        public Tensor ScatterFill(Tensor result, float value, int dim, Tensor indices)
        {
            var context     = CudaHelpers.TSContextForTensor(indices);
            var cudaContext = context.CudaContextForTensor(indices);

            if (result == null)
            {
                throw new ArgumentNullException(nameof(result));
            }

            if (dim < 0 && dim >= result.DimensionCount)
            {
                throw new ArgumentOutOfRangeException(nameof(dim));
            }

            if (indices.DimensionCount != result.DimensionCount)
            {
                throw new InvalidOperationException("result and indices must have same number of dimensions");
            }

            if (!TensorResultBuilder.ArrayEqualExcept(indices.Sizes, result.Sizes, dim))
            {
                throw new InvalidOperationException("result and indices must be the same size except in dimension dim");
            }

            var writeTarget = result;

            var nElement = indices.ElementCount();
            var block    = ApplyUtils.GetApplyBlock();
            var grid     = ApplyUtils.GetApplyGrid(context.DeviceInfoForContext(cudaContext), nElement);

            if (ApplyUtils.CanUse32BitIndexMath(writeTarget) &&
                ApplyUtils.CanUse32BitIndexMath(indices))
            {
                var dims       = indices.DimensionCount <= 3 ? indices.DimensionCount : -1;
                var kernelName = MakeKernelName(ScatterFillBaseName, true, dims);
                this.Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, true,
                            writeTarget, indices, value, dim, (int)nElement);
            }
            else
            {
                var kernelName = MakeKernelName(ScatterFillBaseName, false, -1);
                this.Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, false,
                            writeTarget, indices, value, dim, nElement);
            }

            return(writeTarget);
        }
Beispiel #6
0
        public Tensor Gather(Tensor result, Tensor src, int dim, Tensor indices)
        {
            TSCudaContext context     = CudaHelpers.TSContextForTensor(src);
            CudaContext   cudaContext = context.CudaContextForTensor(src);

            if (result != null && result.DimensionCount != src.DimensionCount)
            {
                throw new InvalidOperationException("result and src must have same number of dimensions");
            }

            if (result != null && dim < 0 && dim >= result.DimensionCount)
            {
                throw new ArgumentOutOfRangeException("dim");
            }

            if (indices.DimensionCount != src.DimensionCount)
            {
                throw new InvalidOperationException("src and indices must have same number of dimensions");
            }

            if (result != null && !result.IsSameSizeAs(indices))
            {
                throw new InvalidOperationException("result and indices must be the same size");
            }

            if (result != null && !TensorResultBuilder.ArrayEqualExcept(src.Sizes, result.Sizes, dim))
            {
                throw new InvalidOperationException("result and src must be the same size except in dimension dim");
            }

            Tensor writeTarget = TensorResultBuilder.GetWriteTarget(result, indices.Allocator, src.ElementType, false, indices.Sizes);

            long nElement = indices.ElementCount();
            dim3 block    = ApplyUtils.GetApplyBlock();
            dim3 grid     = ApplyUtils.GetApplyGrid(context.DeviceInfoForContext(cudaContext), nElement);

            Invoke(context, cudaContext, "gather_kernel", grid, block, 0, CUstream.NullStream, false, writeTarget, src, indices, dim, nElement);

            return(writeTarget);
        }
Beispiel #7
0
        /// <summary>
        /// Invokes the specified context.
        /// </summary>
        /// <param name="context">The context.</param>
        /// <param name="cudaContext">The cuda context.</param>
        /// <param name="ptx">The PTX.</param>
        /// <param name="baseName">Name of the base.</param>
        /// <param name="args">The arguments.</param>
        public static void Invoke(TSCudaContext context, CudaContext cudaContext, byte[] ptx, string baseName, params object[] args)
        {
            ThrowIfAnyTensorInvalid(args);

            var deviceInfo = context.DeviceInfoForContext(cudaContext);

            var allTensors   = args.OfType <NDArray>();
            var firstTensor  = allTensors.First();
            var elementCount = firstTensor.ElementCount();
            var spec         = new ApplySpecialization(allTensors.ToArray());

            ConvertTensorArgs.Convert(cudaContext, spec.Use32BitIndices, args);

            var block = ApplyUtils.GetApplyBlock();
            var grid  = ApplyUtils.GetApplyGrid(deviceInfo, elementCount);

            var fullKernelName = PermutationGenerator.GetMangledName(baseName, spec);
            var kernel         = context.KernelCache.Get(cudaContext, ptx, fullKernelName);

            kernel.GridDimensions  = grid;
            kernel.BlockDimensions = block;
            kernel.RunAsync(CUstream.NullStream, args);
        }
Beispiel #8
0
        public Tensor ScatterFill(Tensor result, float value, int dim, Tensor indices)
        {
            TSCudaContext context     = CudaHelpers.TSContextForTensor(indices);
            CudaContext   cudaContext = context.CudaContextForTensor(indices);

            if (result == null)
            {
                throw new ArgumentNullException("result");
            }

            if (dim < 0 && dim >= result.DimensionCount)
            {
                throw new ArgumentOutOfRangeException("dim");
            }

            if (indices.DimensionCount != result.DimensionCount)
            {
                throw new InvalidOperationException("result and indices must have same number of dimensions");
            }

            if (!TensorResultBuilder.ArrayEqualExcept(indices.Sizes, result.Sizes, dim))
            {
                throw new InvalidOperationException("result and indices must be the same size except in dimension dim");
            }

            Tensor writeTarget = result;

            long nElement = indices.ElementCount();
            dim3 block    = ApplyUtils.GetApplyBlock();
            dim3 grid     = ApplyUtils.GetApplyGrid(context.DeviceInfoForContext(cudaContext), nElement);

            Invoke(context, cudaContext, "scatterFill_kernel", grid, block, 0, CUstream.NullStream, false,
                   writeTarget, indices, value, dim, nElement);

            return(writeTarget);
        }