Ejemplo n.º 1
0
        public Tensor Scatter(Tensor result, Tensor src, int dim, Tensor indices)
        {
            TSCudaContext context     = CudaHelpers.TSContextForTensor(src);
            CudaContext   cudaContext = context.CudaContextForTensor(src);

            if (result == null)
            {
                throw new ArgumentNullException("result");
            }

            if (result.DimensionCount != src.DimensionCount)
            {
                throw new InvalidOperationException("result and src must have same number of dimensions");
            }

            if (dim < 0 && dim >= result.DimensionCount)
            {
                throw new ArgumentOutOfRangeException("dim");
            }

            if (indices.DimensionCount != src.DimensionCount)
            {
                throw new InvalidOperationException("src and indices must have same number of dimensions");
            }

            if (!src.IsSameSizeAs(indices))
            {
                throw new InvalidOperationException("src and indices must be the same size");
            }

            if (!TensorResultBuilder.ArrayEqualExcept(src.Sizes, result.Sizes, dim))
            {
                throw new InvalidOperationException("result and src must be the same size except in dimension dim");
            }

            Tensor writeTarget = result;

            long nElement = indices.ElementCount();
            dim3 block    = ApplyUtils.GetApplyBlock();
            dim3 grid     = ApplyUtils.GetApplyGrid(context.DeviceInfoForContext(cudaContext), nElement);

            if (ApplyUtils.CanUse32BitIndexMath(writeTarget) &&
                ApplyUtils.CanUse32BitIndexMath(src) &&
                ApplyUtils.CanUse32BitIndexMath(indices))
            {
                int    dims       = indices.DimensionCount <= 3 ? indices.DimensionCount : -1;
                string kernelName = MakeKernelName(ScatterBaseName, true, dims);
                Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, true,
                       writeTarget, src, indices, dim, (int)nElement);
            }
            else
            {
                string kernelName = MakeKernelName(ScatterBaseName, false, -1);
                Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, false,
                       writeTarget, src, indices, dim, nElement);
            }

            return(writeTarget);
        }
Ejemplo n.º 2
0
        public Tensor Gather(Tensor result, Tensor src, int dim, Tensor indices)
        {
            var context     = CudaHelpers.TSContextForTensor(src);
            var cudaContext = context.CudaContextForTensor(src);

            if (result != null && result.DimensionCount != src.DimensionCount)
            {
                throw new InvalidOperationException("result and src must have same number of dimensions");
            }

            if (result != null && dim < 0 && dim >= result.DimensionCount)
            {
                throw new ArgumentOutOfRangeException(nameof(dim));
            }

            if (indices.DimensionCount != src.DimensionCount)
            {
                throw new InvalidOperationException("src and indices must have same number of dimensions");
            }

            if (result != null && !result.IsSameSizeAs(indices))
            {
                throw new InvalidOperationException("result and indices must be the same size");
            }

            if (result != null && !TensorResultBuilder.ArrayEqualExcept(src.Sizes, result.Sizes, dim))
            {
                throw new InvalidOperationException("result and src must be the same size except in dimension dim");
            }

            var writeTarget = TensorResultBuilder.GetWriteTarget(result, indices.Allocator, src.ElementType, false, indices.Sizes);

            var nElement = indices.ElementCount();
            var block    = ApplyUtils.GetApplyBlock();
            var grid     = ApplyUtils.GetApplyGrid(context.DeviceInfoForContext(cudaContext), nElement);

            if (ApplyUtils.CanUse32BitIndexMath(writeTarget) &&
                ApplyUtils.CanUse32BitIndexMath(src) &&
                ApplyUtils.CanUse32BitIndexMath(indices))
            {
                var dims       = indices.DimensionCount <= 3 ? indices.DimensionCount : -1;
                var kernelName = MakeKernelName(GatherBaseName, true, dims);
                this.Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, true,
                            writeTarget, src, indices, dim, (int)nElement);
            }
            else
            {
                var kernelName = MakeKernelName(GatherBaseName, false, -1);
                this.Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, false,
                            writeTarget, src, indices, dim, nElement);
            }

            return(writeTarget);
        }
Ejemplo n.º 3
0
        public Tensor ScatterFill(Tensor result, float value, int dim, Tensor indices)
        {
            var context     = CudaHelpers.TSContextForTensor(indices);
            var cudaContext = context.CudaContextForTensor(indices);

            if (result == null)
            {
                throw new ArgumentNullException(nameof(result));
            }

            if (dim < 0 && dim >= result.DimensionCount)
            {
                throw new ArgumentOutOfRangeException(nameof(dim));
            }

            if (indices.DimensionCount != result.DimensionCount)
            {
                throw new InvalidOperationException("result and indices must have same number of dimensions");
            }

            if (!TensorResultBuilder.ArrayEqualExcept(indices.Sizes, result.Sizes, dim))
            {
                throw new InvalidOperationException("result and indices must be the same size except in dimension dim");
            }

            var writeTarget = result;

            var nElement = indices.ElementCount();
            var block    = ApplyUtils.GetApplyBlock();
            var grid     = ApplyUtils.GetApplyGrid(context.DeviceInfoForContext(cudaContext), nElement);

            if (ApplyUtils.CanUse32BitIndexMath(writeTarget) &&
                ApplyUtils.CanUse32BitIndexMath(indices))
            {
                var dims       = indices.DimensionCount <= 3 ? indices.DimensionCount : -1;
                var kernelName = MakeKernelName(ScatterFillBaseName, true, dims);
                this.Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, true,
                            writeTarget, indices, value, dim, (int)nElement);
            }
            else
            {
                var kernelName = MakeKernelName(ScatterFillBaseName, false, -1);
                this.Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, false,
                            writeTarget, indices, value, dim, nElement);
            }

            return(writeTarget);
        }
Ejemplo n.º 4
0
        public Tensor IndexSelect(Tensor result, Tensor src, int dim, Tensor indices)
        {
            TSCudaContext context     = CudaHelpers.TSContextForTensor(src);
            CudaContext   cudaContext = context.CudaContextForTensor(src);

            long[] requiredOutputSize = (long[])src.Sizes.Clone();
            requiredOutputSize[dim] = 1;
            Tensor writeTarget = TensorResultBuilder.GetWriteTarget(result, src, true, requiredOutputSize);


            // The `src` is partitioned into two parts:
            // -the size of each slice we are indexing, which is the
            // total size of the tensor ignoring dimension `dim`;
            // -the number of indices we are choosing, which is the total size
            // of the tensor `indices`.
            long numIndices       = indices.ElementCount();
            long dstTotalSize     = writeTarget.ElementCount();
            long srcSelectDimSize = src.Sizes[dim];
            long sliceSize        = dstTotalSize / numIndices;

            int  mpc             = context.DeviceInfoForContext(cudaContext).MultiProcessorCount;
            dim3 smallIndexGrid  = new dim3((uint)Math.Min(ApplyUtils.CeilDiv(sliceSize, 128), (mpc * 8)));
            dim3 smallIndexBlock = new dim3((uint)Math.Min(sliceSize, 128));

            dim3 largeIndexGrid  = new dim3((uint)Math.Min(ApplyUtils.CeilDiv(dstTotalSize, 128), (mpc * 8)));
            dim3 largeIndexBlock = new dim3((uint)Math.Min(dstTotalSize, 128));


            long[] newResultSize = (long[])writeTarget.Sizes.Clone();
            newResultSize[dim] = 1;
            Tensor resultFlat = new Tensor(newResultSize, writeTarget.Strides, writeTarget.Storage, writeTarget.StorageOffset);

            long[] newSrcSize = (long[])src.Sizes.Clone();
            newSrcSize[dim] = 1;
            Tensor srcFlat = new Tensor(newSrcSize, src.Strides, src.Storage, src.StorageOffset);


            if (ApplyUtils.CanUse32BitIndexMath(writeTarget) &&
                ApplyUtils.CanUse32BitIndexMath(src) &&
                ApplyUtils.CanUse32BitIndexMath(indices))
            {
                // Threshold for small kernel
                bool   smallKernel = numIndices <= 16;
                string kernelName  = "";
                bool   indContig   = indices.IsContiguous();

                if (writeTarget.DimensionCount == src.DimensionCount &&
                    writeTarget.DimensionCount <= 3 &&
                    indContig)
                {
                    kernelName = MakeKernelName(smallKernel, true, writeTarget.DimensionCount, src.DimensionCount, -2);
                }
                else
                {
                    kernelName = MakeKernelName(smallKernel, true, -1, -1, -1);
                }

                dim3 grid  = smallKernel ? smallIndexGrid : largeIndexGrid;
                dim3 block = smallKernel ? smallIndexBlock : largeIndexBlock;
                Invoke(context, cudaContext, kernelName, grid, block, 0, CUstream.NullStream, true,
                       writeTarget, src, indices, dim, dim, sliceSize, srcSelectDimSize);
            }
            else
            {
                string kernelName = MakeKernelName(false, false, -1, -1, -1);

                Invoke(context, cudaContext, kernelName, largeIndexGrid, largeIndexBlock, 0, CUstream.NullStream, false,
                       writeTarget, src, indices, dim, dim, dstTotalSize, sliceSize, srcSelectDimSize);
            }



            return(writeTarget);
        }