コード例 #1
0
        /// <summary>
        /// Copies the gpu to cpu.
        /// </summary>
        /// <param name="result">The result.</param>
        /// <param name="src">The source.</param>
        /// <param name="totalElements">The total elements.</param>
        /// <exception cref="CudaException"></exception>
        public void CopyGpuToCpu(Tensor result, Tensor src, long totalElements)
        {
            var context    = CudaHelpers.TSContextForTensor(src);
            var srcContext = context.CudaContextForTensor(src);

            using (var srcContig = Ops.AsContiguous(src))
                using (var resultContig = AsTypeCpu(result, src.ElementType, true))
                {
                    var resultContigPtr = ((Cpu.CpuStorage)resultContig.Storage).PtrAtElement(resultContig.StorageOffset);
                    var srcContigPtr    = ((CudaStorage)srcContig.Storage).DevicePtrAtElement(srcContig.StorageOffset);

                    var totalBytes = totalElements * srcContig.ElementType.Size();

                    // Use DriverAPINativeMethods directly here instead of CudaContext.CopyToHost, because CopyToHost only has an overload
                    // for specifying totalBytes as a uint, but we may exceed the range of a uint here.
                    var res = DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpyDtoH_v2(resultContigPtr, srcContigPtr, totalBytes);
                    if (res != CUResult.Success)
                    {
                        throw new CudaException(res);
                    }

                    if (result.Storage != resultContig.Storage)
                    {
                        Ops.Copy(result, resultContig); // copy on CPU
                    }
                }
        }
コード例 #2
0
        public void SpatialMaxPoolingBackward(Tensor input, Tensor gradOutput, Tensor gradInput, Tensor indices, ConvolutionDesc2d cd, bool ceilMode)
        {
            var context     = CudaHelpers.TSContextForTensor(gradOutput);
            var cudaContext = context.CudaContextForTensor(gradOutput);

            var dimw = 3;
            var dimh = 2;
            var dimc = 1;

            var nbatch  = input.Sizes[0];
            var nslices = input.Sizes[dimc];
            var iheight = input.Sizes[dimh];
            var iwidth  = input.Sizes[dimw];
            var owidth  = gradOutput.Sizes[dimw];
            var oheight = gradOutput.Sizes[dimh];

            using var gradOutputContig = Ops.AsContiguous(gradOutput);
            var gradOutputPtr = CudaHelpers.GetBufferStart(gradOutputContig);
            var indicesPtr    = CudaHelpers.GetBufferStart(indices);
            var gradInputPtr  = CudaHelpers.GetBufferStart(gradInput);

            var count = (int)input.ElementCount();

            this.Invoke(context, cudaContext, "MaxPoolBackward", new dim3(NNThreads.NumBlocks(count)), new dim3(NNThreads.NumThreads), 0, CUstream.NullStream,
                        count, gradOutputPtr, indicesPtr, nbatch, nslices, iheight, iwidth, oheight, owidth,
                        cd.kH, cd.kW, cd.dH, cd.dW, cd.padH, cd.padW, gradInputPtr);
        }
コード例 #3
0
        public IWeightTensor Permute(IWeightTensor w, params int[] dims)
        {
            var          m   = w as WeightTensor;
            WeightTensor res = m_weightTensorFactory.CreateWeightTensor(m.Sizes, m_deviceId, name: $"{GetHashString(w.Name)}.Permute");

            VisualizeNodes(w, res);

            using (var tWPremute = m.TWeight.Permute(dims))
            {
                res.TWeight = Ops.AsContiguous(tWPremute);
            }

            if (m_needsBackprop)
            {
                Action backward = () =>
                {
                    using (var gT = m.TGradient.Permute(dims))
                    {
                        Ops.Add(gT, gT, res.TGradient);
                    }
                    res.Dispose();
                };
                this.m_backprop.Add(backward);
            }

            return(res);
        }
コード例 #4
0
        public static void SpatialMaxPoolingBackward(Tensor input, Tensor gradOutput, Tensor gradInput, Tensor indices, ConvolutionDesc2d cd, bool ceilMode)
        {
            int dimw = 3;
            int dimh = 2;
            int dimc = 1;

            long nbatch  = input.Sizes[0];
            long nslices = input.Sizes[dimc];
            long iheight = input.Sizes[dimh];
            long iwidth  = input.Sizes[dimw];
            long owidth  = gradOutput.Sizes[dimw];
            long oheight = gradOutput.Sizes[dimh];

            Ops.Fill(gradInput, 0);


            using Tensor gradOutputContig = Ops.AsContiguous(gradOutput);
            for (int i = 0; i < nbatch; ++i)
            {
                using Tensor gradInput_i  = gradInput.Select(0, i);
                using Tensor gradOutput_i = gradOutputContig.Select(0, i);
                using Tensor indices_i    = indices.Select(0, i);
                using (NativeWrapper.BuildTensorRefPtr(gradInput_i, out IntPtr gradInput_iPtr))
                    using (NativeWrapper.BuildTensorRefPtr(gradOutput_i, out IntPtr gradOutput_iPtr))
                        using (NativeWrapper.BuildTensorRefPtr(indices_i, out IntPtr indices_iPtr))
                        {
                            CpuOpsNative.TS_SpatialMaxPooling_updateGradInput_frame(gradInput_iPtr, gradOutput_iPtr, indices_iPtr,
                                                                                    nslices, iwidth, iheight,
                                                                                    owidth, oheight,
                                                                                    cd.dW, cd.dH);
                        }
            }
        }
コード例 #5
0
        public IWeightMatrix PermuteBatch(IWeightMatrix m, int batchSize)
        {
            WeightTensor t              = m as WeightTensor;
            var          res            = weightTensorFactory.CreateWeightTensor(m.Rows, m.Columns, deviceId);
            int          sizeEveryBatch = m.Rows / batchSize;

            res.TWeight = Ops.AsContiguous(t.TWeight.View(sizeEveryBatch, batchSize, m.Columns).Permute(1, 0, 2)).View(m.Rows, m.Columns);

            if (this.needs_backprop)
            {
                Action backward = () =>
                {
                    var g  = t.TGradient.View(sizeEveryBatch, batchSize, m.Columns);
                    var t2 = res.TGradient.View(batchSize, sizeEveryBatch, m.Columns).Permute(1, 0, 2);
                    Ops.Add(g, g, t2);

                    g.Dispose();
                    t2.Dispose();
                    res.Dispose();
                };
                this.backprop.Add(backward);
            }


            return(res);
        }
コード例 #6
0
        public void SpatialMaxPoolingForward(Tensor input, Tensor output, Tensor indices, ConvolutionDesc2d cd, bool ceilMode)
        {
            var context     = CudaHelpers.TSContextForTensor(input);
            var cudaContext = context.CudaContextForTensor(input);

            var iwidth      = input.Sizes[3];
            var iheight     = input.Sizes[2];
            var nInputPlane = input.Sizes[1];
            var batchSize   = input.Sizes[0];

            long owidth;
            long oheight;

            if (ceilMode)
            {
                // ReSharper disable once ArrangeRedundantParentheses
                oheight = (long)(Math.Ceiling((float)(iheight - cd.kH + 2 * cd.padH) / cd.dH)) + 1;
                // ReSharper disable once ArrangeRedundantParentheses
                owidth = (long)(Math.Ceiling((float)(iwidth - cd.kW + 2 * cd.padW) / cd.dW)) + 1;
            }
            else
            {
                // ReSharper disable once ArrangeRedundantParentheses
                oheight = (long)(Math.Floor((float)(iheight - cd.kH + 2 * cd.padH) / cd.dH)) + 1;
                // ReSharper disable once ArrangeRedundantParentheses
                owidth = (long)(Math.Floor((float)(iwidth - cd.kW + 2 * cd.padW) / cd.dW)) + 1;
            }

            if (cd.padW != 0 || cd.padH != 0)
            {
                // ensure that the last pooling starts inside the image
                if ((oheight - 1) * cd.dH >= iheight + cd.padH)
                {
                    --oheight;
                }

                if ((owidth - 1) * cd.dW >= iwidth + cd.padW)
                {
                    --owidth;
                }
            }

            using var inputContig = Ops.AsContiguous(input);
            var inputPtr   = CudaHelpers.GetBufferStart(inputContig);
            var outputPtr  = CudaHelpers.GetBufferStart(output);
            var indicesPtr = CudaHelpers.GetBufferStart(indices);

            var count = (int)output.ElementCount();

            this.Invoke(context, cudaContext, "MaxPoolForward", new dim3(NNThreads.NumBlocks(count)), new dim3(NNThreads.NumThreads), 0, CUstream.NullStream,
                        count, inputPtr, batchSize, nInputPlane, iheight, iwidth, oheight, owidth,
                        cd.kH, cd.kW, cd.dH, cd.dW, cd.padH, cd.padW, outputPtr, indicesPtr);
        }
コード例 #7
0
        public IWeightTensor TransposeBatch(IWeightTensor m, int batchSize)
        {
            WeightTensor t   = m as WeightTensor;
            WeightTensor res = m_weightTensorFactory.CreateWeightTensor(t.Sizes, m_deviceId, name: $"{GetHashString(m.Name)}.TransposeBatch", graphToBind: this);

            VisualizeNodes(m, res);

            int sizeEveryBatch = m.Rows / batchSize;

            using (Tensor tWView = t.TWeight.View(sizeEveryBatch, batchSize, m.Columns))
            {
                using (Tensor tWViewPermute = tWView.Permute(1, 0, 2))
                {
                    using (Tensor tW2 = Ops.AsContiguous(tWViewPermute))
                    {
                        res.TWeight = tW2.View(m.Rows, m.Columns);
                        res.Sizes   = res.TWeight.Sizes;
                    }
                }
            }

            if (m_needsBackprop)
            {
                Action backward = () =>
                {
                    res.ReleaseWeight();

                    using (Tensor g = t.TGradient.View(sizeEveryBatch, batchSize, m.Columns))
                    {
                        using (Tensor t2 = res.TGradient.View(batchSize, sizeEveryBatch, m.Columns))
                        {
                            using (Tensor t2Permute = t2.Permute(1, 0, 2))
                            {
                                Ops.Add(g, g, t2Permute);
                            }
                        }
                    }

                    res.Dispose();
                };
                m_backprop.Add(backward);
            }

            return(res);
        }
コード例 #8
0
        /// <summary>
        /// Copies the cpu to gpu.
        /// </summary>
        /// <param name="result">The result.</param>
        /// <param name="src">The source.</param>
        /// <param name="totalElements">The total elements.</param>
        public void CopyCpuToGpu(Tensor result, Tensor src, long totalElements)
        {
            var context       = CudaHelpers.TSContextForTensor(result);
            var resultContext = context.CudaContextForTensor(result);

            // If types of src and result are different, convert on the CPU first.
            using (var srcContig = AsTypeCpu(src, result.ElementType, true))
                using (var resultContig = Ops.AsContiguous(result))
                {
                    var resultContigPtr = ((CudaStorage)resultContig.Storage).DevicePtrAtElement(resultContig.StorageOffset);
                    var srcContigPtr    = ((Cpu.CpuStorage)srcContig.Storage).PtrAtElement(srcContig.StorageOffset);

                    resultContext.CopyToDevice(resultContigPtr, srcContigPtr, totalElements * srcContig.ElementType.Size());

                    if (result.Storage != resultContig.Storage)
                    {
                        CopyGpuDirect(result, resultContig, resultContext);
                    }
                }
        }
コード例 #9
0
        public IWeightTensor AsContiguous(IWeightTensor w, bool runGradient = true)
        {
            WeightTensor m   = w as WeightTensor;
            WeightTensor res = m_weightTensorFactory.CreateWeightTensor(m.Sizes, m_deviceId, name: $"{GetHashString(w.Name)}.AsContiguous");

            VisualizeNodes(w, res);

            res.TWeight = Ops.AsContiguous(m.TWeight);

            if (m_needsBackprop && runGradient)
            {
                Action backward = () =>
                {
                    m.CopyOrAddGradient(res);

                    res.Dispose();
                };
                m_backprop.Add(backward);
            }

            return(res);
        }
コード例 #10
0
        /// <summary>
        /// Spatials the maximum pooling backward.
        /// </summary>
        /// <param name="input">The input.</param>
        /// <param name="gradOutput">The grad output.</param>
        /// <param name="gradInput">The grad input.</param>
        /// <param name="indices">The indices.</param>
        /// <param name="cd">The cd.</param>
        /// <param name="ceilMode">if set to <c>true</c> [ceil mode].</param>
        public static void SpatialMaxPoolingBackward(NDArray input, NDArray gradOutput, NDArray gradInput, NDArray indices, ConvolutionDesc2d cd, bool ceilMode)
        {
            var dimw = 3;
            var dimh = 2;
            var dimc = 1;

            var nbatch  = input.Shape[0];
            var nslices = input.Shape[dimc];
            var iheight = input.Shape[dimh];
            var iwidth  = input.Shape[dimw];
            var owidth  = gradOutput.Shape[dimw];
            var oheight = gradOutput.Shape[dimh];

            Ops.Fill(gradInput, 0);


            using (var gradOutputContig = Ops.AsContiguous(gradOutput))
            {
                for (int i = 0; i < nbatch; ++i)
                {
                    using (var gradInput_i = gradInput.Select(0, i))
                        using (var gradOutput_i = gradOutputContig.Select(0, i))
                            using (var indices_i = indices.Select(0, i))
                            {
                                IntPtr gradInput_iPtr, gradOutput_iPtr, indices_iPtr;
                                using (NativeWrapper.BuildTensorRefPtr(gradInput_i, out gradInput_iPtr))
                                    using (NativeWrapper.BuildTensorRefPtr(gradOutput_i, out gradOutput_iPtr))
                                        using (NativeWrapper.BuildTensorRefPtr(indices_i, out indices_iPtr))
                                        {
                                            CpuOpsNative.TS_SpatialMaxPooling_updateGradInput_frame(gradInput_iPtr, gradOutput_iPtr, indices_iPtr,
                                                                                                    nslices, iwidth, iheight,
                                                                                                    owidth, oheight,
                                                                                                    cd.dW, cd.dH);
                                        }
                            }
                }
            }
        }
コード例 #11
0
        public static unsafe TensorProto GetProto(this Tensor tensor)
        {
            var result = new TensorProto();
            var sizes  = tensor.Sizes.Select(l => (int)l);

            result.Shape.Add(sizes);
            result.Count  = sizes.Aggregate((a, i) => a * i);
            result.Type   = _dtypeToDataType[tensor.ElementType];
            result.Format = TensorFormat.RowMajor;

            tensor = Ops.AsContiguous(tensor);
            var bytes = new byte[tensor.Storage.ByteLength];

            fixed(byte *p = bytes)
            {
                IntPtr ptr = (IntPtr)p;

                tensor.Storage.CopyFromStorage(ptr, 0, bytes.Length);
            }

            result.Data = ByteString.CopyFrom(bytes);
            return(result);
        }
コード例 #12
0
        /// <summary>
        /// Copies the gpu indirect.
        /// </summary>
        /// <param name="result">The result.</param>
        /// <param name="src">The source.</param>
        /// <param name="totalElements">The total elements.</param>
        /// <exception cref="CudaException"></exception>
        private void CopyGpuIndirect(Tensor result, Tensor src, long totalElements)
        {
            // This is only called if the tensors have the same type, but memcpy cannot be used on the tensor pair,
            // and we can't get direct access to the other GPU's memory.

            // We will make contiguous proxy tensors as necessary, so we can use cuMemcpy to perform the copy.
            // If result needs to be proxied, we then copy back from the contiguous proxy to result on the same GPU

            var context        = CudaHelpers.TSContextForTensor(src);
            var isResultContig = result.IsContiguous();
            var resultContig   = result;

            using (var srcContig = Ops.AsContiguous(src))
            {
                if (!isResultContig)
                {
                    resultContig = new Tensor(result.Allocator, result.ElementType, result.Shape);
                }

                var resultContigPtr = ((CudaStorage)resultContig.Storage).DevicePtrAtElement(resultContig.StorageOffset);
                var srcContigPtr    = ((CudaStorage)srcContig.Storage).DevicePtrAtElement(srcContig.StorageOffset);

                var res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAsync(
                    resultContigPtr, srcContigPtr, totalElements * srcContig.ElementType.Size(), CUstream.NullStream);
                if (res != CUResult.Success)
                {
                    throw new CudaException(res);
                }

                if (!isResultContig)
                {
                    CopyGpuDirect(result, resultContig, context.CudaContextForTensor(result));
                    resultContig.Dispose();
                }
            }
        }
コード例 #13
0
        /// <summary>
        /// Spatials the maximum pooling forward.
        /// </summary>
        /// <param name="input">The input.</param>
        /// <param name="output">The output.</param>
        /// <param name="indices">The indices.</param>
        /// <param name="cd">The cd.</param>
        /// <param name="ceilMode">if set to <c>true</c> [ceil mode].</param>
        /// <exception cref="ArgumentException">input must be a 4D tensor</exception>
        /// <exception cref="InvalidOperationException">
        /// input image is smaller than kernel size
        /// or
        /// pad should be smaller than half of the kernel size
        /// </exception>
        public static void SpatialMaxPoolingForward(NDArray input, NDArray output, NDArray indices, ConvolutionDesc2d cd, bool ceilMode)
        {
            if (input.DimensionCount != 4)
            {
                throw new ArgumentException("input must be a 4D tensor");
            }

            var dimw = 3;
            var dimh = 2;
            var dimc = 1;

            if (input.Shape[dimw] < cd.kW - cd.padW || input.Shape[dimh] < cd.kH - cd.padH)
            {
                throw new InvalidOperationException("input image is smaller than kernel size");
            }

            if (cd.padW > cd.kW / 2 || cd.padH > cd.kH / 2)
            {
                throw new InvalidOperationException("pad should be smaller than half of the kernel size");
            }

            var nbatch  = input.Shape[0];
            var nslices = input.Shape[dimc];
            var iheight = input.Shape[dimh];
            var iwidth  = input.Shape[dimw];

            long owidth;
            long oheight;

            if (ceilMode)
            {
                oheight = (long)(Math.Ceiling((float)(iheight - cd.kH + 2 * cd.padH) / cd.dH)) + 1;
                owidth  = (long)(Math.Ceiling((float)(iwidth - cd.kW + 2 * cd.padW) / cd.dW)) + 1;
            }
            else
            {
                oheight = (long)(Math.Floor((float)(iheight - cd.kH + 2 * cd.padH) / cd.dH)) + 1;
                owidth  = (long)(Math.Floor((float)(iwidth - cd.kW + 2 * cd.padW) / cd.dW)) + 1;
            }

            if (cd.padW != 0 || cd.padH != 0)
            {
                // ensure that the last pooling starts inside the image
                if ((oheight - 1) * cd.dH >= iheight + cd.padH)
                {
                    --oheight;
                }
                if ((owidth - 1) * cd.dW >= iwidth + cd.padW)
                {
                    --owidth;
                }
            }

            using (var inputContig = Ops.AsContiguous(input))
            {
                for (int i = 0; i < nbatch; ++i)
                {
                    using (var input_i = inputContig.Select(0, i))
                        using (var output_i = output.Select(0, i))
                            using (var indices_i = indices.Select(0, i))
                            {
                                IntPtr input_iPtr, output_iPtr, indices_iPtr;
                                using (NativeWrapper.BuildTensorRefPtr(input_i, out input_iPtr))
                                    using (NativeWrapper.BuildTensorRefPtr(output_i, out output_iPtr))
                                        using (NativeWrapper.BuildTensorRefPtr(indices_i, out indices_iPtr))
                                        {
                                            CpuOpsNative.TS_SpatialMaxPooling_updateOutput_frame(input_iPtr, output_iPtr, indices_iPtr,
                                                                                                 nslices, iwidth, iheight,
                                                                                                 owidth, oheight,
                                                                                                 cd.kW, cd.kH, cd.dW, cd.dH, cd.padW, cd.padH);
                                        }
                            }
                }
            }
        }
コード例 #14
0
        public IWeightTensor View(IWeightTensor w, bool runGradient = true, params long[] dims)
        {
            bool hasNegOne      = false;
            int  negOneIdx      = 0;
            long totalGivenSize = 1;

            for (int i = 0; i < dims.Length; i++)
            {
                long dim = dims[i];
                if (dim == -1)
                {
                    if (hasNegOne)
                    {
                        throw new ArgumentException($"View operation only allows single -1 in dims.");
                    }

                    hasNegOne = true;
                    negOneIdx = i;
                }
                else
                {
                    totalGivenSize *= dim;
                }
            }

            if (hasNegOne)
            {
                long totalSrcSize = 1;
                foreach (int size in w.Sizes)
                {
                    totalSrcSize *= size;
                }

                dims[negOneIdx] = totalSrcSize / totalGivenSize;
            }


            WeightTensor m   = w as WeightTensor;
            WeightTensor res = m_weightTensorFactory.CreateWeightTensor(dims, m_deviceId, name: w.Name, graphToBind: this);
            //  VisualizeNodes(w, res);


            Tensor congtiW = Ops.AsContiguous(m.TWeight);

            m.ReleaseWeight();
            m.TWeight = congtiW;

            res.TWeight = congtiW.View(dims);


            if (m_needsBackprop)
            {
                Action backward = () =>
                {
                    if (runGradient)
                    {
                        res.ReleaseWeight();
                        using (Tensor resG = res.TGradient.View(m.Sizes))
                        {
                            m.CopyOrAddGradient(resG, res.Name);
                        }
                    }
                    res.Dispose();
                };
                m_backprop.Add(backward);
            }

            return(res);
        }