예제 #1
0
 public void AddBias(CUDNNTensorDescriptor dstTensorDesc, Layer layer, int c, DeviceMemory<float> data)
 {
     _biasTensorDesc.Set4D(TensorFormat, DataType, 1, c, 1, 1);
     const float alpha = 1.0f;
     const float beta = 1.0f;
     _cudnn.AddTensor(CUDNNInterop.cudnnAddMode_t.CUDNN_ADD_SAME_C, alpha, _biasTensorDesc, layer.BiasD.Ptr, beta, dstTensorDesc, data.Ptr);
 }
예제 #2
0
        //[CudnnMnistFCF]
        public void FullyConnectedForward(Layer ip, nchw_t nchw, DeviceMemory<float> srcData, ref DeviceMemory<float> dstData)
        {
            if (nchw.N != 1) throw new Exception("Not Implemented");
            var dimX = nchw.C * nchw.H * nchw.W;
            var dimY = ip.Outputs;
            Resize(ref dstData, dimY);

            const float alpha = 1.0f;
            const float beta = 1.0f;

            // This cuMemcpyDtoD is a raw CUDA API call so it should be guarded with worker.Eval
            var output = dstData;
            _worker.EvalAction(() => CUDAInterop.cuMemcpyDtoD(output.Ptr.Handle, ip.BiasD.Handle, (IntPtr)(dimY * sizeof(float))));

            // This cublas call doesn't need worker.Eval because cublas is a thin wrapper for the raw API 
            // and it alreadyhas worke.eval  
            _cublas.Sgemv(CUBLASInterop.cublasOperation_t.CUBLAS_OP_T, dimX, dimY, alpha, ip.DataD.Ptr, dimX,
                srcData.Ptr, 1, beta, dstData.Ptr, 1);

            nchw.H = 1;
            nchw.W = 1;
            nchw.C = dimY;
        }
예제 #3
0
        //[/CudnnMnistAF]

        //[CudnnMnistClassify]
        public int ClassifyExample(string fname, Layer conv1, Layer conv2, Layer ip1, Layer ip2)
        {
            var nchw = new nchw_t()
            {
                N = 1,
                C = 1,
                H = Data.ImageH,
                W = Data.ImageW
            };

            var imgDataH = new float[Data.ImageH * Data.ImageW];
            var oHostSrc = Data.LoadImage(fname).Select(x => (int)x).ToArray();
            for (var i = 0; i < Data.ImageH; i++)
            {
                for (var j = 0; j < Data.ImageW; j++)
                {
                    var idx = Data.ImageH * i + j;
                    imgDataH[idx] = oHostSrc[idx] / 255.0f;
                }
            }

            using (var srcData = _worker.Malloc(imgDataH))
            using (var dstData = _worker.Malloc<float>(0))
            {
                Console.WriteLine("Performing forward propagation...");
                var src = srcData;
                var dst = dstData;

                ConvoluteForward(conv1, nchw, src, ref dst);
                PoolForward(nchw, dst, ref src);

                ConvoluteForward(conv2, nchw, src, ref dst);
                PoolForward(nchw, dst, ref src);

                FullyConnectedForward(ip1, nchw, src, ref dst);
                ActivationForward(nchw, dst, ref src);

                FullyConnectedForward(ip2, nchw, src, ref dst);
                SoftmaxForward(nchw, dst, ref src);

                Console.WriteLine("Finished forward propagation.");
                const int maxDigits = 10;
                var hsrc = src.Gather();
                var result = hsrc.Take(maxDigits).ToArray();
                var id = 0;
                for (var i = 1; i < maxDigits; i++)
                    if (result[id] < result[i])
                        id = i;
                Console.WriteLine("Classification Complete.\n");
                return id;
            }
        }
예제 #4
0
        //[/CudnnMnistFCF]

        //[CudnnMnistCF]
        public void ConvoluteForward(Layer conv, nchw_t nchw, DeviceMemory<float> srcData, ref DeviceMemory<float> dstData)
        {
            _srcTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W);
            _filterDesc.Set4D(DataType, conv.Outputs, conv.Inputs, conv.KernelDim, conv.KernelDim);
            _convDesc.Set2D(0, 0, 1, 1, 1, 1, CUDNNInterop.cudnnConvolutionMode_t.CUDNN_CROSS_CORRELATION);
            // find dimension of convoltion output
            // outputDim = 1 + (inputDim + 2*pad - filterDim) / convolutionStride
            int n, c, h, w;
            _convDesc.Get2DForwardOutputDim(_srcTensorDesc, _filterDesc, out n, out c, out h, out w);
            nchw.N = n;
            nchw.C = c;
            nchw.H = h;
            nchw.W = w;
            _dstTensorDesc.Set4D(TensorFormat, DataType, nchw.N, nchw.C, nchw.H, nchw.W);
            var algo = _cudnn.GetConvolutionForwardAlgorithm(_srcTensorDesc, _filterDesc, _convDesc, _dstTensorDesc,
                CUDNNInterop.cudnnConvolutionFwdPreference_t.CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, (IntPtr)0);

            Resize(ref dstData, nchw.N * nchw.C * nchw.H * nchw.W);
            var sizeInBytes = _cudnn.GetConvolutionForwardWorkspaceSize(_srcTensorDesc, _filterDesc, _convDesc, _dstTensorDesc, algo);

            using (var workSpace = _worker.Malloc<byte>(sizeInBytes.ToInt32()))
            {
                const float alpha = 1.0f;
                const float beta = 0.0f;
                _cudnn.ConvolutionForward(alpha, _srcTensorDesc, srcData.Ptr, _filterDesc, conv.DataD.Ptr, _convDesc, algo, workSpace.Ptr, sizeInBytes, beta, _dstTensorDesc, dstData.Ptr);
                AddBias(_dstTensorDesc, conv, c, dstData);
            }
        }