Beispiel #1
0
        public override float Inference(CudaDeviceVariable <float> input)
        {
            _input = input;

            NPPImage_32fC1 tempConv = new NPPImage_32fC1(_tempConvolution.DevicePointer, InWidth, InHeight, InWidth * sizeof(float));

            for (int outLayer = 0; outLayer < OutChannels; outLayer++)
            {
                SizeT          offsetOut        = outLayer * OutWidth * OutHeight * sizeof(float);
                CUdeviceptr    ptrWithOffsetOut = _z.DevicePointer + offsetOut;
                NPPImage_32fC1 imgOut           = new NPPImage_32fC1(ptrWithOffsetOut, OutWidth, OutHeight, OutWidth * sizeof(float));
                imgOut.Set(0);

                for (int inLayer = 0; inLayer < InChannels; inLayer++)
                {
                    SizeT          offsetIn        = inLayer * InWidth * InHeight * sizeof(float);
                    CUdeviceptr    ptrWithOffsetIn = _input.DevicePointer + offsetIn;
                    NPPImage_32fC1 imgIn           = new NPPImage_32fC1(ptrWithOffsetIn, InWidth, InHeight, InWidth * sizeof(float));

                    imgIn.SetRoi(_filterX / 2, _filterY / 2, InWidth - _filterX + 1, InHeight - _filterY + 1);

                    SizeT offsetFilter = (outLayer * InChannels * _filterX * _filterY + inLayer * _filterX * _filterY) * sizeof(float);
                    CudaDeviceVariable <float> filter = new CudaDeviceVariable <float>(_weights.DevicePointer + offsetFilter, false, _filterX * _filterY * sizeof(float));

                    imgIn.Filter(tempConv, filter, new NppiSize(_filterX, _filterY), new NppiPoint(_filterX / 2, _filterY / 2));
                    imgOut.Add(tempConv);
                }
                imgOut.Add(bHost[outLayer]);
            }

            switch (_activation)
            {
            case Activation.None:
                _y.CopyToDevice(_z);
                break;

            case Activation.Relu:
                //_aRelu is set to 0!
                _KernelPReluForward.RunSafe(_z, _aRelu, _y, _outWidth * _outHeight, _outChannels, _batch);
                break;

            case Activation.PRelu:
                _KernelPReluForward.RunSafe(_z, _aRelu, _y, _outWidth * _outHeight, _outChannels, _batch);
                break;

            case Activation.LeakyRelu:
                _KernelPReluForward.RunSafe(_z, _aRelu, _y, _outWidth * _outHeight, _outChannels, _batch);
                break;

            default:
                break;
            }

            return(_nextLayer.Inference(_y));
        }
        public override float InferenceTraining(CudaDeviceVariable <float> input)
        {
            _kernelAddBorder.RunSafe(input, _withBorderInput, _inChannels, _batch, _inWidth, _filterX / 2);

            _input = _withBorderInput;

            _cudnn.ConvolutionForward(alpha, _descDataIn, _input,
                                      _descFilter, _weights, _descConv,
                                      _algoFwd, _workspace, beta,
                                      _descDataOut, _z);


            _cudnn.AddTensor(alpha, _descBias, _bias, alpha, _descDataOut, _z);

            switch (_activation)
            {
            case Activation.None:
                _y.CopyToDevice(_z);
                break;

            case Activation.Relu:
                _cudnn.ActivationForward(_descActivation, alpha, _descDataOut, _z, beta, _descDataOut, _y);
                break;

            case Activation.PRelu:
                _KernelPReluForward.RunSafe(_z, _aRelu, _y, _outWidth * _outHeight, _outChannels, _batch);
                break;

            case Activation.LeakyRelu:
                _KernelPReluForward.RunSafe(_z, _aRelu, _y, _outWidth * _outHeight, _outChannels, _batch);
                break;

            default:
                break;
            }

            return(_nextLayer.InferenceTraining(_y));
        }