예제 #1
0
        public static void DrawString(string str, int x, int y, uint bgColor, uint fgColor, CUdeviceptr image, int imageWidth, int imageHeight, int maxStringSize = 20)
        {
            // Crop if the string is too long
            if (str.Length > maxStringSize)
                str = str.Substring(0, maxStringSize);

            if (str.Length > 200)
            {
                //__constant__ int D_DIGIT_INDEXES[200];
                throw new ArgumentException("Hardcoded value in DrawDigitsKernel.cs");
            }

            MyCudaKernel m_drawDigitKernel = MyKernelFactory.Instance.Kernel(MyKernelFactory.Instance.DevCount - 1, @"Observers\DrawDigitsKernel");
            CudaDeviceVariable<float> characters = MyMemoryManager.Instance.GetGlobalVariable<float>("CHARACTERS_TEXTURE", MyKernelFactory.Instance.DevCount - 1, LoadDigits);

            m_drawDigitKernel.SetConstantVariable("D_BG_COLOR", bgColor);
            m_drawDigitKernel.SetConstantVariable("D_FG_COLOR", fgColor);
            m_drawDigitKernel.SetConstantVariable("D_IMAGE_WIDTH", imageWidth);
            m_drawDigitKernel.SetConstantVariable("D_IMAGE_HEIGHT", imageHeight);
            m_drawDigitKernel.SetConstantVariable("D_DIGIT_WIDTH", CharacterWidth);
            m_drawDigitKernel.SetConstantVariable("D_DIGIT_SIZE", CharacterSize);
            m_drawDigitKernel.SetConstantVariable("D_DIGITMAP_NBCHARS", CharacterMapNbChars);

            int[] indexes = StringToDigitIndexes(str);
            m_drawDigitKernel.SetConstantVariable("D_DIGIT_INDEXES", indexes);
            m_drawDigitKernel.SetConstantVariable("D_DIGIT_INDEXES_LEN", indexes.Length);

            m_drawDigitKernel.SetupExecution(CharacterSize * indexes.Length);
            m_drawDigitKernel.Run(image, characters.DevicePointer, x, y);
        }
예제 #2
0
        public static extern cudnnStatus cudnnActivationForward( cudnnHandle handle,
														  cudnnActivationMode mode,
														  ref float alpha,
														  cudnnTensorDescriptor srcDesc,
														  CUdeviceptr srcData,
														  ref float beta,
														  cudnnTensorDescriptor destDesc,
														  CUdeviceptr destData
														);
예제 #3
0
 public static extern CublasStatus cublasAxpyEx(CudaBlasHandle handle,
                                               int n,
                                               CUdeviceptr alpha, /* host or device pointer */
                                               cudaDataType alphaType,
                                               CUdeviceptr x,
                                               cudaDataType xType,
                                               int incx,
                                               CUdeviceptr y,
                                               cudaDataType yType,
                                               int incy,
                                               cudaDataType executiontype);
예제 #4
0
        public override void Bind(CUdeviceptr firstInput, params CUdeviceptr[] otherInputs)
        {
            if (otherInputs == null)
                otherInputs = new CUdeviceptr[] { firstInput };

            var output = otherInputs[otherInputs.Length - 1];
            m_XORKernel.Run(firstInput, otherInputs[0], output, (int)MyJoin.MyJoinOperation.XOR, m_inputSize);

            for (int i = 1; i < otherInputs.Length - 1; ++i)
                m_XORKernel.Run(otherInputs[i], output, output, (int)MyJoin.MyJoinOperation.XOR, m_inputSize);
        }
예제 #5
0
        public static extern cusparseStatus cusparseCbsric02(cusparseContext handle,
											  cusparseDirection dirA,
											  int mb,
											  int nnzb,
											  cusparseMatDescr descrA,
											  CUdeviceptr bsrVal,
											  CUdeviceptr bsrRowPtr,
											  CUdeviceptr bsrColInd,
											  int blockDim,
											  bsric02Info info,
											  cusparseSolvePolicy policy,
											  CUdeviceptr pBuffer);
예제 #6
0
        public static extern cusparseStatus cusparseCbsr2csr(cusparseContext handle,
											cusparseDirection dirA,
											int mb,
											int nb,
											cusparseMatDescr descrA,
											CUdeviceptr bsrValA,
											CUdeviceptr bsrRowPtrA,
											CUdeviceptr bsrColIndA,
											int blockDim,
											cusparseMatDescr descrC,
											CUdeviceptr csrValC,
											CUdeviceptr csrRowPtrC,
											CUdeviceptr csrColIndC);
예제 #7
0
        public static extern cudnnStatus cudnnActivationBackward( cudnnHandle handle,
                                                           cudnnActivationDescriptor activationDesc,
                                                           ref float alpha,
														   cudnnTensorDescriptor srcDesc,
														   CUdeviceptr srcData,
														   cudnnTensorDescriptor srcDiffDesc,
														   CUdeviceptr srcDiffData,
														   cudnnTensorDescriptor destDesc,
														   CUdeviceptr destData,
														   ref float beta,
														   cudnnTensorDescriptor destDiffDesc,
														   CUdeviceptr destDiffData
														 );
예제 #8
0
		/// <summary>
		/// Creates a new NPPImage from allocated device ptr.
		/// </summary>
		/// <param name="devPtr">Already allocated device ptr.</param>
		/// <param name="width">Image width in pixels</param>
		/// <param name="height">Image height in pixels</param>
		/// <param name="pitch">Pitch / Line step</param>
		/// <param name="isOwner">If TRUE, devPtr is freed when disposing</param>
		public NPPImage_32sC4(CUdeviceptr devPtr, int width, int height, int pitch, bool isOwner)
		{
			_devPtr = devPtr;
			_devPtrRoi = _devPtr;
			_sizeOriginal.width = width;
			_sizeOriginal.height = height;
			_sizeRoi.width = width;
			_sizeRoi.height = height;
			_pitch = pitch;
			_channels = 4;
			_isOwner = isOwner;
			_typeSize = sizeof(int);
		}
예제 #9
0
		/// <summary>
		/// Creates a new NPPImage from allocated device ptr.
		/// </summary>
		/// <param name="devPtr">Already allocated device ptr.</param>
		/// <param name="width">Image width in pixels</param>
		/// <param name="height">Image height in pixels</param>
		/// <param name="pitch">Pitch / Line step</param>
		/// <param name="isOwner">If TRUE, devPtr is freed when disposing</param>
		public NPPImage_32fcC2(CUdeviceptr devPtr, int width, int height, int pitch, bool isOwner)
		{
			_devPtr = devPtr;
			_devPtrRoi = _devPtr;
			_sizeOriginal.width = width;
			_sizeOriginal.height = height;
			_sizeRoi.width = width;
			_sizeRoi.height = height;
			_pitch = pitch;
			_channels = 2;
			_isOwner = isOwner;
			_typeSize = Marshal.SizeOf(typeof(Npp32fc));
		}
예제 #10
0
        public static extern cudnnStatus cudnnActivationBackward( cudnnHandle handle,
														   cudnnActivationMode mode,
														   ref double alpha,
														   cudnnTensorDescriptor srcDesc,
														   CUdeviceptr srcData,
														   cudnnTensorDescriptor srcDiffDesc,
														   CUdeviceptr srcDiffData,
														   cudnnTensorDescriptor destDesc,
														   CUdeviceptr destData,
														   ref double beta,
														   cudnnTensorDescriptor destDiffDesc,
														   CUdeviceptr destDiffData
														 );
예제 #11
0
        void Bind(CUdeviceptr firstInput, IEnumerable<CUdeviceptr> otherInputs, CUdeviceptr output, int method)
        {
            if (otherInputs == null)
                throw new ArgumentNullException("otherInputs");

            var second = otherInputs.FirstOrDefault();

            if (second == null)
                throw new ArgumentException("Nothing to bind with...");

            m_binaryPermKernel.RunAsync(m_stream, firstInput, second, output, method, m_inputSize);

            foreach (var input in otherInputs.Skip(1)) // Exclude the second input
                m_binaryPermKernel.RunAsync(m_stream, input, output, output, method, m_inputSize);
        }
예제 #12
0
        public override void Bind(CUdeviceptr firstInput, IEnumerable<CUdeviceptr> otherInputs, CUdeviceptr output)
        {
            m_fft.Exec(firstInput, m_tempBlock.GetDevicePtr(m_owner, m_secondFFTOffset));

            foreach (var input in otherInputs)
            {
                m_fft.Exec(input, m_tempBlock.GetDevicePtr(m_owner, m_firstFFTOffset));
                m_mulkernel.RunAsync(
                    m_stream,
                    m_tempBlock.GetDevicePtr(m_owner, m_firstFFTOffset),
                    m_tempBlock.GetDevicePtr(m_owner, m_secondFFTOffset),
                    m_tempBlock.GetDevicePtr(m_owner, m_secondFFTOffset), m_inputSize + 1);
            }

            FinishBinding(output);
        }
예제 #13
0
        public static void DrawStringFromGPUMem(CudaDeviceVariable<float> inString, int x, int y, uint bgColor, uint fgColor, CUdeviceptr image, int imageWidth, int imageHeight, int stringOffset, int stringLen)
        {
            MyCudaKernel m_drawDigitKernel = MyKernelFactory.Instance.Kernel(MyKernelFactory.Instance.DevCount - 1, @"Observers\DrawStringKernel");
            CudaDeviceVariable<float> characters = MyMemoryManager.Instance.GetGlobalVariable<float>("CHARACTERS_TEXTURE", MyKernelFactory.Instance.DevCount - 1, LoadDigits);

            //MyKernelFactory.Instance.Synchronize();

            m_drawDigitKernel.SetConstantVariable("D_BG_COLOR", bgColor);
            m_drawDigitKernel.SetConstantVariable("D_FG_COLOR", fgColor);
            m_drawDigitKernel.SetConstantVariable("D_IMAGE_WIDTH", imageWidth);
            m_drawDigitKernel.SetConstantVariable("D_IMAGE_HEIGHT", imageHeight);
            m_drawDigitKernel.SetConstantVariable("D_DIGIT_WIDTH", CharacterWidth);
            m_drawDigitKernel.SetConstantVariable("D_DIGIT_SIZE", CharacterSize);
            m_drawDigitKernel.SetConstantVariable("D_DIGITMAP_NBCHARS", CharacterMapNbChars);

            m_drawDigitKernel.SetupExecution(CharacterSize * stringLen);
            m_drawDigitKernel.Run(image, characters.DevicePointer, x, y, inString.DevicePointer + sizeof(float) * stringOffset, stringLen);
        }
예제 #14
0
        public void cudnnDivisiveNormalizationBackward(
									  cudnnDivNormMode mode,
									  float alpha,
									  cudnnTensorDescriptor srcDesc, // same desc for diff, means, temp, temp2
									  CUdeviceptr srcData,
									  CUdeviceptr srcMeansData, // if NULL, means are assumed to be zero
									  CUdeviceptr srcDiffData,
									  CUdeviceptr tempData,
									  CUdeviceptr tempData2,
									  float betaData,
									  cudnnTensorDescriptor destDataDesc, // same desc for dest, means, meansDiff
									  CUdeviceptr destDataDiff, // output data differential
									  CUdeviceptr destMeansDiff // output means differential, can be NULL
			)
        {
            res = CudaDNNNativeMethods.cudnnDivisiveNormalizationBackward(_handle, _desc, mode, ref alpha, srcDesc, srcData, srcMeansData, srcDiffData, tempData, tempData2, ref betaData, destDataDesc, destDataDiff, destMeansDiff);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cudnnDivisiveNormalizationBackward", res));
            if (res != cudnnStatus.Success) throw new CudaDNNException(res);
        }
예제 #15
0
        void Bind(CUdeviceptr firstInput, CUdeviceptr[] otherInputs, int method)
        {
            if (otherInputs == null)
                otherInputs = new[] { firstInput };

            var output = otherInputs[otherInputs.Length - 1];

            if (otherInputs.Length <= 2)
            {
                m_binaryPermKernel.Run(firstInput, otherInputs[0], output, method, m_inputSize);
                return;
            }

            m_tempBlock.Host[0] = firstInput;

            for (int i = 1; i < otherInputs.Length; i++)
                m_tempBlock.Host[i] = otherInputs[i - 1];

            m_tempBlock.SafeCopyToDevice(0, otherInputs.Length);
            m_PermKernel.Run(m_tempBlock, m_tempBlock.Count, output, method, m_inputSize);
        }
예제 #16
0
        public override void Bind(CUdeviceptr firstInput, params CUdeviceptr[] otherInputs)
        {
            if (otherInputs == null)
            {
                otherInputs = new CUdeviceptr[] { firstInput };
            }
            m_fft.Exec(firstInput, m_tempBlock.GetDevicePtr(m_owner, m_secondFFTOffset));

            int count = otherInputs.Length == 1 ? otherInputs.Length : otherInputs.Length - 1;

            for (int i = 0; i < count; ++i)
            {
                CUdeviceptr start = otherInputs[i];
                m_fft.Exec(start, m_tempBlock.GetDevicePtr(m_owner, m_firstFFTOffset));
                m_mulkernel.Run(
                    m_tempBlock.GetDevicePtr(m_owner, m_firstFFTOffset),
                    m_tempBlock.GetDevicePtr(m_owner, m_secondFFTOffset),
                    m_tempBlock.GetDevicePtr(m_owner, m_secondFFTOffset), m_inputSize + 1);
            }

            CUdeviceptr output = otherInputs[otherInputs.Length - 1];
            FinishBinding(output);
        }
		public static extern cusparseStatus cusparseZcsr2csru(cusparseContext handle,
                                               int m,
                                               int n,
                                               int nnz,
                                               cusparseMatDescr descrA,
                                               CUdeviceptr csrVal,
                                               CUdeviceptr csrRowPtr,
                                               CUdeviceptr csrColInd,
                                               csru2csrInfo  info,
                                               CUdeviceptr pBuffer);
예제 #18
0
		/// <summary>
		/// Three-channel 8-bit unsigned packed to planar image copy.
		/// </summary>
		/// <param name="dst0">Destination image channel 0</param>
		/// <param name="dst1">Destination image channel 1</param>
		/// <param name="dst2">Destination image channel 2</param>
		/// <param name="dst3">Destination image channel 3</param>
		public void Copy(NPPImage_32sC1 dst0, NPPImage_32sC1 dst1, NPPImage_32sC1 dst2, NPPImage_32sC1 dst3)
		{
			CUdeviceptr[] array = new CUdeviceptr[] { dst0.DevicePointerRoi, dst1.DevicePointerRoi, dst2.DevicePointerRoi, dst3.DevicePointerRoi };
			status = NPPNativeMethods.NPPi.MemCopy.nppiCopy_32s_C4P4R(_devPtrRoi, _pitch, array, dst0.Pitch, _sizeRoi);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiCopy_32s_C4P4R", status));
			NPPException.CheckNppStatus(status, this);
		}
예제 #19
0
		/// <summary>
		/// Three-channel 8-bit unsigned planar to packed image copy.
		/// </summary>
		/// <param name="src0">Source image channel 0</param>
		/// <param name="src1">Source image channel 1</param>
		/// <param name="src2">Source image channel 2</param>
		/// <param name="src3">Source image channel 2</param>
		/// <param name="dest">Destination image</param>
		public static void Copy(NPPImage_32sC1 src0, NPPImage_32sC1 src1, NPPImage_32sC1 src2, NPPImage_32sC1 src3, NPPImage_32sC4 dest)
		{
			CUdeviceptr[] array = new CUdeviceptr[] { src0.DevicePointerRoi, src1.DevicePointerRoi, src2.DevicePointerRoi, src3.DevicePointerRoi };
			NppStatus status = NPPNativeMethods.NPPi.MemCopy.nppiCopy_32s_P4C4R(array, src0.Pitch, dest.DevicePointerRoi, dest.Pitch, dest.SizeRoi);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiCopy_32s_P4C4R", status));
			NPPException.CheckNppStatus(status, null);
		}
예제 #20
0
		/// <summary>
		/// Creates a new NPPImage from allocated device ptr.
		/// </summary>
		/// <param name="devPtr">Already allocated device ptr.</param>
		/// <param name="size">Image size</param>
		/// <param name="pitch">Pitch / Line step</param>
		public NPPImage_32sC4(CUdeviceptr devPtr, NppiSize size, int pitch)
			: this(devPtr, size.width, size.height, pitch)
		{

		}
예제 #21
0
		/// <summary>
		/// Perspective transform of an image.<para/>
		/// This function performs perspective warping of a the specified
		/// quadrangle in the source image to the specified quadrangle in the
		/// destination image. The function nppiWarpPerspectiveQuad uses the same
		/// formulas for pixel mapping as in nppiWarpPerspective function. The
		/// transform coefficients are computed internally.
		/// The transformed part of the source image is resampled using the specified
		/// interpolation method and written to the destination ROI.<para/>
		/// NPPI specific recommendation: <para/>
		/// The function operates using 2 types of kernels: fast and accurate. The fast
		/// method is about 4 times faster than its accurate variant,
		/// but doesn't perform memory access checks and requires the destination ROI
		/// to be 64 bytes aligned. Hence any destination ROI is 
		/// chunked into 3 vertical stripes: the first and the third are processed by
		/// accurate kernels and the central one is processed by the fast one.
		/// In order to get the maximum available speed of execution, the projection of
		/// destination ROI onto image addresses must be 64 bytes aligned. This is
		/// always true if the values <para/>
		/// <code>(int)((void *)(pDst + dstRoi.x))</code> and <para/>
		/// <code>(int)((void *)(pDst + dstRoi.x + dstRoi.width))</code> <para/>
		/// are multiples of 64. Another rule of thumb is to specify destination ROI in
		/// such way that left and right sides of the projected image are separated from
		/// the ROI by at least 63 bytes from each side. However, this requires the
		/// whole ROI to be part of allocated memory. In case when the conditions above
		/// are not satisfied, the function may decrease in speed slightly and will
		/// return NPP_MISALIGNED_DST_ROI_WARNING warning.
		/// </summary>
		/// <param name="src0">Source image (Channel 0)</param>
		/// <param name="src1">Source image (Channel 1)</param>
		/// <param name="src2">Source image (Channel 2)</param>
		/// <param name="srcQuad">Source quadrangle [4,2]</param>
		/// <param name="dest0">Destination image (Channel 0)</param>
		/// <param name="dest1">Destination image (Channel 1)</param>
		/// <param name="dest2">Destination image (Channel 2)</param>
		/// <param name="destQuad">Destination quadrangle [4,2]</param>
		/// <param name="eInterpolation">Interpolation mode: can be <see cref="InterpolationMode.NearestNeighbor"/>, <see cref="InterpolationMode.Linear"/> or <see cref="InterpolationMode.Cubic"/></param>
		public static void WarpPerspectiveQuad(NPPImage_32sC1 src0, NPPImage_32sC1 src1, NPPImage_32sC1 src2, double[,] srcQuad, NPPImage_32sC1 dest0, NPPImage_32sC1 dest1, NPPImage_32sC1 dest2, double[,] destQuad, InterpolationMode eInterpolation)
		{
			NppiRect rectIn = new NppiRect(src0.PointRoi, src0.SizeRoi);
			NppiRect rectOut = new NppiRect(dest0.PointRoi, dest0.SizeRoi);

			CUdeviceptr[] src = new CUdeviceptr[] { src0.DevicePointer, src1.DevicePointer, src2.DevicePointer };
			CUdeviceptr[] dst = new CUdeviceptr[] { dest0.DevicePointer, dest1.DevicePointer, dest2.DevicePointer };

			NppStatus status = NPPNativeMethods.NPPi.PerspectiveTransforms.nppiWarpPerspectiveQuad_32s_P4R(src, src0.Size, src0.Pitch, rectIn, srcQuad, dst, dest0.Pitch, rectOut, destQuad, eInterpolation);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "nppiWarpPerspectiveQuad_32s_P4R", status));
			NPPException.CheckNppStatus(status, null);
		}
		public static extern cusparseStatus cusparseZcsrsv2_analysis(cusparseContext handle,
													  cusparseOperation transA,
													  int m,
													  int nnz,
													  cusparseMatDescr descrA,
													  CUdeviceptr csrValA,
													  CUdeviceptr csrRowPtrA,
													  CUdeviceptr csrColIndA,
													  csrsv2Info info,
													  cusparseSolvePolicy policy,
													  CUdeviceptr pBuffer);
		public static extern cusparseStatus cusparseZbsrsv2_bufferSizeExt(cusparseContext handle,
														cusparseDirection dirA,
														cusparseOperation transA,
														int mb,
														int nnzb,
														cusparseMatDescr descrA,
														CUdeviceptr bsrVal,
														CUdeviceptr bsrRowPtr,
														CUdeviceptr bsrColInd,
														int blockDim,
														bsrsv2Info info,
														ref SizeT pBufferSize);
		public static extern cusparseStatus cusparseZcsrsv2_bufferSizeExt(cusparseContext handle,
														cusparseOperation transA,
														int m,
														int nnz,
														cusparseMatDescr descrA,
														CUdeviceptr csrValA,
														CUdeviceptr csrRowPtrA,
														CUdeviceptr csrColIndA,
														csrsv2Info info,
														ref SizeT pBufferSize);
		public static extern cusparseStatus cusparseZcsrsv_solve(cusparseContext handle, cusparseOperation transA, int m, CUdeviceptr alpha, cusparseMatDescr descrA, CUdeviceptr csrValA, CUdeviceptr csrRowPtrA, CUdeviceptr csrColIndA, cusparseSolveAnalysisInfo info, CUdeviceptr x, CUdeviceptr y);
예제 #26
0
		/// <summary>
		/// Creates a new NPPImage from allocated device ptr. Does not take ownership of decPtr.
		/// </summary>
		/// <param name="devPtr">Already allocated device ptr.</param>
		/// <param name="width">Image width in pixels</param>
		/// <param name="height">Image height in pixels</param>
		/// <param name="pitch">Pitch / Line step</param>
		public NPPImage_32sC4(CUdeviceptr devPtr, int width, int height, int pitch)
			: this(devPtr, width, height, pitch, false)
		{

		}
		public static extern cusparseStatus cusparseZcsru2csr_bufferSizeExt(cusparseContext handle,
                                                             int m,
                                                             int n,
                                                             int nnz,
                                                             CUdeviceptr csrVal,
                                                             CUdeviceptr csrRowPtr,
                                                             CUdeviceptr csrColInd,
                                                             csru2csrInfo  info,
                                                             ref SizeT pBufferSizeInBytes);
		public static extern cusparseStatus cusparseCcsrsv_analysis(cusparseContext handle, cusparseOperation transA, int m, int nnz, cusparseMatDescr descrA, CUdeviceptr csrValA, CUdeviceptr csrRowPtrA, CUdeviceptr csrColIndA, cusparseSolveAnalysisInfo info);
예제 #29
0
        public override void Initialize(Int32 nGPU)
        {
            base.Initialize(nGPU);

            // Set WeightChange and BiasChange dimensions according to respective Weight and Bias

            if (m_weightBlock != null)
            {
                m_weight.Ptr = m_weightBlock.GetDevicePtr(m_network, m_weightOffset);
                m_weightChange.Ptr = m_weightChangeBlock.GetDevicePtr(m_network, m_weightChangeOffset);
            }
            if (m_biasBlock != null)
            {
                m_bias.Ptr = m_biasBlock.GetDevicePtr(m_network, m_biasOffset);
                m_biasChange.Ptr = m_biasChangeBlock.GetDevicePtr(m_network, m_biasChangeOffset);
            }

            // Send the structures to GPU
            m_network.DataDimsMemoryBlock.Host[m_weightDimGPUPtrOffset] = Weight;
            m_network.DataDimsMemoryBlock.Host[m_weightChangeDimGPUPtrOffset] = WeightChange;
            m_network.DataDimsMemoryBlock.Host[m_biasDimGPUPtrOffset] = Bias;
            m_network.DataDimsMemoryBlock.Host[m_biasChangeDimGPUPtrOffset] = BiasChange;
            m_network.DataDimsMemoryBlock.Host[m_lastWeightDeltaDimGPUPtrOffset] = LastWeightDelta;
            m_network.DataDimsMemoryBlock.Host[m_storedOutputDimGPUPtrOffset] = StoredOutput;

            // Store the GPU pointers
            WeightDataPtr = m_network.DataDimsMemoryBlock.GetDevicePtr(m_network, (int)m_weightDimGPUPtrOffset);
            WeightChangeDataPtr = m_network.DataDimsMemoryBlock.GetDevicePtr(m_network, (int)m_weightChangeDimGPUPtrOffset);
            BiasDataPtr = m_network.DataDimsMemoryBlock.GetDevicePtr(m_network, (int)m_biasDimGPUPtrOffset);
            BiasChangeDataPtr = m_network.DataDimsMemoryBlock.GetDevicePtr(m_network, (int)m_biasChangeDimGPUPtrOffset);
            LastWeightDeltaDataPtr = m_network.DataDimsMemoryBlock.GetDevicePtr(m_network, (int)m_lastWeightDeltaDimGPUPtrOffset);
            StoredOutputDataPtr = m_network.DataDimsMemoryBlock.GetDevicePtr(m_network, (int)m_storedOutputDimGPUPtrOffset);

            // Generate initial weights
            GenerateWeights();
        }
		public static extern cusparseStatus cusparseXcscsort(cusparseContext handle,
                                              int m,
                                              int n,
                                              int nnz,
                                              cusparseMatDescr descrA,
                                              CUdeviceptr cscColPtrA,
                                              CUdeviceptr cscRowIndA,
                                              CUdeviceptr P,
                                              CUdeviceptr pBuffer);