Example #1
0
		/// <summary>
		/// Creates a new surface from array memory. Allocates new array.
		/// </summary>
		/// <param name="kernel"></param>
		/// <param name="surfName"></param>
		/// <param name="flags"></param>
		/// <param name="format"></param>
		/// <param name="width">In elements</param>
		/// <param name="height">In elements</param>
		/// <param name="depth">In elements</param>
		/// <param name="numChannels"></param>
		/// <param name="arrayFlags"></param>
		public CudaSurface(CudaKernel kernel, string surfName, CUSurfRefSetFlags flags, CUArrayFormat format, SizeT width, SizeT height, SizeT depth, CudaArray3DNumChannels numChannels, CUDAArray3DFlags arrayFlags)
		{
			_surfref = new CUsurfref();
			res = DriverAPINativeMethods.ModuleManagement.cuModuleGetSurfRef(ref _surfref, kernel.CUModule, surfName);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}, Surface name: {3}", DateTime.Now, "cuModuleGetSurfRef", res, surfName));
			if (res != CUResult.Success) throw new CudaException(res);

			_flags = flags;
			_format = format;
			_height = height;
			_width = width;
			_depth = depth;
			_numChannels = (int)numChannels;
			_name = surfName;
			_module = kernel.CUModule;
			_cufunction = kernel.CUFunction;

			_channelSize = CudaHelperMethods.GetChannelSize(format);
			_dataSize = height * width * depth * _numChannels * _channelSize;
			_array = new CudaArray3D(format, width, height, depth, numChannels, arrayFlags);

			res = DriverAPINativeMethods.SurfaceReferenceManagement.cuSurfRefSetArray(_surfref, _array.CUArray, flags);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuSurfRefSetArray", res));
			if (res != CUResult.Success) throw new CudaException(res);
		}
		/// <summary>
		/// Creates a new CudaRegisteredHostMemory_byte from an existing IntPtr. IntPtr must be page size aligned (4KBytes)!
		/// </summary>
		/// <param name="hostPointer">must be page size aligned (4KBytes)</param>
		/// <param name="size">In elements</param>
		public CudaRegisteredHostMemory_byte(IntPtr hostPointer, SizeT size)
		{
			_intPtr = hostPointer;
			_size = size;
			_typeSize = (SizeT)Marshal.SizeOf(typeof(byte));
			_ptr = (byte*)_intPtr;
		}
Example #3
0
			/// <summary/>
			public cudaOccDeviceProp(CudaDeviceProperties props)
			{
				major = props.ComputeCapabilityMajor;
				minor = props.ComputeCapabilityMinor;
				maxThreadsPerBlock = props.MaxThreadsPerBlock;
				maxThreadsPerMultiProcessor = props.MaxThreadsPerMultiProcessor;
				regsPerBlock = props.RegistersPerBlock;
				regsPerMultiprocessor = props.MaxRegistersPerMultiprocessor;
				warpSize = props.WarpSize;
				sharedMemPerBlock = props.SharedMemoryPerBlock;
				sharedMemPerMultiprocessor = props.MaxSharedMemoryPerMultiprocessor;
			}
Example #4
0
        public MyInputLayer(MyAbstractFeedForwardNode network, MyMemoryBlock<float> input, SizeT offset, SizeT nb, SizeT width, SizeT height, SizeT nbSamplesPerStep)
            : base(network)
        {
            m_inputBlock = input;
            m_inputOffset = offset;

            m_output.Nb = nb;
            m_output.Width = width;
            m_output.Height = height;

            m_nbSamplesPerStep = nbSamplesPerStep;
        }
        public override void AllocateMemory()
        {
            base.AllocateMemory();

            m_delta = m_output;

            m_deltaBlock = m_network.DeltasMemoryBlock;
            m_deltaOffset = m_network.DeltasMemoryBlock.Count;
            m_deltaDimGPUPtrOffset = m_network.DataDimsMemoryBlock.Count;
            m_network.DataDimsMemoryBlock.Count++;

            m_network.DeltasMemoryBlock.Count += m_delta.Count;
        }
        /// <summary>
        /// Creates a new 2D texture from array memory. Allocates a new 2D array.
        /// </summary>
        /// <param name="kernel"></param>
        /// <param name="texName"></param>
        /// <param name="addressMode0"></param>
        /// <param name="addressMode1"></param>
        /// <param name="filterMode"></param>
        /// <param name="flags"></param>
        /// <param name="format"></param>
        /// <param name="height">In elements</param>
        /// <param name="width">In elements</param>
        /// <param name="numChannels">1,2 or 4</param>
        public CudaTextureArray2D(CudaKernel kernel, string texName, CUAddressMode addressMode0, CUAddressMode addressMode1, CUFilterMode filterMode, CUTexRefSetFlags flags, CUArrayFormat format, SizeT width, SizeT height, CudaArray2DNumChannels numChannels)
        {
            _texref = new CUtexref();
            res = DriverAPINativeMethods.ModuleManagement.cuModuleGetTexRef(ref _texref, kernel.CUModule, texName);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}, Texture name: {3}", DateTime.Now, "cuModuleGetTexRef", res, texName));
            if (res != CUResult.Success) throw new CudaException(res);

            res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetAddressMode(_texref, 0, addressMode0);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetAddressMode", res));
            if (res != CUResult.Success) throw new CudaException(res);
            res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetAddressMode(_texref, 1, addressMode1);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetAddressMode", res));
            if (res != CUResult.Success) throw new CudaException(res);
            res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetFilterMode(_texref, filterMode);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetFilterMode", res));
            if (res != CUResult.Success) throw new CudaException(res);
            res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetFlags(_texref, flags);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetFlags", res));
            if (res != CUResult.Success) throw new CudaException(res);
            res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetFormat(_texref, format, (int)numChannels);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetFormat", res));
            if (res != CUResult.Success) throw new CudaException(res);

            _filtermode = filterMode;
            _flags = flags;
            _addressMode0 = addressMode0;
            _addressMode1 = addressMode1;
            _format = format;
            _height = height;
            _width = width;
            _numChannels = (int)numChannels;
            _name = texName;
            _module = kernel.CUModule;
            _cufunction = kernel.CUFunction;

            _channelSize = CudaHelperMethods.GetChannelSize(format);
            _dataSize = height * width * _numChannels * _channelSize;
            _array = new CudaArray2D(format, width, height, numChannels);

            res = DriverAPINativeMethods.TextureReferenceManagement.cuTexRefSetArray(_texref, _array.CUArray, CUTexRefSetArrayFlags.OverrideFormat);
            Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cuTexRefSetArray", res));
            if (res != CUResult.Success) throw new CudaException(res);
            //res = DriverAPINativeMethods.ParameterManagement.cuParamSetTexRef(kernel.CUFunction, CUParameterTexRef.Default, _texref);
            //Debug.WriteLine("{0:G}, {1}: {2}", DateTime.Now, "cuParamSetTexRef", res);
            //if (res != CUResult.Success) throw new CudaException(res);
        }
		/// <summary>
		/// Creates a FFT plan configuration of dimension rank, with sizes
		/// specified in the array <c>n</c>. The <c>batch</c> input parameter tells CUFFT how
		/// many transforms to configure in parallel. With this function, batched
		/// plans of any dimension may be created. (new API)
		/// </summary>
		/// <param name="handle">cufftHandle object</param>
		/// <param name="rank">Dimensionality of the transform (1, 2, or 3)</param>
		/// <param name="n">An array of size rank, describing the size of each dimension</param>
		/// <param name="batch">Batch size for this transform</param>
		/// <param name="type">Transform data type (e.g., C2C, as per other CUFFT calls)</param>
		/// <param name="size"></param>
		public CudaFFTPlanMany64(cufftHandle handle, int rank, long[] n, long batch, cufftType type, ref SizeT size)
		{
			_handle = handle;
			_rank = rank;
			_n = n;
			_batch = batch;
			_type = type;

			//optional:
			_inembed = null;
			_istride = 1;
			_idist = 0;
			_onembed = null;
			_ostride = 1;
			_odist = 0;
			res = CudaFFTNativeMethods.cufftMakePlanMany64(_handle, _rank, _n, _inembed, _istride, _idist, _onembed, _ostride, _odist, _type, _batch, ref size);
			Debug.WriteLine(String.Format("{0:G}, {1}: {2}", DateTime.Now, "cufftMakePlanMany64", res));
			if (res != cufftResult.Success)
				throw new CudaFFTException(res);
		}
Example #8
0
 public static extern CUResult cuGraphicsResourceGetMappedPointer_v2(ref CUdeviceptr pDevPtr, ref SizeT pSize, CUgraphicsResource resource);
Example #9
0
 public static extern CUResult cuMemAlloc_v2(ref CUdeviceptr dptr, SizeT bytesize);
Example #10
0
 public static extern CUResult cuMemGetInfo_v2(ref SizeT free, ref SizeT total);
Example #11
0
 public static extern CUResult cuLinkComplete(CUlinkState state, ref IntPtr cubinOut, ref SizeT sizeOut);
		public static extern cusparseStatus cusparseZbsrsv2_bufferSizeExt(cusparseContext handle,
														cusparseDirection dirA,
														cusparseOperation transA,
														int mb,
														int nnzb,
														cusparseMatDescr descrA,
														CUdeviceptr bsrVal,
														CUdeviceptr bsrRowPtr,
														CUdeviceptr bsrColInd,
														int blockDim,
														bsrsv2Info info,
														ref SizeT pBufferSize);
		public static extern cusparseStatus cusparseZcsru2csr_bufferSizeExt(cusparseContext handle,
                                                             int m,
                                                             int n,
                                                             int nnz,
                                                             CUdeviceptr csrVal,
                                                             CUdeviceptr csrRowPtr,
                                                             CUdeviceptr csrColInd,
                                                             csru2csrInfo  info,
                                                             ref SizeT pBufferSizeInBytes);
		public static extern cusparseStatus cusparseXcoosort_bufferSizeExt(cusparseContext handle,
                                                            int m,
                                                            int n,
                                                            int nnz,
                                                            CUdeviceptr cooRowsA,
                                                            CUdeviceptr cooColsA,
                                                            ref SizeT pBufferSizeInBytes);
		public static extern cusparseStatus cusparseZgebsr2gebsc_bufferSizeExt(cusparseContext handle,
															 int mb,
															 int nb,
															 int nnzb,
															 CUdeviceptr bsrVal,
															 CUdeviceptr bsrRowPtr,
															 CUdeviceptr bsrColInd,
															 int rowBlockDim,
															 int colBlockDim,
															 ref SizeT pBufferSize);
		public static extern cusparseStatus cusparseDcsrilu02_bufferSizeExt(cusparseContext handle,
														  int m,
														  int nnz,
														  cusparseMatDescr descrA,
														  CUdeviceptr csrValA,
														  CUdeviceptr csrRowPtrA,
														  CUdeviceptr csrColIndA,
														  csrilu02Info info,
														  ref SizeT pBufferSize);
Example #17
0
			public static extern CUResult cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, SizeT length, CUmemAttach_flags flags);
Example #18
0
 public static extern CUResult cuModuleGetGlobal_v2(ref CUdeviceptr dptr, ref SizeT bytes, CUmodule hmod, string name);
Example #19
0
 public static extern CUResult cuTexRefSetAddress2D_v2(CUtexref hTexRef, ref CUDAArrayDescriptor desc, CUdeviceptr dptr, SizeT Pitch);
Example #20
0
 public static extern CUResult cuTexRefSetAddress_v2(ref SizeT ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, SizeT bytes);
		public static extern cusparseStatus cusparseZcsrgemm2_bufferSizeExt(cusparseContext handle,
															 int m,
															 int n,
															 int k,
															 CUdeviceptr alpha,
															 cusparseMatDescr descrA,
															 int nnzA,
															 CUdeviceptr csrSortedRowPtrA,
															 CUdeviceptr csrSortedColIndA,
															 cusparseMatDescr descrB,
															 int nnzB,
															 CUdeviceptr csrSortedRowPtrB,
															 CUdeviceptr csrSortedColIndB,
															 CUdeviceptr beta,
															 cusparseMatDescr descrD,
															 int nnzD,
															 CUdeviceptr csrSortedRowPtrD,
															 CUdeviceptr csrSortedColIndD,
															 csrgemm2Info info,
															 ref SizeT pBufferSizeInBytes);
Example #22
0
 public static extern CUResult cuCtxSetLimit(CULimit limit, SizeT value);
		public static extern cusparseStatus cusparseZgebsr2gebsr_bufferSizeExt(cusparseContext handle,
															 cusparseDirection dirA,
															 int mb,
															 int nb,
															 int nnzb,
															 cusparseMatDescr descrA,
															 CUdeviceptr bsrValA,
															 CUdeviceptr bsrRowPtrA,
															 CUdeviceptr bsrColIndA,
															 int rowBlockDimA,
															 int colBlockDimA,
															 int rowBlockDimC,
															 int colBlockDimC,
															 ref SizeT pBufferSize);
Example #24
0
 public static extern CUResult cuCtxGetLimit(ref SizeT pvalue, CULimit limit);
		public static extern cusparseStatus cusparseXcscsort_bufferSizeExt(cusparseContext handle,
                                                            int m,
                                                            int n,
                                                            int nnz,
                                                            CUdeviceptr cscColPtrA,
                                                            CUdeviceptr cscRowIndA,
                                                            ref SizeT pBufferSizeInBytes);
Example #26
0
            public static extern CUResult cuLinkAddData(CUlinkState state, CUJITInputType type, byte[] data, SizeT size, [MarshalAs(UnmanagedType.LPStr)] string name,
				uint numOptions, CUJITOption[] options, IntPtr[] optionValues);
		public static extern cusparseStatus cusparseZcsrsv2_bufferSizeExt(cusparseContext handle,
														cusparseOperation transA,
														int m,
														int nnz,
														cusparseMatDescr descrA,
														CUdeviceptr csrValA,
														CUdeviceptr csrRowPtrA,
														CUdeviceptr csrColIndA,
														csrsv2Info info,
														ref SizeT pBufferSize);
Example #28
0
			public static extern CUResult cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(ref int numBlocks, CUfunction func, int blockSize, SizeT dynamicSMemSize, CUoccupancy_flags flags);
        public override void AllocateMemory()
        {
            base.AllocateMemory();

            m_weightChange = m_weight;
            m_biasChange = m_bias;

            m_weightBlock = m_network.WeightsMemoryBlock;
            m_weightOffset = m_network.WeightsMemoryBlock.Count;
            m_network.WeightsMemoryBlock.Count += m_weight.Count;
            m_weightDimGPUPtrOffset = m_network.DataDimsMemoryBlock.Count;
            m_network.DataDimsMemoryBlock.Count++;

            m_weightChangeBlock = m_network.WeightChangesMemoryBlock;
            m_weightChangeOffset = m_network.WeightChangesMemoryBlock.Count;
            m_network.WeightChangesMemoryBlock.Count += m_weightChange.Count;
            m_weightChangeDimGPUPtrOffset = m_network.DataDimsMemoryBlock.Count;
            m_network.DataDimsMemoryBlock.Count++;

            m_biasBlock = m_network.WeightsMemoryBlock;
            m_biasOffset = m_network.WeightsMemoryBlock.Count;
            m_network.WeightsMemoryBlock.Count += m_bias.Count;
            m_biasDimGPUPtrOffset = m_network.DataDimsMemoryBlock.Count;
            m_network.DataDimsMemoryBlock.Count++;

            m_biasChangeBlock = m_network.WeightChangesMemoryBlock;
            m_biasChangeOffset = m_network.WeightChangesMemoryBlock.Count;
            m_network.WeightChangesMemoryBlock.Count += m_biasChange.Count;
            m_biasChangeDimGPUPtrOffset = m_network.DataDimsMemoryBlock.Count;
            m_network.DataDimsMemoryBlock.Count++;

            m_lastWeightDeltaBlock = m_network.WeightChangesMemoryBlock;
            m_lastWeightDeltaOffset = m_network.WeightChangesMemoryBlock.Count;
            m_network.WeightChangesMemoryBlock.Count += m_lastWeightDelta.Count;
            m_lastWeightDeltaDimGPUPtrOffset = m_network.DataDimsMemoryBlock.Count;
            m_network.DataDimsMemoryBlock.Count++;

            m_storedOutputBlock = m_network.WeightChangesMemoryBlock;
            m_storedOutputOffset = m_network.WeightChangesMemoryBlock.Count;
            m_network.WeightChangesMemoryBlock.Count += m_storedOutput.Count;
            m_storedOutputDimGPUPtrOffset = m_network.DataDimsMemoryBlock.Count;
            m_network.DataDimsMemoryBlock.Count++;
        }
Example #30
0
			public static extern CUResult cuOccupancyMaxPotentialBlockSizeWithFlags(ref int minGridSize, ref int blockSize, CUfunction func, del_CUoccupancyB2DSize blockSizeToDynamicSMemSize, SizeT dynamicSMemSize, int blockSizeLimit, CUoccupancy_flags flags);