Exemplo n.º 1
0
        public (I3DTensor Result, I3DTensor Indices) MaxPool(int filterWidth, int filterHeight,
                                                             int xStride, int yStride, bool saveIndices)
        {
            Debug.Assert(IsValid);
            var maxPool = _cuda.TensorMaxPool(Memory, _rows, _columns, _depth, 1, filterWidth,
                                              filterHeight, xStride, yStride, saveIndices);
            var ret     = new Gpu3DTensor(_cuda, maxPool.Rows, maxPool.Columns, _depth, maxPool.Data, true);
            var indices = saveIndices
                                ? new Gpu3DTensor(_cuda, maxPool.Rows, maxPool.Columns, _depth, maxPool.Indices, true)
                                : null;

            return(ret, indices);
        }
Exemplo n.º 2
0
        public I3DTensor TransposeThisAndMultiply(I4DTensor tensor)
        {
            var other = (Gpu4DTensor)tensor;

#if DEBUG
            Debug.Assert(tensor.Count == Depth && IsValid && other.IsValid);
#endif
            var ptr = Memory.DevicePointer;
            var ptr2 = other.Memory.DevicePointer;
            int rowsA = _rows, columnsA = _columns, columnsB = other.Depth,
                rowsB = other.RowCount * other.ColumnCount, blockSize2 = columnsB * rowsB;
            float alpha = 1.0f, beta = 0.0f;
            var   output = new Gpu3DTensor(_cuda, _columns, columnsB, _depth,
                                           _cuda.Allocate(_columns * columnsB * _depth), true);
            var status = CudaBlasNativeMethods.cublasSgemmStridedBatched(_cuda.Blas.CublasHandle,
                                                                         Operation.Transpose, Operation.NonTranspose, columnsA, columnsB, rowsB, ref alpha, ptr,
                                                                         rowsA, _blockSize, ptr2, rowsB, blockSize2, ref beta, output.Memory.DevicePointer, columnsA,
                                                                         _columns * columnsB, _depth);
            if (status != CublasStatus.Success)
            {
                throw new CudaBlasException(status);
            }
            return(output);

            //var output = Enumerable.Range(0, _depth).Select(i => new GpuMatrix(_cuda, _columns, columnsB, _cuda.Allocate(_columns * columnsB), true)).ToList();

            //using (var aPtrs = new PtrToDeviceMemoryList(Enumerable.Range(0, _depth).Select(i => ptr + i * _blockSize * CudaProvider.FLOAT_SIZE).ToArray()))
            //using (var bPtrs = new PtrToDeviceMemoryList(Enumerable.Range(0, _depth).Select(i => ptr2 + i * blockSize2 * CudaProvider.FLOAT_SIZE).ToArray()))
            //using (var cPtrs = new PtrToDeviceMemoryList(output.Select(m => m.Memory.DevicePointer).ToArray())) {
            //	var status = CudaBlasNativeMethods.cublasSgemmBatched(_cuda.Blas.CublasHandle,
            //		Operation.Transpose,
            //		Operation.NonTranspose,
            //		columnsA,
            //		columnsB,
            //		rowsB,
            //		ref alpha,
            //		aPtrs.DevicePointer,
            //		rowsA,
            //		bPtrs.DevicePointer,
            //		rowsB,
            //		ref beta,
            //		cPtrs.DevicePointer,
            //		columnsA,
            //		_depth
            //	);
            //	if (status != CublasStatus.Success)
            //		throw new CudaBlasException(status);
            //}

            //return _cuda.Create3DTensor(output);
        }
Exemplo n.º 3
0
        public I3DTensor Multiply(IMatrix matrix)
        {
            var   other = (GpuMatrix)matrix;
            var   ptr = Memory.DevicePointer;
            int   rowsA = _rows, columnsArowsB = _columns, columnsB = matrix.ColumnCount;
            float alpha = 1.0f, beta = 0.0f;
            var   output = new Gpu3DTensor(_cuda, _rows, columnsB, _depth,
                                           _cuda.Allocate(_rows * columnsB * _depth), true);
            var status = CudaBlasNativeMethods.cublasSgemmStridedBatched(_cuda.Blas.CublasHandle,
                                                                         Operation.NonTranspose, Operation.NonTranspose, rowsA, columnsB, columnsArowsB, ref alpha,
                                                                         ptr, rowsA, _blockSize, other.Memory.DevicePointer, columnsArowsB, 0, ref beta,
                                                                         output.Memory.DevicePointer, rowsA, _rows * columnsB, _depth);

            if (status != CublasStatus.Success)
            {
                throw new CudaBlasException(status);
            }
            return(output);

            //var output = Enumerable.Range(0, _depth).Select(i => new GpuMatrix(_cuda, _rows, columnsB, _cuda.Allocate(_rows * columnsB), true)).ToList();

            //using (var aPtrs = new PtrToDeviceMemoryList(Enumerable.Range(0, _depth).Select(i => ptr + i * _blockSize * CudaProvider.FLOAT_SIZE).ToArray()))
            //using (var bPtrs = new PtrToDeviceMemoryList(Enumerable.Range(0, _depth).Select(i => other.Memory.DevicePointer).ToArray()))
            //using (var cPtrs = new PtrToDeviceMemoryList(output.Select(m => m.Memory.DevicePointer).ToArray())) {
            //	var status = CudaBlasNativeMethods.cublasSgemmBatched(_cuda.Blas.CublasHandle,
            //		Operation.NonTranspose,
            //		Operation.NonTranspose,
            //		rowsA,
            //		columnsB,
            //		columnsArowsB,
            //		ref alpha,
            //		aPtrs.DevicePointer,
            //		rowsA,
            //		bPtrs.DevicePointer,
            //		columnsArowsB,
            //		ref beta,
            //		cPtrs.DevicePointer,
            //		rowsA,
            //		_depth
            //	);
            //	if (status != CublasStatus.Success)
            //		throw new CudaBlasException(status);
            //}

            //return _cuda.Create3DTensor(output);
        }