コード例 #1
0
ファイル: CudaProvider.cs プロジェクト: fcmai/brightwire
        public I3DTensor CreateTensor(IIndexable3DTensor tensor)
        {
            var matrixList = tensor.Data.Select(d => (GpuMatrix)Create(d)).ToList();
            var ret        = new Gpu3DTensor(this, tensor.RowCount, tensor.ColumnCount, tensor.Depth, matrixList);

            return(ret);
        }
コード例 #2
0
ファイル: Gpu3DTensor.cs プロジェクト: zheng1748/brightwire
        public (I3DTensor Result, I3DTensor Indices) MaxPool(int filterWidth, int filterHeight, int xStride, int yStride, bool saveIndices)
        {
            Debug.Assert(IsValid);
            var maxPool = _cuda.TensorMaxPool(_data, _rows, _columns, _depth, 1, filterWidth, filterHeight, xStride, yStride, saveIndices);
            var ret     = new Gpu3DTensor(_cuda, maxPool.Rows, maxPool.Columns, _depth, maxPool.Data, true);
            var indices = saveIndices ? new Gpu3DTensor(_cuda, maxPool.Rows, maxPool.Columns, _depth, maxPool.Indices, true) : null;

            return(ret, indices);
        }
コード例 #3
0
ファイル: Gpu4DTensor.cs プロジェクト: lulzzz/brightwire
        public I3DTensor GetTensorAt(int index)
        {
            var subMatrix = _subVector.Value;
            var ret       = subMatrix[index]
                            .Select(v => v.ConvertInPlaceToMatrix(_rows, _columns))
                            .Cast <GpuMatrix>()
                            .ToList()
            ;
            var tensor = new Gpu3DTensor(_cuda, _rows, _columns, _depth, ret);

            return(tensor);
        }
コード例 #4
0
ファイル: Gpu3DTensor.cs プロジェクト: prosa100/brightwire
        public (I3DTensor Result, IReadOnlyList <(object X, object Y)> Index) MaxPool(int filterWidth, int filterHeight, int stride, bool calculateIndex)
        {
            Debug.Assert(IsValid);
            var newColumns = (ColumnCount - filterWidth) / stride + 1;
            var newRows    = (RowCount - filterHeight) / stride + 1;
            var data       = _cuda.TensorMaxPool(_tensorInfo.Value.Single(), RowCount, ColumnCount, filterWidth, filterHeight, stride, calculateIndex);
            var ret        = new Gpu3DTensor(_cuda, newRows, newColumns, Depth, data.Select(d => new GpuMatrix(_cuda, newRows, newColumns, d.Item1)).ToList());

            List <(object X, object Y)> index = null;

            if (calculateIndex)
            {
                index = data.Select(d => (d.Item2, d.Item3)).ToList();
            }
            return(ret, index);
        }
コード例 #5
0
ファイル: Gpu3DTensor.cs プロジェクト: zheng1748/brightwire
        public I3DTensor TransposeThisAndMultiply(I4DTensor tensor)
        {
            var other = (Gpu4DTensor)tensor;

#if DEBUG
            Debug.Assert(tensor.Count == Depth && IsValid && other.IsValid);
#endif
            var   ptr = _data.DevicePointer;
            var   ptr2 = other.Memory.DevicePointer;
            int   rowsA = _rows, columnsA = _columns, columnsB = other.Depth, rowsB = other.RowCount * other.ColumnCount, blockSize2 = columnsB * rowsB;
            float alpha = 1.0f, beta = 0.0f;
            var   output = new Gpu3DTensor(_cuda, _columns, columnsB, _depth, _cuda.Allocate(_columns * columnsB * _depth), true);

            var status = CudaBlasNativeMethods.cublasSgemmStridedBatched(_cuda.Blas.CublasHandle,
                                                                         Operation.Transpose,
                                                                         Operation.NonTranspose,
                                                                         columnsA,
                                                                         columnsB,
                                                                         rowsB,
                                                                         ref alpha,
                                                                         ptr,
                                                                         rowsA,
                                                                         _blockSize,
                                                                         ptr2,
                                                                         rowsB,
                                                                         blockSize2,
                                                                         ref beta,
                                                                         output.Memory.DevicePointer,
                                                                         columnsA,
                                                                         _columns * columnsB,
                                                                         _depth
                                                                         );
            if (status != CublasStatus.Success)
            {
                throw new CudaBlasException(status);
            }

            return(output);

            //var output = Enumerable.Range(0, _depth).Select(i => new GpuMatrix(_cuda, _columns, columnsB, _cuda.Allocate(_columns * columnsB), true)).ToList();

            //using (var aPtrs = new PtrToDeviceMemoryList(Enumerable.Range(0, _depth).Select(i => ptr + i * _blockSize * CudaProvider.FLOAT_SIZE).ToArray()))
            //using (var bPtrs = new PtrToDeviceMemoryList(Enumerable.Range(0, _depth).Select(i => ptr2 + i * blockSize2 * CudaProvider.FLOAT_SIZE).ToArray()))
            //using (var cPtrs = new PtrToDeviceMemoryList(output.Select(m => m.Memory.DevicePointer).ToArray())) {
            //	var status = CudaBlasNativeMethods.cublasSgemmBatched(_cuda.Blas.CublasHandle,
            //		Operation.Transpose,
            //		Operation.NonTranspose,
            //		columnsA,
            //		columnsB,
            //		rowsB,
            //		ref alpha,
            //		aPtrs.DevicePointer,
            //		rowsA,
            //		bPtrs.DevicePointer,
            //		rowsB,
            //		ref beta,
            //		cPtrs.DevicePointer,
            //		columnsA,
            //		_depth
            //	);
            //	if (status != CublasStatus.Success)
            //		throw new CudaBlasException(status);
            //}

            //return _cuda.Create3DTensor(output);
        }
コード例 #6
0
ファイル: Gpu3DTensor.cs プロジェクト: zheng1748/brightwire
        public I3DTensor Multiply(IMatrix matrix)
        {
            var   other = (GpuMatrix)matrix;
            var   ptr = _data.DevicePointer;
            int   rowsA = _rows, columnsArowsB = _columns, columnsB = matrix.ColumnCount;
            float alpha = 1.0f, beta = 0.0f;
            var   output = new Gpu3DTensor(_cuda, _rows, columnsB, _depth, _cuda.Allocate(_rows * columnsB * _depth), true);

            var status = CudaBlasNativeMethods.cublasSgemmStridedBatched(_cuda.Blas.CublasHandle,
                                                                         Operation.NonTranspose,
                                                                         Operation.NonTranspose,
                                                                         rowsA,
                                                                         columnsB,
                                                                         columnsArowsB,
                                                                         ref alpha,
                                                                         ptr,
                                                                         rowsA,
                                                                         _blockSize,
                                                                         other.Memory.DevicePointer,
                                                                         columnsArowsB,
                                                                         0,
                                                                         ref beta,
                                                                         output.Memory.DevicePointer,
                                                                         rowsA,
                                                                         _rows * columnsB,
                                                                         _depth
                                                                         );

            if (status != CublasStatus.Success)
            {
                throw new CudaBlasException(status);
            }

            return(output);

            //var output = Enumerable.Range(0, _depth).Select(i => new GpuMatrix(_cuda, _rows, columnsB, _cuda.Allocate(_rows * columnsB), true)).ToList();

            //using (var aPtrs = new PtrToDeviceMemoryList(Enumerable.Range(0, _depth).Select(i => ptr + i * _blockSize * CudaProvider.FLOAT_SIZE).ToArray()))
            //using (var bPtrs = new PtrToDeviceMemoryList(Enumerable.Range(0, _depth).Select(i => other.Memory.DevicePointer).ToArray()))
            //using (var cPtrs = new PtrToDeviceMemoryList(output.Select(m => m.Memory.DevicePointer).ToArray())) {
            //	var status = CudaBlasNativeMethods.cublasSgemmBatched(_cuda.Blas.CublasHandle,
            //		Operation.NonTranspose,
            //		Operation.NonTranspose,
            //		rowsA,
            //		columnsB,
            //		columnsArowsB,
            //		ref alpha,
            //		aPtrs.DevicePointer,
            //		rowsA,
            //		bPtrs.DevicePointer,
            //		columnsArowsB,
            //		ref beta,
            //		cPtrs.DevicePointer,
            //		rowsA,
            //		_depth
            //	);
            //	if (status != CublasStatus.Success)
            //		throw new CudaBlasException(status);
            //}

            //return _cuda.Create3DTensor(output);
        }