Beispiel #1
0
        public IVector Add(IVector vector)
        {
            Debug.Assert(IsValid && vector.IsValid);
            var other = (GpuVector)vector;

            Debug.Assert(other.Count == Count);
            var ret = _cuda.Allocate(other.Memory.Size);

            ret.CopyToDevice(other.Memory);
            _cuda.Blas.Axpy(1.0f, Memory.DeviceVariable, 1, ret.DeviceVariable, 1);
            return(new GpuVector(_cuda, ret, true));
        }
Beispiel #2
0
        public I3DTensor Multiply(IMatrix matrix)
        {
            var   other = (GpuMatrix)matrix;
            var   ptr = Memory.DevicePointer;
            int   rowsA = _rows, columnsArowsB = _columns, columnsB = matrix.ColumnCount;
            float alpha = 1.0f, beta = 0.0f;
            var   output = new Gpu3DTensor(_cuda, _rows, columnsB, _depth,
                                           _cuda.Allocate(_rows * columnsB * _depth), true);
            var status = CudaBlasNativeMethods.cublasSgemmStridedBatched(_cuda.Blas.CublasHandle,
                                                                         Operation.NonTranspose, Operation.NonTranspose, rowsA, columnsB, columnsArowsB, ref alpha,
                                                                         ptr, rowsA, _blockSize, other.Memory.DevicePointer, columnsArowsB, 0, ref beta,
                                                                         output.Memory.DevicePointer, rowsA, _rows * columnsB, _depth);

            if (status != CublasStatus.Success)
            {
                throw new CudaBlasException(status);
            }
            return(output);

            //var output = Enumerable.Range(0, _depth).Select(i => new GpuMatrix(_cuda, _rows, columnsB, _cuda.Allocate(_rows * columnsB), true)).ToList();

            //using (var aPtrs = new PtrToDeviceMemoryList(Enumerable.Range(0, _depth).Select(i => ptr + i * _blockSize * CudaProvider.FLOAT_SIZE).ToArray()))
            //using (var bPtrs = new PtrToDeviceMemoryList(Enumerable.Range(0, _depth).Select(i => other.Memory.DevicePointer).ToArray()))
            //using (var cPtrs = new PtrToDeviceMemoryList(output.Select(m => m.Memory.DevicePointer).ToArray())) {
            //	var status = CudaBlasNativeMethods.cublasSgemmBatched(_cuda.Blas.CublasHandle,
            //		Operation.NonTranspose,
            //		Operation.NonTranspose,
            //		rowsA,
            //		columnsB,
            //		columnsArowsB,
            //		ref alpha,
            //		aPtrs.DevicePointer,
            //		rowsA,
            //		bPtrs.DevicePointer,
            //		columnsArowsB,
            //		ref beta,
            //		cPtrs.DevicePointer,
            //		rowsA,
            //		_depth
            //	);
            //	if (status != CublasStatus.Success)
            //		throw new CudaBlasException(status);
            //}

            //return _cuda.Create3DTensor(output);
        }
Beispiel #3
0
        public IMatrix Add(IMatrix matrix)
        {
            Debug.Assert(IsValid && matrix.IsValid);
            var other = (GpuMatrix)matrix;

            Debug.Assert(other.RowCount == RowCount && other.ColumnCount == ColumnCount);
            var ret = _cuda.Allocate(other.Memory.Size);

            ret.CopyToDevice(other.Memory);
            _cuda.Blas.Axpy(1.0f, Memory.DeviceVariable, 1, ret.DeviceVariable, 1);
            return(new GpuMatrix(_cuda, RowCount, ColumnCount, ret, true));
        }