public IVector Add(IVector vector) { Debug.Assert(IsValid && vector.IsValid); var other = (GpuVector)vector; Debug.Assert(other.Count == Count); var ret = _cuda.Allocate(other.Memory.Size); ret.CopyToDevice(other.Memory); _cuda.Blas.Axpy(1.0f, Memory.DeviceVariable, 1, ret.DeviceVariable, 1); return(new GpuVector(_cuda, ret, true)); }
public I3DTensor Multiply(IMatrix matrix) { var other = (GpuMatrix)matrix; var ptr = Memory.DevicePointer; int rowsA = _rows, columnsArowsB = _columns, columnsB = matrix.ColumnCount; float alpha = 1.0f, beta = 0.0f; var output = new Gpu3DTensor(_cuda, _rows, columnsB, _depth, _cuda.Allocate(_rows * columnsB * _depth), true); var status = CudaBlasNativeMethods.cublasSgemmStridedBatched(_cuda.Blas.CublasHandle, Operation.NonTranspose, Operation.NonTranspose, rowsA, columnsB, columnsArowsB, ref alpha, ptr, rowsA, _blockSize, other.Memory.DevicePointer, columnsArowsB, 0, ref beta, output.Memory.DevicePointer, rowsA, _rows * columnsB, _depth); if (status != CublasStatus.Success) { throw new CudaBlasException(status); } return(output); //var output = Enumerable.Range(0, _depth).Select(i => new GpuMatrix(_cuda, _rows, columnsB, _cuda.Allocate(_rows * columnsB), true)).ToList(); //using (var aPtrs = new PtrToDeviceMemoryList(Enumerable.Range(0, _depth).Select(i => ptr + i * _blockSize * CudaProvider.FLOAT_SIZE).ToArray())) //using (var bPtrs = new PtrToDeviceMemoryList(Enumerable.Range(0, _depth).Select(i => other.Memory.DevicePointer).ToArray())) //using (var cPtrs = new PtrToDeviceMemoryList(output.Select(m => m.Memory.DevicePointer).ToArray())) { // var status = CudaBlasNativeMethods.cublasSgemmBatched(_cuda.Blas.CublasHandle, // Operation.NonTranspose, // Operation.NonTranspose, // rowsA, // columnsB, // columnsArowsB, // ref alpha, // aPtrs.DevicePointer, // rowsA, // bPtrs.DevicePointer, // columnsArowsB, // ref beta, // cPtrs.DevicePointer, // rowsA, // _depth // ); // if (status != CublasStatus.Success) // throw new CudaBlasException(status); //} //return _cuda.Create3DTensor(output); }
public IMatrix Add(IMatrix matrix) { Debug.Assert(IsValid && matrix.IsValid); var other = (GpuMatrix)matrix; Debug.Assert(other.RowCount == RowCount && other.ColumnCount == ColumnCount); var ret = _cuda.Allocate(other.Memory.Size); ret.CopyToDevice(other.Memory); _cuda.Blas.Axpy(1.0f, Memory.DeviceVariable, 1, ret.DeviceVariable, 1); return(new GpuMatrix(_cuda, RowCount, ColumnCount, ret, true)); }