Exemplo n.º 1
0
        public IVector Add(IVector vector)
        {
            Debug.Assert(IsValid && vector.IsValid);
            var other = (GpuVector)vector;

            Debug.Assert(other.Count == Count);

            var ret = _cuda.Allocate(other._data.Size);

            ret.CopyToDevice(other._data);
            _cuda.Blas.Axpy(1.0f, _data.DeviceVariable, 1, ret.DeviceVariable, 1);
            return(new GpuVector(_cuda, ret, true));
        }
Exemplo n.º 2
0
        public GpuMatrix(CudaProvider cuda, int rows, int columns, Func <int, int, float> init)
        {
            _cuda    = cuda;
            _rows    = rows;
            _columns = columns;

            var count = rows * columns;
            var data  = new float[count];

            for (var j = 0; j < columns; j++)
            {
                for (var i = 0; i < rows; i++)
                {
                    data[j * rows + i] = init(i, j);
                }
            }
            _data = cuda.Allocate(count);
            _data.CopyToDevice(data);
            cuda.Register(this);

#if DEBUG
            if (_id == _badAlloc)
            {
                Debugger.Break();
            }
#endif
        }
Exemplo n.º 3
0
        public IMatrix ConvertToMatrix()
        {
            Debug.Assert(IsValid);
            var rows    = ColumnCount * RowCount;
            var columns = Depth;
            var ret     = _cuda.Allocate(rows * columns);

            _cuda.TensorConvertToMatrix(_tensorInfo.Value.Single(), ColumnCount, RowCount, rows, columns, ret);
            return(new GpuMatrix(_cuda, rows, columns, ret));
        }
Exemplo n.º 4
0
        public GpuVector(CudaProvider cuda, int size, Func <int, float> init)
        {
            _cuda = cuda;
            var data = new float[size];

            for (var i = 0; i < size; i++)
            {
                data[i] = init(i);
            }
            _data = cuda.Allocate(size);
            _data.CopyToDevice(data);
            cuda.Register(this);
#if DEBUG
            if (_id == _badAlloc)
            {
                Debugger.Break();
            }
#endif
        }
Exemplo n.º 5
0
        public I3DTensor Multiply(IMatrix matrix)
        {
            var   other = (GpuMatrix)matrix;
            var   ptr = _data.DevicePointer;
            int   rowsA = _rows, columnsArowsB = _columns, columnsB = matrix.ColumnCount;
            float alpha = 1.0f, beta = 0.0f;
            var   output = new Gpu3DTensor(_cuda, _rows, columnsB, _depth, _cuda.Allocate(_rows * columnsB * _depth), true);

            var status = CudaBlasNativeMethods.cublasSgemmStridedBatched(_cuda.Blas.CublasHandle,
                                                                         Operation.NonTranspose,
                                                                         Operation.NonTranspose,
                                                                         rowsA,
                                                                         columnsB,
                                                                         columnsArowsB,
                                                                         ref alpha,
                                                                         ptr,
                                                                         rowsA,
                                                                         _blockSize,
                                                                         other.Memory.DevicePointer,
                                                                         columnsArowsB,
                                                                         0,
                                                                         ref beta,
                                                                         output.Memory.DevicePointer,
                                                                         rowsA,
                                                                         _rows * columnsB,
                                                                         _depth
                                                                         );

            if (status != CublasStatus.Success)
            {
                throw new CudaBlasException(status);
            }

            return(output);

            //var output = Enumerable.Range(0, _depth).Select(i => new GpuMatrix(_cuda, _rows, columnsB, _cuda.Allocate(_rows * columnsB), true)).ToList();

            //using (var aPtrs = new PtrToDeviceMemoryList(Enumerable.Range(0, _depth).Select(i => ptr + i * _blockSize * CudaProvider.FLOAT_SIZE).ToArray()))
            //using (var bPtrs = new PtrToDeviceMemoryList(Enumerable.Range(0, _depth).Select(i => other.Memory.DevicePointer).ToArray()))
            //using (var cPtrs = new PtrToDeviceMemoryList(output.Select(m => m.Memory.DevicePointer).ToArray())) {
            //	var status = CudaBlasNativeMethods.cublasSgemmBatched(_cuda.Blas.CublasHandle,
            //		Operation.NonTranspose,
            //		Operation.NonTranspose,
            //		rowsA,
            //		columnsB,
            //		columnsArowsB,
            //		ref alpha,
            //		aPtrs.DevicePointer,
            //		rowsA,
            //		bPtrs.DevicePointer,
            //		columnsArowsB,
            //		ref beta,
            //		cPtrs.DevicePointer,
            //		rowsA,
            //		_depth
            //	);
            //	if (status != CublasStatus.Success)
            //		throw new CudaBlasException(status);
            //}

            //return _cuda.Create3DTensor(output);
        }
Exemplo n.º 6
0
        public IMatrix Add(IMatrix matrix)
        {
            Debug.Assert(IsValid && matrix.IsValid);
            var other = (GpuMatrix)matrix;

            Debug.Assert(other._rows == _rows && other._columns == _columns);

            var ret = _cuda.Allocate(other._data.Size);

            ret.CopyToDevice(other._data);
            _cuda.Blas.Axpy(1.0f, _data.DeviceVariable, 1, ret.DeviceVariable, 1);
            return(new GpuMatrix(_cuda, _rows, _columns, ret));
        }