internal IDeviceMemoryPtr SumColumns(IDeviceMemoryPtr a, int rows, int columns) { var ret = Allocate(columns); _Use(_sumColumns, columns, k => k.Run(0, a.DevicePointer, ret.DevicePointer, rows, columns)); return(ret); }
internal IDeviceMemoryPtr VectorSoftmaxDerivative(IDeviceMemoryPtr a, int size) { var ret = Allocate(size * size); _Use(_softmaxDerivative, size, size, k => k.Run(0, a.DevicePointer, ret.DevicePointer, size)); return(ret); }
public GpuMatrix(CudaProvider cuda, int rows, int columns, Func <int, int, float> init) { _cuda = cuda; _rows = rows; _columns = columns; var count = rows * columns; var data = new float[count]; for (var j = 0; j < columns; j++) { for (var i = 0; i < rows; i++) { data[j * rows + i] = init(i, j); } } _data = cuda.Allocate(count); _data.CopyToDevice(data); cuda.Register(this); #if DEBUG if (_id == _badAlloc) { Debugger.Break(); } #endif }
public PtrToMemory(CudaContext context, IDeviceMemoryPtr rootBlock, CUdeviceptr ptr, SizeT size) { _context = context; _ptr = new CudaDeviceVariable <float>(ptr, size); _rootBlock = rootBlock; rootBlock.AddRef(); }
internal IDeviceMemoryPtr LeakyRELUDerivative(IDeviceMemoryPtr a, int size) { var ret = Allocate(size); _Use(_leakyReluDerivative, size, k => k.Run(0, a.DevicePointer, ret.DevicePointer, size)); return(ret); }
internal float EuclideanDistance(IDeviceMemoryPtr a, IDeviceMemoryPtr b, int size) { var ret = Allocate(size); _Use(_euclideanDistance, size, k => k.Run(0, a.DevicePointer, b.DevicePointer, ret.DevicePointer, size)); return(Convert.ToSingle(Math.Sqrt(SumValues(ret, size)))); }
internal float FindStdDev(IDeviceMemoryPtr a, int size, float mean) { var inputSize = size; if (size > 0) { var ptr = a; while (size > BLOCK_DIM2) { var bufferSize = (size / BLOCK_DIM2) + 1; var sumBlock = Allocate(bufferSize); _Use(_findStdDev, size, k => k.Run(BLOCK_DIM2, ptr.DevicePointer, size, mean, sumBlock.DevicePointer)); if (ptr != a) { ptr.Free(); } size = bufferSize; ptr = sumBlock; } var total = new float[size]; ptr.CopyToHost(total); if (ptr != a) { ptr.Free(); } return(Convert.ToSingle(Math.Sqrt(total.Sum() / inputSize))); } return(0f); }
internal IDeviceMemoryPtr Sqrt(IDeviceMemoryPtr a, int size, float valueAdjustment) { var ret = Allocate(size); _Use(_sqrt, size, k => k.Run(0, a.DevicePointer, ret.DevicePointer, size, valueAdjustment)); return(ret); }
internal IDeviceMemoryPtr Pow(IDeviceMemoryPtr a, int size, float power) { var ret = Allocate(size); _Use(_pow, size, k => k.Run(0, a.DevicePointer, ret.DevicePointer, size, power)); return(ret); }
internal float ManhattanDistance(IDeviceMemoryPtr a, IDeviceMemoryPtr b, int size) { var ret = Allocate(size); _Use(_manhattanDistance, size, k => k.Run(0, a.DevicePointer, b.DevicePointer, ret.DevicePointer, size)); return(SumValues(ret, size)); }
internal float SumValues(IDeviceMemoryPtr a, int size) { var ptr = a; while (size > BLOCK_DIM2) { var bufferSize = (size / BLOCK_DIM2) + 1; var sumBlock = Allocate(bufferSize); _Use(_findSum, size, k => k.Run(BLOCK_DIM2, ptr.DevicePointer, size, sumBlock.DevicePointer)); if (ptr != a) { ptr.Free(); } size = bufferSize; ptr = sumBlock; } var total = new float[size]; ptr.CopyToHost(total); if (ptr != a) { ptr.Free(); } return(total.Sum()); }
internal IDeviceMemoryPtr SoftmaxVector(IDeviceMemoryPtr a, int size, float max) { var ret = Allocate(size); _Use(_softmaxVector, size, k => k.Run(0, a.DevicePointer, ret.DevicePointer, size, max)); return(ret); }
internal IDeviceMemoryPtr SigmoidDerivative(IDeviceMemoryPtr a, int size) { var ret = Allocate(size); _Use(_sigmoidDerivative, size, k => k.Run(0, a.DevicePointer, ret.DevicePointer, size)); return(ret); }
internal IDeviceMemoryPtr TanH(IDeviceMemoryPtr a, int size) { var ret = Allocate(size); _Use(_tanh, size, k => k.Run(0, a.DevicePointer, ret.DevicePointer, size)); return(ret); }
internal IDeviceMemoryPtr Reverse(IDeviceMemoryPtr a, int size) { var ret = Allocate(size); _Use(_reverse, size, k => k.Run(0, a.DevicePointer, ret.DevicePointer, size)); return(ret); }
internal IDeviceMemoryPtr Diagonal(IDeviceMemoryPtr a, int rows, int columns) { var len = Math.Min(rows, columns); var ret = Allocate(len); _Use(_diagonal, len, k => k.Run(0, a.DevicePointer, ret.DevicePointer, rows, columns)); return(ret); }
internal IDeviceMemoryPtr PointwiseDivide(IDeviceMemoryPtr a, IDeviceMemoryPtr b, int size) { var ret = Allocate(size); ret.CopyToDevice(b); _Use(_pointwiseDivide, size, k => k.Run(0, a.DevicePointer, ret.DevicePointer, size)); return(ret); }
internal IDeviceMemoryPtr VectorCopy(IDeviceMemoryPtr a, int size, int[] indexList) { var retSize = indexList.Length; var ret = Allocate(retSize); using (var indexGpu = new CudaDeviceVariable <int>(retSize)) { indexGpu.CopyToDevice(indexList); _Use(_vectorCopyRandom, retSize, k => k.Run(0, a.DevicePointer, ret.DevicePointer, indexGpu.DevicePointer, retSize)); return(ret); } }
public GpuVector(CudaProvider cuda, IDeviceMemoryPtr data, bool isOwner) { _cuda = cuda; Memory = data; cuda.Register(this); #if DEBUG if (_id == _badAlloc) { Debugger.Break(); } #endif }
internal GpuVector(CudaProvider cuda, IDeviceMemoryPtr data) { _cuda = cuda; _data = data; cuda.Register(this); #if DEBUG if (_id == _badAlloc) { Debugger.Break(); } #endif }
internal GpuMatrix(CudaProvider cuda, int rows, int columns, IDeviceMemoryPtr gpuData) { _cuda = cuda; _rows = rows; _columns = columns; _data = gpuData; cuda.Register(this); #if DEBUG if (_id == _badAlloc) { Debugger.Break(); } #endif }
internal (float Min, float Max) FindMinAndMax(IDeviceMemoryPtr a, int size) { if (size > 0) { var ptr = a; while (size > BLOCK_DIM2) { var bufferSize = (size / BLOCK_DIM2) + 1; var minBlock = Allocate(bufferSize, true); var maxBlock = Allocate(bufferSize, true); try { _Use(_findMinAndMax, size, k => k.Run(BLOCK_DIM2, ptr.DevicePointer, size, minBlock.DevicePointer, maxBlock.DevicePointer)); if (ptr != a) { ptr.Free(); } var minTest = new float[bufferSize]; var maxText = new float[bufferSize]; minBlock.CopyToHost(minTest); maxBlock.CopyToHost(maxText); size = bufferSize * 2; ptr = Allocate(size); ptr.DeviceVariable.CopyToDevice(minBlock.DeviceVariable, 0, 0, bufferSize * sizeof(float)); ptr.DeviceVariable.CopyToDevice(maxBlock.DeviceVariable, 0, bufferSize * sizeof(float), bufferSize * sizeof(float)); } finally { minBlock.Free(); maxBlock.Free(); } } var data = new float[size]; ptr.CopyToHost(data); float min = float.MaxValue, max = float.MinValue; for (var i = 0; i < size; i++) { var val = data[i]; if (val > max) { max = val; } if (val < min) { min = val; } } return(min, max); } return(0f, 0f); }
public GpuMatrix(CudaProvider cuda, int rows, int columns, IDeviceMemoryPtr data, bool isOwner) { Debug.Assert(rows * columns == data.Size); _cuda = cuda; RowCount = rows; ColumnCount = columns; Memory = data; cuda.Register(this); #if DEBUG if (_id == _badAlloc) { Debugger.Break(); } #endif }
internal IDeviceMemoryPtr MultiManhattanDistance(IDeviceMemoryPtr vector, CUdeviceptr[] compareTo, int size) { IDeviceMemoryPtr ret = null; var buffer = _cuda.AllocateMemory(8 * compareTo.Length); try { _cuda.CopyToDevice(buffer, compareTo); ret = Allocate(size * compareTo.Length); _Use(_multiManhattan, size, compareTo.Length, k => k.Run(0, vector.DevicePointer, buffer, ret.DevicePointer, size, compareTo.Length)); } finally { _cuda.FreeMemory(buffer); } return(ret); }
public Gpu3DTensor(CudaProvider provider, int rows, int columns, int depth, IDeviceMemoryPtr data, bool isOwner) { Debug.Assert(rows * columns * depth == data.Size); _cuda = provider; _rows = rows; _columns = columns; _depth = depth; _data = data; _blockSize = rows * columns; provider.Register(this); #if DEBUG if (_id == _badAlloc) { Debugger.Break(); } #endif }
public GpuVector(CudaProvider cuda, int size, Func <int, float> init) { _cuda = cuda; var data = new float[size]; for (var i = 0; i < size; i++) { data[i] = init(i); } _data = cuda.Allocate(size); _data.CopyToDevice(data); cuda.Register(this); #if DEBUG if (_id == _badAlloc) { Debugger.Break(); } #endif }
internal IDeviceMemoryPtr Rotate(IDeviceMemoryPtr a, int size, int blockCount) { var blockSize = size / blockCount; var vectorList = Enumerable.Range(0, blockCount) .Select(i => Allocate(blockSize)) .ToList() ; var ret = Allocate(size); using (var devicePtr = new CudaDeviceVariable <CUdeviceptr>(blockCount)) { devicePtr.CopyToDevice(vectorList.Select(p => p.DevicePointer).ToArray()); VectorSplit(a, size, blockSize, devicePtr.DevicePointer); _Use(_rotate, size, k => k.Run(0, devicePtr.DevicePointer, ret.DevicePointer, size, blockCount, blockSize)); } foreach (var item in vectorList) { item.Free(); } return(ret); }
internal void ConcatColumns(IDeviceMemoryPtr a, IDeviceMemoryPtr b, IDeviceMemoryPtr c, int rows, int columns, int topRowCount, int bottomRowCount) { _Use(_concatColumns, rows, columns, k => k.Run(0, a.DevicePointer, b.DevicePointer, c.DevicePointer, rows, columns, topRowCount, bottomRowCount)); }
internal void ConcatRows(IDeviceMemoryPtr a, IDeviceMemoryPtr b, IDeviceMemoryPtr c, int rows, int columns, int leftColumnCount) { _Use(_concatRows, rows, columns, k => k.Run(0, a.DevicePointer, b.DevicePointer, c.DevicePointer, rows, columns, leftColumnCount)); }
internal void TensorConvertToMatrix(IReadOnlyList <IDeviceMemoryPtr> matrixList, int tensorRows, int tensorColumns, int matrixRows, int matrixColumns, IDeviceMemoryPtr ret) { using (var devicePtr = new CudaDeviceVariable <CUdeviceptr>(matrixList.Count)) { devicePtr.CopyToDevice(matrixList.Select(m => m.DevicePointer).ToArray()); _Use(_tensorConvertToMatrix, matrixRows, matrixColumns, k => k.Run(0, devicePtr.DevicePointer, ret.DevicePointer, tensorRows, tensorColumns, matrixRows, matrixColumns)); } }