private void BuildSelfTriMask(TSCudaContext context, Tensor result, Tensor originalLengths, int paddedSeqLen, float value, float maskedValue) { CudaContext cudaContext = context.CudaContextForTensor(originalLengths); cudaContext.SetCurrent(); int ndim = result.DimensionCount; long storageSize = TensorDimensionHelpers.GetStorageSize(result.Sizes, result.Strides); long cols = result.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } long rows = storageSize / cols; dim3 threads = new dim3((uint)Math.Min(512, rows)); dim3 grid = new dim3((uint)Math.Min(1024, ApplyUtils.CeilDiv(rows, threads.y))); CUdeviceptr resultPtr = CudaHelpers.GetBufferStart(result); CUdeviceptr originalLengthsPtr = CudaHelpers.GetBufferStart(originalLengths); Invoke(context, cudaContext, "BuildSelfTriMask", grid, threads, threads.x * sizeof(float), CUstream.NullStream, resultPtr, originalLengthsPtr, rows, cols, paddedSeqLen, value, maskedValue); }
public static unsafe Tensor Deserialize(TensorProto proto) { if (!_dataTypeToDType.Keys.Contains(proto.Type)) { throw new Exception($"Tensors don't support '{proto.Type.ToString()}' data type"); } var allocator = new CpuAllocator(); var dtype = _dataTypeToDType[proto.Type]; var storage = (CpuStorage)allocator.Allocate(dtype, proto.Data.Length / dtype.Size()); var bytes = proto.Data.ToByteArray(); fixed(byte *p = bytes) { IntPtr ptr = (IntPtr)p; storage.CopyToStorage(0, ptr, bytes.Length); } var sizes = proto.Shape.Select(i => (long)i).ToArray(); var strides = TensorDimensionHelpers.GetContiguousStride(sizes); return(new Tensor(sizes, strides, storage, 0)); }
private void AddLayerNormGrad(TSCudaContext context, Tensor out1Grad, Tensor out2Grad, Tensor alphaGrad, Tensor betaGrad, Tensor inGrad, Tensor y, Tensor x1, Tensor x2, Tensor alpha, Tensor beta, float eps = 1e-9f) { var cudaContext = context.CudaContextForTensor(inGrad); cudaContext.SetCurrent(); var ndim = inGrad.DimensionCount; var storageSize = TensorDimensionHelpers.GetStorageSize(inGrad.Sizes, inGrad.Strides); var cols = inGrad.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } var rows = storageSize / cols; var threads = new dim3((uint)Math.Min(512, rows)); var grid = new dim3((uint)Math.Min(1024, ApplyUtils.CeilDiv(rows, threads.y))); var out1GradPtr = CudaHelpers.GetBufferStart(out1Grad); var out2GradPtr = CudaHelpers.GetBufferStart(out2Grad); var alphaGradPtr = CudaHelpers.GetBufferStart(alphaGrad); var betaGradPtr = CudaHelpers.GetBufferStart(betaGrad); var inGradPtr = CudaHelpers.GetBufferStart(inGrad); var yPtr = CudaHelpers.GetBufferStart(y); var x1Ptr = CudaHelpers.GetBufferStart(x1); var x2Ptr = CudaHelpers.GetBufferStart(x2); var alphaPtr = CudaHelpers.GetBufferStart(alpha); var betaPtr = CudaHelpers.GetBufferStart(beta); this.Invoke(context, cudaContext, "gAddLayerNormalizationGrad", grid, threads, threads.x * sizeof(float) * 4, CUstream.NullStream, out1GradPtr, out2GradPtr, alphaGradPtr, betaGradPtr, inGradPtr, yPtr, x1Ptr, x2Ptr, alphaPtr, betaPtr, rows, cols, eps); }
private void LayerNorm(TSCudaContext context, Tensor result, Tensor src, Tensor alpha, Tensor beta, float eps = 1e-9f) { var cudaContext = context.CudaContextForTensor(src); cudaContext.SetCurrent(); var ndim = src.DimensionCount; var storageSize = TensorDimensionHelpers.GetStorageSize(src.Sizes, src.Strides); var cols = src.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } var rows = storageSize / cols; var threads = new dim3((uint)Math.Min(512, rows)); var grid = new dim3((uint)Math.Min(1024, ApplyUtils.CeilDiv(rows, threads.y))); var resultPtr = CudaHelpers.GetBufferStart(result); var srcPtr = CudaHelpers.GetBufferStart(src); var alphaPtr = CudaHelpers.GetBufferStart(alpha); var betaPtr = CudaHelpers.GetBufferStart(beta); this.Invoke(context, cudaContext, "gLNormalization", grid, threads, threads.x * sizeof(float), CUstream.NullStream, resultPtr, srcPtr, alphaPtr, betaPtr, rows, cols, eps); }
private void RMSProp(TSCudaContext context, Tensor weight, Tensor gradient, Tensor cache, int batchSize, float step_size, float clipval, float regc, float decay_rate, float eps) { var cudaContext = context.CudaContextForTensor(weight); cudaContext.SetCurrent(); var ndim = weight.DimensionCount; var storageSize = TensorDimensionHelpers.GetStorageSize(weight.Sizes, weight.Strides); var cols = weight.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } var rows = storageSize / cols; var threads = new dim3((uint)Math.Min(512, rows)); var grid = new dim3((uint)Math.Min(1024, ApplyUtils.CeilDiv(rows, threads.y))); var weightPtr = CudaHelpers.GetBufferStart(weight); var gradientPtr = CudaHelpers.GetBufferStart(gradient); var cachePtr = CudaHelpers.GetBufferStart(cache); this.Invoke(context, cudaContext, "RMSProp", grid, threads, 0, CUstream.NullStream, weightPtr, gradientPtr, cachePtr, rows, cols, batchSize, step_size, clipval, regc, decay_rate, eps); }
private void SoftmaxGrad(TSCudaContext context, Tensor grad, Tensor adj, Tensor val, bool addGrad = true) { var cudaContext = context.CudaContextForTensor(grad); cudaContext.SetCurrent(); var ndim = grad.DimensionCount; var storageSize = TensorDimensionHelpers.GetStorageSize(grad.Sizes, grad.Strides); var cols = grad.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } var rows = storageSize / cols; var iAddGrad = addGrad ? 1 : 0; var threads = new dim3((uint)Math.Min(512, rows)); var grid = new dim3((uint)Math.Min(1024, ApplyUtils.CeilDiv(rows, threads.y))); var gradPtr = CudaHelpers.GetBufferStart(grad); var adjPtr = CudaHelpers.GetBufferStart(adj); var valPtr = CudaHelpers.GetBufferStart(val); this.Invoke(context, cudaContext, "gSoftmaxGrad", grid, threads, threads.x * sizeof(float), CUstream.NullStream, gradPtr, adjPtr, valPtr, rows, cols, iAddGrad); }
private void LayerNormGrad(TSCudaContext context, Tensor outGrad, Tensor alphaGrad, Tensor betaGrad, Tensor inGrad, Tensor y, Tensor x, Tensor alpha, Tensor beta, float eps = 1e-9f) { CudaContext cudaContext = context.CudaContextForTensor(inGrad); cudaContext.SetCurrent(); int ndim = inGrad.DimensionCount; long storageSize = TensorDimensionHelpers.GetStorageSize(inGrad.Sizes, inGrad.Strides); long cols = inGrad.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } long rows = storageSize / cols; dim3 threads = new dim3((uint)Math.Min(512, rows)); dim3 grid = new dim3((uint)Math.Min(1024, ApplyUtils.CeilDiv(rows, threads.y))); CUdeviceptr outGradPtr = CudaHelpers.GetBufferStart(outGrad); CUdeviceptr alphaGradPtr = CudaHelpers.GetBufferStart(alphaGrad); CUdeviceptr betaGradPtr = CudaHelpers.GetBufferStart(betaGrad); CUdeviceptr inGradPtr = CudaHelpers.GetBufferStart(inGrad); CUdeviceptr yPtr = CudaHelpers.GetBufferStart(y); CUdeviceptr xPtr = CudaHelpers.GetBufferStart(x); CUdeviceptr alphaPtr = CudaHelpers.GetBufferStart(alpha); CUdeviceptr betaPtr = CudaHelpers.GetBufferStart(beta); Invoke(context, cudaContext, "gLayerNormalizationGrad", grid, threads, threads.x * sizeof(float) * 4, CUstream.NullStream, outGradPtr, alphaGradPtr, betaGradPtr, inGradPtr, yPtr, xPtr, alphaPtr, betaPtr, rows, cols, eps); }
private void AddLayerNorm(TSCudaContext context, Tensor result, Tensor src1, Tensor src2, Tensor alpha, Tensor beta, float eps = 1e-9f) { CudaContext cudaContext = context.CudaContextForTensor(src1); cudaContext.SetCurrent(); int ndim = src1.DimensionCount; long storageSize = TensorDimensionHelpers.GetStorageSize(src1.Sizes, src1.Strides); long cols = src1.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } long rows = storageSize / cols; dim3 threads = new dim3((uint)Math.Min(512, rows)); dim3 grid = new dim3((uint)Math.Min(1024, ApplyUtils.CeilDiv(rows, threads.y))); CUdeviceptr resultPtr = CudaHelpers.GetBufferStart(result); CUdeviceptr src1Ptr = CudaHelpers.GetBufferStart(src1); CUdeviceptr src2Ptr = CudaHelpers.GetBufferStart(src2); CUdeviceptr alphaPtr = CudaHelpers.GetBufferStart(alpha); CUdeviceptr betaPtr = CudaHelpers.GetBufferStart(beta); Invoke(context, cudaContext, "gAddLNormalization", grid, threads, threads.x * sizeof(float), CUstream.NullStream, resultPtr, src1Ptr, src2Ptr, alphaPtr, betaPtr, rows, cols, eps); }
private void Softmax(TSCudaContext context, Tensor result, Tensor src) { CudaContext cudaContext = context.CudaContextForTensor(src); cudaContext.SetCurrent(); int ndim = src.DimensionCount; long storageSize = TensorDimensionHelpers.GetStorageSize(src.Sizes, src.Strides); long cols = src.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } long rows = storageSize / cols; dim3 threads = new dim3((uint)Math.Min(512, rows)); dim3 grid = new dim3((uint)Math.Min(1024, ApplyUtils.CeilDiv(rows, threads.y))); CUdeviceptr resultPtr = CudaHelpers.GetBufferStart(result); CUdeviceptr srcPtr = CudaHelpers.GetBufferStart(src); Invoke(context, cudaContext, "gSoftmax", grid, threads, threads.x * sizeof(float), CUstream.NullStream, resultPtr, srcPtr, rows, cols); }
private void Adam(TSCudaContext context, Tensor weight, Tensor gradient, Tensor v, Tensor m, int batchSize, float step_size, float clipval, float regc, float decay_rate_v, float decay_rate_m, int iter, float eps) { CudaContext cudaContext = context.CudaContextForTensor(weight); cudaContext.SetCurrent(); int ndim = weight.DimensionCount; long storageSize = TensorDimensionHelpers.GetStorageSize(weight.Sizes, weight.Strides); long cols = weight.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } long rows = storageSize / cols; dim3 threads = new dim3((uint)Math.Min(512, rows)); dim3 grid = new dim3((uint)Math.Min(1024, ApplyUtils.CeilDiv(rows, threads.y))); CUdeviceptr weightPtr = CudaHelpers.GetBufferStart(weight); CUdeviceptr gradientPtr = CudaHelpers.GetBufferStart(gradient); CUdeviceptr vPtr = CudaHelpers.GetBufferStart(v); CUdeviceptr mPtr = CudaHelpers.GetBufferStart(m); Invoke(context, cudaContext, "Adam", grid, threads, 0, CUstream.NullStream, weightPtr, gradientPtr, vPtr, mPtr, rows, cols, batchSize, step_size, clipval, regc, decay_rate_v, decay_rate_m, iter, eps); }
private void IndexSelectGrad(TSCudaContext context, Tensor grad, Tensor adj, Tensor indice) { CudaContext cudaContext = context.CudaContextForTensor(adj); cudaContext.SetCurrent(); int ndim = adj.DimensionCount; long storageSize = TensorDimensionHelpers.GetStorageSize(adj.Sizes, adj.Strides); long cols = adj.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } long rows = storageSize / cols; dim3 threads = new dim3((uint)Math.Min(512, rows)); dim3 grid = new dim3((uint)Math.Min(1024, ApplyUtils.CeilDiv(rows, threads.y))); CUdeviceptr gradPtr = CudaHelpers.GetBufferStart(grad); CUdeviceptr adjPtr = CudaHelpers.GetBufferStart(adj); CUdeviceptr indicePtr = CudaHelpers.GetBufferStart(indice); Invoke(context, cudaContext, "IndexSelectGrad", grid, threads, threads.x * sizeof(float), CUstream.NullStream, gradPtr, adjPtr, indicePtr, rows, cols); }
private void SoftmaxMask(TSCudaContext context, Tensor result, Tensor src, Tensor mask) { var cudaContext = context.CudaContextForTensor(src); cudaContext.SetCurrent(); var ndim = src.DimensionCount; var storageSize = TensorDimensionHelpers.GetStorageSize(src.Sizes, src.Strides); var cols = src.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } var rows = storageSize / cols; var maskNdim = mask.DimensionCount; var maskStorageSize = TensorDimensionHelpers.GetStorageSize(mask.Sizes, mask.Strides); var maskCols = mask.Sizes[maskNdim - 1]; if (maskStorageSize % maskCols != 0) { throw new Exception($"Invalid mask tensor storage size = '{maskStorageSize}', and cols = '{maskCols}'"); } var maskRows = maskStorageSize / maskCols; if (rows % maskRows != 0) { throw new Exception($"Invalid tensor rows = '{rows}' and mask tensor rows = '{maskRows}'"); } if (cols != maskCols) { throw new Exception($"Tensor cols = '{cols}', mask tensor cols = '{maskCols}'. They should be equal."); } var threads = new dim3((uint)Math.Min(512, rows)); var grid = new dim3((uint)Math.Min(1024, ApplyUtils.CeilDiv(rows, threads.y))); var resultPtr = CudaHelpers.GetBufferStart(result); var srcPtr = CudaHelpers.GetBufferStart(src); var maskPtr = CudaHelpers.GetBufferStart(mask); this.Invoke(context, cudaContext, "gSoftmaxMask", grid, threads, threads.x * sizeof(float), CUstream.NullStream, resultPtr, srcPtr, maskPtr, rows, cols, maskRows); }
public Tensor BuildSrcTgtMask(Tensor result, Tensor srcOriginalLengths, Tensor tgtOriginalLengths, int srcPaddedSeqLen, int tgtPaddedSeqLen, float value, float maskedValue) { int ndim = result.DimensionCount; long storageSize = TensorDimensionHelpers.GetStorageSize(result.Sizes, result.Strides); long cols = result.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } long rows = storageSize / cols; TensorApplyCPU.BuildSrcTgtMask(result, srcOriginalLengths, tgtOriginalLengths, (int)rows, (int)cols, tgtPaddedSeqLen, value, maskedValue); return(result); }
public Tensor BuildTriMask(Tensor result, float value, float maskedValue) { int ndim = result.DimensionCount; long storageSize = TensorDimensionHelpers.GetStorageSize(result.Sizes, result.Strides); long cols = result.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } long rows = storageSize / cols; TensorApplyCPU.BuildTriMask(result, (int)rows, (int)cols, value, maskedValue); return(result); }
public Tensor SoftmaxGrad(Tensor grad_, Tensor adj_, Tensor val_, bool addGrad = true) { var ndim = adj_.DimensionCount; var storageSize = TensorDimensionHelpers.GetStorageSize(adj_.Sizes, adj_.Strides); var cols = adj_.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } var rows = storageSize / cols; var writeTarget = TensorResultBuilder.GetWriteTarget(grad_, adj_, false, adj_.Sizes); NativeWrapper.InvokeTypeMatch(this.softmaxGrad_func, writeTarget, adj_, val_, (int)rows, (int)cols, addGrad); return(writeTarget); }
public Tensor Softmax(Tensor result, Tensor src) { int ndim = src.DimensionCount; long storageSize = TensorDimensionHelpers.GetStorageSize(src.Sizes, src.Strides); long cols = src.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } long rows = storageSize / cols; Tensor writeTarget = TensorResultBuilder.GetWriteTarget(result, src, true, src.Sizes); TensorApplyCPU.Softmax(writeTarget, src, (int)rows, (int)cols); return(writeTarget); }
public Tensor IndexSelect(Tensor result, Tensor src, Tensor indice) { int ndim = result.DimensionCount; long storageSize = TensorDimensionHelpers.GetStorageSize(result.Sizes, result.Strides); long cols = result.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } long rows = storageSize / cols; Tensor writeTarget = TensorResultBuilder.GetWriteTarget(result, src, false, new long[] { indice.Sizes[0], src.Sizes[1] }); TensorApplyCPU.IndexSelect(writeTarget, src, indice, (int)rows, (int)cols); return(writeTarget); }
public Tensor Softmax(Tensor result, Tensor src) { var ndim = src.DimensionCount; var storageSize = TensorDimensionHelpers.GetStorageSize(src.Sizes, src.Strides); var cols = src.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } var rows = storageSize / cols; var writeTarget = TensorResultBuilder.GetWriteTarget(result, src, false, src.Sizes); NativeWrapper.InvokeTypeMatch(this.softmax_func, writeTarget, src, (int)rows, (int)cols); return(writeTarget); }
public Tensor SoftmaxGrad(Tensor grad_, Tensor adj_, Tensor val_, bool addGrad = true) { int ndim = adj_.DimensionCount; long storageSize = TensorDimensionHelpers.GetStorageSize(adj_.Sizes, adj_.Strides); long cols = adj_.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } long rows = storageSize / cols; Tensor writeTarget = TensorResultBuilder.GetWriteTarget(grad_, adj_, true, adj_.Sizes); TensorApplyCPU.SoftmaxGrad(writeTarget, adj_, val_, (int)rows, (int)cols, addGrad); return(writeTarget); }
public Tensor IndexSelectGrad(Tensor grad, Tensor adj, Tensor indice) { if (grad == null) { throw new ArgumentNullException($"Tensor grad should not be null."); } int ndim = adj.DimensionCount; long storageSize = TensorDimensionHelpers.GetStorageSize(adj.Sizes, adj.Strides); long cols = adj.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } long rows = storageSize / cols; TensorApplyCPU.IndexSelectGrad(grad, adj, indice, (int)rows, (int)cols); return(grad); }
public Tensor SoftmaxMask(Tensor result, Tensor src, Tensor mask) { var ndim = src.DimensionCount; var storageSize = TensorDimensionHelpers.GetStorageSize(src.Sizes, src.Strides); var cols = src.Sizes[ndim - 1]; if (storageSize % cols != 0) { throw new Exception($"Invalid tensor storage size = '{storageSize}', and cols = '{cols}'"); } var rows = storageSize / cols; var maskNdim = mask.DimensionCount; var maskStorageSize = TensorDimensionHelpers.GetStorageSize(mask.Sizes, mask.Strides); var maskCols = mask.Sizes[maskNdim - 1]; if (maskStorageSize % maskCols != 0) { throw new Exception($"Invalid mask tensor storage size = '{maskStorageSize}', and cols = '{maskCols}'"); } var maskRows = maskStorageSize / maskCols; if (rows % maskRows != 0) { throw new Exception($"Invalid tensor rows = '{rows}' and mask tensor rows = '{maskRows}'"); } if (cols != maskCols) { throw new Exception($"Tensor cols = '{cols}', mask tensor cols = '{maskCols}'. They should be equal."); } var writeTarget = TensorResultBuilder.GetWriteTarget(result, src, false, src.Sizes); NativeWrapper.InvokeTypeMatch(this.softmaxmask_func, writeTarget, src, mask, (int)rows, (int)cols, (int)maskRows); return(writeTarget); }
public TensorShape(DType elementType, long[] sizes) : this(elementType, sizes, TensorDimensionHelpers.GetContiguousStride(sizes)) { }