public static Tensor Mul_M_M(TSCudaContext context, Tensor result, Tensor lhs, Tensor rhs) { if (lhs.ElementType != rhs.ElementType || (result != null && result.ElementType != lhs.ElementType)) { throw new InvalidOperationException("All tensors must have the same element type"); } CudaHelpers.ThrowIfDifferentDevices(result, lhs, rhs); if (result != null && !(result.Storage is CudaStorage)) { throw new ArgumentException("result must be a CUDA tensor", "result"); } if (!(lhs.Storage is CudaStorage)) { throw new ArgumentException("lhs must be a CUDA tensor", "lhs"); } if (!(rhs.Storage is CudaStorage)) { throw new ArgumentException("rhs must be a CUDA tensor", "rhs"); } var writeTarget = TensorResultBuilder.GetWriteTarget(result, lhs, false, lhs.Sizes[0], rhs.Sizes[1]); Gemm(context, 1, lhs, rhs, 0, writeTarget); return(writeTarget); }
public static Tensor Dot(TSCudaContext context, Tensor result, Tensor lhs, Tensor rhs) { // ReSharper disable once ArrangeRedundantParentheses if (lhs.ElementType != rhs.ElementType || (result != null && result.ElementType != lhs.ElementType)) { throw new InvalidOperationException("All tensors must have the same element type"); } CudaHelpers.ThrowIfDifferentDevices(result, lhs, rhs); if (result != null && !(result.Storage is CudaStorage)) { throw new ArgumentException("result must be a CUDA tensor", nameof(result)); } if (!(lhs.Storage is CudaStorage)) { throw new ArgumentException("lhs must be a CUDA tensor", nameof(lhs)); } if (!(rhs.Storage is CudaStorage)) { throw new ArgumentException("rhs must be a CUDA tensor", nameof(rhs)); } if (lhs.DimensionCount != 1) { throw new ArgumentException("lhs must have 1 dimension (ie. be a vector)", nameof(lhs)); } if (rhs.DimensionCount != 1) { throw new ArgumentException("rhs must have 1 dimension (ie. be a vector)", nameof(rhs)); } var writeTarget = TensorResultBuilder.GetWriteTarget(result, lhs, false, 1); if (writeTarget.ElementType == DType.Float32) { Run_Dot_float(context, writeTarget, lhs, rhs); } else if (writeTarget.ElementType == DType.Float64) { Run_Dot_double(context, writeTarget, lhs, rhs); } else { throw new NotSupportedException("CUDA vector dot product with element type " + result.ElementType + " not supported"); } return(writeTarget); }
public static Tensor Mul_M_V(TSCudaContext context, Tensor result, Tensor lhs, Tensor rhs) { if (lhs.ElementType != rhs.ElementType || (result != null && result.ElementType != lhs.ElementType)) { throw new InvalidOperationException("All tensors must have the same element type"); } CudaHelpers.ThrowIfDifferentDevices(result, lhs, rhs); if (result != null && (result.Storage is CudaStorage)) { throw new ArgumentException("result must be a CUDA tensor", "result"); } if (!(lhs.Storage is CudaStorage)) { throw new ArgumentException("lhs must be a CUDA tensor", "lhs"); } if (!(rhs.Storage is CudaStorage)) { throw new ArgumentException("rhs must be a CUDA tensor", "rhs"); } if (lhs.DimensionCount != 2) { throw new ArgumentException("lhs must have 2 dimensions", "lhs"); } if (rhs.DimensionCount != 1) { throw new ArgumentException("rhs must have 1 dimension (ie. be a vector)", "rhs"); } Tensor lhsClone; if (lhs.Strides[1] == 1) // If lhs is already row-major, do nothing { lhsClone = lhs.CopyRef(); } else if (lhs.Strides[0] == 1) // If lhs is column-major, transpose it { lhsClone = lhs.Transpose(); } else // If lhs is not contiguous in either dimension, make a temporary contiguous copy { lhsClone = Ops.NewContiguous(lhs); } var writeTarget = TensorResultBuilder.GetWriteTarget(result, rhs, false, lhs.Sizes[0]); try { if (writeTarget.ElementType == DType.Float32) { Run_M_V_float(context, writeTarget, lhsClone, rhs); } else if (writeTarget.ElementType == DType.Float64) { Run_M_V_double(context, writeTarget, lhsClone, rhs); } else { throw new NotSupportedException("CUDA Matrix-Vector multiplication with element type " + result.ElementType + " not supported"); } } finally { lhsClone.Dispose(); } return(writeTarget); }