/// <summary> /// Evaluates the specified write target. /// </summary> /// <param name="writeTarget">The write target.</param> /// <returns>Tensor.</returns> public override NDArray Evaluate(NDArray writeTarget) { if (writeTarget == null) { return(value.CopyRef()); } else { Ops.Copy(writeTarget, value); return(writeTarget); } }
/// <summary> /// Ases the type cpu. /// </summary> /// <param name="tensor">The tensor.</param> /// <param name="elementType">Type of the element.</param> /// <param name="requireContig">if set to <c>true</c> [require contig].</param> /// <returns>Tensor.</returns> private NDArray AsTypeCpu(NDArray tensor, DType elementType, bool requireContig) { if (tensor.ElementType == elementType && (!requireContig || tensor.IsContiguous())) { return(tensor.CopyRef()); } else { var result = new NDArray(cpuAllocator, elementType, tensor.Shape); Ops.Copy(result, tensor); return(result); } }
/// <summary> /// Muls the m v. /// </summary> /// <param name="context">The context.</param> /// <param name="result">The result.</param> /// <param name="lhs">The LHS.</param> /// <param name="rhs">The RHS.</param> /// <returns>Tensor.</returns> /// <exception cref="InvalidOperationException">All tensors must have the same element type</exception> /// <exception cref="ArgumentException"> /// result must be a CUDA tensor - result /// or /// lhs must be a CUDA tensor - lhs /// or /// rhs must be a CUDA tensor - rhs /// or /// lhs must have 2 dimensions - lhs /// or /// rhs must have 1 dimension (ie. be a vector) - rhs /// </exception> /// <exception cref="NotSupportedException">CUDA Matrix-Vector multiplication with element type " + result.ElementType + " not supported</exception> public static NDArray Mul_M_V(TSCudaContext context, NDArray result, NDArray lhs, NDArray rhs) { if (lhs.ElementType != rhs.ElementType || (result != null && result.ElementType != lhs.ElementType)) { throw new InvalidOperationException("All tensors must have the same element type"); } CudaHelpers.ThrowIfDifferentDevices(result, lhs, rhs); if (result != null && (result.Storage is CudaStorage)) { throw new ArgumentException("result must be a CUDA tensor", "result"); } if (!(lhs.Storage is CudaStorage)) { throw new ArgumentException("lhs must be a CUDA tensor", "lhs"); } if (!(rhs.Storage is CudaStorage)) { throw new ArgumentException("rhs must be a CUDA tensor", "rhs"); } if (lhs.DimensionCount != 2) { throw new ArgumentException("lhs must have 2 dimensions", "lhs"); } if (rhs.DimensionCount != 1) { throw new ArgumentException("rhs must have 1 dimension (ie. be a vector)", "rhs"); } NDArray lhsClone; if (lhs.Strides[1] == 1) // If lhs is already row-major, do nothing { lhsClone = lhs.CopyRef(); } else if (lhs.Strides[0] == 1) // If lhs is column-major, transpose it { lhsClone = lhs.IntTranspose(); } else // If lhs is not contiguous in either dimension, make a temporary contiguous copy { lhsClone = Ops.NewContiguous(lhs); } var writeTarget = TensorResultBuilder.GetWriteTarget(result, rhs, false, lhs.Shape[0]); try { if (writeTarget.ElementType == DType.Float32) { Run_M_V_float(context, writeTarget, lhsClone, rhs); } else if (writeTarget.ElementType == DType.Float64) { Run_M_V_double(context, writeTarget, lhsClone, rhs); } else { throw new NotSupportedException("CUDA Matrix-Vector multiplication with element type " + result.ElementType + " not supported"); } } finally { lhsClone.Dispose(); } return(writeTarget); }
// Computes c := alpha * a * b + beta * c /// <summary> /// Gemms the specified context. /// </summary> /// <param name="context">The context.</param> /// <param name="alpha">The alpha.</param> /// <param name="a">a.</param> /// <param name="b">The b.</param> /// <param name="beta">The beta.</param> /// <param name="c">The c.</param> /// <exception cref="InvalidOperationException">Size mismatch</exception> public static void Gemm(TSCudaContext context, float alpha, NDArray a, NDArray b, float beta, NDArray c) { if (a.Shape[0] != c.Shape[0] || b.Shape[1] != c.Shape[1] || a.Shape[1] != b.Shape[0]) { throw new InvalidOperationException("Size mismatch"); } BlasOp aOp = default(BlasOp); BlasOp bOp = default(BlasOp); bool copyC = false; NDArray aClone = null; NDArray bClone = null; NDArray cClone = null; if (c.Strides[0] == 1 && c.Strides[1] != 0) { // If c is contiguous in dimension 0 (column-major) aClone = a.CopyRef(); bClone = b.CopyRef(); cClone = c.CopyRef(); } else if (c.Strides[1] == 1 && c.Strides[0] != 0) { // If c is contiguous in dimension 1 (row-major) // using (a * b)' == b' * a' // we can pass row-major matrices to BLAS functions that expect column-major by swapping A and B, // and transposing all 3 matrices cClone = c.IntTranspose(); aClone = b.IntTranspose(); // Note swap of a and b bClone = a.IntTranspose(); } else { var cNew = new NDArray(c.Allocator, c.ElementType, c.Shape[1], c.Shape[0]); cClone = cNew.IntTranspose(); Ops.Copy(cClone, c); cNew.Dispose(); copyC = true; aClone = a.CopyRef(); bClone = b.CopyRef(); } try { if (aClone.Strides[0] == 1 && aClone.Strides[1] != 0) { // If a is contiguous in dimension 0 (column-major) aOp = BlasOp.NonTranspose; } else if (aClone.Strides[1] == 1 && aClone.Strides[0] != 0) { aOp = BlasOp.Transpose; var aNew = aClone.IntTranspose(); aClone.Dispose(); aClone = aNew; } else { var aNew = new NDArray(aClone.Allocator, aClone.ElementType, aClone.Shape[1], aClone.Shape[0]); var aClone2 = aNew.IntTranspose(); Ops.Copy(aClone2, aClone); aClone.Dispose(); aClone = aClone2; aNew.Dispose(); } if (bClone.Strides[0] == 1 && bClone.Strides[1] != 0) { // If a is contiguous in dimension 0 (column-major) bOp = BlasOp.NonTranspose; } else if (bClone.Strides[1] == 1 && bClone.Strides[0] != 0) { bOp = BlasOp.Transpose; var bNew = bClone.IntTranspose(); bClone.Dispose(); bClone = bNew; } else { var bNew = new NDArray(bClone.Allocator, bClone.ElementType, bClone.Shape[1], bClone.Shape[0]); var bClone2 = bNew.IntTranspose(); Ops.Copy(bClone2, bClone); bClone.Dispose(); bClone = bClone2; bNew.Dispose(); } GemmOp(context, aOp, bOp, alpha, aClone, bClone, beta, cClone); if (copyC) { Ops.Copy(c, cClone); } } finally { aClone.Dispose(); bClone.Dispose(); cClone.Dispose(); } }