/// <summary> ///Computes softmax cross entropy between logits and labels. /// /// Measures the probability error in discrete classification tasks in which /// the classes are mutually exclusive (each entry is in exactly one class). /// For example, each CIFAR-10 image is labeled with one and only one label: an /// image can be a dog or a truck, but not both. /// /// `NOTE`: While the classes are mutually exclusive, their probabilities need /// not be. All that is required is that each row of labels is a valid /// probability distribution. If they are not, the computation of the gradient /// will be incorrect. /// /// `WARNING`: This op expects unscaled logits, since it performs a softmax on /// logits internally for efficiency. Do not call this op with the output of /// softmax, as it will produce incorrect results. /// /// logits and labels must have the same shape, e.g. [batch_size, num_classes] /// and the same dtype. /// </summary> /// <param name="labelst">The labels array.</param> /// <param name="logitst">The logits array.</param> /// <param name="dim">The dimension softmax would be performed on. Defaults to `-1` /// which indicates the last dimension.</param> /// <returns></returns> public static Tensor softmaxCrossEntropy(Tensor labelst, Tensor logitst, int dim = -1) { if (dim == -1) { dim = logitst.Rank - 1; } var customOp = customGrad( (Tensor[] x) => { var labels = x[0]; var logits = x[1]; var predictedProbs = logits.softmax(dim); var costVector = scalar(1e-5f).add(predictedProbs).log().mul(labels).neg(); var value = costVector.sum(new int[] { dim }); CustomGradientResults res = new CustomGradientResults(); res.value = value; res.gradFunc = (Tensor dy) => { var dyShape = Util.expandShapeToKeepDim(dy.Shape, new int[] { dim }); return(new List <Tensor>() { dy.reshape(dyShape).mul(labels.sub(predictedProbs)), dy.reshape(dyShape).mul(predictedProbs.sub(labels)) }); }; return(res); } ); return(customOp(new Tensor[] { labelst, logitst })); }
/// <summary> /// Computes the sum of elements across dimensions of a `Tensor`. /// Reduces the input along the dimensions given in `axes`. Unless `keepDims` /// is true, the rank of the `Tensor` is reduced by 1 for each entry in `axes`. /// If `keepDims` is true, the reduced dimensions are retained with length 1. /// If axes has no entries, all dimensions are reduced, and a `Tensor` with a /// single element is returned. /// </summary> /// <param name="x">The input tensor to compute the sum over. If the dtype is `bool` /// it will be converted to `int32` and the output dtype will be `int32`.</param> /// <param name="axis">The dimension(s) to reduce. By default it reduces all dimensions.</param> /// <param name="keepDims">If true, retains reduced dimensions with size 1.</param> /// <returns></returns> public static Tensor sum(this Tensor x, int[] axis = null, bool keepDims = false) { var axes = Util.parseAxisParam(axis, x.Shape); var customOp = customGrad( (Tensor[] opInputs) => { var xi = opInputs[0]; var permutation = Util.getAxesPermutation(axes, xi.Rank); var reductionAxes = axes; var permutedX = xi; if (permutation != null) { permutedX = xi.transpose(permutation); reductionAxes = Util.getInnerMostAxes(reductionAxes.Length, xi.Rank); } ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.Sum(permutedX, reductionAxes)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("x", xi); var value = ENV.engine.runKernel(f, inputs); if (keepDims) { var newShape = Util.expandShapeToKeepDim(value.Shape, axes); value = value.reshape(newShape); } CustomGradientResults res = new CustomGradientResults(); res.value = value; res.gradFunc = (Tensor dy) => { var expandedDyShape = new List <int>(xi.Shape).ToArray(); foreach (var axis2 in axes) { expandedDyShape[axis2] = 1; } var expandedDy = dy.reshape(expandedDyShape); var derX = expandedDy.mul(Ops.ones(xi.Shape)); return(new List <Tensor>() { derX }); }; return(res); } ); return(customOp(new Tensor[] { x })); }
/// <summary> /// Computes the mean of elements across dimensions of a `Tensor`. /// Reduces `x` along the dimensions given in `axis`. Unless `keepDims` is /// true, the rank of the `Tensor` is reduced by 1 for each entry in `axis`. /// If `keepDims` is true, the reduced dimensions are retained with length 1. /// If `axis` has no entries, all dimensions are reduced, and a `Tensor` with /// a single element is returned. /// </summary> /// <param name="xtensor">The input tensor.</param> /// <param name="axis">The dimension(s) to reduce. By default it reduces all dimensions.</param> /// <param name="keepDims">If true, retains reduced dimensions with size 1.</param> /// <returns></returns> public static Tensor mean(this Tensor xtensor, int[] axis = null, bool keepDims = false) { var axes = Util.parseAxisParam(axis, xtensor.Shape); var shapes = Util.computeOutAndReduceShapes(xtensor.Shape, axes); var reduceShape = shapes.Item2; var reduceSize = Util.SizeFromShape(reduceShape); var customOp = customGrad( (Tensor[] x) => { var reduceSizeScalar = scalar(reduceSize); var xReduce = x[0]; var ress = xReduce.div(reduceSizeScalar); var value = ress.sum(axis, keepDims); CustomGradientResults res = new CustomGradientResults(); res.value = value; res.gradFunc = (Tensor dy) => { var expandedDyShape = new List <int>(xReduce.Shape).ToArray(); foreach (var axis2 in axes) { expandedDyShape[axis2] = 1; } var expandedDy = dy.reshape(expandedDyShape); var derX = expandedDy.mul(Ops.ones(xReduce.Shape)); return(new List <Tensor>() { derX }); }; return(res); } ); return(customOp(new Tensor[] { xtensor })); }
/// <summary> /// Computes the softmax normalized vector given the logits. /// </summary> /// <param name="logitst">The logits array.</param> /// <param name="dim">The dimension softmax would be performed on. Defaults to `-1` /// which indicates the last dimension.</param> /// <returns></returns> public static Tensor softmax(this Tensor logitst, int dim = -1) { if (dim == -1) { dim = logitst.Rank - 1; } var customOp = customGrad( (Tensor[] x) => { var logits = x[0]; var keepDims = true; var lse = logits.logSumExp(new int[] { dim }, keepDims); var logResult = logits.sub(lse); var y = logResult.exp(); CustomGradientResults res = new CustomGradientResults(); res.value = y; res.gradFunc = (Tensor dy) => { var dyTimesY = dy.mul(y); var thesum = dyTimesY.sum(new int[] { dim }, true); var themully = thesum.mul(y); var theres = dyTimesY.sub(themully); return(new List <Tensor>() { theres }); }; return(res); } ); return(customOp(new Tensor[] { logitst })); }