/// <summary> /// Computes gause error function of the input `Tensor` element-wise: /// `erf(x)` /// </summary> /// <param name="x">The input tensor.</param> /// <returns></returns> public static Tensor erf(this Tensor x) { Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { var stepRes = x.step(); NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("x", () => { return(dy.mulStrict(Ops.scalar(2f / (float)Math.Sqrt(Math.PI)) .mul(x.square().neg().exp()))); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.erf(x)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("x", x); return(e.runKernel(f, inputs, grad)); }
private void addTapeNode(Tensor[] inputs, Tensor result, Func <Tensor, Tensor[]> gradientsFunc) { var inputsMap = new Dictionary <string, Tensor>(); for (int i = 0; i < inputs.Length; i++) { inputsMap.Add(i.ToString(), inputs[i]); } Func <Tensor, NamedGradientMap> gradient = (Tensor dy) => { var res = gradientsFunc(dy); var resMap = new NamedGradientMap(); var outer = 0; foreach (var item in res) { resMap.gradient.Add(outer.ToString(), () => { return(item); }); outer++; } return(resMap); }; TapeNode tapeNode = new TapeNode() { id = this.nextTapeNodeId++, name = this.activeScope.name, inputs = inputsMap, output = result, gradient = gradient }; this.activeTape.Add(tapeNode); }
/// <summary> /// Computes exponential of the input `Tensor` element-wise. `e ^ x` /// </summary> /// <param name="x">The input tensor.</param> /// <returns></returns> public static Tensor exp(this Tensor x) { Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { var y = s[0]; NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("x", () => { return(dy.mulStrict(y)); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(saved(bk.exp(x))); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("x", x); return(e.runKernel(f, inputs, grad)); }
/// <summary> /// Returns (a - b) * (a - b) element-wise. /// Supports broadcasting. /// /// We also expose `squaredDifferenceStrict` which has the same signature as /// this op and asserts that `a` and `b` are the same shape (does not /// broadcast). /// </summary> /// <param name="a">The first tensor.</param> /// <param name="b">The second tensor.</param> /// <returns></returns> public static Tensor squaredDifference(this Tensor a, Tensor b) { Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { var two = scalar(2); NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("a", () => { return(dy.mul(a.sub(b).mul(two))); }); g.gradient.Add("b", () => { return(dy.mul(b.sub(a).mul(two))); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.squaredDifference(a, b)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("a", a); inputs.Add("b", b); return(e.runKernel(f, inputs, grad)); }
/// <summary> /// Clips values element-wise. `max(min(x, clipValueMax), clipValueMin)` /// </summary> /// <param name="x">The input tensor.</param> /// <param name="clipValueMin">Lower-bound of range to be clipped to.</param> /// <param name="clipValueMax">Upper-bound of range to be clipped to.</param> /// <returns></returns> public static Tensor clipByValue(this Tensor x, float clipValueMin, float clipValueMax) { Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { var y = s[0]; NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("x", () => { return(dy.where ( x.greater(scalar(clipValueMin)) .logicalAnd(x.less(scalar(clipValueMax))), zerosLike(dy))); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.clip(x, clipValueMin, clipValueMax)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("x", x); return(e.runKernel(f, inputs, grad)); }
/// <summary> /// Returns the max of a and b (`a > b ? a : b`) element-wise. /// Supports broadcasting. /// /// We also expose `maximumStrict` which has the same signature as this op and /// asserts that `a` and `b` are the same shape (does not broadcast). /// </summary> /// <param name="a">The first tensor.</param> /// <param name="b">The second tensor.</param> /// <returns></returns> public static Tensor maximum(this Tensor a, Tensor b) { Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("a", () => { return(dy.mul(a.greaterEqual(b))); }); g.gradient.Add("b", () => { return(dy.mul(a.less(b))); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.maximum(a, b)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("a", a); inputs.Add("b", b); return(e.runKernel(f, inputs, grad)); }
/// <summary> /// Computes exponential linear element-wise, `x > 0 ? e ^ x - 1 : 0` /// </summary> /// <param name="x">The input tensor.</param> /// <returns></returns> public static Tensor elu(this Tensor x) { Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { var stepRes = x.step(); NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("x", () => { var y = s[0]; ForwardFunc fg = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.eluDer(dy, y)); }; var inputsg = new Dictionary <string, Tensor>(); inputsg.Add("dy", dy); inputsg.Add("y", y); return(ENV.engine.runKernel(fg, inputsg)); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> save) => { return(save(bk.elu(x))); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("x", x); return(e.runKernel(f, inputs, grad)); }
/// <summary> /// Computes scaled exponential linear element-wise. /// </summary> /// <param name="x">The input tensor.</param> /// <returns></returns> public static Tensor selu(this Tensor x) { Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { var stepRes = x.step(); NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("x", () => { var mask = x.greater(scalar(0)); var scaleAlpha = scalar((float)Util.SELU_SCALEALPHA); var scale = scalar((float)Util.SELU_SCALE); var greaterThanZeroDer = dy.mul(scale); var lessEqualZeroDer = dy.mul(scaleAlpha).mul(x.exp()); return(where (mask, greaterThanZeroDer, lessEqualZeroDer)); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.selu(x)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("x", x); return(e.runKernel(f, inputs, grad)); }
/// <summary> /// Reverses a `Tensor` along a specified axis. /// </summary> /// <param name="x">The input tensor to be reversed.</param> /// <param name="axis">The set of dimensions to reverse. Must be in the /// range [-rank(x), rank(x)). Defaults to all axes.</param> /// <returns></returns> public static Tensor reverse(this Tensor x, int[] axis) { if (x.Rank == 0) { return(x.clone()); } var axes = Util.parseAxisParam(axis, x.Shape); Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("x", () => { return(dy.reverse(axes)); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return (bk.reverse(x, axes)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("x", x); var res = e.runKernel(f, inputs, grad); return(res.reshapeAs(x)); }
/// <summary> /// Computes hyperbolic tangent of the input `Tensor` element-wise: `tanh(x)` /// </summary> /// <param name="x">The input tensor.</param> /// <returns></returns> public static Tensor tanh(this Tensor x) { Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { var stepRes = x.step(); NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("x", () => { var y = s[0]; return(scalar(1).sub(y.square()).mulStrict(dy)); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(saved(bk.tanh(x))); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("x", x); return(e.runKernel(f, inputs, grad)); }
/// <summary> /// Normalizes the activation of a local neighborhood across or within /// channels. /// </summary> /// <param name="x">The input tensor. The 4-D input tensor is treated as a 3-D array /// of 1D vectors (along the last dimension), and each vector is /// normalized independently.</param> /// <param name="depthRadius">The number of adjacent channels or spatial locations of the /// 1D normalization window. In Tensorflow this param is called /// 'depth_radius' because only 'acrossChannels' mode is supported.</param> /// <param name="bias">A constant bias term for the basis.</param> /// <param name="alpha">A scale factor, usually positive.</param> /// <param name="beta">An exponent.</param> /// <returns></returns> public static Tensor localResponseNormalization(this Tensor x, float depthRadius = 5, float bias = 1, float alpha = 1, float beta = 0.5f) { Tensor x4D = null; var reshapedTo4D = false; if (x.Rank == 3) { reshapedTo4D = true; x4D = x.as4D(1, x.Shape[0], x.Shape[1], x.Shape[2]); } else { x4D = x as Tensor; } Engine e = ENV.engine; Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("x4D", () => { ForwardFunc fgrad = (IBackend bk, Func <Tensor, Tensor> saved) => { var outputImage = s[0]; return(bk.LRNGrad( dy, x4D, outputImage, depthRadius, bias, alpha, beta)); }; return(e.runKernel(fgrad, new Dictionary <string, Tensor>())); }); return(g); }; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(saved(bk.localResponseNormalization4D( x4D, depthRadius, bias, alpha, beta))); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("x4D", x4D); var res = e.runKernel(f, inputs); if (reshapedTo4D) { return(res.as3D(res.Shape[1], res.Shape[2], res.Shape[3])); } else { return(res); } }
/// <summary> /// Computes the 2D max pooling of an image. /// </summary> /// <param name="x">The input tensor, of rank 4 or rank 3 of shape /// `[batch, height, width, inChannels]`. If rank 3, batch of 1 is assumed</param> /// <param name="filterSize">The filter size, a tuple `[filterHeight, filterWidth]`.</param> /// <param name="strides">The strides of the pooling: `[strideHeight, strideWidth]`.</param> /// <param name="pad"> The type of padding algorithm. /// - `same` and stride 1: output will be of same size as input, /// regardless of filter size. /// - `valid`: output will be smaller than input if filter is larger /// than 1x1. /// - For more info, see this guide: /// [https://www.tensorflow.org/api_guides/python/nn#Convolution]( /// https://www.tensorflow.org/api_guides/python/nn#Convolution)</param> /// <param name="dimRoundingMode">The rounding mode used when computing output /// dimensions if pad is a number. If none is provided, it will not round /// and error if the output is of fractional size.</param> /// <param name="padvalue"></param> /// <returns></returns> public static Tensor maxPool(this Tensor x, int[] filterSize, int[] strides, PadType pad, roundingMode dimRoundingMode = roundingMode.none, Nullable <int> padvalue = null) { Tensor x4D = null; var reshapedTo4D = false; if (x.Rank == 3) { reshapedTo4D = true; x4D = x.as4D(1, x.Shape[0], x.Shape[1], x.Shape[2]); } else { x4D = x as Tensor; } var convInfo = Util.computePool2DInfo( x4D.Shape, filterSize, strides, pad, dimRoundingMode, ConvDataFormat.channelsLast, padvalue); Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("x", () => { var y4D = s[0]; return(maxPoolBackprop(dy, x4D, y4D, filterSize, strides, pad, dimRoundingMode, padvalue)); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(saved(bk.maxPool(x4D, convInfo))); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("x", x4D); var res = e.runKernel(f, inputs, grad); if (reshapedTo4D) { return(res.as3D(res.Shape[1], res.Shape[2], res.Shape[3])); } return(res); }
/// <summary> /// Bilinear resize a batch of 3D images to a new shape. /// </summary> /// <param name="images">The images, of rank 4 or rank 3, of shape /// `[batch, height, width, inChannels]`. If rank 3, batch of 1 is assumed.</param> /// <param name="size">The new shape `[newHeight, newWidth]` to resize the /// images to. Each channel is resized individually.</param> /// <param name="alignCorners">Defaults to False. If true, rescale /// input by `(new_height - 1) / (height - 1)`, which exactly aligns the 4 /// corners of images and resized images. If false, rescale by /// `new_height / height`. Treat similarly the width dimension.</param> /// <returns></returns> public static Tensor resizeBilinear(this Tensor images, int[] size, bool alignCorners = false) { Tensor batchImages = null; var reshapedTo4D = false; if (images.Rank == 3) { reshapedTo4D = true; batchImages = images.as4D(1, images.Shape[0], images.Shape[1], images.Shape[2]); } else { batchImages = images as Tensor; } Engine e = ENV.engine; Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("x", () => { ForwardFunc fb = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.resizeBilinearBackprop(dy, batchImages, alignCorners)); }; return(e.runKernel(fb, new Dictionary <string, Tensor>())); }); return(g); }; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.resizeBilinear(batchImages, size[0], size[1], alignCorners)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("batchImages", batchImages); var res = e.runKernel(f, inputs); if (reshapedTo4D) { return(res.as3D(res.Shape[1], res.Shape[2], res.Shape[3])); } return(res); }
/// <summary> /// * Computes arctangent of `Tensor`s a / b element-wise: `atan2(a, b)`. /// Supports broadcasting. /// </summary> /// <param name="a">The first tensor.</param> /// <param name="b">The second tensor.</param> /// <returns></returns> public static Tensor atan2(this Tensor a, Tensor b) { var outShape = Util.assertAndGetBroadcastShape(a.Shape, b.Shape); Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("a", () => { var d = add(square(a), square(b)); var res = dy.mul(b.div(d)); var reduceAxes = Util.getReductionAxes(a.Shape, outShape); if (reduceAxes.Length > 0) { res = res.sum(reduceAxes); } return(res.reshape(a.Shape)); }); g.gradient.Add("b", () => { var d = add(square(a), square(b)); var res = neg(dy.mul(a.div(d))); var reduceAxes = Util.getReductionAxes(b.Shape, outShape); if (reduceAxes.Length > 0) { res = res.sum(reduceAxes); } return(res.reshape(b.Shape)); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.atan2(a, b)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("a", a); inputs.Add("b", b); return(e.runKernel(f, inputs, grad)); }
/// <summary> /// Computes the power of one `Tensor` to another. Supports broadcasting. /// /// Given a `Tensor` x and a `Tensor` y, this operation computes x^y for /// corresponding elements in x and y. The result's dtype will be the upcasted /// type of the `base` and `exp` dtypes. /// </summary> /// <param name="baset">The base `Tensor` to pow element-wise.</param> /// <param name="exp">The exponent `Tensor` to pow element-wise.</param> /// <returns></returns> public static Tensor pow(this Tensor baset, Tensor exp) { var outShape = Util.assertAndGetBroadcastShape(baset.Shape, exp.Shape); Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { var y = s[0]; NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("baset", () => { var res = dy.mul(exp.mul(y.div(baset))); var reduceAxes = Util.getReductionAxes(baset.Shape, outShape); if (reduceAxes.Length > 0) { res = res.sum(reduceAxes); } return(res.reshape(baset.Shape)); }); g.gradient.Add("exp", () => { var res = dy.mul(y.mul(baset.log())); var reduceAxes = Util.getReductionAxes(exp.Shape, outShape); if (reduceAxes.Length > 0) { res = res.sum(reduceAxes); } return(res.reshape(exp.Shape)); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(saved(bk.Pow(baset, exp))); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("baset", baset); inputs.Add("exp", exp); return(e.runKernel(f, inputs, grad)); }
private static Tensor concat2Tensors(Tensor a, Tensor b, int axis) { var outShape = Util.computeOutShape(a.Shape, b.Shape, axis); var fs = new ArraySegment <int>(a.Shape, axis, a.Shape.Length - axis); var fs2 = new ArraySegment <int>(b.Shape, axis, b.Shape.Length - axis); var a2D = a.as2D(-1, Util.SizeFromShape( fs.ToArray() )); var b2D = b.as2D(-1, Util.SizeFromShape(fs2.ToArray())); var slices = Util.computeGradientSliceShapes(a2D.Shape, b2D.Shape); Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("a", () => { return(dy.slice(slices.aBegin, slices.aSize)); }); g.gradient.Add("b", () => { return(dy.slice(slices.bBegin, slices.bSize)); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return (bk.concat(a2D, b2D)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("a", a2D); inputs.Add("b", b2D); var res = e.runKernel(f, inputs, grad); return(res.reshape(outShape)); }
/// <summary> /// Computes floor of input `Tensor` element-wise: `floor(x)`. /// </summary> /// <param name="x">The input Tensor.</param> /// <returns></returns> public static Tensor floor(this Tensor x) { Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("x", () => { return(zerosLike(dy)); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.floor(x)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("x", x); return(e.runKernel(f, inputs, grad)); }
/// <summary> /// Computes reciprocal of x element-wise: `1 / x` /// `y = 1 / sqrt(x)` /// </summary> /// <param name="x"> The input tensor.</param> /// <returns></returns> public static Tensor reciprocal(this Tensor x) { Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("x", () => { return(dy.divStrict(x.square().neg())); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.reciprocal(x)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("x", x); return(e.runKernel(f, inputs, grad)); }
/// <summary> /// Computes cos of the input `Tensor` element-wise: `cos(x)` /// </summary> /// <param name="x">The input tensor.</param> /// <returns></returns> public static Tensor cos(this Tensor x) { Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { var stepRes = x.step(); NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("x", () => { return(x.sin().neg().mulStrict(dy)); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.cos(x)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("x", x); return(e.runKernel(f, inputs, grad)); }
/// <summary> /// Computes step of the input `Tensor` element-wise: `x > 0 ? 1 : alpha * x` /// </summary> /// <param name="x"> The input tensor.</param> /// <param name="alpha">The gradient when input is negative.</param> /// <returns></returns> public static Tensor step(this Tensor x, float alpha = 0.0f) { Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { var two = scalar(2); NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("x", () => { return(zerosLike(dy)); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.step(x, alpha)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("x", x); return(e.runKernel(f, inputs, grad)); }
/// <summary> /// Transposes the `Tensor`. Permutes the dimensions according to `perm`. /// The returned `Tensor`'s dimension `i` will correspond to the input /// dimension `perm[i]`. If `perm` is not given, it is set to `[n-1...0]`, /// where `n` is the rank of the input `Tensor`. Hence by default, this /// operation performs a regular matrix transpose on 2-D input `Tensor`s. /// </summary> /// <param name="x">The tensor to transpose.</param> /// <param name="perm">The permutation of the dimensions of a.</param> /// <returns></returns> public static Tensor transpose(this Tensor x, int[] perm = null) { if (perm == null) { perm = x.Shape.Select((s, i) => i).ToArray(); perm = perm.Reverse().ToArray(); } if (x.Rank <= 1) { return(x.clone()); } if (x.Rank != perm.Length) { throw new Exception("Error in transpose: rank of input " + x.Rank.ToString() + "must match length of perm " + perm.Length + " ."); } Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { var undoPerm = Util.getUndoAxesPermutation(perm); NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("x", () => { return(dy.transpose(undoPerm)); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.transpose(x, perm)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("x", x); return(e.runKernel(f, inputs, grad)); }
/// <summary> /// Depthwise 2D convolution. /// * Given a 4D "input" array and a "filter" array of shape /// "[filterHeight, filterWidth, inChannels, channelMultiplier]" containing /// "inChannels" convolutional filters of depth 1, this op applies a /// different filter to each input channel (expanding from 1 channel to /// "channelMultiplier" channels for each), then concatenates the results /// together. The output has "inChannels * channelMultiplier" channels. /// /// See /// [https://www.tensorflow.org/api_docs/python/tf/nn/depthwise_conv2d]( /// https://www.tensorflow.org/api_docs/python/tf/nn/depthwise_conv2d) /// for more details. /// </summary> /// <param name="input">The input tensor, of Rank 4 or Rank 3, of shape /// "[batch, height, width, inChannels]". If Rank 3, batch of 1 is assumed.</param> /// <param name="filter">The filter tensor, Rank 4, of shape /// "[filterHeight, filterWidth, inChannels, channelMultiplier]"</param> /// <param name="strides">The strides of the convolution: "[strideHeight, /// strideWidth]". If strides is a single number, then "strideHeight == /// strideWidth".</param> /// <param name="pad">The type of padding algorithm. /// - "same" and stride 1: output will be of same size as input, /// regardless of filter size. /// - "valid": output will be smaller than input if filter is larger /// than 1x1. /// - For more info, see this guide: /// [https://www.tensorflow.org/api_guides/python/nn#Convolution]( /// https://www.tensorflow.org/api_guides/python/nn#Convolution)</param> /// <param name="dilations">The dilation rates: "[dilationHeight, dilationWidth]" /// in which we sample input values across the height and width dimensions /// in atrous convolution. Defaults to "[1, 1]". If "rate" is a single /// number, then "dilationHeight == dilationWidth". If it is greater than /// 1, then all values of "strides" must be 1.</param> /// <param name="dimRoundingMode">The rounding mode used when computing output /// dimensions if pad is a number. If none is provided, it will not round /// and error if the output is of fractional size.</param> /// <param name="padvalue">the value of pad if pad is number</param> /// <returns></returns> public static Tensor depthwiseConv2d(this Tensor input, Tensor filter, int[] strides, PadType pad, int[] dilations = null, roundingMode dimRoundingMode = roundingMode.none, Nullable <int> padvalue = null) { if (dilations == null) { dilations = new int[] { 1, 1 }; } Tensor input4D = null; var reshapedTo4D = false; if (input.Rank == 3) { reshapedTo4D = true; input4D = input.as4D(1, input.Shape[0], input.Shape[1], input.Shape[2]); } else { input4D = input as Tensor; } var convInfo = Util.computeConv2DInfo( input4D.Shape, filter.Shape, strides, dilations, pad, dimRoundingMode, true /* depthwise */, ConvDataFormat.channelsLast, padvalue); Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("input4D", () => { var resg = depthwiseConv2dDerInput(input4D.Shape, dy, filter, strides, pad, dimRoundingMode, padvalue); if (reshapedTo4D) { resg = resg.as3D(resg.Shape[1], resg.Shape[2], resg.Shape[3]); } return(resg); }); g.gradient.Add("filter", () => { return(depthwiseConv2dDerFilter(input4D, dy, filter.Shape, strides, pad, dimRoundingMode, padvalue)); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.depthwiseConv2D(input4D, filter, convInfo)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("input4D", input4D); inputs.Add("filter", filter); var res = e.runKernel(f, inputs, grad); if (reshapedTo4D) { return(res.as3D(res.Shape[1], res.Shape[2], res.Shape[3])); } return(res); }
/// <summary> /// Batch normalization. /// /// As described in /// [http://arxiv.org/abs/1502.03167](http://arxiv.org/abs/1502.03167). /// /// Mean, variance, scale, and offset can be of two /// shapes: /// - The same shape as the input. /// - In the common case, the depth dimension is the last dimension of x, so /// the values would be an "Tensor1D" of shape [depth]. /// </summary> /// <param name="x">The input Tensor.</param> /// <param name="mean">A mean Tensor.</param> /// <param name="variance">A variance Tensor.</param> /// <param name="varianceEpsilon">A small float number to avoid dividing by 0.</param> /// <param name="scale">A scale Tensor.</param> /// <param name="offset">An offset Tensor.</param> /// <returns></returns> public static Tensor batchNormalization(this Tensor x, Tensor mean, Tensor variance, float varianceEpsilon = 0.001f, Tensor scale = null, Tensor offset = null) { if (mean.Rank != variance.Rank) { throw new Exception("Batch normalization gradient requires mean and variance to have equal Ranks."); } if (offset != null) { if (mean.Rank != offset.Rank) { throw new Exception("Batch normalization gradient requires mean and offset to have equal Ranks."); } } if (scale != null) { if (mean.Rank != scale.Rank) { throw new Exception("Batch normalization gradient requires mean and scale to have equal Ranks."); } } Tensor x4D = null; if (x.Rank == 0 || x.Rank == 1) { x4D = x.as4D(1, 1, 1, x.Size); } else if (x.Rank == 2) { x4D = x.as4D(1, 1, x.Shape[0], x.Shape[1]); } else if (x.Rank == 3) { x4D = x.as4D(1, x.Shape[0], x.Shape[1], x.Shape[2]) as Tensor; } else { x4D = x as Tensor; } Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { var scaleValue = scale == null?scalar(1) : scale; var reductionAxes = Util.getReductionAxes(mean.Shape, x4D.Shape); List <int> tileShape = new List <int>(); if (mean.Rank == 1) { for (var i = 0; i < x4D.Shape.Length - 1; ++i) { tileShape.Add(x4D.Shape[i]); } tileShape.Add(1); } var xMinusMean = x.sub(mean); var dyTimesScaleValue = dy.mul(scaleValue); var oneOverSqrtVariance = rsqrt(variance.add(scalar(varianceEpsilon))); var minusHalfRCube = oneOverSqrtVariance.mul(oneOverSqrtVariance) .mul(oneOverSqrtVariance) .mul(scalar(-0.5f)); NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("x", () => { if (mean.Rank == 1) { return(dy .mul(tile( oneOverSqrtVariance.as4D(1, 1, 1, mean.Shape[0]), tileShape.ToArray())) .mul(scaleValue) .reshape(x.Shape)); } else { return(dy.mul(oneOverSqrtVariance).mul(scaleValue).reshape(x.Shape)); } }); g.gradient.Add("mean", () => { var meanDer = oneOverSqrtVariance.mul(scalar(-1)).mul(dyTimesScaleValue); if (mean.Rank == 1) { meanDer = meanDer.sum(reductionAxes); } return(meanDer.reshape(mean.Shape)); }); g.gradient.Add("variance", () => { var varianceDer = minusHalfRCube.mul(xMinusMean).mul(dyTimesScaleValue); if (mean.Rank == 1) { varianceDer = varianceDer.sum(reductionAxes); } return(varianceDer.reshape(mean.Shape)); }); g.gradient.Add("scale", () => { var xMinusMean2TimesRsqrt = xMinusMean.mul(oneOverSqrtVariance); var scaleDer = dy.mul(xMinusMean2TimesRsqrt); if (mean.Rank == 1) { scaleDer = scaleDer.sum(reductionAxes); } return(scaleDer.reshape(mean.Shape)); }); g.gradient.Add("offset", () => { var offsetDer = dy; if (mean.Rank == 1) { offsetDer = offsetDer.sum(reductionAxes); } return(offsetDer.reshape(mean.Shape)); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.batchNormalization( x4D, batchnormReshape4D(mean), batchnormReshape4D(variance), varianceEpsilon, batchnormReshape4D(scale), batchnormReshape4D(offset))); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("x", x); inputs.Add("mean", mean); inputs.Add("variance", variance); inputs.Add("scale", scale); inputs.Add("offset", offset); var res = e.runKernel(f, inputs, grad); return(res.reshape(x.Shape)); }
/// <summary> ///Extracts a slice from a `Tensor` starting at coordinates `begin` ///and is of size `size`. /// </summary> /// <param name="x">The input `Tensor` to slice from.</param> /// <param name="begin"> /// The coordinates to start the slice from. The length can be /// less than the rank of x - the rest of the axes will have implicit 0 as /// start. Can also be a single number, in which case it specifies the /// first axis. /// </param> /// <param name="size"> /// The size of the slice. The length can be less than the rank of ///x - the rest of the axes will have implicit -1. A value of -1 requests ///the rest of the dimensions in the axis. Can also be a single number, ///in which case it specifies the size of the first axis. /// </param> /// <returns></returns> public static Tensor slice(this Tensor x, int[] begin, int[] size = null) { if (x.Rank == 0) { throw new Exception("Slicing scalar is not possible"); } // The following logic allows for more ergonomic calls. int[] begin_ = new int[x.Rank]; if (begin.Length < x.Rank) { Array.Copy(begin, begin_, begin.Length); } else { begin_ = begin; } int[] size_ = new int[x.Rank]; if (size == null) { for (int i = 0; i < size_.Length; i++) { size_[i] = -1; } } else if (size.Length < x.Rank) { for (int i = 0; i < size_.Length; i++) { size_[i] = -1; } Array.Copy(size, size_, size.Length); } else { size_ = size; } size_ = size_.Select((d, i) => { if (d >= 0) { return(d); } else { if (d == -1) { throw new Exception("Bad value in size"); } // util.assert(d === -1, 'Bad value in size'); return(x.Shape[i] - begin_[i]); } }).ToArray(); var inputShape = x.Shape; Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { // Create an Nx2 padding where the first column represents how many // zeros are prepended (at start) for each dimension, and the second // column indicates how many zeros are appended (at end). // The number of zeros to append is the shape of the input // elementwise-subtracted by both the begin vector and sizes vector. List <int[]> paddings = new List <int[]>(); for (var i = 0; i < dy.Rank; i++) { paddings.Add(new int[] { begin_[i], inputShape[i] - begin_[i] - size_[i] }); } NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); g.gradient.Add("x", () => { return(dy.pad(paddings.ToArray())); }); return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.slice(x, begin_, size_)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("x", x); return(e.runKernel(f, inputs, grad)); }
/// <summary> /// Computes the dot product of two matrices, A * B. These must be matrices. /// </summary> /// <param name="a">First matrix in dot product operation.</param> /// <param name="b">Second matrix in dot product operation.</param> /// <param name="transposeA">If true, "a" is transposed before multiplication.</param> /// <param name="transposeB">If true, "b" is transposed before multiplication.</param> /// <returns></returns> public static Tensor matMul(this Tensor a, Tensor b, bool transposeA = false, bool transposeB = false) { var innerShapeA = transposeA ? a.Shape[0] : a.Shape[1]; var innerShapeB = transposeB ? b.Shape[1] : b.Shape[0]; Util.assert( a.Rank == 2 && b.Rank == 2, "Error in matMul: inputs must be Rank 2, got ranks " + a.Rank.ToString() + " and " + b.Rank.ToString() + "."); Util.assert( innerShapeA == innerShapeB, "Error in matMul: inner shapes (" + innerShapeA.ToString() + ") and (" + innerShapeB.ToString() + " ) of Tensors with shapes " + a.Shape.ToString() + " and " + b.Shape.ToString() + " and transposeA=" + transposeA.ToString() + " and transposeB=" + transposeB.ToString() + " must match."); Func <Tensor, List <Tensor>, NamedGradientMap> grad = (Tensor dy, List <Tensor> s) => { NamedGradientMap g = new NamedGradientMap(); g.gradient = new Dictionary <string, Func <Tensor> >(); if (!transposeA && !transposeB) { g.gradient.Add("a", () => { return(dy.matMul(b, false, true)); }); g.gradient.Add("b", () => { return(a.matMul(dy, true, false)); }); } else if (!transposeA && transposeB) { g.gradient.Add("a", () => { return(dy.matMul(b, false, false)); }); g.gradient.Add("b", () => { return(dy.matMul(a, true, false)); }); } else if (transposeA && !transposeB) { g.gradient.Add("a", () => { return(b.matMul(dy, false, true)); }); g.gradient.Add("b", () => { return(a.matMul(dy, false, false)); }); } else { g.gradient.Add("a", () => { return(b.matMul(dy, true, true)); }); g.gradient.Add("b", () => { return(dy.matMul(a, true, true)); }); } return(g); }; Engine e = ENV.engine; ForwardFunc f = (IBackend bk, Func <Tensor, Tensor> saved) => { return(bk.matMul(a, b, transposeA, transposeB)); }; var inputs = new Dictionary <string, Tensor>(); inputs.Add("a", a); inputs.Add("b", b); return(e.runKernel(f, inputs, grad)); }