private static Tensor CalculateDX(Tensor w, Tensor x, Tensor y, Kernel kernel, int numberOfFilters, MatrixLayout matrixLayout) { Tensor dx = new Tensor(null, x.Axes); int xb = Math.Max(-kernel.PaddingX, 0); int xe = x.Shape.GetAxis(Axis.X) - 1 - xb; int yb = Math.Max(-kernel.PaddingY, 0); int ye = x.Shape.GetAxis(Axis.Y) - 1 - yb; for (int ib = 0, iib = x.Shape.GetAxis(Axis.B); ib < iib; ib++) { for (int ix = 0, xpos = -kernel.PaddingX, iix = y.Shape.GetAxis(Axis.X); ix < iix; ix++, xpos += kernel.StrideX) { for (int iy = 0, ypos = -kernel.PaddingY, iiy = y.Shape.GetAxis(Axis.Y); iy < iiy; iy++, ypos += kernel.StrideY) { Tensor kdy = y.CropKernel(ib, ix, iy, new Kernel(1, 1, 1, 1), true, out int kernelArea); Tensor kdx = new Tensor(null, new Shape(Shape.BWHC, 1, kernel.Width, kernel.Height, numberOfFilters)); kdx.Set(FullyConnectedLayerTest.CalculateDx(w, kdy, numberOfFilters, matrixLayout)); for (int kx = xpos; kx < xpos + kernel.Width; kx++) { for (int ky = ypos; ky < ypos + kernel.Height; ky++) { if (kx.Between(xb, xe) && ky.Between(yb, ye)) { for (int kc = 0; kc < numberOfFilters; kc++) { dx.Gradient[x.Shape.Position(ib, kx, ky, kc)] += kdx[0, kx - xpos, ky - ypos, kc]; } } } } } } } return(dx); }
public void ForwardBackwardTest() { Shape shape = new Shape(new[] { -1, 2, 3, 2 }); const int NumberOfNeurons = 2; foreach (MatrixLayout matrixLayout in Enum.GetValues(typeof(MatrixLayout)).OfType <MatrixLayout>()) { FullyConnectedLayer layer = new FullyConnectedLayer(shape, NumberOfNeurons, matrixLayout, null); ////layer.SetLearningMode(true); layer.W.Set(new float[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 }); layer.B.Set(new float[] { 1, 2 }); Tensor xTemp = new Tensor(null, new[] { 1, 12 }); xTemp.Set(new float[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }); // should be W * x + b Tensor expectedTemp = new Tensor(null, new[] { 1, NumberOfNeurons }); expectedTemp.Set(FullyConnectedLayerTest.CalculateNeurons(layer.W, xTemp, layer.B, NumberOfNeurons, matrixLayout)); Tensor dyTemp = new Tensor(null, new[] { 1, NumberOfNeurons }); dyTemp.Set(new float[] { 1, 2 }); // should be W' * dy Tensor expectedDxTemp = new Tensor(null, xTemp.Shape); expectedDxTemp.Set(FullyConnectedLayerTest.CalculateDx(layer.W, dyTemp, NumberOfNeurons, matrixLayout)); Tensor expectedDBTemp = new Tensor(null, layer.B.Shape); expectedDBTemp.Set(FullyConnectedLayerTest.CalculateDB(dyTemp)); // should be sum(x' * dy) Tensor expectedDWTemp = new Tensor(null, layer.W.Shape); expectedDWTemp.Set(FullyConnectedLayerTest.CalculateDW(xTemp, dyTemp, matrixLayout)); for (int i = 1; i <= 3; i++) { Session session = new Session(); layer.W.ClearGradient(); layer.B.ClearGradient(); Tensor x = session.Tile(xTemp, (int)Axis.B, i); Tensor y = layer.Forward(session, new[] { x })[0]; Tensor expected = session.Tile(expectedTemp, (int)Axis.B, i); Helpers.AreTensorsEqual(expected, y); // unroll the graph y.SetGradient(session.Tile(dyTemp, (int)Axis.B, i).Weights); session.Unroll(); Tensor expectedDx = session.Tile(expectedDxTemp, (int)Axis.B, i); Helpers.AreArraysEqual(expectedDx.Length, expectedDx.Weights, x.Gradient); // should be dy Tensor expectedDB = session.Multiply(expectedDBTemp, i); Helpers.AreArraysEqual(expectedDB.Length, expectedDB.Weights, layer.B.Gradient); // should be x * dy Tensor expectedDW = session.Multiply(expectedDWTemp, i); Helpers.AreArraysEqual(expectedDW.Length, expectedDW.Weights, layer.W.Gradient); } } }