public void UpdateBias(IMatrix delta, ILearningContext context) { using (var columnSums = delta.ColumnSums()) { columnSums.Multiply(1f / delta.RowCount); _bias.AddInPlace(columnSums, 1f, context.BatchLearningRate); } }
public void Update(IMatrix biasDelta, IMatrix weightDelta, float weightCoefficient, float learningRate) { if (biasDelta != null) { using (var columnSums = biasDelta.ColumnSums()) _bias.AddInPlace(columnSums, 1f / columnSums.Count, learningRate); } _weight.AddInPlace(weightDelta, weightCoefficient, learningRate); }
protected override IGraphData _Backpropagate(INode fromNode, IGraphData errorSignal, IContext context, IReadOnlyList <INode> parents) { var error = errorSignal.GetMatrix(); var batchSize = (float)context.BatchSequence.MiniBatch.BatchSize; var lap = context.LinearAlgebraProvider; using (var dxHat = error.PointwiseMultiply(_gamma)) using (var temp = dxHat.PointwiseMultiply(_inputMinusMean)) using (var temp2 = _inverseVariance.PointwiseMultiply(_inverseVariance)) using (var inverseVarianceCubed = temp2.PointwiseMultiply(_inverseVariance)) using (var temp3 = temp.PointwiseMultiply(inverseVarianceCubed)) { temp3.Multiply(-0.5f); using (var dVar = temp3.ColumnSums()) using (var temp4 = dxHat.PointwiseMultiply(_inverseVariance)) { temp4.Multiply(-1f); using (var dmu = temp4.ColumnSums()) using (var temp5 = _inputMinusMean.ColumnSums()) { temp5.Multiply(-2f / batchSize); using (var dmu2 = temp5.PointwiseMultiply(dVar)) { dmu.AddInPlace(dmu2); using (var dVarMatrix = lap.CreateMatrix(Enumerable.Repeat(dVar, context.BatchSequence.MiniBatch.BatchSize).ToList())) using (var dx1 = dxHat.PointwiseMultiply(_inverseVariance)) using (var dx2 = dVarMatrix.PointwiseMultiply(_inputMinusMean)) { dx2.Multiply(-2f / batchSize); using (var dx3 = lap.CreateMatrix(Enumerable.Repeat(dmu, context.BatchSequence.MiniBatch.BatchSize).ToList())) { dx3.Multiply(1f / batchSize); var dx = dx1.Add(dx2); dx.AddInPlace(dx3); var dBeta = dx.ColumnSums(); using (var temp6 = _xHat.PointwiseMultiply(error)) { var dGamma = temp6.ColumnSums(); // store the updates var learningContext = context.LearningContext; learningContext.StoreUpdate(_source, dBeta, err => _source._beta.AddInPlace(err, 1f, learningContext.BatchLearningRate)); learningContext.StoreUpdate(_source, dGamma, err => _source._gamma.AddInPlace(err, 1f, learningContext.BatchLearningRate)); } return(errorSignal.ReplaceWith(dx)); } } } } } } }
public void Update(IMatrix biasDelta, IMatrix weightDelta, float weightCoefficient, float learningRate) { if (biasDelta != null) { using (var columnSums = biasDelta.ColumnSums()) _bias.AddInPlace(columnSums, 1f / columnSums.Count, learningRate); } using (var transpose = weightDelta.Transpose()) _weight.AddInPlace(transpose, weightCoefficient, learningRate); _weightTranspose.Dispose(); _weightTranspose = _weight.Transpose(); }
protected override void _UpdateLayer(IMatrix input, IMatrix delta, ITrainingContext context, INeuralNetworkUpdateAccumulator updates) { if (_filter != null && _filter2 != null) { // filter the updates to the bias against the filter using (var columnSums = delta.ColumnSums()) using (var filteredColumnSums = columnSums.PointwiseMultiply(_filter2)) _layerUpdater.Layer.Bias.AddInPlace(filteredColumnSums, 1f / columnSums.Count); // filter the weight updates against the filter using (var weightUpdate = input.TransposeThisAndMultiply(delta)) using (var filteredWeightUpdate = weightUpdate.PointwiseMultiply(_filter)) _layerUpdater.Update(null, filteredWeightUpdate, context); } else { base._UpdateLayer(input, delta, context, updates); } }
public void UpdateBias(IMatrix delta, ILearningContext context) { using var columnSums = delta.ColumnSums(); _bias.AddInPlace(columnSums, 1f / columnSums.Count, context.BatchLearningRate); }
public override void ExecuteForward(IContext context) { var lap = context.LinearAlgebraProvider; var input = context.Data.GetMatrix(); IMatrix output, inputMinusMean = null, inverseVariance = null, xHat = null, gamma = null; Debug.Assert(input.RowCount == context.BatchSequence.MiniBatch.BatchSize); var batchSize = input.RowCount; var tensor = context.Data.Get4DTensor(); if (tensor != null) { IVector currentVariance, currentMean; if (context.IsTraining) { var volumeList = input.ConvertInPlaceToVector().Split(tensor.Count); var rowList = volumeList.Select(m => lap.CreateVector(m.Split(tensor.Depth).Select(v => v.Average()))).ToList(); var reducedInput = lap.CreateMatrix(rowList); // calculate batch mean and variance currentMean = reducedInput.ColumnSums(); currentMean.Multiply(1f / reducedInput.RowCount); using (var meanMatrix = lap.CreateMatrix(Enumerable.Repeat(currentMean, reducedInput.RowCount).ToList())) using (var reducedInputMinusMean = reducedInput.Subtract(meanMatrix)) using (var temp = reducedInputMinusMean.PointwiseMultiply(reducedInputMinusMean)) { currentVariance = temp.ColumnSums(); currentVariance.Multiply(1f / reducedInput.RowCount); } } else { currentVariance = _variance; currentMean = _mean; } using (var tensorMean = _FormIntoTensor(lap, currentMean, tensor)) using (var tensorVariance = _FormIntoTensor(lap, currentVariance, tensor)) using (var tensorGamma = _FormIntoTensor(lap, _gamma, tensor)) using (var tensorBeta = _FormIntoTensor(lap, _beta, tensor)) using (var matrixMean = tensorMean.ConvertToMatrix()) using (var matrixVariance = tensorVariance.ConvertToMatrix()) using (var matrixBeta = tensorBeta.ConvertToMatrix()) using (var temp = input.Subtract(matrixMean)) using (var sqrtVariance = matrixVariance.Sqrt(1e-6f)) { gamma = tensorMean.ConvertToMatrix(); inputMinusMean = input.Subtract(matrixMean); // calculate batch normalisation using (var ones = context.LinearAlgebraProvider.CreateMatrix(batchSize, input.ColumnCount, 1f)) { inverseVariance = ones.PointwiseDivide(sqrtVariance); xHat = inputMinusMean.PointwiseMultiply(inverseVariance); output = xHat.PointwiseMultiply(gamma); output.AddToEachRow(_beta); // update the mean _mean.AddInPlace(currentMean, _momentum, 1f - _momentum); // correct for the biased sample variance currentVariance.Multiply(1f / (batchSize - 1)); // update the variance _variance.AddInPlace(currentVariance, _momentum, 1f - _momentum); if (context.IsTraining) { currentVariance.Dispose(); currentMean.Dispose(); } } } } else { if (context.IsTraining) { var learningContext = context.LearningContext; gamma = lap.CreateMatrix(Enumerable.Repeat(_gamma, batchSize).ToList()); // calculate batch mean var batchMean = input.ColumnSums(); batchMean.Multiply(1f / batchSize); // find input minus mean using (var meanMatrix = lap.CreateMatrix(Enumerable.Repeat(batchMean, batchSize).ToList())) inputMinusMean = input.Subtract(meanMatrix); // calculate variance as (x - u)^2 IMatrix batchVariance = inputMinusMean.PointwiseMultiply(inputMinusMean); // calculate batch normalisation using (var varianceSqrt = batchVariance.Sqrt(1e-6f)) using (var ones = context.LinearAlgebraProvider.CreateMatrix(batchSize, input.ColumnCount, 1f)) { inverseVariance = ones.PointwiseDivide(varianceSqrt); xHat = inputMinusMean.PointwiseMultiply(inverseVariance); output = xHat.PointwiseMultiply(gamma); output.AddToEachRow(_beta); // update the mean _mean.AddInPlace(batchMean, 1f, learningContext.BatchLearningRate); // correct for the biased sample variance batchVariance.Multiply(1f / (batchSize - 1)); // update the variance using (var temp = batchVariance.ColumnSums()) { temp.Multiply(1f / batchSize); _variance.AddInPlace(temp, _momentum, learningContext.BatchLearningRate); } batchVariance.Dispose(); } } else { using (var varianceMatrix = lap.CreateMatrix(Enumerable.Repeat(_variance, batchSize).ToList())) using (var varianceMatrixSqrt = varianceMatrix.Sqrt(1e-6f)) using (var meanMatrix = lap.CreateMatrix(Enumerable.Repeat(_mean, batchSize).ToList())) using (var gammaMatrix = lap.CreateMatrix(Enumerable.Repeat(_gamma, batchSize).ToList())) using (var inputSubtractMean = input.Subtract(meanMatrix)) using (var temp = inputSubtractMean.PointwiseMultiply(gammaMatrix)) { output = temp.PointwiseDivide(varianceMatrixSqrt); output.AddToEachRow(_beta); } } _AddNextGraphAction(context, context.Data.ReplaceWith(output), () => new Backpropagation(this, inputMinusMean, inverseVariance, xHat, gamma)); } }
public void TrainOnMiniBatch(ISequentialMiniBatch miniBatch, float[] memory, IRecurrentTrainingContext context, Action <IMatrix> beforeBackProp, Action <IMatrix> afterBackProp) { var trainingContext = context.TrainingContext; _lap.PushLayer(); var sequenceLength = miniBatch.SequenceLength; var updateStack = new Stack <Tuple <Stack <INeuralNetworkRecurrentBackpropagation>, IMatrix, IMatrix, ISequentialMiniBatch, int> >(); context.ExecuteForward(miniBatch, memory, (k, fc) => { var layerStack = new Stack <INeuralNetworkRecurrentBackpropagation>(); foreach (var action in _layer) { layerStack.Push(action.Execute(fc, true)); } updateStack.Push(Tuple.Create(layerStack, miniBatch.GetExpectedOutput(fc, k), fc[0], miniBatch, k)); }); // backpropagate, accumulating errors across the sequence using (var updateAccumulator = new UpdateAccumulator(trainingContext)) { IMatrix curr = null; while (updateStack.Any()) { var update = updateStack.Pop(); var isT0 = !updateStack.Any(); var actionStack = update.Item1; // calculate error var expectedOutput = update.Item2; if (expectedOutput != null) { curr = trainingContext.ErrorMetric.CalculateDelta(update.Item3, expectedOutput); } // backpropagate beforeBackProp?.Invoke(curr); while (actionStack.Any()) { var backpropagationAction = actionStack.Pop(); var shouldCalculateOutput = actionStack.Any() || isT0; curr = backpropagationAction.Execute(curr, trainingContext, true, updateAccumulator); } afterBackProp?.Invoke(curr); // apply any filters foreach (var filter in _filter) { filter.AfterBackPropagation(update.Item4, update.Item5, curr); } } // adjust the initial memory against the error signal if (curr != null) { using (var columnSums = curr.ColumnSums()) { var initialDelta = columnSums.AsIndexable(); for (var j = 0; j < memory.Length; j++) { memory[j] += initialDelta[j] * trainingContext.TrainingRate; } } } } // cleanup trainingContext.EndBatch(); _lap.PopLayer(); }