Ejemplo n.º 1
0
 public void UpdateBias(IMatrix delta, ILearningContext context)
 {
     using (var columnSums = delta.ColumnSums()) {
         columnSums.Multiply(1f / delta.RowCount);
         _bias.AddInPlace(columnSums, 1f, context.BatchLearningRate);
     }
 }
Ejemplo n.º 2
0
 public void Update(IMatrix biasDelta, IMatrix weightDelta, float weightCoefficient, float learningRate)
 {
     if (biasDelta != null)
     {
         using (var columnSums = biasDelta.ColumnSums())
             _bias.AddInPlace(columnSums, 1f / columnSums.Count, learningRate);
     }
     _weight.AddInPlace(weightDelta, weightCoefficient, learningRate);
 }
Ejemplo n.º 3
0
            protected override IGraphData _Backpropagate(INode fromNode, IGraphData errorSignal, IContext context, IReadOnlyList <INode> parents)
            {
                var error     = errorSignal.GetMatrix();
                var batchSize = (float)context.BatchSequence.MiniBatch.BatchSize;
                var lap       = context.LinearAlgebraProvider;

                using (var dxHat = error.PointwiseMultiply(_gamma))
                    using (var temp = dxHat.PointwiseMultiply(_inputMinusMean))
                        using (var temp2 = _inverseVariance.PointwiseMultiply(_inverseVariance))
                            using (var inverseVarianceCubed = temp2.PointwiseMultiply(_inverseVariance))
                                using (var temp3 = temp.PointwiseMultiply(inverseVarianceCubed)) {
                                    temp3.Multiply(-0.5f);

                                    using (var dVar = temp3.ColumnSums())
                                        using (var temp4 = dxHat.PointwiseMultiply(_inverseVariance)) {
                                            temp4.Multiply(-1f);

                                            using (var dmu = temp4.ColumnSums())
                                                using (var temp5 = _inputMinusMean.ColumnSums()) {
                                                    temp5.Multiply(-2f / batchSize);

                                                    using (var dmu2 = temp5.PointwiseMultiply(dVar)) {
                                                        dmu.AddInPlace(dmu2);

                                                        using (var dVarMatrix = lap.CreateMatrix(Enumerable.Repeat(dVar, context.BatchSequence.MiniBatch.BatchSize).ToList()))
                                                            using (var dx1 = dxHat.PointwiseMultiply(_inverseVariance))
                                                                using (var dx2 = dVarMatrix.PointwiseMultiply(_inputMinusMean)) {
                                                                    dx2.Multiply(-2f / batchSize);

                                                                    using (var dx3 = lap.CreateMatrix(Enumerable.Repeat(dmu, context.BatchSequence.MiniBatch.BatchSize).ToList())) {
                                                                        dx3.Multiply(1f / batchSize);

                                                                        var dx = dx1.Add(dx2);
                                                                        dx.AddInPlace(dx3);

                                                                        var dBeta = dx.ColumnSums();
                                                                        using (var temp6 = _xHat.PointwiseMultiply(error)) {
                                                                            var dGamma = temp6.ColumnSums();

                                                                            // store the updates
                                                                            var learningContext = context.LearningContext;
                                                                            learningContext.StoreUpdate(_source, dBeta, err => _source._beta.AddInPlace(err, 1f, learningContext.BatchLearningRate));
                                                                            learningContext.StoreUpdate(_source, dGamma, err => _source._gamma.AddInPlace(err, 1f, learningContext.BatchLearningRate));
                                                                        }

                                                                        return(errorSignal.ReplaceWith(dx));
                                                                    }
                                                                }
                                                    }
                                                }
                                        }
                                }
            }
Ejemplo n.º 4
0
        public void Update(IMatrix biasDelta, IMatrix weightDelta, float weightCoefficient, float learningRate)
        {
            if (biasDelta != null)
            {
                using (var columnSums = biasDelta.ColumnSums())
                    _bias.AddInPlace(columnSums, 1f / columnSums.Count, learningRate);
            }
            using (var transpose = weightDelta.Transpose())
                _weight.AddInPlace(transpose, weightCoefficient, learningRate);

            _weightTranspose.Dispose();
            _weightTranspose = _weight.Transpose();
        }
Ejemplo n.º 5
0
        protected override void _UpdateLayer(IMatrix input, IMatrix delta, ITrainingContext context, INeuralNetworkUpdateAccumulator updates)
        {
            if (_filter != null && _filter2 != null)
            {
                // filter the updates to the bias against the filter
                using (var columnSums = delta.ColumnSums())
                    using (var filteredColumnSums = columnSums.PointwiseMultiply(_filter2))
                        _layerUpdater.Layer.Bias.AddInPlace(filteredColumnSums, 1f / columnSums.Count);

                // filter the weight updates against the filter
                using (var weightUpdate = input.TransposeThisAndMultiply(delta))
                    using (var filteredWeightUpdate = weightUpdate.PointwiseMultiply(_filter))
                        _layerUpdater.Update(null, filteredWeightUpdate, context);
            }
            else
            {
                base._UpdateLayer(input, delta, context, updates);
            }
        }
Ejemplo n.º 6
0
 public void UpdateBias(IMatrix delta, ILearningContext context)
 {
     using var columnSums = delta.ColumnSums();
     _bias.AddInPlace(columnSums, 1f / columnSums.Count, context.BatchLearningRate);
 }
Ejemplo n.º 7
0
        public override void ExecuteForward(IContext context)
        {
            var     lap = context.LinearAlgebraProvider;
            var     input = context.Data.GetMatrix();
            IMatrix output, inputMinusMean = null, inverseVariance = null, xHat = null, gamma = null;

            Debug.Assert(input.RowCount == context.BatchSequence.MiniBatch.BatchSize);
            var batchSize = input.RowCount;

            var tensor = context.Data.Get4DTensor();

            if (tensor != null)
            {
                IVector currentVariance, currentMean;
                if (context.IsTraining)
                {
                    var volumeList   = input.ConvertInPlaceToVector().Split(tensor.Count);
                    var rowList      = volumeList.Select(m => lap.CreateVector(m.Split(tensor.Depth).Select(v => v.Average()))).ToList();
                    var reducedInput = lap.CreateMatrix(rowList);

                    // calculate batch mean and variance
                    currentMean = reducedInput.ColumnSums();
                    currentMean.Multiply(1f / reducedInput.RowCount);
                    using (var meanMatrix = lap.CreateMatrix(Enumerable.Repeat(currentMean, reducedInput.RowCount).ToList()))
                        using (var reducedInputMinusMean = reducedInput.Subtract(meanMatrix))
                            using (var temp = reducedInputMinusMean.PointwiseMultiply(reducedInputMinusMean)) {
                                currentVariance = temp.ColumnSums();
                                currentVariance.Multiply(1f / reducedInput.RowCount);
                            }
                }
                else
                {
                    currentVariance = _variance;
                    currentMean     = _mean;
                }
                using (var tensorMean = _FormIntoTensor(lap, currentMean, tensor))
                    using (var tensorVariance = _FormIntoTensor(lap, currentVariance, tensor))
                        using (var tensorGamma = _FormIntoTensor(lap, _gamma, tensor))
                            using (var tensorBeta = _FormIntoTensor(lap, _beta, tensor))
                                using (var matrixMean = tensorMean.ConvertToMatrix())
                                    using (var matrixVariance = tensorVariance.ConvertToMatrix())
                                        using (var matrixBeta = tensorBeta.ConvertToMatrix())
                                            using (var temp = input.Subtract(matrixMean))
                                                using (var sqrtVariance = matrixVariance.Sqrt(1e-6f)) {
                                                    gamma          = tensorMean.ConvertToMatrix();
                                                    inputMinusMean = input.Subtract(matrixMean);

                                                    // calculate batch normalisation
                                                    using (var ones = context.LinearAlgebraProvider.CreateMatrix(batchSize, input.ColumnCount, 1f)) {
                                                        inverseVariance = ones.PointwiseDivide(sqrtVariance);
                                                        xHat            = inputMinusMean.PointwiseMultiply(inverseVariance);
                                                        output          = xHat.PointwiseMultiply(gamma);
                                                        output.AddToEachRow(_beta);

                                                        // update the mean
                                                        _mean.AddInPlace(currentMean, _momentum, 1f - _momentum);

                                                        // correct for the biased sample variance
                                                        currentVariance.Multiply(1f / (batchSize - 1));

                                                        // update the variance
                                                        _variance.AddInPlace(currentVariance, _momentum, 1f - _momentum);

                                                        if (context.IsTraining)
                                                        {
                                                            currentVariance.Dispose();
                                                            currentMean.Dispose();
                                                        }
                                                    }
                                                }
            }
            else
            {
                if (context.IsTraining)
                {
                    var learningContext = context.LearningContext;
                    gamma = lap.CreateMatrix(Enumerable.Repeat(_gamma, batchSize).ToList());

                    // calculate batch mean
                    var batchMean = input.ColumnSums();
                    batchMean.Multiply(1f / batchSize);

                    // find input minus mean
                    using (var meanMatrix = lap.CreateMatrix(Enumerable.Repeat(batchMean, batchSize).ToList()))
                        inputMinusMean = input.Subtract(meanMatrix);

                    // calculate variance as (x - u)^2
                    IMatrix batchVariance = inputMinusMean.PointwiseMultiply(inputMinusMean);

                    // calculate batch normalisation
                    using (var varianceSqrt = batchVariance.Sqrt(1e-6f))
                        using (var ones = context.LinearAlgebraProvider.CreateMatrix(batchSize, input.ColumnCount, 1f)) {
                            inverseVariance = ones.PointwiseDivide(varianceSqrt);
                            xHat            = inputMinusMean.PointwiseMultiply(inverseVariance);
                            output          = xHat.PointwiseMultiply(gamma);
                            output.AddToEachRow(_beta);

                            // update the mean
                            _mean.AddInPlace(batchMean, 1f, learningContext.BatchLearningRate);

                            // correct for the biased sample variance
                            batchVariance.Multiply(1f / (batchSize - 1));

                            // update the variance
                            using (var temp = batchVariance.ColumnSums()) {
                                temp.Multiply(1f / batchSize);
                                _variance.AddInPlace(temp, _momentum, learningContext.BatchLearningRate);
                            }
                            batchVariance.Dispose();
                        }
                }
                else
                {
                    using (var varianceMatrix = lap.CreateMatrix(Enumerable.Repeat(_variance, batchSize).ToList()))
                        using (var varianceMatrixSqrt = varianceMatrix.Sqrt(1e-6f))
                            using (var meanMatrix = lap.CreateMatrix(Enumerable.Repeat(_mean, batchSize).ToList()))
                                using (var gammaMatrix = lap.CreateMatrix(Enumerable.Repeat(_gamma, batchSize).ToList()))
                                    using (var inputSubtractMean = input.Subtract(meanMatrix))
                                        using (var temp = inputSubtractMean.PointwiseMultiply(gammaMatrix)) {
                                            output = temp.PointwiseDivide(varianceMatrixSqrt);
                                            output.AddToEachRow(_beta);
                                        }
                }
                _AddNextGraphAction(context, context.Data.ReplaceWith(output), () => new Backpropagation(this, inputMinusMean, inverseVariance, xHat, gamma));
            }
        }
Ejemplo n.º 8
0
        public void TrainOnMiniBatch(ISequentialMiniBatch miniBatch, float[] memory, IRecurrentTrainingContext context, Action <IMatrix> beforeBackProp, Action <IMatrix> afterBackProp)
        {
            var trainingContext = context.TrainingContext;

            _lap.PushLayer();
            var sequenceLength = miniBatch.SequenceLength;
            var updateStack    = new Stack <Tuple <Stack <INeuralNetworkRecurrentBackpropagation>, IMatrix, IMatrix, ISequentialMiniBatch, int> >();

            context.ExecuteForward(miniBatch, memory, (k, fc) => {
                var layerStack = new Stack <INeuralNetworkRecurrentBackpropagation>();
                foreach (var action in _layer)
                {
                    layerStack.Push(action.Execute(fc, true));
                }
                updateStack.Push(Tuple.Create(layerStack, miniBatch.GetExpectedOutput(fc, k), fc[0], miniBatch, k));
            });

            // backpropagate, accumulating errors across the sequence
            using (var updateAccumulator = new UpdateAccumulator(trainingContext)) {
                IMatrix curr = null;
                while (updateStack.Any())
                {
                    var update      = updateStack.Pop();
                    var isT0        = !updateStack.Any();
                    var actionStack = update.Item1;

                    // calculate error
                    var expectedOutput = update.Item2;
                    if (expectedOutput != null)
                    {
                        curr = trainingContext.ErrorMetric.CalculateDelta(update.Item3, expectedOutput);
                    }

                    // backpropagate
                    beforeBackProp?.Invoke(curr);
                    while (actionStack.Any())
                    {
                        var backpropagationAction = actionStack.Pop();
                        var shouldCalculateOutput = actionStack.Any() || isT0;
                        curr = backpropagationAction.Execute(curr, trainingContext, true, updateAccumulator);
                    }
                    afterBackProp?.Invoke(curr);

                    // apply any filters
                    foreach (var filter in _filter)
                    {
                        filter.AfterBackPropagation(update.Item4, update.Item5, curr);
                    }
                }

                // adjust the initial memory against the error signal
                if (curr != null)
                {
                    using (var columnSums = curr.ColumnSums()) {
                        var initialDelta = columnSums.AsIndexable();
                        for (var j = 0; j < memory.Length; j++)
                        {
                            memory[j] += initialDelta[j] * trainingContext.TrainingRate;
                        }
                    }
                }
            }

            // cleanup
            trainingContext.EndBatch();
            _lap.PopLayer();
        }