/// <summary> /// A small helper for comparing a loss's computations to expected values. /// </summary> /// <param name="lossFunc">The training loss.</param> /// <param name="label">The ideal labeled output.</param> /// <param name="output">The actual output.</param> /// <param name="expectedLoss">The expected value of this loss, given /// <c>label</c> and <c>output</c></param> /// <param name="expectedUpdate">The expected value of the update /// step, given <c>label</c> and <c>output</c></param> /// <param name="differentiable">Whether the loss function is differentiable /// w.r.t. the output in the vicinity of the output value</param> private void TestHelper(IScalarOutputLoss lossFunc, double label, double output, double expectedLoss, double expectedUpdate, bool differentiable = true) { Double loss = lossFunc.Loss((float)output, (float)label); float derivative = lossFunc.Derivative((float)output, (float)label); Assert.Equal(expectedLoss, loss, 5); Assert.Equal(expectedUpdate, -derivative, 5); if (differentiable) { // In principle, the update should be the negative of the first derivative of the loss. // Use a simple finite difference method to see if it's in the right ballpark. float almostOutput = Math.Max((float)output * (1 + _epsilon), (float)output + _epsilon); Double almostLoss = lossFunc.Loss(almostOutput, (float)label); Assert.Equal((almostLoss - loss) / (almostOutput - output), derivative, 1); } }
public override void ProcessDataInstance(IChannel ch, ref VBuffer <Float> feat, Float label, Float weight) { base.ProcessDataInstance(ch, ref feat, label, weight); // compute the update and update if needed Float output = CurrentMargin(ref feat); Double loss = _loss.Loss(output, label); // REVIEW: Should this be biasUpdate != 0? // This loss does not incorporate L2 if present, but the chance of that addition to the loss // exactly cancelling out loss is remote. if (loss != 0 || _args.L2RegularizerWeight > 0) { // If doing lazy weights, we need to update the totalWeights and totalBias before updating weights/bias if (_args.DoLazyUpdates && _args.Averaged && NumNoUpdates > 0 && TotalMultipliers * _args.AveragedTolerance <= PendingMultipliers) { VectorUtils.AddMult(ref Weights, NumNoUpdates * WeightsScale, ref TotalWeights); TotalBias += Bias * NumNoUpdates * WeightsScale; NumWeightUpdates += NumNoUpdates; NumNoUpdates = 0; TotalMultipliers += PendingMultipliers; PendingMultipliers = 0; } // Make final adjustments to update parameters. Float rate = _args.LearningRate; if (_args.DecreaseLearningRate) { rate /= MathUtils.Sqrt((Float)NumWeightUpdates + NumNoUpdates + 1); } Float biasUpdate = -rate *_loss.Derivative(output, label); // Perform the update to weights and bias. VectorUtils.AddMult(ref feat, biasUpdate / WeightsScale, ref Weights); WeightsScale *= 1 - 2 * _args.L2RegularizerWeight; // L2 regularization. ScaleWeightsIfNeeded(); Bias += biasUpdate; PendingMultipliers += Math.Abs(biasUpdate); } // Add to averaged weights and increment the count. if (Averaged) { if (!_args.DoLazyUpdates) { IncrementAverageNonLazy(); } else { NumNoUpdates++; } // Reset the weights to averages if needed. if (_resetWeightsAfterXExamples > 0 && NumIterExamples % _resetWeightsAfterXExamples == 0) { ch.Info("Resetting weights to average weights"); VectorUtils.ScaleInto(ref TotalWeights, 1 / (Float)NumWeightUpdates, ref Weights); WeightsScale = 1; Bias = TotalBias / (Float)NumWeightUpdates; } } }
protected override void ProcessDataInstance(IChannel ch, ref VBuffer <Float> feat, Float label, Float weight) { base.ProcessDataInstance(ch, ref feat, label, weight); // compute the update and update if needed Float output = CurrentMargin(ref feat); Double loss = LossFunction.Loss(output, label); // REVIEW: Should this be biasUpdate != 0? // This loss does not incorporate L2 if present, but the chance of that addition to the loss // exactly cancelling out loss is remote. if (loss != 0 || Args.L2RegularizerWeight > 0) { // If doing lazy weights, we need to update the totalWeights and totalBias before updating weights/bias if (Args.DoLazyUpdates && Args.Averaged && NumNoUpdates > 0 && TotalMultipliers * Args.AveragedTolerance <= PendingMultipliers) { VectorUtils.AddMult(ref Weights, NumNoUpdates * WeightsScale, ref TotalWeights); TotalBias += Bias * NumNoUpdates * WeightsScale; NumWeightUpdates += NumNoUpdates; NumNoUpdates = 0; TotalMultipliers += PendingMultipliers; PendingMultipliers = 0; } #if OLD_TRACING // REVIEW: How should this be ported? // If doing debugging and have L2 regularization, adjust the loss to account for that component. if (DebugLevel > 2 && _args.l2RegularizerWeight != 0) { loss += _args.l2RegularizerWeight * VectorUtils.NormSquared(_weights) * _weightsScale * _weightsScale; } #endif // Make final adjustments to update parameters. Float rate = Args.LearningRate; if (Args.DecreaseLearningRate) { rate /= MathUtils.Sqrt((Float)NumWeightUpdates + NumNoUpdates + 1); } Float biasUpdate = -rate *LossFunction.Derivative(output, label); // Perform the update to weights and bias. VectorUtils.AddMult(ref feat, biasUpdate / WeightsScale, ref Weights); WeightsScale *= 1 - 2 * Args.L2RegularizerWeight; // L2 regularization. ScaleWeightsIfNeeded(); Bias += biasUpdate; PendingMultipliers += Math.Abs(biasUpdate); #if OLD_TRACING // REVIEW: How should this be ported? if (DebugLevel > 2) { // sanity check: did loss for the example decrease? Double newLoss = _lossFunction.Loss(CurrentMargin(instance), instance.Label); if (_args.l2RegularizerWeight != 0) { newLoss += _args.l2RegularizerWeight * VectorUtils.NormSquared(_weights) * _weightsScale * _weightsScale; } if (newLoss - loss > 0 && (newLoss - loss > 0.01 || _args.l2RegularizerWeight == 0)) { Host.StdErr.WriteLine("Loss increased (unexpected): Old value: {0}, new value: {1}", loss, newLoss); Host.StdErr.WriteLine("Offending instance #{0}: {1}", _numIterExamples, instance); } } #endif } // Add to averaged weights and increment the count. if (Args.Averaged) { if (!Args.DoLazyUpdates) { IncrementAverageNonLazy(); } else { NumNoUpdates++; } // Reset the weights to averages if needed. if (Args.ResetWeightsAfterXExamples > 0 && NumIterExamples % Args.ResetWeightsAfterXExamples.Value == 0) { // #if OLD_TRACING // REVIEW: How should this be ported? Console.WriteLine(); // #endif ch.Info("Resetting weights to average weights"); VectorUtils.ScaleInto(ref TotalWeights, 1 / (Float)NumWeightUpdates, ref Weights); WeightsScale = 1; Bias = TotalBias / (Float)NumWeightUpdates; } } #if OLD_TRACING // REVIEW: How should this be ported? if (DebugLevel > 3) { // Output the weights. Host.StdOut.Write("Weights after the instance are: "); foreach (var iv in _weights.Items(all: true)) { Host.StdOut.Write('\t'); Host.StdOut.Write(iv.Value * _weightsScale); } Host.StdOut.WriteLine(); Host.StdOut.WriteLine(); } #endif }