/// <summary> /// Used by L2 + L1 regularized SGD to push the weights in the direction of/contrary-to an example vector. /// In addition to L2 regularization as above, we also shrinking all weights towards zero /// by a small constant amount (L1), clamping at 0.0f. This is also to prevent overfitting, and has the effect /// of doing some automatic feature selection, since most weights will end up being indeed 0.0 with a large /// enough L1. The amount comes from the (sub-)derivative of the L1 norm, which is just the constant "L1". /// </summary> public static void InplaceAddMultiplyWithL2AndL1(this float[] dense, float learningRate, float loss, float L2, float L1, SparseVector sparse) { Debug.Assert(dense.Length == sparse.Dimension); var count = sparse.Count; var indices = sparse.Indices; var weightFactor = loss * learningRate; var L2Factor = (1.0f - L2) * learningRate; var L1Factor = (L1)*learningRate; for (int i = 0; i < indices.Length; i++) { if (i >= count) { break; } var ix = indices[i]; var wi = dense[ix]; var candidateWeightAfterL2 = wi + (weightFactor * sparse.Values[i]) - (L2Factor * wi); dense[ix] = Math.Abs(candidateWeightAfterL2) < L1Factor ? 0.0f : candidateWeightAfterL2 > 0.0f ? candidateWeightAfterL2 - L1Factor : candidateWeightAfterL2 + L1Factor; } }
public Example(float label, SparseVector features) { Label = label; Features = features; }
/// <summary> /// Used by L2 regularized SGD to push the weights in the direction of/contrary-to an example vector, /// by a given loss amount and learning rate, while shrinking all weights by a small factor /// to prevent overfitting. The factor comes from the definition of the regularized loss: /// L(w,e) = ... + lambda/2 * norm-2(w)^2, whose deriviative is simply lambda * w. /// </summary> public static void InplaceAddMultiplyWithL2(this float[] dense, float learningRate, float loss, float L2, SparseVector sparse) { Debug.Assert(dense.Length == sparse.Dimension); var count = sparse.Count; var indices = sparse.Indices; var weightFactor = loss * learningRate; var L2Factor = (1.0f - L2) * learningRate; for (int i = 0; i < indices.Length; i++) { if (i >= count) { break; } var ix = indices[i]; var wi = dense[ix]; dense[ix] = wi + (weightFactor * sparse.Values[i]) - (L2Factor * wi); } }