protected override Float AccumulateOneGradient(ref VBuffer <Float> feat, Float label, Float weight, ref VBuffer <Float> x, ref VBuffer <Float> grad, ref Float[] scratch) { Float bias = 0; x.GetItemOrDefault(0, ref bias); Float score = bias + VectorUtils.DotProductWithOffset(ref x, 1, ref feat); Float s = score / 2; Float logZ = MathUtils.SoftMax(s, -s); Float label01 = Math.Min(1, Math.Max(label, 0)); Float label11 = 2 * label01 - 1; //(-1..1) label Float modelProb1 = MathUtils.ExpSlow(s - logZ); Float ls = label11 * s; Float datumLoss = logZ - ls; //Float loss2 = MathUtil.SoftMax(s-l_s, -s-l_s); Contracts.Check(!Float.IsNaN(datumLoss), "Unexpected NaN"); Float mult = weight * (modelProb1 - label01); VectorUtils.AddMultWithOffset(ref feat, mult, ref grad, 1); // Note that 0th L-BFGS weight is for bias. // Add bias using this strange trick that has advantage of working well for dense and sparse arrays. // Due to the call to EnsureBiases, we know this region is dense. Contracts.Assert(grad.Count >= BiasCount && (grad.IsDense || grad.Indices[BiasCount - 1] == BiasCount - 1)); grad.Values[0] += mult; return(weight * datumLoss); }
// Poisson: p(y;lambda) = lambda^y * exp(-lambda) / y! // lambda is the parameter to the Poisson. It is the mean/expected number of occurrences // p(y;lambda) is the probability that there are y occurences given the expected was lambda // Our goal is to maximize log-liklihood. Log(p(y;lambda)) = ylog(lambda) - lambda - log(y!) // lambda = exp(w.x+b) // then dlog(p(y))/dw_i = x_i*y - x_i*lambda = y*x_i - x_i * lambda // dp/db = y - lambda // Goal is to find w that maximizes // Note: We negate the above in ordrer to minimize protected override float AccumulateOneGradient(ref VBuffer <float> feat, float label, float weight, ref VBuffer <float> x, ref VBuffer <float> grad, ref float[] scratch) { float bias = 0; x.GetItemOrDefault(0, ref bias); float dot = VectorUtils.DotProductWithOffset(ref x, 1, ref feat) + bias; float lambda = MathUtils.ExpSlow(dot); float y = label; float mult = -(y - lambda) * weight; VectorUtils.AddMultWithOffset(ref feat, mult, ref grad, 1); // Due to the call to EnsureBiases, we know this region is dense. Contracts.Assert(grad.Count >= BiasCount && (grad.IsDense || grad.Indices[BiasCount - 1] == BiasCount - 1)); grad.Values[0] += mult; // From the computer's perspective exp(infinity)==infinity // so inf-inf=nan, but in reality, infinity is just a large // number we can't represent, and exp(X)-X for X=inf is just inf. if (float.IsPositiveInfinity(lambda)) { return(float.PositiveInfinity); } return(-(y * dot - lambda) * weight); }
protected override float AccumulateOneGradient(ref VBuffer <float> feat, float label, float weight, ref VBuffer <float> x, ref VBuffer <float> grad, ref float[] scores) { if (Utils.Size(scores) < _numClasses) { scores = new float[_numClasses]; } float bias = 0; for (int c = 0, start = _numClasses; c < _numClasses; c++, start += NumFeatures) { x.GetItemOrDefault(c, ref bias); scores[c] = bias + VectorUtils.DotProductWithOffset(ref x, start, ref feat); } float logZ = MathUtils.SoftMax(scores, _numClasses); float datumLoss = logZ; int lab = (int)label; Contracts.Assert(0 <= lab && lab < _numClasses); for (int c = 0, start = _numClasses; c < _numClasses; c++, start += NumFeatures) { float probLabel = lab == c ? 1 : 0; datumLoss -= probLabel * scores[c]; float modelProb = MathUtils.ExpSlow(scores[c] - logZ); float mult = weight * (modelProb - probLabel); VectorUtils.AddMultWithOffset(ref feat, mult, ref grad, start); // Due to the call to EnsureBiases, we know this region is dense. Contracts.Assert(grad.Count >= BiasCount && (grad.IsDense || grad.Indices[BiasCount - 1] == BiasCount - 1)); grad.Values[c] += mult; } Contracts.Check(FloatUtils.IsFinite(datumLoss), "Data contain bad values."); return(weight * datumLoss); }