public double cubicInterp(pointValueDeriv p0, pointValueDeriv p1) { double t1 = p0._d + p1._d - 3 * (p0._v - p1._v) / (p0._a - p1._a); double t2 = mathSign(p1._a - p0._a) * Math.Sqrt(t1 * t1 - p0._d * p1._d); double num = p1._d + t2 - t1; double denom = p1._d - p0._d + 2 * t2; return(p1._a - (p1._a - p0._a) * num / denom); }
//to deal with non-convex objective function public void wolfeLineSearch() { double dirDeriv = getDirDeriv(); double normDir = Math.Sqrt(dotProduct(_dir, _dir)); if (dirDeriv > 0) { Global.swLog.WriteLine("L-BFGS chose a non-descent direction: check your gradient!"); } double c1 = 1e-4 * dirDeriv; double c2 = 0.9 * dirDeriv; double a = (_iter == 0 ? (1 / normDir) : 1.0); pointValueDeriv last = new pointValueDeriv(0, _value, dirDeriv); pointValueDeriv aLo = new pointValueDeriv(), aHi = new pointValueDeriv(); bool done = false; double unitRoundoff = 1e-6;//xu if (a * normDir < unitRoundoff) { Global.swLog.WriteLine("Obtained step size near limits of numerical stability."); } double newValue = 0; while (true) { getNextPoint2(a); newValue = getLossGradient(_newW, _newGradList); double oldValue = _value; _value = newValue; dirDeriv = getNewDirDeriv(); pointValueDeriv curr = new pointValueDeriv(a, newValue, dirDeriv); if ((curr._v > oldValue + c1 * a) || (last._a > 0 && curr._v >= last._v)) { aLo = last; aHi = curr; break; } else if (Math.Abs(curr._d) <= -c2) { done = true; break; } else if (curr._d >= 0) { aLo = curr; aHi = last; break; } last = curr; a *= 2; Global.swLog.Write("+"); } double minChange = 0.01; while (!done) { Global.swLog.Write("-"); pointValueDeriv left = aLo._a < aHi._a ? aLo : aHi; pointValueDeriv right = aLo._a < aHi._a ? aHi : aLo; if (left._d > 0 && right._d < 0) { a = aLo._v < aHi._v ? aLo._a : aHi._a; } else { a = cubicInterp(aLo, aHi); } double ub = (minChange * left._a + (1 - minChange) * right._a); if (a > ub) { a = ub; } double lb = (minChange * right._a + (1 - minChange) * left._a); if (a < lb) { a = lb; } getNextPoint2(a); newValue = getLossGradient(_newW, _newGradList); double oldValue = _value; _value = newValue; dirDeriv = getNewDirDeriv(); pointValueDeriv curr = new pointValueDeriv(a, newValue, dirDeriv); if ((curr._v > oldValue + c1 * a) || (curr._v >= aLo._v)) { aHi = curr; } else if (Math.Abs(curr._d) <= -c2) { done = true; } else { if (curr._d * (aHi._a - aLo._a) >= 0) { aHi = aLo; } aLo = curr; } if (aLo._a == aHi._a) { Global.swLog.WriteLine("Step size interval numerically zero."); } } }