/// <summary> /// Evaluate and sum the function over all indices, in parallel /// </summary> /// <param name="input">The point at which to evaluate the function</param> /// <param name="gradient">The gradient vector, which must be filled in (its initial contents are undefined)</param> /// <returns>Function value</returns> public Float Eval(ref VBuffer <Float> input, ref VBuffer <Float> gradient) { _input = input; for (int c = 0; c < _threads; ++c) { ThreadPool.QueueUserWorkItem(Eval, c); } AutoResetEvent.WaitAll(_threadFinished); VectorUtils.ScaleBy(ref gradient, 0); Float value = 0; for (int c = 0; c < _threads; ++c) { if (gradient.Length == 0) { _tempGrads[c].CopyTo(ref gradient); } else { VectorUtils.Add(ref _tempGrads[c], ref gradient); } value += _tempVals[c]; } return(value); }
internal void MapDirByInverseHessian() { int count = _roList.Count; if (count != 0) { Float[] alphas = new Float[count]; int lastGoodRo = -1; for (int i = count - 1; i >= 0; i--) { if (_roList[i] > 0) { alphas[i] = -VectorUtils.DotProduct(ref _sList[i], ref _dir) / _roList[i]; VectorUtils.AddMult(ref _yList[i], alphas[i], ref _dir); if (lastGoodRo == -1) { lastGoodRo = i; } } } // if we have no positive ros, dir doesn't change if (lastGoodRo == -1) { return; } Float yDotY = VectorUtils.DotProduct(ref _yList[lastGoodRo], ref _yList[lastGoodRo]); VectorUtils.ScaleBy(ref _dir, _roList[lastGoodRo] / yDotY); for (int i = 0; i <= lastGoodRo; i++) { if (_roList[i] > 0) { Float beta = VectorUtils.DotProduct(ref _yList[i], ref _dir) / _roList[i]; VectorUtils.AddMult(ref _sList[i], -alphas[i] - beta, ref _dir); } } } }
public void ChangeDir() { if (_useCG) { Float newByNew = VectorUtils.NormSquared(_newGrad); Float newByOld = VectorUtils.DotProduct(ref _newGrad, ref _grad); Float oldByOld = VectorUtils.NormSquared(_grad); Float betaPR = (newByNew - newByOld) / oldByOld; Float beta = Math.Max(0, betaPR); VectorUtils.ScaleBy(ref _dir, beta); VectorUtils.AddMult(ref _newGrad, -1, ref _dir); } else { VectorUtils.ScaleInto(ref _newGrad, -1, ref _dir); } _newPoint.CopyTo(ref _point); _newGrad.CopyTo(ref _grad); _value = _newValue; }
private void Eval(object chunkIndexObj) { int chunkIndex = (int)chunkIndexObj; int chunkSize = _maxIndex / _threads; int bigChunkSize = chunkSize + 1; int numBigChunks = _maxIndex % _threads; int from; int to; if (chunkIndex < numBigChunks) { from = bigChunkSize * chunkIndex; to = from + bigChunkSize; } else { from = bigChunkSize * numBigChunks + chunkSize * (chunkIndex - numBigChunks); to = from + chunkSize; } _tempVals[chunkIndex] = 0; VectorUtils.ScaleBy(ref _tempGrads[chunkIndex], 0); VBuffer <Float> tempGrad = default(VBuffer <Float>); for (int i = from; i < to; ++i) { tempGrad = new VBuffer <Float>(0, 0, tempGrad.Values, tempGrad.Indices); _tempVals[chunkIndex] += _func(i, ref _input, ref tempGrad); if (_tempGrads[chunkIndex].Length == 0) { tempGrad.CopyTo(ref _tempGrads[chunkIndex]); } else { VectorUtils.Add(ref tempGrad, ref _tempGrads[chunkIndex]); } } _threadFinished[chunkIndex].Set(); }
/// <summary> /// Tests the gradient reported by f. /// </summary> /// <param name="f">function to test</param> /// <param name="x">point at which to test</param> /// <param name="quiet">If false, outputs detailed info.</param> /// <returns>maximum normalized difference between analytic and numeric directional derivative over multiple tests</returns> public static Float Test(DifferentiableFunction f, ref VBuffer <Float> x, bool quiet) { // REVIEW: Delete this method? VBuffer <Float> grad = default(VBuffer <Float>); VBuffer <Float> newGrad = default(VBuffer <Float>); VBuffer <Float> newX = default(VBuffer <Float>); Float normX = VectorUtils.Norm(x); f(ref x, ref grad, null); if (!quiet) { Console.WriteLine(Header); } Float maxNormDiff = Float.NegativeInfinity; int numIters = Math.Min((int)x.Length, 10); int maxDirCount = Math.Min((int)x.Length / 2, 100); for (int n = 1; n <= numIters; n++) { int dirCount = Math.Min(n * 10, maxDirCount); List <int> indices = new List <int>(dirCount); List <Float> values = new List <Float>(dirCount); for (int i = 0; i < dirCount; i++) { int index = _r.Next((int)x.Length); while (indices.IndexOf(index) >= 0) { index = _r.Next((int)x.Length); } indices.Add(index); values.Add(SampleFromGaussian(_r)); } VBuffer <Float> dir = new VBuffer <Float>(x.Length, values.Count, values.ToArray(), indices.ToArray()); Float norm = VectorUtils.Norm(dir); VectorUtils.ScaleBy(ref dir, 1 / norm); VectorUtils.AddMultInto(ref x, Eps, ref dir, ref newX); Float rVal = f(ref newX, ref newGrad, null); VectorUtils.AddMultInto(ref x, -Eps, ref dir, ref newX); Float lVal = f(ref newX, ref newGrad, null); Float dirDeriv = VectorUtils.DotProduct(ref grad, ref dir); Float numDeriv = (rVal - lVal) / (2 * Eps); Float normDiff = Math.Abs(1 - numDeriv / dirDeriv); Float diff = numDeriv - dirDeriv; if (!quiet) { Console.WriteLine("{0,-9}{1,-18:0.0000e0}{2,-18:0.0000e0}{3,-15:0.0000e0}{4,0:0.0000e0}", n, numDeriv, dirDeriv, diff, normDiff); } maxNormDiff = Math.Max(maxNormDiff, normDiff); } return(maxNormDiff); }
/// <summary> /// Minimize the function represented by <paramref name="f"/>. /// </summary> /// <param name="f">Stochastic gradients of function to minimize</param> /// <param name="initial">Initial point</param> /// <param name="result">Approximate minimum of <paramref name="f"/></param> public void Minimize(DStochasticGradient f, ref VBuffer <Float> initial, ref VBuffer <Float> result) { Contracts.Check(FloatUtils.IsFinite(initial.Values, initial.Count), "The initial vector contains NaNs or infinite values."); int dim = initial.Length; VBuffer <Float> grad = VBufferUtils.CreateEmpty <Float>(dim); VBuffer <Float> step = VBufferUtils.CreateEmpty <Float>(dim); VBuffer <Float> x = default(VBuffer <Float>); initial.CopyTo(ref x); VBuffer <Float> prev = default(VBuffer <Float>); VBuffer <Float> avg = VBufferUtils.CreateEmpty <Float>(dim); for (int n = 0; _maxSteps == 0 || n < _maxSteps; ++n) { if (_momentum == 0) { step = new VBuffer <Float>(step.Length, 0, step.Values, step.Indices); } else { VectorUtils.ScaleBy(ref step, _momentum); } Float stepSize; switch (_rateSchedule) { case RateScheduleType.Constant: stepSize = 1 / _t0; break; case RateScheduleType.Sqrt: stepSize = 1 / (_t0 + MathUtils.Sqrt(n)); break; case RateScheduleType.Linear: stepSize = 1 / (_t0 + n); break; default: throw Contracts.Except(); } Float scale = (1 - _momentum) / _batchSize; for (int i = 0; i < _batchSize; ++i) { f(ref x, ref grad); VectorUtils.AddMult(ref grad, scale, ref step); } if (_averaging) { Utils.Swap(ref avg, ref prev); VectorUtils.ScaleBy(prev, ref avg, (Float)n / (n + 1)); VectorUtils.AddMult(ref step, -stepSize, ref x); VectorUtils.AddMult(ref x, (Float)1 / (n + 1), ref avg); if ((n > 0 && TerminateTester.ShouldTerminate(ref avg, ref prev)) || _terminate(ref avg)) { result = avg; return; } } else { Utils.Swap(ref x, ref prev); VectorUtils.AddMult(ref step, -stepSize, ref prev, ref x); if ((n > 0 && TerminateTester.ShouldTerminate(ref x, ref prev)) || _terminate(ref x)) { result = x; return; } } } result = _averaging ? avg : x; }