protected internal override void TakeStep(AbstractStochasticCachingDiffFunction dfunction) { for (int i = 0; i < x.Length; i++) { double thisGain = fixedGain * GainSchedule(k, 5 * numBatches) / (diag[i]); newX[i] = x[i] - thisGain * grad[i]; } //Get a new pair... Say(" A "); double[] s; double[] y; if (pairMem > 0 && sList.Count == pairMem || sList.Count == pairMem) { s = sList.Remove(0); y = yList.Remove(0); } else { s = new double[x.Length]; y = new double[x.Length]; } s = ArrayMath.PairwiseSubtract(newX, x); dfunction.recalculatePrevBatch = true; System.Array.Copy(dfunction.DerivativeAt(newX, bSize), 0, y, 0, grad.Length); ArrayMath.PairwiseSubtractInPlace(y, newGrad); // newY = newY-newGrad double[] comp = new double[x.Length]; sList.Add(s); yList.Add(y); UpdateDiag(diag, s, y); }
public StochasticDiffFunctionTester(IFunction function) { // check for derivatives if (!(function is AbstractStochasticCachingDiffFunction)) { log.Info("Attempt to test non stochastic function using StochasticDiffFunctionTester"); throw new NotSupportedException(); } thisFunc = (AbstractStochasticCachingDiffFunction)function; // Make sure the function is Stochastic generator = new Random(Runtime.CurrentTimeMillis()); // used to generate random test vectors // Look for a good batchSize to test with by getting factors testBatchSize = (int)GetTestBatchSize(thisFunc.DataDimension()); // Again make sure that our calculated batchSize is actually valid if (testBatchSize < 0 || testBatchSize > thisFunc.DataDimension() || (thisFunc.DataDimension() % testBatchSize != 0)) { log.Info("Invalid testBatchSize found, testing aborted. Data size: " + thisFunc.DataDimension() + " batchSize: " + testBatchSize); System.Environment.Exit(1); } numBatches = thisFunc.DataDimension() / testBatchSize; Sayln("StochasticDiffFunctionTester created with:"); Sayln(" data dimension = " + thisFunc.DataDimension()); Sayln(" batch size = " + testBatchSize); Sayln(" number of batches = " + numBatches); }
protected internal override void TakeStep(AbstractStochasticCachingDiffFunction dfunction) { dfunction.returnPreviousValues = true; System.Array.Copy(dfunction.HdotVAt(x, v, grad, bSize), 0, Hv, 0, Hv.Length); //Update the weights for (int i = 0; i < x.Length; i++) { meta = 1 - mu * grad[i] * v[i]; if (0.5 > meta) { gains[i] = gains[i] * 0.5; } else { gains[i] = gains[i] * meta; } //Update gain history v[i] = lam * (1 + cPosDef * gains[i]) * v[i] - gains[i] * (grad[i] + lam * Hv[i]); //Get the next X newX[i] = x[i] - gains[i] * grad[i]; } if (printMinMax) { Say("vMin = " + ArrayMath.Min(v) + " "); Say("vMax = " + ArrayMath.Max(v) + " "); Say("gainMin = " + ArrayMath.Min(gains) + " "); Say("gainMax = " + ArrayMath.Max(gains) + " "); } }
protected internal override void TakeStep(AbstractStochasticCachingDiffFunction dfunction) { for (int i = 0; i < x.Length; i++) { newX[i] = x[i] - gain * GainSchedule(k, 5 * numBatches) * grad[i]; } }
// [cdm 2012: The version that used to be here was clearly buggy; // I changed it a little, but didn't test it. It's now more correct, but // I think it is still conceptually faulty, since it will keep growing the // batch size so long as any minute improvement in the function value is // obtained, whereas the whole point of using a small batch is to get speed // at the cost of small losses.] public virtual int TuneBatch(Func function, double[] initial, long msPerTest, int bStart) { double[] xTest = new double[initial.Length]; int bOpt = 0; double min = double.PositiveInfinity; this.maxTime = msPerTest; double prev = double.PositiveInfinity; // check for stochastic derivatives if (!(function is AbstractStochasticCachingDiffFunction)) { throw new NotSupportedException(); } AbstractStochasticCachingDiffFunction dFunction = (AbstractStochasticCachingDiffFunction)function; int b = bStart; bool toContinue = true; do { System.Array.Copy(initial, 0, xTest, 0, initial.Length); log.Info(string.Empty); log.Info("Testing with batch size: " + b); bSize = b; ShutUp(); this.Minimize(function, 1e-5, xTest); double result = dFunction.ValueAt(xTest); if (result < min) { min = result; bOpt = bSize; b *= 2; prev = result; } else { if (result < prev) { b *= 2; prev = result; } else { if (result > prev) { toContinue = false; } } } log.Info(string.Empty); log.Info("Final value is: " + nf.Format(result)); log.Info("Optimal so far is: batch size: " + bOpt); }while (toContinue); return(bOpt); }
protected internal override void Init(AbstractStochasticCachingDiffFunction func) { func.method = this.method; gains = new double[x.Length]; v = new double[x.Length]; Hv = new double[x.Length]; for (int i = 0; i < v.Length; i++) { gains[i] = gain; } }
protected internal override void Init(AbstractStochasticCachingDiffFunction func) { diag = new double[x.Length]; memory = 1; for (int i = 0; i < x.Length; i++) { diag[i] = fixedGain / gain; } sList = new List <double[]>(); yList = new List <double[]>(); }
protected internal override void TakeStep(AbstractStochasticCachingDiffFunction dfunction) { try { ComputeDir(dir, newGrad); } catch (SQNMinimizer.SurpriseConvergence) { ClearStuff(); } double thisGain = gain * GainSchedule(k, 5 * numBatches); for (int i = 0; i < x.Length; i++) { newX[i] = x[i] + thisGain * dir[i]; } //Get a new pair... Say(" A "); if (M > 0 && sList.Count == M || sList.Count == M) { s = sList.Remove(0); y = yList.Remove(0); } else { s = new double[x.Length]; y = new double[x.Length]; } dfunction.recalculatePrevBatch = true; System.Array.Copy(dfunction.DerivativeAt(newX, bSize), 0, y, 0, grad.Length); // compute s_k, y_k ro = 0; for (int i_1 = 0; i_1 < x.Length; i_1++) { s[i_1] = newX[i_1] - x[i_1]; y[i_1] = y[i_1] - newGrad[i_1] + lambda * s[i_1]; ro += s[i_1] * y[i_1]; } ro = 1.0 / ro; sList.Add(s); yList.Add(y); roList.Add(ro); }
public virtual double[] Minimize(IDiffFunction function, double functionTolerance, double[] initial, int maxIterations) { Sayln("SGDToQNMinimizer called on function of " + function.DomainDimension() + " variables;"); // check for stochastic derivatives if (!(function is AbstractStochasticCachingDiffFunction)) { throw new NotSupportedException(); } AbstractStochasticCachingDiffFunction dfunction = (AbstractStochasticCachingDiffFunction)function; dfunction.method = StochasticCalculateMethods.GradientOnly; ScaledSGDMinimizer sgd = new ScaledSGDMinimizer(this.gain, this.bSize, this.SGDPasses, 1, this.outputIterationsToFile); QNMinimizer qn = new QNMinimizer(this.QNMem, true); double[] x = sgd.Minimize(dfunction, functionTolerance, initial, this.SGDPasses); QNMinimizer.QNInfo qnInfo = new QNMinimizer.QNInfo(this, sgd.sList, sgd.yList); qnInfo.d = sgd.diag; qn.Minimize(dfunction, functionTolerance, x, this.QNPasses, qnInfo); log.Info(string.Empty); log.Info("Minimization complete."); log.Info(string.Empty); log.Info("Exiting for Debug"); return(x); }
protected internal override void Init(AbstractStochasticCachingDiffFunction func) { sList = new List <double[]>(); yList = new List <double[]>(); dir = new double[func.DomainDimension()]; }
protected internal abstract void TakeStep(AbstractStochasticCachingDiffFunction dfunction);
public virtual double[] Minimize(Func function, double functionTolerance, double[] initial, int maxIterations) { // check for stochastic derivatives if (!(function is AbstractStochasticCachingDiffFunction)) { throw new NotSupportedException(); } AbstractStochasticCachingDiffFunction dfunction = (AbstractStochasticCachingDiffFunction)function; dfunction.method = StochasticCalculateMethods.GradientOnly; /* --- * StochasticDiffFunctionTester sdft = new StochasticDiffFunctionTester(dfunction); * ArrayMath.add(initial, gen.nextDouble() ); // to make sure that priors are working. * sdft.testSumOfBatches(initial, 1e-4); * System.exit(1); * --- */ x = initial; grad = new double[x.Length]; newX = new double[x.Length]; gradList = new List <double[]>(); numBatches = dfunction.DataDimension() / bSize; outputFrequency = (int)System.Math.Ceil(((double)numBatches) / ((double)outputFrequency)); Init(dfunction); InitFiles(); bool have_max = (maxIterations > 0 || numPasses > 0); if (!have_max) { throw new NotSupportedException("No maximum number of iterations has been specified."); } else { maxIterations = System.Math.Max(maxIterations, numPasses) * numBatches; } Sayln(" Batchsize of: " + bSize); Sayln(" Data dimension of: " + dfunction.DataDimension()); Sayln(" Batches per pass through data: " + numBatches); Sayln(" Max iterations is = " + maxIterations); if (outputIterationsToFile) { infoFile.Println(function.DomainDimension() + "; DomainDimension "); infoFile.Println(bSize + "; batchSize "); infoFile.Println(maxIterations + "; maxIterations"); infoFile.Println(numBatches + "; numBatches "); infoFile.Println(outputFrequency + "; outputFrequency"); } //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // Loop //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Timing total = new Timing(); Timing current = new Timing(); total.Start(); current.Start(); for (k = 0; k < maxIterations; k++) { try { bool doEval = (k > 0 && evaluateIters > 0 && k % evaluateIters == 0); if (doEval) { DoEvaluation(x); } int pass = k / numBatches; int batch = k % numBatches; Say("Iter: " + k + " pass " + pass + " batch " + batch); // restrict number of saved gradients // (recycle memory of first gradient in list for new gradient) if (k > 0 && gradList.Count >= memory) { newGrad = gradList.Remove(0); } else { newGrad = new double[grad.Length]; } dfunction.hasNewVals = true; System.Array.Copy(dfunction.DerivativeAt(x, v, bSize), 0, newGrad, 0, newGrad.Length); ArrayMath.AssertFinite(newGrad, "newGrad"); gradList.Add(newGrad); grad = Smooth(gradList); //Get the next X TakeStep(dfunction); ArrayMath.AssertFinite(newX, "newX"); //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // THIS IS FOR DEBUG ONLY //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if (outputIterationsToFile && (k % outputFrequency == 0) && k != 0) { double curVal = dfunction.ValueAt(x); Say(" TrueValue{ " + curVal + " } "); file.Println(k + " , " + curVal + " , " + total.Report()); } //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! // END OF DEBUG STUFF //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! if (k >= maxIterations) { Sayln("Stochastic Optimization complete. Stopped after max iterations"); x = newX; break; } if (total.Report() >= maxTime) { Sayln("Stochastic Optimization complete. Stopped after max time"); x = newX; break; } System.Array.Copy(newX, 0, x, 0, x.Length); Say("[" + (total.Report()) / 1000.0 + " s "); Say("{" + (current.Restart() / 1000.0) + " s}] "); Say(" " + dfunction.LastValue()); if (quiet) { log.Info("."); } else { Sayln(string.Empty); } } catch (ArrayMath.InvalidElementException e) { log.Info(e.ToString()); for (int i = 0; i < x.Length; i++) { x[i] = double.NaN; } break; } } if (evaluateIters > 0) { // do final evaluation DoEvaluation(x); } if (outputIterationsToFile) { infoFile.Println(k + "; Iterations"); infoFile.Println((total.Report()) / 1000.0 + "; Completion Time"); infoFile.Println(dfunction.ValueAt(x) + "; Finalvalue"); infoFile.Close(); file.Close(); log.Info("Output Files Closed"); } //System.exit(1); Say("Completed in: " + (total.Report()) / 1000.0 + " s"); return(x); }
//This can be filled if an extending class needs to initialize things. protected internal virtual void Init(AbstractStochasticCachingDiffFunction func) { }
public virtual double TuneDouble(Func function, double[] initial, long msPerTest, StochasticMinimizer.IPropertySetter <double> ps, double lower, double upper, double Tol) { double[] xtest = new double[initial.Length]; this.maxTime = msPerTest; // check for stochastic derivatives if (!(function is AbstractStochasticCachingDiffFunction)) { throw new NotSupportedException(); } AbstractStochasticCachingDiffFunction dfunction = (AbstractStochasticCachingDiffFunction)function; IList <Pair <double, double> > res = new List <Pair <double, double> >(); Pair <double, double> best = new Pair <double, double>(lower, double.PositiveInfinity); //this is set to lower because the first it will always use the lower first, so it has to be best Pair <double, double> low = new Pair <double, double>(lower, double.PositiveInfinity); Pair <double, double> high = new Pair <double, double>(upper, double.PositiveInfinity); Pair <double, double> cur = new Pair <double, double>(); Pair <double, double> tmp = new Pair <double, double>(); IList <double> queue = new List <double>(); queue.Add(lower); queue.Add(upper); //queue.add(0.5* (lower + upper)); bool toContinue = true; this.numPasses = 10000; do { System.Array.Copy(initial, 0, xtest, 0, initial.Length); if (queue.Count != 0) { cur.first = queue.Remove(0); } else { cur.first = 0.5 * (low.First() + high.First()); } ps.Set(cur.First()); log.Info(string.Empty); log.Info("About to test with batch size: " + bSize + " gain: " + gain + " and " + ps.ToString() + " set to " + cur.First()); xtest = this.Minimize(function, 1e-100, xtest); if (double.IsNaN(xtest[0])) { cur.second = double.PositiveInfinity; } else { cur.second = dfunction.ValueAt(xtest); } if (cur.Second() < best.Second()) { CopyPair(best, tmp); CopyPair(cur, best); if (tmp.First() > best.First()) { CopyPair(tmp, high); } else { // The old best is now the upper bound CopyPair(tmp, low); } // The old best is now the lower bound queue.Add(0.5 * (cur.First() + high.First())); } else { // check in the right interval next if (cur.First() < best.First()) { CopyPair(cur, low); } else { if (cur.First() > best.First()) { CopyPair(cur, high); } } } if (System.Math.Abs(low.First() - high.First()) < Tol) { toContinue = false; } res.Add(new Pair <double, double>(cur.First(), cur.Second())); log.Info(string.Empty); log.Info("Final value is: " + nf.Format(cur.Second())); log.Info("Optimal so far using " + ps.ToString() + " is: " + best.First()); }while (toContinue); //output the results to screen. log.Info("-------------"); log.Info(" RESULTS "); log.Info(ps.GetType().ToString()); log.Info("-------------"); log.Info(" val , function after " + msPerTest + " ms"); foreach (Pair <double, double> re in res) { log.Info(re.First() + " , " + re.Second()); } log.Info(string.Empty); log.Info(string.Empty); return(best.First()); }
// 0=MinErr 1=Bradley public virtual double TuneFixedGain(Func function, double[] initial, long msPerTest, double fixedStart) { double[] xtest = new double[initial.Length]; double fOpt = 0.0; double factor = 1.2; double min = double.PositiveInfinity; this.maxTime = msPerTest; double prev = double.PositiveInfinity; // check for stochastic derivatives if (!(function is AbstractStochasticCachingDiffFunction)) { throw new NotSupportedException(); } AbstractStochasticCachingDiffFunction dfunction = (AbstractStochasticCachingDiffFunction)function; int it = 1; bool toContinue = true; double f = fixedStart; do { System.Array.Copy(initial, 0, xtest, 0, initial.Length); log.Info(string.Empty); this.fixedGain = f; log.Info("Testing with batchsize: " + bSize + " gain: " + gain + " fixedGain: " + nf.Format(fixedGain)); this.numPasses = 10000; this.Minimize(function, 1e-100, xtest); double result = dfunction.ValueAt(xtest); if (it == 1) { f = f / factor; } if (result < min) { min = result; fOpt = this.fixedGain; f = f / factor; prev = result; } else { if (result < prev) { f = f / factor; prev = result; } else { if (result > prev) { toContinue = false; } } } it += 1; log.Info(string.Empty); log.Info("Final value is: " + nf.Format(result)); log.Info("Optimal so far is: fixedgain: " + fOpt); }while (toContinue); return(fOpt); }