/// <summary> /// Implements the actual optimization algorithm. This /// method should try to minimize the objective function. /// </summary> /// protected override bool Optimize() { if (LineSearch == Optimization.LineSearch.RegularWolfe || LineSearch == Optimization.LineSearch.StrongWolfe) { if (wolfe <= ftol || 1.0 <= wolfe) { throw OperationException("Wolfe tolerance must be between 'ParameterTolerance' and 1.", "LBFGSERR_INVALID_WOLFE"); } } if (OrthantwiseC != 0.0 && linesearch != Optimization.LineSearch.RegularWolfe) { throw OperationException("Orthant-wise updates are only available with Regular Wolfe line search.", "LBFGSERR_INVALID_LINESEARCH"); } var param = new lbfgs_parameter_t() { m = m, epsilon = epsilon, past = past, delta = delta, max_iterations = max_iterations, linesearch = linesearch, max_linesearch = max_linesearch, min_step = min_step, max_step = max_step, ftol = ftol, wolfe = wolfe, gtol = gtol, xtol = xtol, orthantwise_c = orthantwise_c, orthantwise_start = orthantwise_start, orthantwise_end = orthantwise_end, }; LBFGS.Code ret = (LBFGS.Code)LBFGS.main(Solution, Function, Gradient, Progress, param); Status = (BroydenFletcherGoldfarbShannoStatus)ret; if (!Enum.IsDefined(typeof(BroydenFletcherGoldfarbShannoStatus), Status)) { throw new InvalidOperationException("Unhandled return code: " + ret); } return(Status == BroydenFletcherGoldfarbShannoStatus.Success || Status == BroydenFletcherGoldfarbShannoStatus.AlreadyMinimized); }
public void CompareToSparse() { var dim = 10; var lbfgs = new LBFGS(3); var x = Vector.Zero(dim); x = lbfgs.Run(x, 1.0, f); Console.WriteLine(x); SparseVector y = SparseVector.Zero(dim); y = (SparseVector)lbfgs.Run(y, 1.0, f); Console.WriteLine(y); Assert.True(y.SparseCount == 2); Assert.True(y == x); SparseVector[] z = new SparseVector[2]; z[0] = SparseVector.Zero(dim / 2); z[1] = SparseVector.Zero(dim / 2); var lbfgsa = new LBFGSArray(3); var result = lbfgsa.Run(z, 1.0, delegate(Vector[] o, ref Vector[] grad) { Vector gradx = Vector.Zero(2); double res = f(Vector.FromArray(new double[] { o[0][0], o[1][0] }), ref gradx); if (grad != null) { grad[0][0] = gradx[0]; grad[1][0] = gradx[1]; } return(res); }); Assert.True(MMath.AbsDiff(result[0][0], x[0]) < 1e-5); Assert.True(MMath.AbsDiff(result[1][0], x[1]) < 1e-5); Assert.True((result[0] as SparseVector).SparseCount == 1); }
private static void ShowEvaluation(int recordNum, ModelWriter feature_index, LBFGS lbfgs, int termNum, int itr, int[,] merr, int[] yfreq, double diff, DateTime startDT, long nonzero_feature_num, EncoderOptions args) { var ts = DateTime.Now - startDT; if (args.DebugLevel > 1) { for (var i = 0; i < feature_index.y_.Count; i++) { var total_merr = 0; var sdict = new SortedDictionary <double, List <string> >(); for (var j = 0; j < feature_index.y_.Count; j++) { total_merr += merr[i, j]; var v = (double)merr[i, j] / (double)yfreq[i]; if (v > 0.0001) { if (sdict.ContainsKey(v) == false) { sdict.Add(v, new List <string>()); } sdict[v].Add(feature_index.y_[j]); } } var vet = (double)total_merr / (double)yfreq[i]; vet = vet * 100.0F; Console.ForegroundColor = ConsoleColor.Green; Console.Write("{0} ", feature_index.y_[i]); Console.ResetColor(); Console.Write("[FR={0}, TE=", yfreq[i]); Console.ForegroundColor = ConsoleColor.Yellow; Console.Write("{0:0.00}%", vet); Console.ResetColor(); Console.WriteLine("]"); var n = 0; foreach (var pair in sdict.Reverse()) { for (int index = 0; index < pair.Value.Count; index++) { var item = pair.Value[index]; n += item.Length + 1 + 7; if (n > 80) { //only show data in one line, more data in tail will not be show. break; } Console.Write("{0}:", item); Console.ForegroundColor = ConsoleColor.Red; Console.Write("{0:0.00}% ", pair.Key * 100); Console.ResetColor(); } if (n > 80) { break; } } Console.WriteLine(); } } var act_feature_rate = (double)(nonzero_feature_num) / (double)(feature_index.feature_size()) * 100.0; //Logger.WriteLine("iter={0} terr={1:0.00000} serr={2:0.00000} diff={3:0.000000} fsize={4}({5:0.00}% act)", itr, 1.0 * lbfgs.err / termNum, 1.0 * lbfgs.zeroone / recordNum, diff, feature_index.feature_size(), act_feature_rate); //Logger.WriteLine("Time span: {0}, Aver. time span per iter: {1}", ts, new TimeSpan(0, 0, (int)(ts.TotalSeconds / (itr + 1)))); }
bool runCRF(EncoderTagger[] x, ModelWriter modelWriter, bool orthant, EncoderOptions args) { var old_obj = double.MaxValue; var converge = 0; var lbfgs = new LBFGS(args.ThreadsNum); lbfgs.expected = new double[modelWriter.feature_size() + 1]; var processList = new List <CRFEncoderThread>(); var parallelOption = new ParallelOptions(); parallelOption.MaxDegreeOfParallelism = args.ThreadsNum; //Initialize encoding threads for (var i = 0; i < args.ThreadsNum; i++) { var thread = new CRFEncoderThread(); thread.start_i = i; thread.thread_num = args.ThreadsNum; thread.x = x; thread.lbfgs = lbfgs; thread.Init(); processList.Add(thread); } //Statistic term and result tags frequency var termNum = 0; int[] yfreq; yfreq = new int[modelWriter.y_.Count]; for (int index = 0; index < x.Length; index++) { var tagger = x[index]; termNum += tagger.word_num; for (var j = 0; j < tagger.word_num; j++) { yfreq[tagger.answer_[j]]++; } } //Iterative training var startDT = DateTime.Now; var dMinErrRecord = 1.0; for (var itr = 0; itr < args.MaxIteration; ++itr) { //Clear result container lbfgs.obj = 0.0f; lbfgs.err = 0; lbfgs.zeroone = 0; Array.Clear(lbfgs.expected, 0, lbfgs.expected.Length); var threadList = new List <Thread>(); for (var i = 0; i < args.ThreadsNum; i++) { var thread = new Thread(processList[i].Run); thread.Start(); threadList.Add(thread); } int[,] merr; merr = new int[modelWriter.y_.Count, modelWriter.y_.Count]; for (var i = 0; i < args.ThreadsNum; ++i) { threadList[i].Join(); lbfgs.obj += processList[i].obj; lbfgs.err += processList[i].err; lbfgs.zeroone += processList[i].zeroone; //Calculate error for (var j = 0; j < modelWriter.y_.Count; j++) { for (var k = 0; k < modelWriter.y_.Count; k++) { merr[j, k] += processList[i].merr[j, k]; } } } long num_nonzero = 0; var fsize = modelWriter.feature_size(); var alpha = modelWriter.alpha_; if (orthant == true) { //L1 regularization Parallel.For <double>(1, fsize + 1, parallelOption, () => 0, (k, loop, subtotal) => { subtotal += Math.Abs(alpha[k] / modelWriter.cost_factor_); if (alpha[k] != 0.0) { Interlocked.Increment(ref num_nonzero); } return(subtotal); }, (subtotal) => // lock free accumulator { double initialValue; double newValue; do { initialValue = lbfgs.obj; // read current value newValue = initialValue + subtotal; //calculate new value }while (initialValue != Interlocked.CompareExchange(ref lbfgs.obj, newValue, initialValue)); }); } else { //L2 regularization num_nonzero = fsize; Parallel.For <double>(1, fsize + 1, parallelOption, () => 0, (k, loop, subtotal) => { subtotal += (alpha[k] * alpha[k] / (2.0 * modelWriter.cost_factor_)); lbfgs.expected[k] += (alpha[k] / modelWriter.cost_factor_); return(subtotal); }, (subtotal) => // lock free accumulator { double initialValue; double newValue; do { initialValue = lbfgs.obj; // read current value newValue = initialValue + subtotal; //calculate new value }while (initialValue != Interlocked.CompareExchange(ref lbfgs.obj, newValue, initialValue)); }); } //Show each iteration result var diff = (itr == 0 ? 1.0f : Math.Abs(old_obj - lbfgs.obj) / old_obj); old_obj = lbfgs.obj; ShowEvaluation(x.Length, modelWriter, lbfgs, termNum, itr, merr, yfreq, diff, startDT, num_nonzero, args); if (diff < args.MinDifference) { converge++; } else { converge = 0; } if (itr > args.MaxIteration || converge == 3) { break; // 3 is ad-hoc } if (args.DebugLevel > 0 && (double)lbfgs.zeroone / (double)x.Length < dMinErrRecord) { var cc = Console.ForegroundColor; Console.ForegroundColor = ConsoleColor.Red; Console.Write("[Debug Mode] "); Console.ForegroundColor = cc; //Save current best feature weight into file dMinErrRecord = (double)lbfgs.zeroone / (double)x.Length; modelWriter.SaveFeatureWeight("feature_weight_tmp", false); } int iret; iret = lbfgs.optimize(alpha, modelWriter.cost_factor_, orthant); if (iret <= 0) { return(false); } } return(true); }