internal ColumnInfo(Column item, Arguments args) { Contracts.CheckValue(item, nameof(item)); Contracts.CheckValue(args, nameof(args)); Input = item.Source ?? item.Name; Output = item.Name; if (item.UseAlpha ?? args.UseAlpha) { Colors |= ColorBits.Alpha; Planes++; } if (item.UseRed ?? args.UseRed) { Colors |= ColorBits.Red; Planes++; } if (item.UseGreen ?? args.UseGreen) { Colors |= ColorBits.Green; Planes++; } if (item.UseBlue ?? args.UseBlue) { Colors |= ColorBits.Blue; Planes++; } Contracts.CheckUserArg(Planes > 0, nameof(item.UseRed), "Need to use at least one color plane"); Interleave = item.InterleaveArgb ?? args.InterleaveArgb; AsFloat = item.Convert ?? args.Convert; if (!AsFloat) { Offset = Defaults.Offset; Scale = Defaults.Scale; } else { Offset = item.Offset ?? args.Offset ?? Defaults.Offset; Scale = item.Scale ?? args.Scale ?? Defaults.Scale; Contracts.CheckUserArg(FloatUtils.IsFinite(Offset), nameof(item.Offset)); Contracts.CheckUserArg(FloatUtils.IsFiniteNonZero(Scale), nameof(item.Scale)); } }
protected bool TryNormalize(VBuffer <Single>[] values) { if (!Normalize) { return(true); } for (int i = 0; i < values.Length; i++) { // Leave a zero vector as all zeros. Otherwise, make the L1 norm equal to 1. var sum = VectorUtils.L1Norm(in values[i]); if (!FloatUtils.IsFinite(sum)) { return(false); } if (sum > 0) { VectorUtils.ScaleBy(ref values[i], 1 / sum); } } return(true); }
public override void ProcessRow() { float label = 0; _labelGetter(ref label); _scoreGetter(ref Score); if (float.IsNaN(label)) { NumUnlabeledInstances++; return; } if (IsNaN(ref Score)) { NumBadScores++; return; } float weight = 1; if (_weightGetter != null) { _weightGetter(ref weight); if (!FloatUtils.IsFinite(weight)) { NumBadWeights++; weight = 1; } } ApplyLossFunction(ref Score, label, ref Loss); UnweightedCounters.Update(ref Score, label, 1, ref Loss); if (WeightedCounters != null) { WeightedCounters.Update(ref Score, label, weight, ref Loss); } }
protected override float AccumulateOneGradient(ref VBuffer <float> feat, float label, float weight, ref VBuffer <float> x, ref VBuffer <float> grad, ref float[] scores) { if (Utils.Size(scores) < _numClasses) { scores = new float[_numClasses]; } float bias = 0; for (int c = 0, start = _numClasses; c < _numClasses; c++, start += NumFeatures) { x.GetItemOrDefault(c, ref bias); scores[c] = bias + VectorUtils.DotProductWithOffset(ref x, start, ref feat); } float logZ = MathUtils.SoftMax(scores, _numClasses); float datumLoss = logZ; int lab = (int)label; Contracts.Assert(0 <= lab && lab < _numClasses); for (int c = 0, start = _numClasses; c < _numClasses; c++, start += NumFeatures) { float probLabel = lab == c ? 1 : 0; datumLoss -= probLabel * scores[c]; float modelProb = MathUtils.ExpSlow(scores[c] - logZ); float mult = weight * (modelProb - probLabel); VectorUtils.AddMultWithOffset(ref feat, mult, ref grad, start); // Due to the call to EnsureBiases, we know this region is dense. Contracts.Assert(grad.Count >= BiasCount && (grad.IsDense || grad.Indices[BiasCount - 1] == BiasCount - 1)); grad.Values[c] += mult; } Contracts.Check(FloatUtils.IsFinite(datumLoss), "Data contain bad values."); return(weight * datumLoss); }
/// <summary> /// This should be overridden by derived classes. This implementation simply increments /// _numIterExamples and dumps debug information to the console. /// </summary> protected virtual void ProcessDataInstance(IChannel ch, ref VBuffer <Float> feat, Float label, Float weight) { Contracts.Assert(FloatUtils.IsFinite(feat.Values, feat.Count)); ++NumIterExamples; #if OLD_TRACING // REVIEW: How should this be ported? if (DebugLevel > 2) { Vector features = instance.Features; Host.StdOut.Write("Instance has label {0} and {1} features:", instance.Label, features.Length); for (int i = 0; i < features.Length; i++) { Host.StdOut.Write('\t'); Host.StdOut.Write(features[i]); } Host.StdOut.WriteLine(); } if (DebugLevel > 1) { if (_numIterExamples % 5000 == 0) { Host.StdOut.Write('.'); if (_numIterExamples % 500000 == 0) { Host.StdOut.Write(" "); Host.StdOut.Write(_numIterExamples); if (_numIterExamples % 5000000 == 0) { Host.StdOut.Write(" "); Host.StdOut.Write(DateTime.UtcNow); } Host.StdOut.WriteLine(); } } } #endif }
/// <summary> /// Finds approximate minimum of the function /// </summary> /// <param name="function">Function to minimize</param> /// <param name="initial">Initial point</param> /// <param name="result">Approximate minimum</param> public void Minimize(DifferentiableFunction function, ref VBuffer <Float> initial, ref VBuffer <Float> result) { Contracts.Check(FloatUtils.IsFinite(initial.Values, initial.Count), "The initial vector contains NaNs or infinite values."); LineFunc lineFunc = new LineFunc(function, ref initial, UseCG); VBuffer <Float> prev = default(VBuffer <Float>); initial.CopyTo(ref prev); for (int n = 0; _maxSteps == 0 || n < _maxSteps; ++n) { Float step = LineSearch.Minimize(lineFunc.Eval, lineFunc.Value, lineFunc.Deriv); var newPoint = lineFunc.NewPoint; bool terminateNow = n > 0 && TerminateTester.ShouldTerminate(ref newPoint, ref prev); if (terminateNow || Terminate(ref newPoint)) { break; } newPoint.CopyTo(ref prev); lineFunc.ChangeDir(); } lineFunc.NewPoint.CopyTo(ref result); }
public void Aggregate(double diff, int line, int col) { if (diff == 0) { return; } if (!FloatUtils.IsFinite(diff)) { InfCount++; return; } if (DiffMax < diff) { DiffMax = diff; LineMax = line; ColMax = col; } DiffTot += diff; DiffCount++; }
public void Save(ModelSaveContext ctx) { Contracts.AssertValue(ctx); // *** Binary format *** // int: Dimension // int: Rank // for i=0,..,Rank-1: // float[]: the i'th eigenvector // int: the size of MeanProjected (0 if it is null) // float[]: MeanProjected Contracts.Assert(0 < Rank && Rank <= Dimension); ctx.Writer.Write(Dimension); ctx.Writer.Write(Rank); for (int i = 0; i < Rank; i++) { Contracts.Assert(FloatUtils.IsFinite(Eigenvectors[i])); ctx.Writer.WriteSinglesNoCount(Eigenvectors[i].AsSpan(0, Dimension)); } Contracts.Assert(MeanProjected == null || (MeanProjected.Length == Rank && FloatUtils.IsFinite(MeanProjected))); ctx.Writer.WriteSingleArray(MeanProjected); }
protected override TModel TrainModelCore(TrainContext context) { Host.CheckValue(context, nameof(context)); var initPredictor = context.InitialPredictor; var initLinearPred = initPredictor as LinearPredictor ?? (initPredictor as CalibratedPredictorBase)?.SubPredictor as LinearPredictor; Host.CheckParam(initPredictor == null || initLinearPred != null, nameof(context), "Not a linear predictor."); var data = context.TrainingSet; data.CheckFeatureFloatVector(out int numFeatures); CheckLabel(data); using (var ch = Host.Start("Training")) { InitCore(ch, numFeatures, initLinearPred); // InitCore should set the number of features field. Contracts.Assert(NumFeatures > 0); TrainCore(ch, data); if (NumBad > 0) { ch.Warning( "Skipped {0} instances with missing features during training (over {1} iterations; {2} inst/iter)", NumBad, Args.NumIterations, NumBad / Args.NumIterations); } Contracts.Assert(WeightsScale == 1); Float maxNorm = Math.Max(VectorUtils.MaxNorm(ref Weights), Math.Abs(Bias)); Contracts.Check(FloatUtils.IsFinite(maxNorm), "The weights/bias contain invalid values (NaN or Infinite). Potential causes: high learning rates, no normalization, high initial weights, etc."); ch.Done(); } return(CreatePredictor()); }
public void GetFeatures(int iCol, int iSlot, Random rand, long key, Span <float> features) { _host.Assert(features.Length == NumFeatures); // get counts from count table in the first _labelBinCount indices. var countsBuffer = features.Slice(0, _labelBinCount); var countTable = _countTables[iCol, iSlot]; countTable.GetCounts(key, countsBuffer); // check if it's garbage and replace with garbage counts if true float sum = 0; foreach (var feat in countsBuffer) { sum += feat; } bool isGarbage = sum < countTable.GarbageThreshold; if (isGarbage) { int i = 0; foreach (var count in countTable.GarbageCounts) { countsBuffer[i++] = count; } } sum = AddLaplacianNoisePerLabel(iCol, rand, countsBuffer); // add log odds in the next _logOddsCount indices. GenerateLogOdds(iCol, countTable, countsBuffer, features.Slice(_labelBinCount, _logOddsCount), sum); _host.Assert(FloatUtils.IsFinite(features)); // Add the last feature: an indicator for isGarbage. features[NumFeatures - 1] = isGarbage ? 1 : 0; }
/// <summary> /// Samples new hyperparameters for the trainer, and sets them. /// Returns true if success (new hyperparameters were suggested and set). Else, returns false. /// </summary> private static bool SampleHyperparameters(MLContext context, SuggestedTrainer trainer, IEnumerable <SuggestedPipelineRunDetail> history, bool isMaximizingMetric, IChannel logger) { try { var sps = ConvertToValueGenerators(trainer.SweepParams); var sweeper = new SmacSweeper(context, new SmacSweeper.Arguments { SweptParameters = sps }); IEnumerable <SuggestedPipelineRunDetail> historyToUse = history .Where(r => r.RunSucceeded && r.Pipeline.Trainer.TrainerName == trainer.TrainerName && r.Pipeline.Trainer.HyperParamSet != null && r.Pipeline.Trainer.HyperParamSet.Any() && FloatUtils.IsFinite(r.Score)); // get new set of hyperparameter values var proposedParamSet = sweeper.ProposeSweeps(1, historyToUse.Select(h => h.ToRunResult(isMaximizingMetric))).FirstOrDefault(); if (!proposedParamSet.Any()) { return(false); } // associate proposed parameter set with trainer, so that smart hyperparameter // sweepers (like KDO) can map them back. trainer.SetHyperparamValues(proposedParamSet); return(true); } catch (Exception ex) { logger.Error($"SampleHyperparameters failed with exception: {ex}"); throw; } }
/// <summary> /// Initialize predictor from a binary file. /// </summary> /// <param name="ctx">The load context</param> /// <param name="env">The host environment</param> private KMeansModelParameters(IHostEnvironment env, ModelLoadContext ctx) : base(env, LoaderSignature, ctx) { // *** Binary format *** // int: k, number of clusters // int: dimensionality, length of the centroid vectors // for each cluster, then: // int: count of this centroid vector (sparse iff count < dimensionality) // int[count]: only present if sparse, in order indices // Float[count]: centroid vector values _k = ctx.Reader.ReadInt32(); Host.CheckDecode(_k > 0); _dimensionality = ctx.Reader.ReadInt32(); Host.CheckDecode(_dimensionality > 0); _centroidL2s = new Float[_k]; _centroids = new VBuffer <Float> [_k]; for (int i = 0; i < _k; i++) { // Prior to allowing sparse vectors, count was not written and was implicitly // always equal to dimensionality, and no indices were written either. int count = ctx.Header.ModelVerWritten >= 0x00010002 ? ctx.Reader.ReadInt32() : _dimensionality; Host.CheckDecode(0 <= count && count <= _dimensionality); var indices = count < _dimensionality?ctx.Reader.ReadIntArray(count) : null; var values = ctx.Reader.ReadFloatArray(count); Host.CheckDecode(FloatUtils.IsFinite(values)); _centroids[i] = new VBuffer <Float>(_dimensionality, count, values, indices); } WarnOnOldNormalizer(ctx, GetType(), Host); InitPredictor(); _inputType = new VectorType(NumberType.Float, _dimensionality); _outputType = new VectorType(NumberType.Float, _k); }
protected sealed override TModel TrainModelCore(TrainContext context) { Host.CheckValue(context, nameof(context)); var initPredictor = context.InitialPredictor; var initLinearPred = initPredictor as LinearPredictor ?? (initPredictor as CalibratedPredictorBase)?.SubPredictor as LinearPredictor; Host.CheckParam(initPredictor == null || initLinearPred != null, nameof(context), "Not a linear predictor."); var data = context.TrainingSet; data.CheckFeatureFloatVector(out int numFeatures); CheckLabels(data); using (var ch = Host.Start("Training")) { var state = MakeState(ch, numFeatures, initLinearPred); TrainCore(ch, data, state); ch.Assert(state.WeightsScale == 1); Float maxNorm = Math.Max(VectorUtils.MaxNorm(ref state.Weights), Math.Abs(state.Bias)); ch.Check(FloatUtils.IsFinite(maxNorm), "The weights/bias contain invalid values (NaN or Infinite). Potential causes: high learning rates, no normalization, high initial weights, etc."); return(state.CreatePredictor()); } }
public ColInfoEx(Column item, Arguments args) { if (item.UseAlpha ?? args.UseAlpha) { Colors |= ColorBits.Alpha; Planes++; } if (item.UseRed ?? args.UseRed) { Colors |= ColorBits.Red; Planes++; } if (item.UseGreen ?? args.UseGreen) { Colors |= ColorBits.Green; Planes++; } if (item.UseBlue ?? args.UseBlue) { Colors |= ColorBits.Blue; Planes++; } Contracts.CheckUserArg(Planes > 0, nameof(item.UseRed), "Need to use at least one color plane"); Interleave = item.InterleaveArgb ?? args.InterleaveArgb; Convert = item.Convert ?? args.Convert; if (!Convert) { Offset = 0; Scale = 1; } else { Offset = item.Offset ?? args.Offset ?? 0; Scale = item.Scale ?? args.Scale ?? 1; Contracts.CheckUserArg(FloatUtils.IsFinite(Offset), nameof(item.Offset)); Contracts.CheckUserArg(FloatUtils.IsFiniteNonZero(Scale), nameof(item.Scale)); } }
internal OlsLinearRegressionPredictor(IHostEnvironment env, ref VBuffer <Float> weights, Float bias, Double[] standardErrors, Double[] tValues, Double[] pValues, Double rSquared, Double rSquaredAdjusted) : base(env, RegistrationName, ref weights, bias) { Contracts.AssertValueOrNull(standardErrors); Contracts.AssertValueOrNull(tValues); Contracts.AssertValueOrNull(pValues); // If r-squared is NaN then the other statistics must be null, however, if r-rsquared is not NaN, // then the statistics may be null if creation of statistics was suppressed. Contracts.Assert(!Double.IsNaN(rSquaredAdjusted) || standardErrors == null); // Nullity or not must be consistent between the statistics. Contracts.Assert((standardErrors == null) == (tValues == null) && (tValues == null) == (pValues == null)); Contracts.Assert(0 <= rSquared & rSquared <= 1); Contracts.Assert(Double.IsNaN(rSquaredAdjusted) | (0 <= rSquaredAdjusted & rSquaredAdjusted <= 1)); if (standardErrors != null) { // If not null, the input arrays must have one value for each parameter. Contracts.Assert(Utils.Size(standardErrors) == weights.Length + 1); Contracts.Assert(Utils.Size(tValues) == weights.Length + 1); Contracts.Assert(Utils.Size(pValues) == weights.Length + 1); #if DEBUG for (int i = 0; i <= weights.Length; ++i) { Contracts.Assert(FloatUtils.IsFinite(standardErrors[i])); Contracts.Assert(FloatUtils.IsFinite(tValues[i])); Contracts.Assert(FloatUtils.IsFinite(pValues[i])); } #endif } _standardErrors = standardErrors; _tValues = tValues; _pValues = pValues; _rSquared = rSquared; _rSquaredAdjusted = rSquaredAdjusted; }
private FieldAwareFactorizationMachineModelParameters TrainCore(IChannel ch, IProgressChannel pch, RoleMappedData data, RoleMappedData validData = null, FieldAwareFactorizationMachineModelParameters predictor = null) { _host.AssertValue(ch); _host.AssertValue(pch); data.CheckBinaryLabel(); var featureColumns = data.Schema.GetColumns(RoleMappedSchema.ColumnRole.Feature); int fieldCount = featureColumns.Count; int totalFeatureCount = 0; int[] fieldColumnIndexes = new int[fieldCount]; for (int f = 0; f < fieldCount; f++) { var col = featureColumns[f]; _host.Assert(!col.IsHidden); if (!(col.Type is VectorDataViewType vectorType) || !vectorType.IsKnownSize || vectorType.ItemType != NumberDataViewType.Single) { throw ch.ExceptParam(nameof(data), "Training feature column '{0}' must be a known-size vector of Single, but has type: {1}.", col.Name, col.Type); } _host.Assert(vectorType.Size > 0); fieldColumnIndexes[f] = col.Index; totalFeatureCount += vectorType.Size; } ch.Check(checked (totalFeatureCount * fieldCount * _latentDimAligned) <= Utils.ArrayMaxSize, "Latent dimension or the number of fields too large"); if (predictor != null) { ch.Check(predictor.FeatureCount == totalFeatureCount, "Input model's feature count mismatches training feature count"); ch.Check(predictor.LatentDimension == _latentDim, "Input model's latent dimension mismatches trainer's"); } if (validData != null) { validData.CheckBinaryLabel(); var validFeatureColumns = data.Schema.GetColumns(RoleMappedSchema.ColumnRole.Feature); _host.Assert(fieldCount == validFeatureColumns.Count); for (int f = 0; f < fieldCount; f++) { var featCol = featureColumns[f]; var validFeatCol = validFeatureColumns[f]; _host.Assert(featCol.Name == validFeatCol.Name); _host.Assert(featCol.Type == validFeatCol.Type); } } bool shuffle = _shuffle; if (shuffle && !data.Data.CanShuffle) { ch.Warning("Training data does not support shuffling, so ignoring request to shuffle"); shuffle = false; } var rng = shuffle ? _host.Rand : null; var featureGetters = new ValueGetter <VBuffer <float> > [fieldCount]; var featureBuffer = new VBuffer <float>(); var featureValueBuffer = new float[totalFeatureCount]; var featureIndexBuffer = new int[totalFeatureCount]; var featureFieldBuffer = new int[totalFeatureCount]; var latentSum = new AlignedArray(fieldCount * fieldCount * _latentDimAligned, 16); var metricNames = new List <string>() { "Training-loss" }; if (validData != null) { metricNames.Add("Validation-loss"); } int iter = 0; long exampleCount = 0; long badExampleCount = 0; long validBadExampleCount = 0; double loss = 0; double validLoss = 0; pch.SetHeader(new ProgressHeader(metricNames.ToArray(), new string[] { "iterations", "examples" }), entry => { entry.SetProgress(0, iter, _numIterations); entry.SetProgress(1, exampleCount); }); var columns = data.Schema.Schema.Where(x => fieldColumnIndexes.Contains(x.Index)).ToList(); columns.Add(data.Schema.Label.Value); if (data.Schema.Weight != null) { columns.Add(data.Schema.Weight.Value); } InitializeTrainingState(fieldCount, totalFeatureCount, predictor, out float[] linearWeights, out AlignedArray latentWeightsAligned, out float[] linearAccSqGrads, out AlignedArray latentAccSqGradsAligned); // refer to Algorithm 3 in https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf while (iter++ < _numIterations) { using (var cursor = data.Data.GetRowCursor(columns, rng)) { var labelGetter = RowCursorUtils.GetLabelGetter(cursor, data.Schema.Label.Value.Index); var weightGetter = data.Schema.Weight?.Index is int weightIdx?RowCursorUtils.GetGetterAs <float>(NumberDataViewType.Single, cursor, weightIdx) : null; for (int i = 0; i < fieldCount; i++) { featureGetters[i] = cursor.GetGetter <VBuffer <float> >(cursor.Schema[fieldColumnIndexes[i]]); } loss = 0; exampleCount = 0; badExampleCount = 0; while (cursor.MoveNext()) { float label = 0; float weight = 1; int count = 0; float modelResponse = 0; labelGetter(ref label); weightGetter?.Invoke(ref weight); float annihilation = label - label + weight - weight; if (!FloatUtils.IsFinite(annihilation)) { badExampleCount++; continue; } if (!FieldAwareFactorizationMachineUtils.LoadOneExampleIntoBuffer(featureGetters, featureBuffer, _norm, ref count, featureFieldBuffer, featureIndexBuffer, featureValueBuffer)) { badExampleCount++; continue; } // refer to Algorithm 1 in [3] https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf FieldAwareFactorizationMachineInterface.CalculateIntermediateVariables(fieldCount, _latentDimAligned, count, featureFieldBuffer, featureIndexBuffer, featureValueBuffer, linearWeights, latentWeightsAligned, latentSum, ref modelResponse); var slope = CalculateLossSlope(label, modelResponse); // refer to Algorithm 2 in [3] https://github.com/wschin/fast-ffm/blob/master/fast-ffm.pdf FieldAwareFactorizationMachineInterface.CalculateGradientAndUpdate(_lambdaLinear, _lambdaLatent, _learningRate, fieldCount, _latentDimAligned, weight, count, featureFieldBuffer, featureIndexBuffer, featureValueBuffer, latentSum, slope, linearWeights, latentWeightsAligned, linearAccSqGrads, latentAccSqGradsAligned); loss += weight * CalculateLoss(label, modelResponse); exampleCount++; } loss /= exampleCount; } if (_verbose) { if (validData == null) { pch.Checkpoint(loss, iter, exampleCount); } else { validLoss = CalculateAvgLoss(ch, validData, _norm, linearWeights, latentWeightsAligned, _latentDimAligned, latentSum, featureFieldBuffer, featureIndexBuffer, featureValueBuffer, featureBuffer, ref validBadExampleCount); pch.Checkpoint(loss, validLoss, iter, exampleCount); } } } if (badExampleCount != 0) { ch.Warning($"Skipped {badExampleCount} examples with bad label/weight/features in training set"); } if (validBadExampleCount != 0) { ch.Warning($"Skipped {validBadExampleCount} examples with bad label/weight/features in validation set"); } return(new FieldAwareFactorizationMachineModelParameters(_host, _norm, fieldCount, totalFeatureCount, _latentDim, linearWeights, latentWeightsAligned)); }
/// <summary> /// Finds the bins. /// </summary> private void FindBinsFromDistinctCounts(double[] distinctValues, int[] counts, int numValues, int maxBins, out double[] binUpperBounds, out int firstBinCount) { Contracts.Assert(0 <= numValues && numValues <= distinctValues.Length); Contracts.Assert(numValues <= counts.Length); #if DEBUG int inv = 0; int bad = 0; var prev = double.NegativeInfinity; for (int i = 0; i < numValues; i++) { var v = distinctValues[i]; if (!FloatUtils.IsFinite(v)) { bad++; } else { if (!(prev < v)) { inv++; } prev = v; } } Contracts.Assert(bad == 0, "distinctValues passed to FindBinsFromDistinctCounts contains non-finite values"); Contracts.Assert(inv == 0, "distinctValues passed to FindBinsFromDistinctCounts is not sorted"); #endif if (numValues <= maxBins) { binUpperBounds = new double[Math.Max(1, numValues)]; for (int i = 1; i < binUpperBounds.Length; i++) { binUpperBounds[i - 1] = GetSplitValue(distinctValues[i - 1], distinctValues[i]); } binUpperBounds[binUpperBounds.Length - 1] = double.PositiveInfinity; firstBinCount = numValues > 0 ? counts[0] : 0; return; } var path = new int[maxBins + 1]; _finder.FindBinsWithCounts(counts, numValues, maxBins, path); binUpperBounds = new double[maxBins]; for (int i = 1; i < binUpperBounds.Length; i++) { binUpperBounds[i - 1] = GetSplitValue(distinctValues[path[i] - 1], distinctValues[path[i]]); } binUpperBounds[binUpperBounds.Length - 1] = double.PositiveInfinity; // Compute the first bin count. firstBinCount = 0; var firstBinUpperBound = binUpperBounds[0]; for (int v = 0; v < numValues; ++v) { if (distinctValues[v] > firstBinUpperBound) { firstBinCount += counts[v]; } } }
private OlsLinearRegressionPredictor TrainCore(IChannel ch, FloatLabelCursor.Factory cursorFactory, int featureCount) { Host.AssertValue(ch); ch.AssertValue(cursorFactory); int m = featureCount + 1; // Check for memory conditions first. if ((long)m * (m + 1) / 2 > int.MaxValue) { throw ch.Except("Cannot hold covariance matrix in memory with {0} features", m - 1); } // Track the number of examples. long n = 0; // Since we are accumulating over many values, we use Double even for the single precision build. var xty = new Double[m]; // The layout of this algorithm is a packed row-major lower triangular matrix. var xtx = new Double[m * (m + 1) / 2]; // Build X'X (lower triangular) and X'y incrementally (X'X+=X'X_i; X'y+=X'y_i): using (var cursor = cursorFactory.Create()) { while (cursor.MoveNext()) { var yi = cursor.Label; // Increment first element of X'y xty[0] += yi; // Increment first element of lower triangular X'X xtx[0] += 1; var values = cursor.Features.GetValues(); if (cursor.Features.IsDense) { int ioff = 1; ch.Assert(values.Length + 1 == m); // Increment rest of first column of lower triangular X'X for (int i = 1; i < m; i++) { ch.Assert(ioff == i * (i + 1) / 2); var val = values[i - 1]; // Add the implicit first bias term to X'X xtx[ioff++] += val; // Add the remainder of X'X for (int j = 0; j < i; j++) { xtx[ioff++] += val * values[j]; } // X'y xty[i] += val * yi; } ch.Assert(ioff == xtx.Length); } else { var fIndices = cursor.Features.GetIndices(); for (int ii = 0; ii < values.Length; ++ii) { int i = fIndices[ii] + 1; int ioff = i * (i + 1) / 2; var val = values[ii]; // Add the implicit first bias term to X'X xtx[ioff++] += val; // Add the remainder of X'X for (int jj = 0; jj <= ii; jj++) { xtx[ioff + fIndices[jj]] += val * values[jj]; } // X'y xty[i] += val * yi; } } n++; } ch.Check(n > 0, "No training examples in dataset."); if (cursor.BadFeaturesRowCount > 0) { ch.Warning("Skipped {0} instances with missing features/label during training", cursor.SkippedRowCount); } if (_l2Weight > 0) { // Skip the bias term for regularization, in the ridge regression case. // So start at [1,1] instead of [0,0]. // REVIEW: There are two ways to view this, firstly, it is more // user friendly ot make this scaling factor behave similarly regardless // of data size, so that if you have the same parameters, you get the same // model if you feed in your data than if you duplicate your data 10 times. // This is what I have now. The alternate point of view is to view this // L2 regularization parameter as providing some sort of prior, in which // case duplication 10 times should in fact be treated differently! (That // is, we should not multiply by n below.) Both interpretations seem // correct, in their way. Double squared = _l2Weight * _l2Weight * n; int ioff = 0; for (int i = 1; i < m; ++i) { xtx[ioff += i + 1] += squared; } ch.Assert(ioff == xtx.Length - 1); } } if (!(_l2Weight > 0) && n < m) { throw ch.Except("Ordinary least squares requires more examples than parameters. There are {0} parameters, but {1} examples. To enable training, use a positive L2 weight so this behaves as ridge regression.", m, n); } Double yMean = n == 0 ? 0 : xty[0] / n; ch.Info("Trainer solving for {0} parameters across {1} examples", m, n); // Cholesky Decomposition of X'X into LL' try { Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, m, xtx); } catch (DllNotFoundException) { // REVIEW: Is there no better way? throw ch.ExceptNotSupp("The MKL library (libMklImports) or one of its dependencies is missing."); } // Solve for beta in (LL')beta = X'y: Mkl.Pptrs(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, m, 1, xtx, xty, 1); // Note that the solver overwrote xty so it contains the solution. To be more clear, // we effectively change its name (through reassignment) so we don't get confused that // this is somehow xty in the remaining calculation. var beta = xty; xty = null; // Check that the solution is valid. for (int i = 0; i < beta.Length; ++i) { ch.Check(FloatUtils.IsFinite(beta[i]), "Non-finite values detected in OLS solution"); } var weights = VBufferUtils.CreateDense <float>(beta.Length - 1); for (int i = 1; i < beta.Length; ++i) { weights.Values[i - 1] = (float)beta[i]; } var bias = (float)beta[0]; if (!(_l2Weight > 0) && m == n) { // We would expect the solution to the problem to be exact in this case. ch.Info("Number of examples equals number of parameters, solution is exact but no statistics can be derived"); return(new OlsLinearRegressionPredictor(Host, in weights, bias, null, null, null, 1, float.NaN)); } Double rss = 0; // residual sum of squares Double tss = 0; // total sum of squares using (var cursor = cursorFactory.Create()) { var lrPredictor = new LinearRegressionPredictor(Host, in weights, bias); var lrMap = lrPredictor.GetMapper <VBuffer <float>, float>(); float yh = default; while (cursor.MoveNext()) { var features = cursor.Features; lrMap(in features, ref yh); var e = cursor.Label - yh; rss += e * e; var ydm = cursor.Label - yMean; tss += ydm * ydm; } } var rSquared = ProbClamp(1 - (rss / tss)); // R^2 adjusted differs from the normal formula on account of the bias term, by Said's reckoning. double rSquaredAdjusted; if (n > m) { rSquaredAdjusted = ProbClamp(1 - (1 - rSquared) * (n - 1) / (n - m)); ch.Info("Coefficient of determination R2 = {0:g}, or {1:g} (adjusted)", rSquared, rSquaredAdjusted); } else { rSquaredAdjusted = Double.NaN; } // The per parameter significance is compute intensive and may not be required for all practitioners. // Also we can't estimate it, unless we can estimate the variance, which requires more examples than // parameters. if (!_perParameterSignificance || m >= n) { return(new OlsLinearRegressionPredictor(Host, in weights, bias, null, null, null, rSquared, rSquaredAdjusted)); } ch.Assert(!Double.IsNaN(rSquaredAdjusted)); var standardErrors = new Double[m]; var tValues = new Double[m]; var pValues = new Double[m]; // Invert X'X: Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, m, xtx); var s2 = rss / (n - m); // estimate of variance of y for (int i = 0; i < m; i++) { // Initialize with inverse Hessian. standardErrors[i] = (Single)xtx[i * (i + 1) / 2 + i]; } if (_l2Weight > 0) { // Iterate through all entries of inverse Hessian to make adjustment to variance. int ioffset = 1; float reg = _l2Weight * _l2Weight * n; for (int iRow = 1; iRow < m; iRow++) { for (int iCol = 0; iCol <= iRow; iCol++) { var entry = (Single)xtx[ioffset]; var adjustment = -reg * entry * entry; standardErrors[iRow] -= adjustment; if (0 < iCol && iCol < iRow) { standardErrors[iCol] -= adjustment; } ioffset++; } } Contracts.Assert(ioffset == xtx.Length); } for (int i = 0; i < m; i++) { // sqrt of diagonal entries of s2 * inverse(X'X + reg * I) * X'X * inverse(X'X + reg * I). standardErrors[i] = Math.Sqrt(s2 * standardErrors[i]); ch.Check(FloatUtils.IsFinite(standardErrors[i]), "Non-finite standard error detected from OLS solution"); tValues[i] = beta[i] / standardErrors[i]; pValues[i] = (float)MathUtils.TStatisticToPValue(tValues[i], n - m); ch.Check(0 <= pValues[i] && pValues[i] <= 1, "p-Value calculated outside expected [0,1] range"); } return(new OlsLinearRegressionPredictor(Host, in weights, bias, standardErrors, tValues, pValues, rSquared, rSquaredAdjusted)); }
/// <summary> /// Minimize the function represented by <paramref name="f"/>. /// </summary> /// <param name="f">Stochastic gradients of function to minimize</param> /// <param name="initial">Initial point</param> /// <param name="result">Approximate minimum of <paramref name="f"/></param> public void Minimize(DStochasticGradient f, ref VBuffer <Float> initial, ref VBuffer <Float> result) { Contracts.Check(FloatUtils.IsFinite(initial.Values, initial.Count), "The initial vector contains NaNs or infinite values."); int dim = initial.Length; VBuffer <Float> grad = VBufferUtils.CreateEmpty <Float>(dim); VBuffer <Float> step = VBufferUtils.CreateEmpty <Float>(dim); VBuffer <Float> x = default(VBuffer <Float>); initial.CopyTo(ref x); VBuffer <Float> prev = default(VBuffer <Float>); VBuffer <Float> avg = VBufferUtils.CreateEmpty <Float>(dim); for (int n = 0; _maxSteps == 0 || n < _maxSteps; ++n) { if (_momentum == 0) { step = new VBuffer <Float>(step.Length, 0, step.Values, step.Indices); } else { VectorUtils.ScaleBy(ref step, _momentum); } Float stepSize; switch (_rateSchedule) { case RateScheduleType.Constant: stepSize = 1 / _t0; break; case RateScheduleType.Sqrt: stepSize = 1 / (_t0 + MathUtils.Sqrt(n)); break; case RateScheduleType.Linear: stepSize = 1 / (_t0 + n); break; default: throw Contracts.Except(); } Float scale = (1 - _momentum) / _batchSize; for (int i = 0; i < _batchSize; ++i) { f(ref x, ref grad); VectorUtils.AddMult(ref grad, scale, ref step); } if (_averaging) { Utils.Swap(ref avg, ref prev); VectorUtils.ScaleBy(prev, ref avg, (Float)n / (n + 1)); VectorUtils.AddMult(ref step, -stepSize, ref x); VectorUtils.AddMult(ref x, (Float)1 / (n + 1), ref avg); if ((n > 0 && TerminateTester.ShouldTerminate(ref avg, ref prev)) || _terminate(ref avg)) { result = avg; return; } } else { Utils.Swap(ref x, ref prev); VectorUtils.AddMult(ref step, -stepSize, ref prev, ref x); if ((n > 0 && TerminateTester.ShouldTerminate(ref x, ref prev)) || _terminate(ref x)) { result = x; return; } } } result = _averaging ? avg : x; }
/// <summary> /// Test whether the optimization should terminate. Returns true if x contains NA or +/-Inf or x equals xprev. /// </summary> /// <param name="x">The current value.</param> /// <param name="xprev">The value from the previous iteration.</param> /// <returns>True if the optimization routine should terminate at this iteration.</returns> internal static bool ShouldTerminate(ref VBuffer <Float> x, ref VBuffer <Float> xprev) { Contracts.Assert(x.Length == xprev.Length, "Vectors must have the same dimensionality."); Contracts.Assert(FloatUtils.IsFinite(xprev.Values, xprev.Count)); if (!FloatUtils.IsFinite(x.Values, x.Count)) { return(true); } if (x.IsDense && xprev.IsDense) { for (int i = 0; i < x.Length; i++) { if (x.Values[i] != xprev.Values[i]) { return(false); } } } else if (xprev.IsDense) { int j = 0; for (int ii = 0; ii < x.Count; ii++) { int i = x.Indices[ii]; while (j < i) { if (xprev.Values[j++] != 0) { return(false); } } Contracts.Assert(i == j); if (x.Values[ii] != xprev.Values[j++]) { return(false); } } while (j < xprev.Length) { if (xprev.Values[j++] != 0) { return(false); } } } else if (x.IsDense) { int i = 0; for (int jj = 0; jj < xprev.Count; jj++) { int j = xprev.Indices[jj]; while (i < j) { if (x.Values[i++] != 0) { return(false); } } Contracts.Assert(j == i); if (x.Values[i++] != xprev.Values[jj]) { return(false); } } while (i < x.Length) { if (x.Values[i++] != 0) { return(false); } } } else { // Both sparse. int ii = 0; int jj = 0; while (ii < x.Count && jj < xprev.Count) { int i = x.Indices[ii]; int j = xprev.Indices[jj]; if (i == j) { if (x.Values[ii++] != xprev.Values[jj++]) { return(false); } } else if (i < j) { if (x.Values[ii++] != 0) { return(false); } } else { if (xprev.Values[jj++] != 0) { return(false); } } } while (ii < x.Count) { if (x.Values[ii++] != 0) { return(false); } } while (jj < xprev.Count) { if (xprev.Values[jj++] != 0) { return(false); } } } return(true); }
public TransformInfo(ModelLoadContext ctx) { Contracts.AssertValue(ctx); // *** Binary format *** // int: Dimension // int: Rank // for i=0,..,Rank-1: // float[]: the i'th eigenvector // int: the size of MeanProjected (0 if it is null) // float[]: MeanProjected Dimension = ctx.Reader.ReadInt32(); Rank = ctx.Reader.ReadInt32(); Contracts.CheckDecode(0 < Rank && Rank <= Dimension); Eigenvectors = new float[Rank][]; for (int i = 0; i < Rank; i++) { Eigenvectors[i] = ctx.Reader.ReadFloatArray(Dimension); Contracts.CheckDecode(FloatUtils.IsFinite(Eigenvectors[i])); } MeanProjected = ctx.Reader.ReadFloatArray(); Contracts.CheckDecode(MeanProjected == null || (MeanProjected.Length == Rank && FloatUtils.IsFinite(MeanProjected))); }
protected LinearPredictor(IHostEnvironment env, string name, ModelLoadContext ctx) : base(env, name, ctx) { // *** Binary format *** // Float: bias // int: number of features (weights) // int: number of indices // int[]: indices // int: number of weights // Float[]: weights // bool: has model stats // (Conditional) LinearModelStatistics: stats Bias = ctx.Reader.ReadFloat(); Host.CheckDecode(FloatUtils.IsFinite(Bias)); int len = ctx.Reader.ReadInt32(); Host.Assert(len > 0); int cind = ctx.Reader.ReadInt32(); Host.CheckDecode(0 <= cind & cind < len); var indices = ctx.Reader.ReadIntArray(cind); // Verify monotonicity of indices. int prev = -1; for (int i = 0; i < cind; i++) { Host.CheckDecode(prev < indices[i]); prev = indices[i]; } Host.CheckDecode(prev < len); int cwht = ctx.Reader.ReadInt32(); // Either there are as many weights as there are indices (in the // sparse case), or (in the dense case) there are no indices and the // number of weights is the length of the vector. Note that for the // trivial predictor it is quite legal to have 0 in both counts. Host.CheckDecode(cwht == cind || (cind == 0 && cwht == len)); var weights = ctx.Reader.ReadFloatArray(cwht); Host.CheckDecode(Utils.Size(weights) == 0 || weights.All(x => FloatUtils.IsFinite(x))); if (cwht == 0) { Weight = VBufferUtils.CreateEmpty <Float>(len); } else { Weight = new VBuffer <Float>(len, Utils.Size(weights), weights, indices); } InputType = new VectorType(NumberType.Float, Weight.Length); WarnOnOldNormalizer(ctx, GetType(), Host); if (Weight.IsDense) { _weightsDense = Weight; } else { _weightsDenseLock = new object(); } }
//Project the covariance matrix A on to Omega: Y <- A * Omega //A = X' * X / n, where X = data - mean //Note that the covariance matrix is not computed explicitly private void Project(IDataView trainingData, Float[][] mean, Float[][][] omega, Float[][][] y, TransformInfo[] transformInfos) { Host.Assert(mean.Length == omega.Length && omega.Length == y.Length && y.Length == Infos.Length); for (int i = 0; i < omega.Length; i++) { Contracts.Assert(omega[i].Length == y[i].Length); } // set y to be all zeros for (int iinfo = 0; iinfo < y.Length; iinfo++) { for (int i = 0; i < y[iinfo].Length; i++) { Array.Clear(y[iinfo][i], 0, y[iinfo][i].Length); } } bool[] center = Enumerable.Range(0, mean.Length).Select(i => mean[i] != null).ToArray(); Double[] totalColWeight = new Double[Infos.Length]; bool[] activeColumns = new bool[Source.Schema.ColumnCount]; for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { activeColumns[Infos[iinfo].Source] = true; if (_weightColumnIndex[iinfo] >= 0) { activeColumns[_weightColumnIndex[iinfo]] = true; } } using (var cursor = trainingData.GetRowCursor(col => activeColumns[col])) { var weightGetters = new ValueGetter <Float> [Infos.Length]; var columnGetters = new ValueGetter <VBuffer <Float> > [Infos.Length]; for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { if (_weightColumnIndex[iinfo] >= 0) { weightGetters[iinfo] = cursor.GetGetter <Float>(_weightColumnIndex[iinfo]); } columnGetters[iinfo] = cursor.GetGetter <VBuffer <Float> >(Infos[iinfo].Source); } var features = default(VBuffer <Float>); while (cursor.MoveNext()) { for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { Contracts.Check(Infos[iinfo].TypeSrc.IsVector && Infos[iinfo].TypeSrc.ItemType.IsNumber, "PCA transform can only be performed on numeric columns of dimension > 1"); Float weight = 1; if (weightGetters[iinfo] != null) { weightGetters[iinfo](ref weight); } columnGetters[iinfo](ref features); if (FloatUtils.IsFinite(weight) && weight >= 0 && (features.Count == 0 || FloatUtils.IsFinite(features.Values, features.Count))) { totalColWeight[iinfo] += weight; if (center[iinfo]) { VectorUtils.AddMult(ref features, mean[iinfo], weight); } for (int i = 0; i < omega[iinfo].Length; i++) { VectorUtils.AddMult(ref features, y[iinfo][i], weight * VectorUtils.DotProductWithOffset(omega[iinfo][i], 0, ref features)); } } } } for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { if (totalColWeight[iinfo] <= 0) { throw Host.Except("Empty data in column '{0}'", Source.Schema.GetColumnName(Infos[iinfo].Source)); } } for (int iinfo = 0; iinfo < Infos.Length; iinfo++) { var invn = (Float)(1 / totalColWeight[iinfo]); for (var i = 0; i < omega[iinfo].Length; ++i) { VectorUtils.ScaleBy(y[iinfo][i], invn); } if (center[iinfo]) { VectorUtils.ScaleBy(mean[iinfo], invn); for (int i = 0; i < omega[iinfo].Length; i++) { VectorUtils.AddMult(mean[iinfo], y[iinfo][i], -VectorUtils.DotProduct(omega[iinfo][i], mean[iinfo])); } } } } }
/// <summary> /// Possible returns: /// /// Finite Value: no infinite value in the sliding window and at least a non NaN value /// NaN value: only NaN values in the sliding window or +/- Infinite /// Inifinite value: one infinite value in the sliding window (sign is no relevant) /// </summary> internal static Single ComputeMovingAverageUniform(FixedSizeQueue <Single> others, Single input, int lag, Single lastDropped, ref Single currentSum, ref bool initUniformMovingAverage, ref int nbNanValues) { if (initUniformMovingAverage) { initUniformMovingAverage = false; return(ComputeMovingAverageUniformInitialisation(others, input, lag, lastDropped, ref currentSum, ref nbNanValues)); } else { if (Single.IsNaN(lastDropped)) { --nbNanValues; } else if (!FloatUtils.IsFinite(lastDropped)) { // One infinite value left, // we need to recompute everything as we don't know how many infinite values are in the sliding window. return(ComputeMovingAverageUniformInitialisation(others, input, lag, lastDropped, ref currentSum, ref nbNanValues)); } else { currentSum -= lastDropped; } // lastDropped is finite Contracts.Assert(FloatUtils.IsFinite(lastDropped) || Single.IsNaN(lastDropped)); var newValue = lag == 0 ? input : others[others.Count - lag]; if (!Single.IsNaN(newValue) && !FloatUtils.IsFinite(newValue)) { // One infinite value entered, // we need to recompute everything as we don't know how many infinite values are in the sliding window. return(ComputeMovingAverageUniformInitialisation(others, input, lag, lastDropped, ref currentSum, ref nbNanValues)); } // lastDropped is finite and input is finite or NaN Contracts.Assert(FloatUtils.IsFinite(newValue) || Single.IsNaN(newValue)); if (!Single.IsNaN(currentSum) && !FloatUtils.IsFinite(currentSum)) { if (Single.IsNaN(newValue)) { ++nbNanValues; return(currentSum); } else { return(FloatUtils.IsFinite(newValue) ? currentSum : (currentSum + newValue)); } } // lastDropped is finite, input is finite or NaN, currentSum is finite or NaN Contracts.Assert(FloatUtils.IsFinite(currentSum) || Single.IsNaN(currentSum)); if (Single.IsNaN(newValue)) { ++nbNanValues; int nb = (lag == 0 ? others.Count + 1 : others.Count - lag + 1) - nbNanValues; return(nb == 0 ? Single.NaN : currentSum / nb); } else { int nb = lag == 0 ? others.Count + 1 - nbNanValues : others.Count + 1 - nbNanValues - lag; currentSum += input; return(nb == 0 ? Single.NaN : currentSum / nb); } } }
/// <summary> /// This should be overridden by derived classes. This implementation simply increments <see cref="NumIterExamples"/>. /// </summary> public virtual void ProcessDataInstance(IChannel ch, ref VBuffer <Float> feat, Float label, Float weight) { ch.Assert(FloatUtils.IsFinite(feat.Values, feat.Count)); ++NumIterExamples; }
public IPredictor Calibrate(IChannel ch, IDataView data, ICalibratorTrainer caliTrainer, int maxRows) { Host.CheckValue(ch, nameof(ch)); ch.CheckValue(data, nameof(data)); ch.CheckValue(caliTrainer, nameof(caliTrainer)); if (caliTrainer.NeedsTraining) { var bound = new Bound(this, new RoleMappedSchema(data.Schema)); using (var curs = data.GetRowCursor(col => true)) { var scoreGetter = (ValueGetter <Single>)bound.CreateScoreGetter(curs, col => true, out Action disposer); // We assume that we can use the label column of the first predictor, since if the labels are not identical // then the whole model is garbage anyway. var labelGetter = bound.GetLabelGetter(curs, 0, out Action disp); disposer += disp; var weightGetter = bound.GetWeightGetter(curs, 0, out disp); disposer += disp; try { int num = 0; while (curs.MoveNext()) { Single label = 0; labelGetter(ref label); if (!FloatUtils.IsFinite(label)) { continue; } Single score = 0; scoreGetter(ref score); if (!FloatUtils.IsFinite(score)) { continue; } Single weight = 0; weightGetter(ref weight); if (!FloatUtils.IsFinite(weight)) { continue; } caliTrainer.ProcessTrainingExample(score, label > 0, weight); if (maxRows > 0 && ++num >= maxRows) { break; } } } finally { disposer?.Invoke(); } } } var calibrator = caliTrainer.FinishTraining(ch); return(CalibratorUtils.CreateCalibratedPredictor(Host, this, calibrator)); }
/// <summary> /// An implementation of the line search for the Wolfe conditions, from Nocedal & Wright /// </summary> internal virtual bool LineSearch(IChannel ch, bool force) { Contracts.AssertValue(ch); Float dirDeriv = VectorUtils.DotProduct(ref _dir, ref _grad); if (dirDeriv == 0) { throw ch.Process(new PrematureConvergenceException(this, "Directional derivative is zero. You may be sitting on the optimum.")); } // if a non-descent direction is chosen, the line search will break anyway, so throw here // The most likely reasons for this is a bug in your function's gradient computation, ch.Check(dirDeriv < 0, "L-BFGS chose a non-descent direction."); Float c1 = (Float)1e-4 * dirDeriv; Float c2 = (Float)0.9 * dirDeriv; Float alpha = (Iter == 1 ? (1 / VectorUtils.Norm(_dir)) : 1); PointValueDeriv last = new PointValueDeriv(0, LastValue, dirDeriv); PointValueDeriv aLo = new PointValueDeriv(); PointValueDeriv aHi = new PointValueDeriv(); // initial bracketing phase while (true) { VectorUtils.AddMultInto(ref _x, alpha, ref _dir, ref _newX); if (EnforceNonNegativity) { VBufferUtils.Apply(ref _newX, delegate(int ind, ref Float newXval) { if (newXval < 0.0) { newXval = 0; } }); } Value = Eval(ref _newX, ref _newGrad); GradientCalculations++; if (Float.IsPositiveInfinity(Value)) { alpha /= 2; continue; } if (!FloatUtils.IsFinite(Value)) { throw ch.Except("Optimizer unable to proceed with loss function yielding {0}", Value); } dirDeriv = VectorUtils.DotProduct(ref _dir, ref _newGrad); PointValueDeriv curr = new PointValueDeriv(alpha, Value, dirDeriv); if ((curr.V > LastValue + c1 * alpha) || (last.A > 0 && curr.V >= last.V)) { aLo = last; aHi = curr; break; } else if (Math.Abs(curr.D) <= -c2) { return(true); } else if (curr.D >= 0) { aLo = curr; aHi = last; break; } last = curr; if (alpha == 0) { alpha = Float.Epsilon; // Robust to divisional underflow. } else { alpha *= 2; } } Float minChange = (Float)0.01; int maxSteps = 10; // this loop is the "zoom" procedure described in Nocedal & Wright for (int step = 0; ; ++step) { if (step == maxSteps && !force) { return(false); } PointValueDeriv left = aLo.A < aHi.A ? aLo : aHi; PointValueDeriv right = aLo.A < aHi.A ? aHi : aLo; if (left.D > 0 && right.D < 0) { // interpolating cubic would have max in range, not min (can this happen?) // set a to the one with smaller value alpha = aLo.V < aHi.V ? aLo.A : aHi.A; } else { alpha = CubicInterp(aLo, aHi); if (Float.IsNaN(alpha) || Float.IsInfinity(alpha)) { alpha = (aLo.A + aHi.A) / 2; } } // this is to ensure that the new point is within bounds // and that the change is reasonably sized Float ub = (minChange * left.A + (1 - minChange) * right.A); if (alpha > ub) { alpha = ub; } Float lb = (minChange * right.A + (1 - minChange) * left.A); if (alpha < lb) { alpha = lb; } VectorUtils.AddMultInto(ref _x, alpha, ref _dir, ref _newX); if (EnforceNonNegativity) { VBufferUtils.Apply(ref _newX, delegate(int ind, ref Float newXval) { if (newXval < 0.0) { newXval = 0; } }); } Value = Eval(ref _newX, ref _newGrad); GradientCalculations++; if (!FloatUtils.IsFinite(Value)) { throw ch.Except("Optimizer unable to proceed with loss function yielding {0}", Value); } dirDeriv = VectorUtils.DotProduct(ref _dir, ref _newGrad); PointValueDeriv curr = new PointValueDeriv(alpha, Value, dirDeriv); if ((curr.V > LastValue + c1 * alpha) || (curr.V >= aLo.V)) { if (aHi.A == curr.A) { if (force) { throw ch.Process(new PrematureConvergenceException(this, "Step size interval numerically zero.")); } else { return(false); } } aHi = curr; } else if (Math.Abs(curr.D) <= -c2) { return(true); } else { if (curr.D * (aHi.A - aLo.A) >= 0) { aHi = aLo; } if (aLo.A == curr.A) { if (force) { throw ch.Process(new PrematureConvergenceException(this, "Step size interval numerically zero.")); } else { return(false); } } aLo = curr; } } }