/// <summary> /// Initializes a new instance of the <see cref="MulticlassLogisticRegressionPredictor"/> class. /// This constructor is called by <see cref="SdcaMultiClassTrainer"/> to create the predictor. /// </summary> /// <param name="env">The host environment.</param> /// <param name="weights">The array of weights vectors. It should contain <paramref name="numClasses"/> weights.</param> /// <param name="bias">The array of biases. It should contain contain <paramref name="numClasses"/> weights.</param> /// <param name="numClasses">The number of classes for multi-class classification. Must be at least 2.</param> /// <param name="numFeatures">The logical length of the feature vector.</param> /// <param name="labelNames">The optional label names. If specified not null, it should have the same length as <paramref name="numClasses"/>.</param> /// <param name="stats">The model statistics.</param> public MulticlassLogisticRegressionPredictor(IHostEnvironment env, VBuffer <float>[] weights, float[] bias, int numClasses, int numFeatures, string[] labelNames, LinearModelStatistics stats = null) : base(env, RegistrationName) { Contracts.CheckValue(weights, nameof(weights)); Contracts.CheckValue(bias, nameof(bias)); Contracts.Check(numClasses >= 2, "numClasses must be at least 2."); _numClasses = numClasses; Contracts.Check(numFeatures >= 1, "numFeatures must be positive."); _numFeatures = numFeatures; Contracts.Check(Utils.Size(weights) == _numClasses); Contracts.Check(Utils.Size(bias) == _numClasses); _weights = new VBuffer <float> [_numClasses]; _biases = new float[_numClasses]; for (int iClass = 0; iClass < _numClasses; iClass++) { Contracts.Assert(weights[iClass].Length == _numFeatures); weights[iClass].CopyTo(ref _weights[iClass]); _biases[iClass] = bias[iClass]; } if (_weights.All(v => v.IsDense)) { _weightsDense = _weights; } InputType = new VectorType(NumberType.R4, _numFeatures); OutputType = new VectorType(NumberType.R4, _numClasses); Contracts.Assert(labelNames == null || labelNames.Length == numClasses); _labelNames = labelNames; Contracts.AssertValueOrNull(stats); _stats = stats; }
protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss, int numParams) { Contracts.AssertValue(ch); Contracts.AssertValue(cursorFactory); Contracts.Assert(NumGoodRows > 0); Contracts.Assert(WeightSum > 0); Contracts.Assert(BiasCount == _numClasses); Contracts.Assert(loss >= 0); Contracts.Assert(numParams >= BiasCount); Contracts.Assert(CurrentWeights.IsDense); ch.Info("Model trained with {0} training examples.", NumGoodRows); // Compute deviance: start with loss function. float deviance = (float)(2 * loss * WeightSum); if (L2Weight > 0) { // Need to subtract L2 regularization loss. // The bias term is not regularized. var regLoss = VectorUtils.NormSquared(CurrentWeights.Values, BiasCount, CurrentWeights.Length - BiasCount) * L2Weight; deviance -= regLoss; } if (L1Weight > 0) { // Need to subtract L1 regularization loss. // The bias term is not regularized. Double regLoss = 0; VBufferUtils.ForEachDefined(ref CurrentWeights, (ind, value) => { if (ind >= BiasCount) { regLoss += Math.Abs(value); } }); deviance -= (float)regLoss * L1Weight * 2; } ch.Info("Residual Deviance: \t{0}", deviance); // Compute null deviance, i.e., the deviance of null hypothesis. // Cap the prior positive rate at 1e-15. float nullDeviance = 0; for (int iLabel = 0; iLabel < _numClasses; iLabel++) { Contracts.Assert(_prior[iLabel] >= 0); if (_prior[iLabel] == 0) { continue; } nullDeviance -= (float)(2 * _prior[iLabel] * Math.Log(_prior[iLabel] / WeightSum)); } ch.Info("Null Deviance: \t{0}", nullDeviance); // Compute AIC. ch.Info("AIC: \t{0}", 2 * numParams + deviance); // REVIEW: Figure out how to compute the statistics for the coefficients. _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); }
private LinearBinaryPredictor(IHostEnvironment env, ModelLoadContext ctx) : base(env, RegistrationName, ctx) { // For model version earlier than 0x00020001, there is no model statisitcs. if (ctx.Header.ModelVerWritten <= 0x00020001) { return; } // *** Binary format *** // (Base class) // LinearModelStatistics: model statistics (optional, in a separate stream) string statsDir = Path.Combine(ctx.Directory ?? "", ModelStatsSubModelFilename); using (var statsEntry = ctx.Repository.OpenEntryOrNull(statsDir, ModelLoadContext.ModelStreamName)) { if (statsEntry == null) { _stats = null; } else { using (var statsCtx = new ModelLoadContext(ctx.Repository, statsEntry, statsDir)) _stats = LinearModelStatistics.Create(Host, statsCtx); } } }
public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias, out Single stdError, out Single zScore, out Single pValue) { if (!stats._coeffStdError.HasValue) { stdError = 0; zScore = 0; pValue = 0; return(false); } const Double sqrt2 = 1.41421356237; // Math.Sqrt(2); stdError = stats._coeffStdError.Value.Values[0]; Contracts.Assert(stdError == stats._coeffStdError.Value.GetItemOrDefault(0)); zScore = bias / stdError; pValue = 1 - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2)); return(true); }
internal MulticlassLogisticRegressionPredictor(IHostEnvironment env, ref VBuffer <float> weights, int numClasses, int numFeatures, string[] labelNames, LinearModelStatistics stats = null) : base(env, RegistrationName) { Contracts.Assert(weights.Length == numClasses + numClasses * numFeatures); _numClasses = numClasses; _numFeatures = numFeatures; // weights contains both bias and feature weights in a flat vector // Biases are stored in the first _numClass elements // followed by one weight vector for each class, in turn, all concatenated // (i.e.: in "row major", if we encode each weight vector as a row of a matrix) Contracts.Assert(weights.Length == _numClasses + _numClasses * _numFeatures); _biases = new float[_numClasses]; for (int i = 0; i < _biases.Length; i++) { weights.GetItemOrDefault(i, ref _biases[i]); } _weights = new VBuffer <float> [_numClasses]; for (int i = 0; i < _weights.Length; i++) { weights.CopyTo(ref _weights[i], _numClasses + i * _numFeatures, _numFeatures); } if (_weights.All(v => v.IsDense)) { _weightsDense = _weights; } InputType = new VectorType(NumberType.R4, _numFeatures); OutputType = new VectorType(NumberType.R4, _numClasses); Contracts.Assert(labelNames == null || labelNames.Length == numClasses); _labelNames = labelNames; Contracts.AssertValueOrNull(stats); _stats = stats; }
protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, Float loss, int numParams) { Contracts.AssertValue(ch); Contracts.AssertValue(cursorFactory); Contracts.Assert(NumGoodRows > 0); Contracts.Assert(WeightSum > 0); Contracts.Assert(BiasCount == 1); Contracts.Assert(loss >= 0); Contracts.Assert(numParams >= BiasCount); Contracts.Assert(CurrentWeights.IsDense); ch.Info("Model trained with {0} training examples.", NumGoodRows); // Compute deviance: start with loss function. Float deviance = (Float)(2 * loss * WeightSum); if (L2Weight > 0) { // Need to subtract L2 regularization loss. // The bias term is not regularized. var regLoss = VectorUtils.NormSquared(CurrentWeights.Values, 1, CurrentWeights.Length - 1) * L2Weight; deviance -= regLoss; } if (L1Weight > 0) { // Need to subtract L1 regularization loss. // The bias term is not regularized. Double regLoss = 0; VBufferUtils.ForEachDefined(ref CurrentWeights, (ind, value) => { if (ind >= BiasCount) { regLoss += Math.Abs(value); } }); deviance -= (Float)regLoss * L1Weight * 2; } ch.Info("Residual Deviance: \t{0} (on {1} degrees of freedom)", deviance, Math.Max(NumGoodRows - numParams, 0)); // Compute null deviance, i.e., the deviance of null hypothesis. // Cap the prior positive rate at 1e-15. Double priorPosRate = _posWeight / WeightSum; Contracts.Assert(0 <= priorPosRate && priorPosRate <= 1); Float nullDeviance = (priorPosRate <= 1e-15 || 1 - priorPosRate <= 1e-15) ? 0f : (Float)(2 * WeightSum * MathUtils.Entropy(priorPosRate, true)); ch.Info("Null Deviance: \t{0} (on {1} degrees of freedom)", nullDeviance, NumGoodRows - 1); // Compute AIC. ch.Info("AIC: \t{0}", 2 * numParams + deviance); // Show the coefficients statistics table. var featureColIdx = cursorFactory.Data.Schema.Feature.Index; var schema = cursorFactory.Data.Data.Schema; var featureLength = CurrentWeights.Length - BiasCount; var namesSpans = VBufferUtils.CreateEmpty <DvText>(featureLength); if (schema.HasSlotNames(featureColIdx, featureLength)) { schema.GetMetadata(MetadataUtils.Kinds.SlotNames, featureColIdx, ref namesSpans); } Host.Assert(namesSpans.Length == featureLength); // Inverse mapping of non-zero weight slots. Dictionary <int, int> weightIndicesInvMap = null; // Indices of bias and non-zero weight slots. int[] weightIndices = null; // Whether all weights are non-zero. bool denseWeight = numParams == CurrentWeights.Length; // Extract non-zero indices of weight. if (!denseWeight) { weightIndices = new int[numParams]; weightIndicesInvMap = new Dictionary <int, int>(numParams); weightIndices[0] = 0; weightIndicesInvMap[0] = 0; int j = 1; for (int i = 1; i < CurrentWeights.Length; i++) { if (CurrentWeights.Values[i] != 0) { weightIndices[j] = i; weightIndicesInvMap[i] = j++; } } Contracts.Assert(j == numParams); } // Compute the standard error of coefficients. long hessianDimension = (long)numParams * (numParams + 1) / 2; if (hessianDimension > int.MaxValue) { ch.Warning("The number of parameter is too large. Cannot hold the variance-covariance matrix in memory. " + "Skipping computation of standard errors and z-statistics of coefficients. Consider choosing a larger L1 regularizer" + "to reduce the number of parameters."); _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); return; } // Building the variance-covariance matrix for parameters. // The layout of this algorithm is a packed row-major lower triangular matrix. // E.g., layout of indices for 4-by-4: // 0 // 1 2 // 3 4 5 // 6 7 8 9 var hessian = new Double[hessianDimension]; // Initialize diagonal elements with L2 regularizers except for the first entry (index 0) // Since bias is not regularized. if (L2Weight > 0) { // i is the array index of the diagonal entry at iRow-th row and iRow-th column. // iRow is one-based. int i = 0; for (int iRow = 2; iRow <= numParams; iRow++) { i += iRow; hessian[i] = L2Weight; } Contracts.Assert(i == hessian.Length - 1); } // Initialize the remaining entries. var bias = CurrentWeights.Values[0]; using (var cursor = cursorFactory.Create()) { while (cursor.MoveNext()) { var label = cursor.Label; var weight = cursor.Weight; var score = bias + VectorUtils.DotProductWithOffset(ref CurrentWeights, 1, ref cursor.Features); // Compute Bernoulli variance n_i * p_i * (1 - p_i) for the i-th training example. var variance = weight / (2 + 2 * Math.Cosh(score)); // Increment the first entry of hessian. hessian[0] += variance; var values = cursor.Features.Values; if (cursor.Features.IsDense) { int ioff = 1; // Increment remaining entries of hessian. for (int i = 1; i < numParams; i++) { ch.Assert(ioff == i * (i + 1) / 2); int wi = weightIndices == null ? i - 1 : weightIndices[i] - 1; Contracts.Assert(0 <= wi && wi < cursor.Features.Length); var val = values[wi] * variance; // Add the implicit first bias term to X'X hessian[ioff++] += val; // Add the remainder of X'X for (int j = 0; j < i; j++) { int wj = weightIndices == null ? j : weightIndices[j + 1] - 1; Contracts.Assert(0 <= wj && wj < cursor.Features.Length); hessian[ioff++] += val * values[wj]; } } ch.Assert(ioff == hessian.Length); } else { var indices = cursor.Features.Indices; for (int ii = 0; ii < cursor.Features.Count; ++ii) { int i = indices[ii]; int wi = i + 1; if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(i + 1, out wi)) { continue; } Contracts.Assert(0 < wi && wi <= cursor.Features.Length); int ioff = wi * (wi + 1) / 2; var val = values[ii] * variance; // Add the implicit first bias term to X'X hessian[ioff] += val; // Add the remainder of X'X for (int jj = 0; jj <= ii; jj++) { int j = indices[jj]; int wj = j + 1; if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(j + 1, out wj)) { continue; } Contracts.Assert(0 < wj && wj <= cursor.Features.Length); hessian[ioff + wj] += val * values[jj]; } } } } } // Apply Cholesky Decomposition to find the inverse of the Hessian. Double[] invHessian = null; try { // First, find the Cholesky decomposition LL' of the Hessian. Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numParams, hessian); // Note that hessian is already modified at this point. It is no longer the original Hessian, // but instead represents the Cholesky decomposition L. // Also note that the following routine is supposed to consume the Cholesky decomposition L instead // of the original information matrix. Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numParams, hessian); // At this point, hessian should contain the inverse of the original Hessian matrix. // Swap hessian with invHessian to avoid confusion in the following context. Utils.Swap(ref hessian, ref invHessian); Contracts.Assert(hessian == null); } catch (DllNotFoundException) { throw ch.ExceptNotSupp("The MKL library (Microsoft.ML.MklImports.dll) or one of its dependencies is missing."); } Float[] stdErrorValues = new Float[numParams]; stdErrorValues[0] = (Float)Math.Sqrt(invHessian[0]); for (int i = 1; i < numParams; i++) { // Initialize with inverse Hessian. stdErrorValues[i] = (Single)invHessian[i * (i + 1) / 2 + i]; } if (L2Weight > 0) { // Iterate through all entries of inverse Hessian to make adjustment to variance. // A discussion on ridge regularized LR coefficient covariance matrix can be found here: // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/ // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf int ioffset = 1; for (int iRow = 1; iRow < numParams; iRow++) { for (int iCol = 0; iCol <= iRow; iCol++) { var entry = (Single)invHessian[ioffset]; var adjustment = -L2Weight * entry * entry; stdErrorValues[iRow] -= adjustment; if (0 < iCol && iCol < iRow) { stdErrorValues[iCol] -= adjustment; } ioffset++; } } Contracts.Assert(ioffset == invHessian.Length); } for (int i = 1; i < numParams; i++) { stdErrorValues[i] = (Float)Math.Sqrt(stdErrorValues[i]); } VBuffer <Float> stdErrors = new VBuffer <Float>(CurrentWeights.Length, numParams, stdErrorValues, weightIndices); _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance, ref stdErrors); }
private MulticlassLogisticRegressionPredictor(IHostEnvironment env, ModelLoadContext ctx) : base(env, RegistrationName, ctx) { // *** Binary format *** // int: number of features // int: number of classes = number of biases // float[]: biases // (weight matrix, in CSR if sparse) // (see https://netlib.org/linalg/html_templates/node91.html#SECTION00931100000000000000) // int: number of row start indices (_numClasses + 1 if sparse, 0 if dense) // int[]: row start indices // int: total number of column indices (0 if dense) // int[]: column index of each non-zero weight // int: total number of non-zero weights (same as number of column indices if sparse, num of classes * num of features if dense) // float[]: non-zero weights // int[]: Id of label names (optional, in a separate stream) // LinearModelStatistics: model statistics (optional, in a separate stream) _numFeatures = ctx.Reader.ReadInt32(); Host.CheckDecode(_numFeatures >= 1); _numClasses = ctx.Reader.ReadInt32(); Host.CheckDecode(_numClasses >= 1); _biases = ctx.Reader.ReadFloatArray(_numClasses); int numStarts = ctx.Reader.ReadInt32(); if (numStarts == 0) { // The weights are entirely dense. int numIndices = ctx.Reader.ReadInt32(); Host.CheckDecode(numIndices == 0); int numWeights = ctx.Reader.ReadInt32(); Host.CheckDecode(numWeights == _numClasses * _numFeatures); _weights = new VBuffer <float> [_numClasses]; for (int i = 0; i < _weights.Length; i++) { var w = ctx.Reader.ReadFloatArray(_numFeatures); _weights[i] = new VBuffer <float>(_numFeatures, w); } _weightsDense = _weights; } else { // Read weight matrix as CSR. Host.CheckDecode(numStarts == _numClasses + 1); int[] starts = ctx.Reader.ReadIntArray(numStarts); Host.CheckDecode(starts[0] == 0); Host.CheckDecode(Utils.IsSorted(starts)); int numIndices = ctx.Reader.ReadInt32(); Host.CheckDecode(numIndices == starts[starts.Length - 1]); var indices = new int[_numClasses][]; for (int i = 0; i < indices.Length; i++) { indices[i] = ctx.Reader.ReadIntArray(starts[i + 1] - starts[i]); Host.CheckDecode(Utils.IsIncreasing(0, indices[i], _numFeatures)); } int numValues = ctx.Reader.ReadInt32(); Host.CheckDecode(numValues == numIndices); _weights = new VBuffer <float> [_numClasses]; for (int i = 0; i < _weights.Length; i++) { float[] values = ctx.Reader.ReadFloatArray(starts[i + 1] - starts[i]); _weights[i] = new VBuffer <float>(_numFeatures, Utils.Size(values), values, indices[i]); } } WarnOnOldNormalizer(ctx, GetType(), Host); InputType = new VectorType(NumberType.R4, _numFeatures); OutputType = new VectorType(NumberType.R4, _numClasses); // REVIEW: Should not save the label names duplicately with the predictor again. // Get it from the label column schema metadata instead. string[] labelNames = null; if (ctx.TryLoadBinaryStream(LabelNamesSubModelFilename, r => labelNames = LoadLabelNames(ctx, r))) { _labelNames = labelNames; } string statsDir = Path.Combine(ctx.Directory ?? "", ModelStatsSubModelFilename); using (var statsEntry = ctx.Repository.OpenEntryOrNull(statsDir, ModelLoadContext.ModelStreamName)) { if (statsEntry == null) { _stats = null; } else { using (var statsCtx = new ModelLoadContext(ctx.Repository, statsEntry, statsDir)) _stats = LinearModelStatistics.Create(Host, statsCtx); } } }
/// <summary> /// Constructs a new linear binary predictor. /// </summary> /// <param name="env">The host environment.</param> /// <param name="weights">The weights for the linear predictor. Note that this /// will take ownership of the <see cref="VBuffer{T}"/>.</param> /// <param name="bias">The bias added to every output score.</param> /// <param name="stats"></param> public LinearBinaryPredictor(IHostEnvironment env, ref VBuffer <Float> weights, Float bias, LinearModelStatistics stats = null) : base(env, RegistrationName, ref weights, bias) { Contracts.AssertValueOrNull(stats); _stats = stats; }
private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stats, ref VBuffer <Single> weights, ref VBuffer <ReadOnlyMemory <char> > names, ref VBuffer <Single> estimate, ref VBuffer <Single> stdErr, ref VBuffer <Single> zScore, ref VBuffer <Single> pValue, out ValueGetter <VBuffer <ReadOnlyMemory <char> > > getSlotNames) { if (!stats._coeffStdError.HasValue) { getSlotNames = null; return; } Contracts.Assert(stats._coeffStdError.Value.Length == weights.Length + 1); var estimateValues = estimate.Values; if (Utils.Size(estimateValues) < stats.ParametersCount - 1) { estimateValues = new Single[stats.ParametersCount - 1]; } var stdErrorValues = stdErr.Values; if (Utils.Size(stdErrorValues) < stats.ParametersCount - 1) { stdErrorValues = new Single[stats.ParametersCount - 1]; } var zScoreValues = zScore.Values; if (Utils.Size(zScoreValues) < stats.ParametersCount - 1) { zScoreValues = new Single[stats.ParametersCount - 1]; } var pValueValues = pValue.Values; if (Utils.Size(pValueValues) < stats.ParametersCount - 1) { pValueValues = new Single[stats.ParametersCount - 1]; } const Double sqrt2 = 1.41421356237; // Math.Sqrt(2); bool denseStdError = stats._coeffStdError.Value.IsDense; int[] stdErrorIndices = stats._coeffStdError.Value.Indices; for (int i = 1; i < stats.ParametersCount; i++) { int wi = denseStdError ? i - 1 : stdErrorIndices[i] - 1; Contracts.Assert(0 <= wi && wi < weights.Length); var weight = estimateValues[i - 1] = weights.GetItemOrDefault(wi); var stdError = stdErrorValues[wi] = stats._coeffStdError.Value.Values[i]; zScoreValues[i - 1] = weight / stdError; pValueValues[i - 1] = 1 - (Single)ProbabilityFunctions.Erf(Math.Abs(zScoreValues[i - 1] / sqrt2)); } estimate = new VBuffer <Single>(stats.ParametersCount - 1, estimateValues, estimate.Indices); stdErr = new VBuffer <Single>(stats.ParametersCount - 1, stdErrorValues, stdErr.Indices); zScore = new VBuffer <Single>(stats.ParametersCount - 1, zScoreValues, zScore.Indices); pValue = new VBuffer <Single>(stats.ParametersCount - 1, pValueValues, pValue.Indices); var slotNames = names; getSlotNames = (ref VBuffer <ReadOnlyMemory <char> > dst) => { var values = dst.Values; if (Utils.Size(values) < stats.ParametersCount - 1) { values = new ReadOnlyMemory <char> [stats.ParametersCount - 1]; } for (int i = 1; i < stats.ParametersCount; i++) { int wi = denseStdError ? i - 1 : stdErrorIndices[i] - 1; values[i - 1] = slotNames.GetItemOrDefault(wi); } dst = new VBuffer <ReadOnlyMemory <char> >(stats.ParametersCount - 1, values, dst.Indices); }; }
protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss, int numParams) { Contracts.AssertValue(ch); Contracts.AssertValue(cursorFactory); Contracts.Assert(NumGoodRows > 0); Contracts.Assert(WeightSum > 0); Contracts.Assert(BiasCount == 1); Contracts.Assert(loss >= 0); Contracts.Assert(numParams >= BiasCount); Contracts.Assert(CurrentWeights.IsDense); ch.Info("Model trained with {0} training examples.", NumGoodRows); // Compute deviance: start with loss function. float deviance = (float)(2 * loss * WeightSum); if (L2Weight > 0) { // Need to subtract L2 regularization loss. // The bias term is not regularized. var regLoss = VectorUtils.NormSquared(CurrentWeights.Values, 1, CurrentWeights.Length - 1) * L2Weight; deviance -= regLoss; } if (L1Weight > 0) { // Need to subtract L1 regularization loss. // The bias term is not regularized. Double regLoss = 0; VBufferUtils.ForEachDefined(ref CurrentWeights, (ind, value) => { if (ind >= BiasCount) { regLoss += Math.Abs(value); } }); deviance -= (float)regLoss * L1Weight * 2; } ch.Info("Residual Deviance: \t{0} (on {1} degrees of freedom)", deviance, Math.Max(NumGoodRows - numParams, 0)); // Compute null deviance, i.e., the deviance of null hypothesis. // Cap the prior positive rate at 1e-15. Double priorPosRate = _posWeight / WeightSum; Contracts.Assert(0 <= priorPosRate && priorPosRate <= 1); float nullDeviance = (priorPosRate <= 1e-15 || 1 - priorPosRate <= 1e-15) ? 0f : (float)(2 * WeightSum * MathUtils.Entropy(priorPosRate, true)); ch.Info("Null Deviance: \t{0} (on {1} degrees of freedom)", nullDeviance, NumGoodRows - 1); // Compute AIC. ch.Info("AIC: \t{0}", 2 * numParams + deviance); // Show the coefficients statistics table. var featureColIdx = cursorFactory.Data.Schema.Feature.Index; var schema = cursorFactory.Data.Data.Schema; var featureLength = CurrentWeights.Length - BiasCount; var namesSpans = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(featureLength); if (schema.HasSlotNames(featureColIdx, featureLength)) { schema.GetMetadata(MetadataUtils.Kinds.SlotNames, featureColIdx, ref namesSpans); } Host.Assert(namesSpans.Length == featureLength); // Inverse mapping of non-zero weight slots. Dictionary <int, int> weightIndicesInvMap = null; // Indices of bias and non-zero weight slots. int[] weightIndices = null; // Whether all weights are non-zero. bool denseWeight = numParams == CurrentWeights.Length; // Extract non-zero indices of weight. if (!denseWeight) { weightIndices = new int[numParams]; weightIndicesInvMap = new Dictionary <int, int>(numParams); weightIndices[0] = 0; weightIndicesInvMap[0] = 0; int j = 1; for (int i = 1; i < CurrentWeights.Length; i++) { if (CurrentWeights.Values[i] != 0) { weightIndices[j] = i; weightIndicesInvMap[i] = j++; } } Contracts.Assert(j == numParams); } // Compute the standard error of coefficients. long hessianDimension = (long)numParams * (numParams + 1) / 2; if (hessianDimension > int.MaxValue) { ch.Warning("The number of parameter is too large. Cannot hold the variance-covariance matrix in memory. " + "Skipping computation of standard errors and z-statistics of coefficients. Consider choosing a larger L1 regularizer" + "to reduce the number of parameters."); _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); return; } // Building the variance-covariance matrix for parameters. // The layout of this algorithm is a packed row-major lower triangular matrix. // E.g., layout of indices for 4-by-4: // 0 // 1 2 // 3 4 5 // 6 7 8 9 var hessian = new Double[hessianDimension]; // Initialize diagonal elements with L2 regularizers except for the first entry (index 0) // Since bias is not regularized. if (L2Weight > 0) { // i is the array index of the diagonal entry at iRow-th row and iRow-th column. // iRow is one-based. int i = 0; for (int iRow = 2; iRow <= numParams; iRow++) { i += iRow; hessian[i] = L2Weight; } Contracts.Assert(i == hessian.Length - 1); } // Initialize the remaining entries. var bias = CurrentWeights.Values[0]; using (var cursor = cursorFactory.Create()) { while (cursor.MoveNext()) { var label = cursor.Label; var weight = cursor.Weight; var score = bias + VectorUtils.DotProductWithOffset(ref CurrentWeights, 1, ref cursor.Features); // Compute Bernoulli variance n_i * p_i * (1 - p_i) for the i-th training example. var variance = weight / (2 + 2 * Math.Cosh(score)); // Increment the first entry of hessian. hessian[0] += variance; var values = cursor.Features.Values; if (cursor.Features.IsDense) { int ioff = 1; // Increment remaining entries of hessian. for (int i = 1; i < numParams; i++) { ch.Assert(ioff == i * (i + 1) / 2); int wi = weightIndices == null ? i - 1 : weightIndices[i] - 1; Contracts.Assert(0 <= wi && wi < cursor.Features.Length); var val = values[wi] * variance; // Add the implicit first bias term to X'X hessian[ioff++] += val; // Add the remainder of X'X for (int j = 0; j < i; j++) { int wj = weightIndices == null ? j : weightIndices[j + 1] - 1; Contracts.Assert(0 <= wj && wj < cursor.Features.Length); hessian[ioff++] += val * values[wj]; } } ch.Assert(ioff == hessian.Length); } else { var indices = cursor.Features.Indices; for (int ii = 0; ii < cursor.Features.Count; ++ii) { int i = indices[ii]; int wi = i + 1; if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(i + 1, out wi)) { continue; } Contracts.Assert(0 < wi && wi <= cursor.Features.Length); int ioff = wi * (wi + 1) / 2; var val = values[ii] * variance; // Add the implicit first bias term to X'X hessian[ioff] += val; // Add the remainder of X'X for (int jj = 0; jj <= ii; jj++) { int j = indices[jj]; int wj = j + 1; if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(j + 1, out wj)) { continue; } Contracts.Assert(0 < wj && wj <= cursor.Features.Length); hessian[ioff + wj] += val * values[jj]; } } } } } _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); }