public void SaveAsText(TextWriter writer, RoleMappedSchema schema) { writer.WriteLine("Dimension: {0}", _dimension); writer.WriteLine("Rank: {0}", _rank); if (_mean.IsDense) { writer.Write("Mean vector:"); foreach (var value in _mean.Items(all: true)) { writer.Write(" {0}", value.Value); } writer.WriteLine(); writer.Write("Projected mean vector:"); foreach (var value in _meanProjected) { writer.Write(" {0}", value); } } writer.WriteLine(); writer.WriteLine("# V"); for (var i = 0; i < _rank; ++i) { VBufferUtils.ForEachDefined(ref _eigenVectors[i], (ind, val) => { if (val != 0) { writer.Write(" {0}:{1}", ind, val); } }); writer.WriteLine(); } }
protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss, int numParams) { Contracts.AssertValue(ch); Contracts.AssertValue(cursorFactory); Contracts.Assert(NumGoodRows > 0); Contracts.Assert(WeightSum > 0); Contracts.Assert(BiasCount == _numClasses); Contracts.Assert(loss >= 0); Contracts.Assert(numParams >= BiasCount); Contracts.Assert(CurrentWeights.IsDense); ch.Info("Model trained with {0} training examples.", NumGoodRows); // Compute deviance: start with loss function. float deviance = (float)(2 * loss * WeightSum); if (L2Weight > 0) { // Need to subtract L2 regularization loss. // The bias term is not regularized. var regLoss = VectorUtils.NormSquared(CurrentWeights.Values, BiasCount, CurrentWeights.Length - BiasCount) * L2Weight; deviance -= regLoss; } if (L1Weight > 0) { // Need to subtract L1 regularization loss. // The bias term is not regularized. Double regLoss = 0; VBufferUtils.ForEachDefined(ref CurrentWeights, (ind, value) => { if (ind >= BiasCount) { regLoss += Math.Abs(value); } }); deviance -= (float)regLoss * L1Weight * 2; } ch.Info("Residual Deviance: \t{0}", deviance); // Compute null deviance, i.e., the deviance of null hypothesis. // Cap the prior positive rate at 1e-15. float nullDeviance = 0; for (int iLabel = 0; iLabel < _numClasses; iLabel++) { Contracts.Assert(_prior[iLabel] >= 0); if (_prior[iLabel] == 0) { continue; } nullDeviance -= (float)(2 * _prior[iLabel] * Math.Log(_prior[iLabel] / WeightSum)); } ch.Info("Null Deviance: \t{0}", nullDeviance); // Compute AIC. ch.Info("AIC: \t{0}", 2 * numParams + deviance); // REVIEW: Figure out how to compute the statistics for the coefficients. _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); }
/// <summary> /// print the linear model as code /// </summary> public static void SaveAsCode(TextWriter writer, ref VBuffer <Float> weights, Float bias, RoleMappedSchema schema, string codeVariable = "output") { Contracts.CheckValue(writer, nameof(writer)); Contracts.CheckValueOrNull(schema); var featureNames = default(VBuffer <ReadOnlyMemory <char> >); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, weights.Length, ref featureNames); int numNonZeroWeights = 0; writer.Write(codeVariable); writer.Write(" = "); VBufferUtils.ForEachDefined(ref weights, (idx, value) => { if (Math.Abs(value - 0) >= Epsilon) { if (numNonZeroWeights > 0) { writer.Write(" + "); } writer.Write(FloatUtils.ToRoundTripString(value)); writer.Write("*"); if (featureNames.Count > 0) { writer.Write(FeatureNameAsCode(featureNames.GetItemOrDefault(idx).ToString(), idx)); } else { writer.Write("f_" + idx); } numNonZeroWeights++; } }); if (numNonZeroWeights > 0) { writer.Write(" + "); } writer.Write(FloatUtils.ToRoundTripString(bias)); writer.WriteLine(";"); }
/// <summary> /// This is the original differentiable function with the injected L1 term. /// </summary> private Float EvalCore(ref VBuffer <Float> input, ref VBuffer <Float> gradient, IProgressChannelProvider progress) { // REVIEW: Leverage Vector methods that use SSE. Float res = 0; if (!EnforceNonNegativity) { if (_biasCount > 0) { VBufferUtils.ForEachDefined(ref input, (ind, value) => { if (ind >= _biasCount) { res += Math.Abs(value); } }); } else { VBufferUtils.ForEachDefined(ref input, (ind, value) => res += Math.Abs(value)); } } else { if (_biasCount > 0) { VBufferUtils.ForEachDefined(ref input, (ind, value) => { if (ind >= _biasCount) { res += value; } }); } else { VBufferUtils.ForEachDefined(ref input, (ind, value) => res += value); } } res = _l1weight * res + _function(ref input, ref gradient, progress); return(res); }
protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, Float loss, int numParams) { Contracts.AssertValue(ch); Contracts.AssertValue(cursorFactory); Contracts.Assert(NumGoodRows > 0); Contracts.Assert(WeightSum > 0); Contracts.Assert(BiasCount == 1); Contracts.Assert(loss >= 0); Contracts.Assert(numParams >= BiasCount); Contracts.Assert(CurrentWeights.IsDense); ch.Info("Model trained with {0} training examples.", NumGoodRows); // Compute deviance: start with loss function. Float deviance = (Float)(2 * loss * WeightSum); if (L2Weight > 0) { // Need to subtract L2 regularization loss. // The bias term is not regularized. var regLoss = VectorUtils.NormSquared(CurrentWeights.Values, 1, CurrentWeights.Length - 1) * L2Weight; deviance -= regLoss; } if (L1Weight > 0) { // Need to subtract L1 regularization loss. // The bias term is not regularized. Double regLoss = 0; VBufferUtils.ForEachDefined(ref CurrentWeights, (ind, value) => { if (ind >= BiasCount) { regLoss += Math.Abs(value); } }); deviance -= (Float)regLoss * L1Weight * 2; } ch.Info("Residual Deviance: \t{0} (on {1} degrees of freedom)", deviance, Math.Max(NumGoodRows - numParams, 0)); // Compute null deviance, i.e., the deviance of null hypothesis. // Cap the prior positive rate at 1e-15. Double priorPosRate = _posWeight / WeightSum; Contracts.Assert(0 <= priorPosRate && priorPosRate <= 1); Float nullDeviance = (priorPosRate <= 1e-15 || 1 - priorPosRate <= 1e-15) ? 0f : (Float)(2 * WeightSum * MathUtils.Entropy(priorPosRate, true)); ch.Info("Null Deviance: \t{0} (on {1} degrees of freedom)", nullDeviance, NumGoodRows - 1); // Compute AIC. ch.Info("AIC: \t{0}", 2 * numParams + deviance); // Show the coefficients statistics table. var featureColIdx = cursorFactory.Data.Schema.Feature.Index; var schema = cursorFactory.Data.Data.Schema; var featureLength = CurrentWeights.Length - BiasCount; var namesSpans = VBufferUtils.CreateEmpty <DvText>(featureLength); if (schema.HasSlotNames(featureColIdx, featureLength)) { schema.GetMetadata(MetadataUtils.Kinds.SlotNames, featureColIdx, ref namesSpans); } Host.Assert(namesSpans.Length == featureLength); // Inverse mapping of non-zero weight slots. Dictionary <int, int> weightIndicesInvMap = null; // Indices of bias and non-zero weight slots. int[] weightIndices = null; // Whether all weights are non-zero. bool denseWeight = numParams == CurrentWeights.Length; // Extract non-zero indices of weight. if (!denseWeight) { weightIndices = new int[numParams]; weightIndicesInvMap = new Dictionary <int, int>(numParams); weightIndices[0] = 0; weightIndicesInvMap[0] = 0; int j = 1; for (int i = 1; i < CurrentWeights.Length; i++) { if (CurrentWeights.Values[i] != 0) { weightIndices[j] = i; weightIndicesInvMap[i] = j++; } } Contracts.Assert(j == numParams); } // Compute the standard error of coefficients. long hessianDimension = (long)numParams * (numParams + 1) / 2; if (hessianDimension > int.MaxValue) { ch.Warning("The number of parameter is too large. Cannot hold the variance-covariance matrix in memory. " + "Skipping computation of standard errors and z-statistics of coefficients. Consider choosing a larger L1 regularizer" + "to reduce the number of parameters."); _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); return; } // Building the variance-covariance matrix for parameters. // The layout of this algorithm is a packed row-major lower triangular matrix. // E.g., layout of indices for 4-by-4: // 0 // 1 2 // 3 4 5 // 6 7 8 9 var hessian = new Double[hessianDimension]; // Initialize diagonal elements with L2 regularizers except for the first entry (index 0) // Since bias is not regularized. if (L2Weight > 0) { // i is the array index of the diagonal entry at iRow-th row and iRow-th column. // iRow is one-based. int i = 0; for (int iRow = 2; iRow <= numParams; iRow++) { i += iRow; hessian[i] = L2Weight; } Contracts.Assert(i == hessian.Length - 1); } // Initialize the remaining entries. var bias = CurrentWeights.Values[0]; using (var cursor = cursorFactory.Create()) { while (cursor.MoveNext()) { var label = cursor.Label; var weight = cursor.Weight; var score = bias + VectorUtils.DotProductWithOffset(ref CurrentWeights, 1, ref cursor.Features); // Compute Bernoulli variance n_i * p_i * (1 - p_i) for the i-th training example. var variance = weight / (2 + 2 * Math.Cosh(score)); // Increment the first entry of hessian. hessian[0] += variance; var values = cursor.Features.Values; if (cursor.Features.IsDense) { int ioff = 1; // Increment remaining entries of hessian. for (int i = 1; i < numParams; i++) { ch.Assert(ioff == i * (i + 1) / 2); int wi = weightIndices == null ? i - 1 : weightIndices[i] - 1; Contracts.Assert(0 <= wi && wi < cursor.Features.Length); var val = values[wi] * variance; // Add the implicit first bias term to X'X hessian[ioff++] += val; // Add the remainder of X'X for (int j = 0; j < i; j++) { int wj = weightIndices == null ? j : weightIndices[j + 1] - 1; Contracts.Assert(0 <= wj && wj < cursor.Features.Length); hessian[ioff++] += val * values[wj]; } } ch.Assert(ioff == hessian.Length); } else { var indices = cursor.Features.Indices; for (int ii = 0; ii < cursor.Features.Count; ++ii) { int i = indices[ii]; int wi = i + 1; if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(i + 1, out wi)) { continue; } Contracts.Assert(0 < wi && wi <= cursor.Features.Length); int ioff = wi * (wi + 1) / 2; var val = values[ii] * variance; // Add the implicit first bias term to X'X hessian[ioff] += val; // Add the remainder of X'X for (int jj = 0; jj <= ii; jj++) { int j = indices[jj]; int wj = j + 1; if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(j + 1, out wj)) { continue; } Contracts.Assert(0 < wj && wj <= cursor.Features.Length); hessian[ioff + wj] += val * values[jj]; } } } } } // Apply Cholesky Decomposition to find the inverse of the Hessian. Double[] invHessian = null; try { // First, find the Cholesky decomposition LL' of the Hessian. Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numParams, hessian); // Note that hessian is already modified at this point. It is no longer the original Hessian, // but instead represents the Cholesky decomposition L. // Also note that the following routine is supposed to consume the Cholesky decomposition L instead // of the original information matrix. Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numParams, hessian); // At this point, hessian should contain the inverse of the original Hessian matrix. // Swap hessian with invHessian to avoid confusion in the following context. Utils.Swap(ref hessian, ref invHessian); Contracts.Assert(hessian == null); } catch (DllNotFoundException) { throw ch.ExceptNotSupp("The MKL library (Microsoft.ML.MklImports.dll) or one of its dependencies is missing."); } Float[] stdErrorValues = new Float[numParams]; stdErrorValues[0] = (Float)Math.Sqrt(invHessian[0]); for (int i = 1; i < numParams; i++) { // Initialize with inverse Hessian. stdErrorValues[i] = (Single)invHessian[i * (i + 1) / 2 + i]; } if (L2Weight > 0) { // Iterate through all entries of inverse Hessian to make adjustment to variance. // A discussion on ridge regularized LR coefficient covariance matrix can be found here: // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/ // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf int ioffset = 1; for (int iRow = 1; iRow < numParams; iRow++) { for (int iCol = 0; iCol <= iRow; iCol++) { var entry = (Single)invHessian[ioffset]; var adjustment = -L2Weight * entry * entry; stdErrorValues[iRow] -= adjustment; if (0 < iCol && iCol < iRow) { stdErrorValues[iCol] -= adjustment; } ioffset++; } } Contracts.Assert(ioffset == invHessian.Length); } for (int i = 1; i < numParams; i++) { stdErrorValues[i] = (Float)Math.Sqrt(stdErrorValues[i]); } VBuffer <Float> stdErrors = new VBuffer <Float>(CurrentWeights.Length, numParams, stdErrorValues, weightIndices); _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance, ref stdErrors); }
protected virtual void TrainCore(IChannel ch, RoleMappedData data) { Host.AssertValue(ch); ch.AssertValue(data); // Compute the number of threads to use. The ctor should have verified that this will // produce a positive value. int numThreads = !UseThreads ? 1 : (NumThreads ?? Environment.ProcessorCount); if (Host.ConcurrencyFactor > 0 && numThreads > Host.ConcurrencyFactor) { numThreads = Host.ConcurrencyFactor; ch.Warning("The number of threads specified in trainer arguments is larger than the concurrency factor " + "setting of the environment. Using {0} training threads instead.", numThreads); } ch.Assert(numThreads > 0); NumGoodRows = 0; WeightSum = 0; _features = null; _labels = null; _weights = null; if (numThreads > 1) { ch.Info("LBFGS multi-threading will attempt to load dataset into memory. In case of out-of-memory " + "issues, add 'numThreads=1' to the trainer arguments and 'cache=-' to the command line " + "arguments to turn off multi-threading."); _features = new VBuffer <float> [1000]; _labels = new float[1000]; if (data.Schema.Weight != null) { _weights = new float[1000]; } } var cursorFactory = new FloatLabelCursor.Factory(data, CursOpt.Features | CursOpt.Label | CursOpt.Weight); long numBad; // REVIEW: This pass seems overly expensive for the benefit when multi-threading is off.... using (var cursor = cursorFactory.Create()) using (var pch = Host.StartProgressChannel("LBFGS data prep")) { // REVIEW: maybe it makes sense for the factory to capture the good row count after // the first successful cursoring? Double totalCount = data.Data.GetRowCount(true) ?? Double.NaN; long exCount = 0; pch.SetHeader(new ProgressHeader(null, new[] { "examples" }), e => e.SetProgress(0, exCount, totalCount)); while (cursor.MoveNext()) { WeightSum += cursor.Weight; if (ShowTrainingStats) { ProcessPriorDistribution(cursor.Label, cursor.Weight); } PreTrainingProcessInstance(cursor.Label, ref cursor.Features, cursor.Weight); exCount++; if (_features != null) { ch.Assert(cursor.KeptRowCount <= int.MaxValue); int index = (int)cursor.KeptRowCount - 1; Utils.EnsureSize(ref _features, index + 1); Utils.EnsureSize(ref _labels, index + 1); if (_weights != null) { Utils.EnsureSize(ref _weights, index + 1); _weights[index] = cursor.Weight; } Utils.Swap(ref _features[index], ref cursor.Features); _labels[index] = cursor.Label; if (cursor.KeptRowCount >= int.MaxValue) { ch.Warning("Limiting data size for multi-threading"); break; } } } NumGoodRows = cursor.KeptRowCount; numBad = cursor.SkippedRowCount; } ch.Check(NumGoodRows > 0, NoTrainingInstancesMessage); if (numBad > 0) { ch.Warning("Skipped {0} instances with missing features/label/weight during training", numBad); } if (_features != null) { ch.Assert(numThreads > 1); // If there are so many threads that each only gets a small number (less than 10) of instances, trim // the number of threads so each gets a more reasonable number (100 or so). These numbers are pretty arbitrary, // but avoid the possibility of having no instances on some threads. if (numThreads > 1 && NumGoodRows / numThreads < 10) { int numNew = Math.Max(1, (int)NumGoodRows / 100); ch.Warning("Too few instances to use {0} threads, decreasing to {1} thread(s)", numThreads, numNew); numThreads = numNew; } ch.Assert(numThreads > 0); // Divide up the instances among the threads. _numChunks = numThreads; _ranges = new int[_numChunks + 1]; int cinstTot = (int)NumGoodRows; for (int ichk = 0, iinstMin = 0; ichk < numThreads; ichk++) { int cchkLeft = numThreads - ichk; // Number of chunks left to fill. ch.Assert(0 < cchkLeft && cchkLeft <= numThreads); int cinstThis = (cinstTot - iinstMin + cchkLeft - 1) / cchkLeft; // Size of this chunk. ch.Assert(0 < cinstThis && cinstThis <= cinstTot - iinstMin); iinstMin += cinstThis; _ranges[ichk + 1] = iinstMin; } _localLosses = new float[numThreads]; _localGradients = new VBuffer <float> [numThreads - 1]; int size = BiasCount + WeightCount; for (int i = 0; i < _localGradients.Length; i++) { _localGradients[i] = VBufferUtils.CreateEmpty <float>(size); } ch.Assert(_numChunks > 0 && _data == null); } else { // Streaming, single-threaded case. _data = data; _cursorFactory = cursorFactory; ch.Assert(_numChunks == 0 && _data != null); } VBuffer <float> initWeights; ITerminationCriterion terminationCriterion; Optimizer opt = InitializeOptimizer(ch, cursorFactory, out initWeights, out terminationCriterion); opt.Quiet = Quiet; float loss; try { opt.Minimize(DifferentiableFunction, ref initWeights, terminationCriterion, ref CurrentWeights, out loss); } catch (Optimizer.PrematureConvergenceException e) { if (!Quiet) { ch.Warning("Premature convergence occurred. The OptimizationTolerance may be set too small. {0}", e.Message); } CurrentWeights = e.State.X; loss = e.State.Value; } ch.Assert(CurrentWeights.Length == BiasCount + WeightCount); int numParams = BiasCount; if ((L1Weight > 0 && !Quiet) || ShowTrainingStats) { VBufferUtils.ForEachDefined(ref CurrentWeights, (index, value) => { if (index >= BiasCount && value != 0) { numParams++; } }); if (L1Weight > 0 && !Quiet) { ch.Info("L1 regularization selected {0} of {1} weights.", numParams, BiasCount + WeightCount); } } if (ShowTrainingStats) { ComputeTrainingStatistics(ch, cursorFactory, loss, numParams); } }
/// <summary> /// Build a Bing TreeEnsemble .ini representation of the given predictor /// </summary> public static string LinearModelAsIni(ref VBuffer <Float> weights, Float bias, IPredictor predictor = null, RoleMappedSchema schema = null, PlattCalibrator calibrator = null) { // TODO: Might need to consider a max line length for the Weights list, requiring us to split it up into // multiple evaluators StringBuilder inputBuilder = new StringBuilder(); StringBuilder aggregatedNodesBuilder = new StringBuilder("Nodes="); StringBuilder weightsBuilder = new StringBuilder("Weights="); var featureNames = default(VBuffer <ReadOnlyMemory <char> >); MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, weights.Length, ref featureNames); int numNonZeroWeights = 0; const string weightsSep = "\t"; VBufferUtils.ForEachDefined(ref weights, (idx, value) => { if (Math.Abs(value - 0) >= Epsilon) { numNonZeroWeights++; var name = featureNames.GetItemOrDefault(idx); inputBuilder.AppendLine("[Input:" + numNonZeroWeights + "]"); inputBuilder.AppendLine("Name=" + (featureNames.Count == 0 ? "Feature_" + idx : name.IsEmpty ? $"f{idx}" : name.ToString())); inputBuilder.AppendLine("Transform=linear"); inputBuilder.AppendLine("Slope=1"); inputBuilder.AppendLine("Intercept=0"); inputBuilder.AppendLine(); aggregatedNodesBuilder.Append("I:" + numNonZeroWeights + weightsSep); weightsBuilder.Append(value + weightsSep); } }); StringBuilder builder = new StringBuilder(); builder.AppendLine("[TreeEnsemble]"); builder.AppendLine("Inputs=" + numNonZeroWeights); builder.AppendLine("Evaluators=1"); builder.AppendLine(); builder.AppendLine(inputBuilder.ToString()); builder.AppendLine("[Evaluator:1]"); builder.AppendLine("EvaluatorType=Aggregator"); builder.AppendLine("Type=Linear"); builder.AppendLine("Bias=" + bias); builder.AppendLine("NumNodes=" + numNonZeroWeights); builder.AppendLine(aggregatedNodesBuilder.ToString().Trim()); builder.AppendLine(weightsBuilder.ToString().Trim()); #if false // REVIEW: This should be done by the caller using the actual training args! builder.AppendLine(); builder.AppendLine("[Comments]"); builder.Append("Trained by TLC"); if (predictor != null) { builder.Append(" as /cl " + predictor.GetType().Name); if (predictor is IInitializable) { string settings = string.Join(";", (predictor as IInitializable).GetSettings()); if (!string.IsNullOrEmpty(settings)) { builder.Append(" /cls " + settings); } } } #endif string ini = builder.ToString(); // Add the calibration if the model was trained with calibration if (calibrator != null) { string calibratorEvaluatorIni = IniFileUtils.GetCalibratorEvaluatorIni(ini, calibrator); ini = IniFileUtils.AddEvaluator(ini, calibratorEvaluatorIni); } return(ini); }
protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss, int numParams) { Contracts.AssertValue(ch); Contracts.AssertValue(cursorFactory); Contracts.Assert(NumGoodRows > 0); Contracts.Assert(WeightSum > 0); Contracts.Assert(BiasCount == 1); Contracts.Assert(loss >= 0); Contracts.Assert(numParams >= BiasCount); Contracts.Assert(CurrentWeights.IsDense); ch.Info("Model trained with {0} training examples.", NumGoodRows); // Compute deviance: start with loss function. float deviance = (float)(2 * loss * WeightSum); if (L2Weight > 0) { // Need to subtract L2 regularization loss. // The bias term is not regularized. var regLoss = VectorUtils.NormSquared(CurrentWeights.Values, 1, CurrentWeights.Length - 1) * L2Weight; deviance -= regLoss; } if (L1Weight > 0) { // Need to subtract L1 regularization loss. // The bias term is not regularized. Double regLoss = 0; VBufferUtils.ForEachDefined(ref CurrentWeights, (ind, value) => { if (ind >= BiasCount) { regLoss += Math.Abs(value); } }); deviance -= (float)regLoss * L1Weight * 2; } ch.Info("Residual Deviance: \t{0} (on {1} degrees of freedom)", deviance, Math.Max(NumGoodRows - numParams, 0)); // Compute null deviance, i.e., the deviance of null hypothesis. // Cap the prior positive rate at 1e-15. Double priorPosRate = _posWeight / WeightSum; Contracts.Assert(0 <= priorPosRate && priorPosRate <= 1); float nullDeviance = (priorPosRate <= 1e-15 || 1 - priorPosRate <= 1e-15) ? 0f : (float)(2 * WeightSum * MathUtils.Entropy(priorPosRate, true)); ch.Info("Null Deviance: \t{0} (on {1} degrees of freedom)", nullDeviance, NumGoodRows - 1); // Compute AIC. ch.Info("AIC: \t{0}", 2 * numParams + deviance); // Show the coefficients statistics table. var featureColIdx = cursorFactory.Data.Schema.Feature.Index; var schema = cursorFactory.Data.Data.Schema; var featureLength = CurrentWeights.Length - BiasCount; var namesSpans = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(featureLength); if (schema.HasSlotNames(featureColIdx, featureLength)) { schema.GetMetadata(MetadataUtils.Kinds.SlotNames, featureColIdx, ref namesSpans); } Host.Assert(namesSpans.Length == featureLength); // Inverse mapping of non-zero weight slots. Dictionary <int, int> weightIndicesInvMap = null; // Indices of bias and non-zero weight slots. int[] weightIndices = null; // Whether all weights are non-zero. bool denseWeight = numParams == CurrentWeights.Length; // Extract non-zero indices of weight. if (!denseWeight) { weightIndices = new int[numParams]; weightIndicesInvMap = new Dictionary <int, int>(numParams); weightIndices[0] = 0; weightIndicesInvMap[0] = 0; int j = 1; for (int i = 1; i < CurrentWeights.Length; i++) { if (CurrentWeights.Values[i] != 0) { weightIndices[j] = i; weightIndicesInvMap[i] = j++; } } Contracts.Assert(j == numParams); } // Compute the standard error of coefficients. long hessianDimension = (long)numParams * (numParams + 1) / 2; if (hessianDimension > int.MaxValue) { ch.Warning("The number of parameter is too large. Cannot hold the variance-covariance matrix in memory. " + "Skipping computation of standard errors and z-statistics of coefficients. Consider choosing a larger L1 regularizer" + "to reduce the number of parameters."); _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); return; } // Building the variance-covariance matrix for parameters. // The layout of this algorithm is a packed row-major lower triangular matrix. // E.g., layout of indices for 4-by-4: // 0 // 1 2 // 3 4 5 // 6 7 8 9 var hessian = new Double[hessianDimension]; // Initialize diagonal elements with L2 regularizers except for the first entry (index 0) // Since bias is not regularized. if (L2Weight > 0) { // i is the array index of the diagonal entry at iRow-th row and iRow-th column. // iRow is one-based. int i = 0; for (int iRow = 2; iRow <= numParams; iRow++) { i += iRow; hessian[i] = L2Weight; } Contracts.Assert(i == hessian.Length - 1); } // Initialize the remaining entries. var bias = CurrentWeights.Values[0]; using (var cursor = cursorFactory.Create()) { while (cursor.MoveNext()) { var label = cursor.Label; var weight = cursor.Weight; var score = bias + VectorUtils.DotProductWithOffset(ref CurrentWeights, 1, ref cursor.Features); // Compute Bernoulli variance n_i * p_i * (1 - p_i) for the i-th training example. var variance = weight / (2 + 2 * Math.Cosh(score)); // Increment the first entry of hessian. hessian[0] += variance; var values = cursor.Features.Values; if (cursor.Features.IsDense) { int ioff = 1; // Increment remaining entries of hessian. for (int i = 1; i < numParams; i++) { ch.Assert(ioff == i * (i + 1) / 2); int wi = weightIndices == null ? i - 1 : weightIndices[i] - 1; Contracts.Assert(0 <= wi && wi < cursor.Features.Length); var val = values[wi] * variance; // Add the implicit first bias term to X'X hessian[ioff++] += val; // Add the remainder of X'X for (int j = 0; j < i; j++) { int wj = weightIndices == null ? j : weightIndices[j + 1] - 1; Contracts.Assert(0 <= wj && wj < cursor.Features.Length); hessian[ioff++] += val * values[wj]; } } ch.Assert(ioff == hessian.Length); } else { var indices = cursor.Features.Indices; for (int ii = 0; ii < cursor.Features.Count; ++ii) { int i = indices[ii]; int wi = i + 1; if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(i + 1, out wi)) { continue; } Contracts.Assert(0 < wi && wi <= cursor.Features.Length); int ioff = wi * (wi + 1) / 2; var val = values[ii] * variance; // Add the implicit first bias term to X'X hessian[ioff] += val; // Add the remainder of X'X for (int jj = 0; jj <= ii; jj++) { int j = indices[jj]; int wj = j + 1; if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(j + 1, out wj)) { continue; } Contracts.Assert(0 < wj && wj <= cursor.Features.Length); hessian[ioff + wj] += val * values[jj]; } } } } } _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance); }