Example #1
0
        /// <summary>
        /// Initializes a new instance of the <see cref="MulticlassLogisticRegressionPredictor"/> class.
        /// This constructor is called by <see cref="SdcaMultiClassTrainer"/> to create the predictor.
        /// </summary>
        /// <param name="env">The host environment.</param>
        /// <param name="weights">The array of weights vectors. It should contain <paramref name="numClasses"/> weights.</param>
        /// <param name="bias">The array of biases. It should contain contain <paramref name="numClasses"/> weights.</param>
        /// <param name="numClasses">The number of classes for multi-class classification. Must be at least 2.</param>
        /// <param name="numFeatures">The logical length of the feature vector.</param>
        /// <param name="labelNames">The optional label names. If specified not null, it should have the same length as <paramref name="numClasses"/>.</param>
        /// <param name="stats">The model statistics.</param>
        public MulticlassLogisticRegressionPredictor(IHostEnvironment env, VBuffer <float>[] weights, float[] bias, int numClasses, int numFeatures, string[] labelNames, LinearModelStatistics stats = null)
            : base(env, RegistrationName)
        {
            Contracts.CheckValue(weights, nameof(weights));
            Contracts.CheckValue(bias, nameof(bias));
            Contracts.Check(numClasses >= 2, "numClasses must be at least 2.");
            _numClasses = numClasses;
            Contracts.Check(numFeatures >= 1, "numFeatures must be positive.");
            _numFeatures = numFeatures;
            Contracts.Check(Utils.Size(weights) == _numClasses);
            Contracts.Check(Utils.Size(bias) == _numClasses);
            _weights = new VBuffer <float> [_numClasses];
            _biases  = new float[_numClasses];
            for (int iClass = 0; iClass < _numClasses; iClass++)
            {
                Contracts.Assert(weights[iClass].Length == _numFeatures);
                weights[iClass].CopyTo(ref _weights[iClass]);
                _biases[iClass] = bias[iClass];
            }

            if (_weights.All(v => v.IsDense))
            {
                _weightsDense = _weights;
            }

            InputType  = new VectorType(NumberType.R4, _numFeatures);
            OutputType = new VectorType(NumberType.R4, _numClasses);

            Contracts.Assert(labelNames == null || labelNames.Length == numClasses);
            _labelNames = labelNames;

            Contracts.AssertValueOrNull(stats);
            _stats = stats;
        }
Example #2
0
        protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss, int numParams)
        {
            Contracts.AssertValue(ch);
            Contracts.AssertValue(cursorFactory);
            Contracts.Assert(NumGoodRows > 0);
            Contracts.Assert(WeightSum > 0);
            Contracts.Assert(BiasCount == _numClasses);
            Contracts.Assert(loss >= 0);
            Contracts.Assert(numParams >= BiasCount);
            Contracts.Assert(CurrentWeights.IsDense);

            ch.Info("Model trained with {0} training examples.", NumGoodRows);
            // Compute deviance: start with loss function.
            float deviance = (float)(2 * loss * WeightSum);

            if (L2Weight > 0)
            {
                // Need to subtract L2 regularization loss.
                // The bias term is not regularized.
                var regLoss = VectorUtils.NormSquared(CurrentWeights.Values, BiasCount, CurrentWeights.Length - BiasCount) * L2Weight;
                deviance -= regLoss;
            }

            if (L1Weight > 0)
            {
                // Need to subtract L1 regularization loss.
                // The bias term is not regularized.
                Double regLoss = 0;
                VBufferUtils.ForEachDefined(ref CurrentWeights, (ind, value) => { if (ind >= BiasCount)
                                                                                  {
                                                                                      regLoss += Math.Abs(value);
                                                                                  }
                                            });
                deviance -= (float)regLoss * L1Weight * 2;
            }

            ch.Info("Residual Deviance: \t{0}", deviance);

            // Compute null deviance, i.e., the deviance of null hypothesis.
            // Cap the prior positive rate at 1e-15.
            float nullDeviance = 0;

            for (int iLabel = 0; iLabel < _numClasses; iLabel++)
            {
                Contracts.Assert(_prior[iLabel] >= 0);
                if (_prior[iLabel] == 0)
                {
                    continue;
                }

                nullDeviance -= (float)(2 * _prior[iLabel] * Math.Log(_prior[iLabel] / WeightSum));
            }
            ch.Info("Null Deviance:    \t{0}", nullDeviance);

            // Compute AIC.
            ch.Info("AIC:              \t{0}", 2 * numParams + deviance);

            // REVIEW: Figure out how to compute the statistics for the coefficients.
            _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance);
        }
Example #3
0
        private LinearBinaryPredictor(IHostEnvironment env, ModelLoadContext ctx)
            : base(env, RegistrationName, ctx)
        {
            // For model version earlier than 0x00020001, there is no model statisitcs.
            if (ctx.Header.ModelVerWritten <= 0x00020001)
            {
                return;
            }

            // *** Binary format ***
            // (Base class)
            // LinearModelStatistics: model statistics (optional, in a separate stream)

            string statsDir = Path.Combine(ctx.Directory ?? "", ModelStatsSubModelFilename);

            using (var statsEntry = ctx.Repository.OpenEntryOrNull(statsDir, ModelLoadContext.ModelStreamName))
            {
                if (statsEntry == null)
                {
                    _stats = null;
                }
                else
                {
                    using (var statsCtx = new ModelLoadContext(ctx.Repository, statsEntry, statsDir))
                        _stats = LinearModelStatistics.Create(Host, statsCtx);
                }
            }
        }
        public static bool TryGetBiasStatistics(LinearModelStatistics stats, Single bias, out Single stdError, out Single zScore, out Single pValue)
        {
            if (!stats._coeffStdError.HasValue)
            {
                stdError = 0;
                zScore   = 0;
                pValue   = 0;
                return(false);
            }

            const Double sqrt2 = 1.41421356237; // Math.Sqrt(2);

            stdError = stats._coeffStdError.Value.Values[0];
            Contracts.Assert(stdError == stats._coeffStdError.Value.GetItemOrDefault(0));
            zScore = bias / stdError;
            pValue = 1 - (Single)ProbabilityFunctions.Erf(Math.Abs(zScore / sqrt2));
            return(true);
        }
Example #5
0
        internal MulticlassLogisticRegressionPredictor(IHostEnvironment env, ref VBuffer <float> weights, int numClasses, int numFeatures, string[] labelNames, LinearModelStatistics stats = null)
            : base(env, RegistrationName)
        {
            Contracts.Assert(weights.Length == numClasses + numClasses * numFeatures);
            _numClasses  = numClasses;
            _numFeatures = numFeatures;

            // weights contains both bias and feature weights in a flat vector
            // Biases are stored in the first _numClass elements
            // followed by one weight vector for each class, in turn, all concatenated
            // (i.e.: in "row major", if we encode each weight vector as a row of a matrix)
            Contracts.Assert(weights.Length == _numClasses + _numClasses * _numFeatures);

            _biases = new float[_numClasses];
            for (int i = 0; i < _biases.Length; i++)
            {
                weights.GetItemOrDefault(i, ref _biases[i]);
            }
            _weights = new VBuffer <float> [_numClasses];
            for (int i = 0; i < _weights.Length; i++)
            {
                weights.CopyTo(ref _weights[i], _numClasses + i * _numFeatures, _numFeatures);
            }
            if (_weights.All(v => v.IsDense))
            {
                _weightsDense = _weights;
            }

            InputType  = new VectorType(NumberType.R4, _numFeatures);
            OutputType = new VectorType(NumberType.R4, _numClasses);

            Contracts.Assert(labelNames == null || labelNames.Length == numClasses);
            _labelNames = labelNames;

            Contracts.AssertValueOrNull(stats);
            _stats = stats;
        }
Example #6
0
        protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, Float loss, int numParams)
        {
            Contracts.AssertValue(ch);
            Contracts.AssertValue(cursorFactory);
            Contracts.Assert(NumGoodRows > 0);
            Contracts.Assert(WeightSum > 0);
            Contracts.Assert(BiasCount == 1);
            Contracts.Assert(loss >= 0);
            Contracts.Assert(numParams >= BiasCount);
            Contracts.Assert(CurrentWeights.IsDense);

            ch.Info("Model trained with {0} training examples.", NumGoodRows);

            // Compute deviance: start with loss function.
            Float deviance = (Float)(2 * loss * WeightSum);

            if (L2Weight > 0)
            {
                // Need to subtract L2 regularization loss.
                // The bias term is not regularized.
                var regLoss = VectorUtils.NormSquared(CurrentWeights.Values, 1, CurrentWeights.Length - 1) * L2Weight;
                deviance -= regLoss;
            }

            if (L1Weight > 0)
            {
                // Need to subtract L1 regularization loss.
                // The bias term is not regularized.
                Double regLoss = 0;
                VBufferUtils.ForEachDefined(ref CurrentWeights, (ind, value) => { if (ind >= BiasCount)
                                                                                  {
                                                                                      regLoss += Math.Abs(value);
                                                                                  }
                                            });
                deviance -= (Float)regLoss * L1Weight * 2;
            }

            ch.Info("Residual Deviance: \t{0} (on {1} degrees of freedom)", deviance, Math.Max(NumGoodRows - numParams, 0));

            // Compute null deviance, i.e., the deviance of null hypothesis.
            // Cap the prior positive rate at 1e-15.
            Double priorPosRate = _posWeight / WeightSum;

            Contracts.Assert(0 <= priorPosRate && priorPosRate <= 1);
            Float nullDeviance = (priorPosRate <= 1e-15 || 1 - priorPosRate <= 1e-15) ?
                                 0f : (Float)(2 * WeightSum * MathUtils.Entropy(priorPosRate, true));

            ch.Info("Null Deviance:     \t{0} (on {1} degrees of freedom)", nullDeviance, NumGoodRows - 1);

            // Compute AIC.
            ch.Info("AIC:               \t{0}", 2 * numParams + deviance);

            // Show the coefficients statistics table.
            var featureColIdx = cursorFactory.Data.Schema.Feature.Index;
            var schema        = cursorFactory.Data.Data.Schema;
            var featureLength = CurrentWeights.Length - BiasCount;
            var namesSpans    = VBufferUtils.CreateEmpty <DvText>(featureLength);

            if (schema.HasSlotNames(featureColIdx, featureLength))
            {
                schema.GetMetadata(MetadataUtils.Kinds.SlotNames, featureColIdx, ref namesSpans);
            }
            Host.Assert(namesSpans.Length == featureLength);

            // Inverse mapping of non-zero weight slots.
            Dictionary <int, int> weightIndicesInvMap = null;

            // Indices of bias and non-zero weight slots.
            int[] weightIndices = null;

            // Whether all weights are non-zero.
            bool denseWeight = numParams == CurrentWeights.Length;

            // Extract non-zero indices of weight.
            if (!denseWeight)
            {
                weightIndices          = new int[numParams];
                weightIndicesInvMap    = new Dictionary <int, int>(numParams);
                weightIndices[0]       = 0;
                weightIndicesInvMap[0] = 0;
                int j = 1;
                for (int i = 1; i < CurrentWeights.Length; i++)
                {
                    if (CurrentWeights.Values[i] != 0)
                    {
                        weightIndices[j]       = i;
                        weightIndicesInvMap[i] = j++;
                    }
                }

                Contracts.Assert(j == numParams);
            }

            // Compute the standard error of coefficients.
            long hessianDimension = (long)numParams * (numParams + 1) / 2;

            if (hessianDimension > int.MaxValue)
            {
                ch.Warning("The number of parameter is too large. Cannot hold the variance-covariance matrix in memory. " +
                           "Skipping computation of standard errors and z-statistics of coefficients. Consider choosing a larger L1 regularizer" +
                           "to reduce the number of parameters.");
                _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance);
                return;
            }

            // Building the variance-covariance matrix for parameters.
            // The layout of this algorithm is a packed row-major lower triangular matrix.
            // E.g., layout of indices for 4-by-4:
            // 0
            // 1 2
            // 3 4 5
            // 6 7 8 9
            var hessian = new Double[hessianDimension];

            // Initialize diagonal elements with L2 regularizers except for the first entry (index 0)
            // Since bias is not regularized.
            if (L2Weight > 0)
            {
                // i is the array index of the diagonal entry at iRow-th row and iRow-th column.
                // iRow is one-based.
                int i = 0;
                for (int iRow = 2; iRow <= numParams; iRow++)
                {
                    i         += iRow;
                    hessian[i] = L2Weight;
                }

                Contracts.Assert(i == hessian.Length - 1);
            }

            // Initialize the remaining entries.
            var bias = CurrentWeights.Values[0];

            using (var cursor = cursorFactory.Create())
            {
                while (cursor.MoveNext())
                {
                    var label  = cursor.Label;
                    var weight = cursor.Weight;
                    var score  = bias + VectorUtils.DotProductWithOffset(ref CurrentWeights, 1, ref cursor.Features);
                    // Compute Bernoulli variance n_i * p_i * (1 - p_i) for the i-th training example.
                    var variance = weight / (2 + 2 * Math.Cosh(score));

                    // Increment the first entry of hessian.
                    hessian[0] += variance;

                    var values = cursor.Features.Values;
                    if (cursor.Features.IsDense)
                    {
                        int ioff = 1;

                        // Increment remaining entries of hessian.
                        for (int i = 1; i < numParams; i++)
                        {
                            ch.Assert(ioff == i * (i + 1) / 2);
                            int wi = weightIndices == null ? i - 1 : weightIndices[i] - 1;
                            Contracts.Assert(0 <= wi && wi < cursor.Features.Length);
                            var val = values[wi] * variance;
                            // Add the implicit first bias term to X'X
                            hessian[ioff++] += val;
                            // Add the remainder of X'X
                            for (int j = 0; j < i; j++)
                            {
                                int wj = weightIndices == null ? j : weightIndices[j + 1] - 1;
                                Contracts.Assert(0 <= wj && wj < cursor.Features.Length);
                                hessian[ioff++] += val * values[wj];
                            }
                        }
                        ch.Assert(ioff == hessian.Length);
                    }
                    else
                    {
                        var indices = cursor.Features.Indices;
                        for (int ii = 0; ii < cursor.Features.Count; ++ii)
                        {
                            int i  = indices[ii];
                            int wi = i + 1;
                            if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(i + 1, out wi))
                            {
                                continue;
                            }

                            Contracts.Assert(0 < wi && wi <= cursor.Features.Length);
                            int ioff = wi * (wi + 1) / 2;
                            var val  = values[ii] * variance;
                            // Add the implicit first bias term to X'X
                            hessian[ioff] += val;
                            // Add the remainder of X'X
                            for (int jj = 0; jj <= ii; jj++)
                            {
                                int j  = indices[jj];
                                int wj = j + 1;
                                if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(j + 1, out wj))
                                {
                                    continue;
                                }

                                Contracts.Assert(0 < wj && wj <= cursor.Features.Length);
                                hessian[ioff + wj] += val * values[jj];
                            }
                        }
                    }
                }
            }

            // Apply Cholesky Decomposition to find the inverse of the Hessian.
            Double[] invHessian = null;
            try
            {
                // First, find the Cholesky decomposition LL' of the Hessian.
                Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numParams, hessian);
                // Note that hessian is already modified at this point. It is no longer the original Hessian,
                // but instead represents the Cholesky decomposition L.
                // Also note that the following routine is supposed to consume the Cholesky decomposition L instead
                // of the original information matrix.
                Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numParams, hessian);
                // At this point, hessian should contain the inverse of the original Hessian matrix.
                // Swap hessian with invHessian to avoid confusion in the following context.
                Utils.Swap(ref hessian, ref invHessian);
                Contracts.Assert(hessian == null);
            }
            catch (DllNotFoundException)
            {
                throw ch.ExceptNotSupp("The MKL library (Microsoft.ML.MklImports.dll) or one of its dependencies is missing.");
            }

            Float[] stdErrorValues = new Float[numParams];
            stdErrorValues[0] = (Float)Math.Sqrt(invHessian[0]);

            for (int i = 1; i < numParams; i++)
            {
                // Initialize with inverse Hessian.
                stdErrorValues[i] = (Single)invHessian[i * (i + 1) / 2 + i];
            }

            if (L2Weight > 0)
            {
                // Iterate through all entries of inverse Hessian to make adjustment to variance.
                // A discussion on ridge regularized LR coefficient covariance matrix can be found here:
                // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/
                // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf
                int ioffset = 1;
                for (int iRow = 1; iRow < numParams; iRow++)
                {
                    for (int iCol = 0; iCol <= iRow; iCol++)
                    {
                        var entry      = (Single)invHessian[ioffset];
                        var adjustment = -L2Weight * entry * entry;
                        stdErrorValues[iRow] -= adjustment;
                        if (0 < iCol && iCol < iRow)
                        {
                            stdErrorValues[iCol] -= adjustment;
                        }
                        ioffset++;
                    }
                }

                Contracts.Assert(ioffset == invHessian.Length);
            }

            for (int i = 1; i < numParams; i++)
            {
                stdErrorValues[i] = (Float)Math.Sqrt(stdErrorValues[i]);
            }

            VBuffer <Float> stdErrors = new VBuffer <Float>(CurrentWeights.Length, numParams, stdErrorValues, weightIndices);

            _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance, ref stdErrors);
        }
Example #7
0
        private MulticlassLogisticRegressionPredictor(IHostEnvironment env, ModelLoadContext ctx)
            : base(env, RegistrationName, ctx)
        {
            // *** Binary format ***
            // int: number of features
            // int: number of classes = number of biases
            // float[]: biases
            // (weight matrix, in CSR if sparse)
            // (see https://netlib.org/linalg/html_templates/node91.html#SECTION00931100000000000000)
            // int: number of row start indices (_numClasses + 1 if sparse, 0 if dense)
            // int[]: row start indices
            // int: total number of column indices (0 if dense)
            // int[]: column index of each non-zero weight
            // int: total number of non-zero weights  (same as number of column indices if sparse, num of classes * num of features if dense)
            // float[]: non-zero weights
            // int[]: Id of label names (optional, in a separate stream)
            // LinearModelStatistics: model statistics (optional, in a separate stream)

            _numFeatures = ctx.Reader.ReadInt32();
            Host.CheckDecode(_numFeatures >= 1);

            _numClasses = ctx.Reader.ReadInt32();
            Host.CheckDecode(_numClasses >= 1);

            _biases = ctx.Reader.ReadFloatArray(_numClasses);

            int numStarts = ctx.Reader.ReadInt32();

            if (numStarts == 0)
            {
                // The weights are entirely dense.
                int numIndices = ctx.Reader.ReadInt32();
                Host.CheckDecode(numIndices == 0);
                int numWeights = ctx.Reader.ReadInt32();
                Host.CheckDecode(numWeights == _numClasses * _numFeatures);
                _weights = new VBuffer <float> [_numClasses];
                for (int i = 0; i < _weights.Length; i++)
                {
                    var w = ctx.Reader.ReadFloatArray(_numFeatures);
                    _weights[i] = new VBuffer <float>(_numFeatures, w);
                }
                _weightsDense = _weights;
            }
            else
            {
                // Read weight matrix as CSR.
                Host.CheckDecode(numStarts == _numClasses + 1);
                int[] starts = ctx.Reader.ReadIntArray(numStarts);
                Host.CheckDecode(starts[0] == 0);
                Host.CheckDecode(Utils.IsSorted(starts));

                int numIndices = ctx.Reader.ReadInt32();
                Host.CheckDecode(numIndices == starts[starts.Length - 1]);

                var indices = new int[_numClasses][];
                for (int i = 0; i < indices.Length; i++)
                {
                    indices[i] = ctx.Reader.ReadIntArray(starts[i + 1] - starts[i]);
                    Host.CheckDecode(Utils.IsIncreasing(0, indices[i], _numFeatures));
                }

                int numValues = ctx.Reader.ReadInt32();
                Host.CheckDecode(numValues == numIndices);

                _weights = new VBuffer <float> [_numClasses];
                for (int i = 0; i < _weights.Length; i++)
                {
                    float[] values = ctx.Reader.ReadFloatArray(starts[i + 1] - starts[i]);
                    _weights[i] = new VBuffer <float>(_numFeatures, Utils.Size(values), values, indices[i]);
                }
            }
            WarnOnOldNormalizer(ctx, GetType(), Host);
            InputType  = new VectorType(NumberType.R4, _numFeatures);
            OutputType = new VectorType(NumberType.R4, _numClasses);

            // REVIEW: Should not save the label names duplicately with the predictor again.
            // Get it from the label column schema metadata instead.
            string[] labelNames = null;
            if (ctx.TryLoadBinaryStream(LabelNamesSubModelFilename, r => labelNames = LoadLabelNames(ctx, r)))
            {
                _labelNames = labelNames;
            }

            string statsDir = Path.Combine(ctx.Directory ?? "", ModelStatsSubModelFilename);

            using (var statsEntry = ctx.Repository.OpenEntryOrNull(statsDir, ModelLoadContext.ModelStreamName))
            {
                if (statsEntry == null)
                {
                    _stats = null;
                }
                else
                {
                    using (var statsCtx = new ModelLoadContext(ctx.Repository, statsEntry, statsDir))
                        _stats = LinearModelStatistics.Create(Host, statsCtx);
                }
            }
        }
Example #8
0
 /// <summary>
 /// Constructs a new linear binary predictor.
 /// </summary>
 /// <param name="env">The host environment.</param>
 /// <param name="weights">The weights for the linear predictor. Note that this
 /// will take ownership of the <see cref="VBuffer{T}"/>.</param>
 /// <param name="bias">The bias added to every output score.</param>
 /// <param name="stats"></param>
 public LinearBinaryPredictor(IHostEnvironment env, ref VBuffer <Float> weights, Float bias, LinearModelStatistics stats = null)
     : base(env, RegistrationName, ref weights, bias)
 {
     Contracts.AssertValueOrNull(stats);
     _stats = stats;
 }
        private static void GetUnorderedCoefficientStatistics(LinearModelStatistics stats, ref VBuffer <Single> weights, ref VBuffer <ReadOnlyMemory <char> > names,
                                                              ref VBuffer <Single> estimate, ref VBuffer <Single> stdErr, ref VBuffer <Single> zScore, ref VBuffer <Single> pValue, out ValueGetter <VBuffer <ReadOnlyMemory <char> > > getSlotNames)
        {
            if (!stats._coeffStdError.HasValue)
            {
                getSlotNames = null;
                return;
            }

            Contracts.Assert(stats._coeffStdError.Value.Length == weights.Length + 1);

            var estimateValues = estimate.Values;

            if (Utils.Size(estimateValues) < stats.ParametersCount - 1)
            {
                estimateValues = new Single[stats.ParametersCount - 1];
            }
            var stdErrorValues = stdErr.Values;

            if (Utils.Size(stdErrorValues) < stats.ParametersCount - 1)
            {
                stdErrorValues = new Single[stats.ParametersCount - 1];
            }
            var zScoreValues = zScore.Values;

            if (Utils.Size(zScoreValues) < stats.ParametersCount - 1)
            {
                zScoreValues = new Single[stats.ParametersCount - 1];
            }
            var pValueValues = pValue.Values;

            if (Utils.Size(pValueValues) < stats.ParametersCount - 1)
            {
                pValueValues = new Single[stats.ParametersCount - 1];
            }

            const Double sqrt2 = 1.41421356237; // Math.Sqrt(2);

            bool denseStdError = stats._coeffStdError.Value.IsDense;

            int[] stdErrorIndices = stats._coeffStdError.Value.Indices;
            for (int i = 1; i < stats.ParametersCount; i++)
            {
                int wi = denseStdError ? i - 1 : stdErrorIndices[i] - 1;
                Contracts.Assert(0 <= wi && wi < weights.Length);
                var weight   = estimateValues[i - 1] = weights.GetItemOrDefault(wi);
                var stdError = stdErrorValues[wi] = stats._coeffStdError.Value.Values[i];
                zScoreValues[i - 1] = weight / stdError;
                pValueValues[i - 1] = 1 - (Single)ProbabilityFunctions.Erf(Math.Abs(zScoreValues[i - 1] / sqrt2));
            }

            estimate = new VBuffer <Single>(stats.ParametersCount - 1, estimateValues, estimate.Indices);
            stdErr   = new VBuffer <Single>(stats.ParametersCount - 1, stdErrorValues, stdErr.Indices);
            zScore   = new VBuffer <Single>(stats.ParametersCount - 1, zScoreValues, zScore.Indices);
            pValue   = new VBuffer <Single>(stats.ParametersCount - 1, pValueValues, pValue.Indices);

            var slotNames = names;

            getSlotNames =
                (ref VBuffer <ReadOnlyMemory <char> > dst) =>
            {
                var values = dst.Values;
                if (Utils.Size(values) < stats.ParametersCount - 1)
                {
                    values = new ReadOnlyMemory <char> [stats.ParametersCount - 1];
                }
                for (int i = 1; i < stats.ParametersCount; i++)
                {
                    int wi = denseStdError ? i - 1 : stdErrorIndices[i] - 1;
                    values[i - 1] = slotNames.GetItemOrDefault(wi);
                }
                dst = new VBuffer <ReadOnlyMemory <char> >(stats.ParametersCount - 1, values, dst.Indices);
            };
        }
Example #10
0
        protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss, int numParams)
        {
            Contracts.AssertValue(ch);
            Contracts.AssertValue(cursorFactory);
            Contracts.Assert(NumGoodRows > 0);
            Contracts.Assert(WeightSum > 0);
            Contracts.Assert(BiasCount == 1);
            Contracts.Assert(loss >= 0);
            Contracts.Assert(numParams >= BiasCount);
            Contracts.Assert(CurrentWeights.IsDense);

            ch.Info("Model trained with {0} training examples.", NumGoodRows);

            // Compute deviance: start with loss function.
            float deviance = (float)(2 * loss * WeightSum);

            if (L2Weight > 0)
            {
                // Need to subtract L2 regularization loss.
                // The bias term is not regularized.
                var regLoss = VectorUtils.NormSquared(CurrentWeights.Values, 1, CurrentWeights.Length - 1) * L2Weight;
                deviance -= regLoss;
            }

            if (L1Weight > 0)
            {
                // Need to subtract L1 regularization loss.
                // The bias term is not regularized.
                Double regLoss = 0;
                VBufferUtils.ForEachDefined(ref CurrentWeights, (ind, value) => { if (ind >= BiasCount)
                                                                                  {
                                                                                      regLoss += Math.Abs(value);
                                                                                  }
                                            });
                deviance -= (float)regLoss * L1Weight * 2;
            }

            ch.Info("Residual Deviance: \t{0} (on {1} degrees of freedom)", deviance, Math.Max(NumGoodRows - numParams, 0));

            // Compute null deviance, i.e., the deviance of null hypothesis.
            // Cap the prior positive rate at 1e-15.
            Double priorPosRate = _posWeight / WeightSum;

            Contracts.Assert(0 <= priorPosRate && priorPosRate <= 1);
            float nullDeviance = (priorPosRate <= 1e-15 || 1 - priorPosRate <= 1e-15) ?
                                 0f : (float)(2 * WeightSum * MathUtils.Entropy(priorPosRate, true));

            ch.Info("Null Deviance:     \t{0} (on {1} degrees of freedom)", nullDeviance, NumGoodRows - 1);

            // Compute AIC.
            ch.Info("AIC:               \t{0}", 2 * numParams + deviance);

            // Show the coefficients statistics table.
            var featureColIdx = cursorFactory.Data.Schema.Feature.Index;
            var schema        = cursorFactory.Data.Data.Schema;
            var featureLength = CurrentWeights.Length - BiasCount;
            var namesSpans    = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(featureLength);

            if (schema.HasSlotNames(featureColIdx, featureLength))
            {
                schema.GetMetadata(MetadataUtils.Kinds.SlotNames, featureColIdx, ref namesSpans);
            }
            Host.Assert(namesSpans.Length == featureLength);

            // Inverse mapping of non-zero weight slots.
            Dictionary <int, int> weightIndicesInvMap = null;

            // Indices of bias and non-zero weight slots.
            int[] weightIndices = null;

            // Whether all weights are non-zero.
            bool denseWeight = numParams == CurrentWeights.Length;

            // Extract non-zero indices of weight.
            if (!denseWeight)
            {
                weightIndices          = new int[numParams];
                weightIndicesInvMap    = new Dictionary <int, int>(numParams);
                weightIndices[0]       = 0;
                weightIndicesInvMap[0] = 0;
                int j = 1;
                for (int i = 1; i < CurrentWeights.Length; i++)
                {
                    if (CurrentWeights.Values[i] != 0)
                    {
                        weightIndices[j]       = i;
                        weightIndicesInvMap[i] = j++;
                    }
                }

                Contracts.Assert(j == numParams);
            }

            // Compute the standard error of coefficients.
            long hessianDimension = (long)numParams * (numParams + 1) / 2;

            if (hessianDimension > int.MaxValue)
            {
                ch.Warning("The number of parameter is too large. Cannot hold the variance-covariance matrix in memory. " +
                           "Skipping computation of standard errors and z-statistics of coefficients. Consider choosing a larger L1 regularizer" +
                           "to reduce the number of parameters.");
                _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance);
                return;
            }

            // Building the variance-covariance matrix for parameters.
            // The layout of this algorithm is a packed row-major lower triangular matrix.
            // E.g., layout of indices for 4-by-4:
            // 0
            // 1 2
            // 3 4 5
            // 6 7 8 9
            var hessian = new Double[hessianDimension];

            // Initialize diagonal elements with L2 regularizers except for the first entry (index 0)
            // Since bias is not regularized.
            if (L2Weight > 0)
            {
                // i is the array index of the diagonal entry at iRow-th row and iRow-th column.
                // iRow is one-based.
                int i = 0;
                for (int iRow = 2; iRow <= numParams; iRow++)
                {
                    i         += iRow;
                    hessian[i] = L2Weight;
                }

                Contracts.Assert(i == hessian.Length - 1);
            }

            // Initialize the remaining entries.
            var bias = CurrentWeights.Values[0];

            using (var cursor = cursorFactory.Create())
            {
                while (cursor.MoveNext())
                {
                    var label  = cursor.Label;
                    var weight = cursor.Weight;
                    var score  = bias + VectorUtils.DotProductWithOffset(ref CurrentWeights, 1, ref cursor.Features);
                    // Compute Bernoulli variance n_i * p_i * (1 - p_i) for the i-th training example.
                    var variance = weight / (2 + 2 * Math.Cosh(score));

                    // Increment the first entry of hessian.
                    hessian[0] += variance;

                    var values = cursor.Features.Values;
                    if (cursor.Features.IsDense)
                    {
                        int ioff = 1;

                        // Increment remaining entries of hessian.
                        for (int i = 1; i < numParams; i++)
                        {
                            ch.Assert(ioff == i * (i + 1) / 2);
                            int wi = weightIndices == null ? i - 1 : weightIndices[i] - 1;
                            Contracts.Assert(0 <= wi && wi < cursor.Features.Length);
                            var val = values[wi] * variance;
                            // Add the implicit first bias term to X'X
                            hessian[ioff++] += val;
                            // Add the remainder of X'X
                            for (int j = 0; j < i; j++)
                            {
                                int wj = weightIndices == null ? j : weightIndices[j + 1] - 1;
                                Contracts.Assert(0 <= wj && wj < cursor.Features.Length);
                                hessian[ioff++] += val * values[wj];
                            }
                        }
                        ch.Assert(ioff == hessian.Length);
                    }
                    else
                    {
                        var indices = cursor.Features.Indices;
                        for (int ii = 0; ii < cursor.Features.Count; ++ii)
                        {
                            int i  = indices[ii];
                            int wi = i + 1;
                            if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(i + 1, out wi))
                            {
                                continue;
                            }

                            Contracts.Assert(0 < wi && wi <= cursor.Features.Length);
                            int ioff = wi * (wi + 1) / 2;
                            var val  = values[ii] * variance;
                            // Add the implicit first bias term to X'X
                            hessian[ioff] += val;
                            // Add the remainder of X'X
                            for (int jj = 0; jj <= ii; jj++)
                            {
                                int j  = indices[jj];
                                int wj = j + 1;
                                if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(j + 1, out wj))
                                {
                                    continue;
                                }

                                Contracts.Assert(0 < wj && wj <= cursor.Features.Length);
                                hessian[ioff + wj] += val * values[jj];
                            }
                        }
                    }
                }
            }

            _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance);
        }