public void SaveAsText(TextWriter writer, RoleMappedSchema schema)
        {
            writer.WriteLine("Dimension: {0}", _dimension);
            writer.WriteLine("Rank: {0}", _rank);

            if (_mean.IsDense)
            {
                writer.Write("Mean vector:");
                foreach (var value in _mean.Items(all: true))
                {
                    writer.Write(" {0}", value.Value);
                }

                writer.WriteLine();
                writer.Write("Projected mean vector:");
                foreach (var value in _meanProjected)
                {
                    writer.Write(" {0}", value);
                }
            }

            writer.WriteLine();
            writer.WriteLine("# V");
            for (var i = 0; i < _rank; ++i)
            {
                VBufferUtils.ForEachDefined(ref _eigenVectors[i],
                                            (ind, val) => { if (val != 0)
                                                            {
                                                                writer.Write(" {0}:{1}", ind, val);
                                                            }
                                            });
                writer.WriteLine();
            }
        }
Beispiel #2
0
        protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss, int numParams)
        {
            Contracts.AssertValue(ch);
            Contracts.AssertValue(cursorFactory);
            Contracts.Assert(NumGoodRows > 0);
            Contracts.Assert(WeightSum > 0);
            Contracts.Assert(BiasCount == _numClasses);
            Contracts.Assert(loss >= 0);
            Contracts.Assert(numParams >= BiasCount);
            Contracts.Assert(CurrentWeights.IsDense);

            ch.Info("Model trained with {0} training examples.", NumGoodRows);
            // Compute deviance: start with loss function.
            float deviance = (float)(2 * loss * WeightSum);

            if (L2Weight > 0)
            {
                // Need to subtract L2 regularization loss.
                // The bias term is not regularized.
                var regLoss = VectorUtils.NormSquared(CurrentWeights.Values, BiasCount, CurrentWeights.Length - BiasCount) * L2Weight;
                deviance -= regLoss;
            }

            if (L1Weight > 0)
            {
                // Need to subtract L1 regularization loss.
                // The bias term is not regularized.
                Double regLoss = 0;
                VBufferUtils.ForEachDefined(ref CurrentWeights, (ind, value) => { if (ind >= BiasCount)
                                                                                  {
                                                                                      regLoss += Math.Abs(value);
                                                                                  }
                                            });
                deviance -= (float)regLoss * L1Weight * 2;
            }

            ch.Info("Residual Deviance: \t{0}", deviance);

            // Compute null deviance, i.e., the deviance of null hypothesis.
            // Cap the prior positive rate at 1e-15.
            float nullDeviance = 0;

            for (int iLabel = 0; iLabel < _numClasses; iLabel++)
            {
                Contracts.Assert(_prior[iLabel] >= 0);
                if (_prior[iLabel] == 0)
                {
                    continue;
                }

                nullDeviance -= (float)(2 * _prior[iLabel] * Math.Log(_prior[iLabel] / WeightSum));
            }
            ch.Info("Null Deviance:    \t{0}", nullDeviance);

            // Compute AIC.
            ch.Info("AIC:              \t{0}", 2 * numParams + deviance);

            // REVIEW: Figure out how to compute the statistics for the coefficients.
            _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance);
        }
Beispiel #3
0
        /// <summary>
        /// print the linear model as code
        /// </summary>
        public static void SaveAsCode(TextWriter writer, ref VBuffer <Float> weights, Float bias,
                                      RoleMappedSchema schema, string codeVariable = "output")
        {
            Contracts.CheckValue(writer, nameof(writer));
            Contracts.CheckValueOrNull(schema);

            var featureNames = default(VBuffer <ReadOnlyMemory <char> >);

            MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, weights.Length, ref featureNames);

            int numNonZeroWeights = 0;

            writer.Write(codeVariable);
            writer.Write(" = ");
            VBufferUtils.ForEachDefined(ref weights,
                                        (idx, value) =>
            {
                if (Math.Abs(value - 0) >= Epsilon)
                {
                    if (numNonZeroWeights > 0)
                    {
                        writer.Write(" + ");
                    }

                    writer.Write(FloatUtils.ToRoundTripString(value));
                    writer.Write("*");
                    if (featureNames.Count > 0)
                    {
                        writer.Write(FeatureNameAsCode(featureNames.GetItemOrDefault(idx).ToString(), idx));
                    }
                    else
                    {
                        writer.Write("f_" + idx);
                    }

                    numNonZeroWeights++;
                }
            });

            if (numNonZeroWeights > 0)
            {
                writer.Write(" + ");
            }
            writer.Write(FloatUtils.ToRoundTripString(bias));
            writer.WriteLine(";");
        }
Beispiel #4
0
            /// <summary>
            /// This is the original differentiable function with the injected L1 term.
            /// </summary>
            private Float EvalCore(ref VBuffer <Float> input, ref VBuffer <Float> gradient, IProgressChannelProvider progress)
            {
                // REVIEW: Leverage Vector methods that use SSE.
                Float res = 0;

                if (!EnforceNonNegativity)
                {
                    if (_biasCount > 0)
                    {
                        VBufferUtils.ForEachDefined(ref input,
                                                    (ind, value) => { if (ind >= _biasCount)
                                                                      {
                                                                          res += Math.Abs(value);
                                                                      }
                                                    });
                    }
                    else
                    {
                        VBufferUtils.ForEachDefined(ref input, (ind, value) => res += Math.Abs(value));
                    }
                }
                else
                {
                    if (_biasCount > 0)
                    {
                        VBufferUtils.ForEachDefined(ref input,
                                                    (ind, value) => { if (ind >= _biasCount)
                                                                      {
                                                                          res += value;
                                                                      }
                                                    });
                    }
                    else
                    {
                        VBufferUtils.ForEachDefined(ref input, (ind, value) => res += value);
                    }
                }
                res = _l1weight * res + _function(ref input, ref gradient, progress);
                return(res);
            }
Beispiel #5
0
        protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, Float loss, int numParams)
        {
            Contracts.AssertValue(ch);
            Contracts.AssertValue(cursorFactory);
            Contracts.Assert(NumGoodRows > 0);
            Contracts.Assert(WeightSum > 0);
            Contracts.Assert(BiasCount == 1);
            Contracts.Assert(loss >= 0);
            Contracts.Assert(numParams >= BiasCount);
            Contracts.Assert(CurrentWeights.IsDense);

            ch.Info("Model trained with {0} training examples.", NumGoodRows);

            // Compute deviance: start with loss function.
            Float deviance = (Float)(2 * loss * WeightSum);

            if (L2Weight > 0)
            {
                // Need to subtract L2 regularization loss.
                // The bias term is not regularized.
                var regLoss = VectorUtils.NormSquared(CurrentWeights.Values, 1, CurrentWeights.Length - 1) * L2Weight;
                deviance -= regLoss;
            }

            if (L1Weight > 0)
            {
                // Need to subtract L1 regularization loss.
                // The bias term is not regularized.
                Double regLoss = 0;
                VBufferUtils.ForEachDefined(ref CurrentWeights, (ind, value) => { if (ind >= BiasCount)
                                                                                  {
                                                                                      regLoss += Math.Abs(value);
                                                                                  }
                                            });
                deviance -= (Float)regLoss * L1Weight * 2;
            }

            ch.Info("Residual Deviance: \t{0} (on {1} degrees of freedom)", deviance, Math.Max(NumGoodRows - numParams, 0));

            // Compute null deviance, i.e., the deviance of null hypothesis.
            // Cap the prior positive rate at 1e-15.
            Double priorPosRate = _posWeight / WeightSum;

            Contracts.Assert(0 <= priorPosRate && priorPosRate <= 1);
            Float nullDeviance = (priorPosRate <= 1e-15 || 1 - priorPosRate <= 1e-15) ?
                                 0f : (Float)(2 * WeightSum * MathUtils.Entropy(priorPosRate, true));

            ch.Info("Null Deviance:     \t{0} (on {1} degrees of freedom)", nullDeviance, NumGoodRows - 1);

            // Compute AIC.
            ch.Info("AIC:               \t{0}", 2 * numParams + deviance);

            // Show the coefficients statistics table.
            var featureColIdx = cursorFactory.Data.Schema.Feature.Index;
            var schema        = cursorFactory.Data.Data.Schema;
            var featureLength = CurrentWeights.Length - BiasCount;
            var namesSpans    = VBufferUtils.CreateEmpty <DvText>(featureLength);

            if (schema.HasSlotNames(featureColIdx, featureLength))
            {
                schema.GetMetadata(MetadataUtils.Kinds.SlotNames, featureColIdx, ref namesSpans);
            }
            Host.Assert(namesSpans.Length == featureLength);

            // Inverse mapping of non-zero weight slots.
            Dictionary <int, int> weightIndicesInvMap = null;

            // Indices of bias and non-zero weight slots.
            int[] weightIndices = null;

            // Whether all weights are non-zero.
            bool denseWeight = numParams == CurrentWeights.Length;

            // Extract non-zero indices of weight.
            if (!denseWeight)
            {
                weightIndices          = new int[numParams];
                weightIndicesInvMap    = new Dictionary <int, int>(numParams);
                weightIndices[0]       = 0;
                weightIndicesInvMap[0] = 0;
                int j = 1;
                for (int i = 1; i < CurrentWeights.Length; i++)
                {
                    if (CurrentWeights.Values[i] != 0)
                    {
                        weightIndices[j]       = i;
                        weightIndicesInvMap[i] = j++;
                    }
                }

                Contracts.Assert(j == numParams);
            }

            // Compute the standard error of coefficients.
            long hessianDimension = (long)numParams * (numParams + 1) / 2;

            if (hessianDimension > int.MaxValue)
            {
                ch.Warning("The number of parameter is too large. Cannot hold the variance-covariance matrix in memory. " +
                           "Skipping computation of standard errors and z-statistics of coefficients. Consider choosing a larger L1 regularizer" +
                           "to reduce the number of parameters.");
                _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance);
                return;
            }

            // Building the variance-covariance matrix for parameters.
            // The layout of this algorithm is a packed row-major lower triangular matrix.
            // E.g., layout of indices for 4-by-4:
            // 0
            // 1 2
            // 3 4 5
            // 6 7 8 9
            var hessian = new Double[hessianDimension];

            // Initialize diagonal elements with L2 regularizers except for the first entry (index 0)
            // Since bias is not regularized.
            if (L2Weight > 0)
            {
                // i is the array index of the diagonal entry at iRow-th row and iRow-th column.
                // iRow is one-based.
                int i = 0;
                for (int iRow = 2; iRow <= numParams; iRow++)
                {
                    i         += iRow;
                    hessian[i] = L2Weight;
                }

                Contracts.Assert(i == hessian.Length - 1);
            }

            // Initialize the remaining entries.
            var bias = CurrentWeights.Values[0];

            using (var cursor = cursorFactory.Create())
            {
                while (cursor.MoveNext())
                {
                    var label  = cursor.Label;
                    var weight = cursor.Weight;
                    var score  = bias + VectorUtils.DotProductWithOffset(ref CurrentWeights, 1, ref cursor.Features);
                    // Compute Bernoulli variance n_i * p_i * (1 - p_i) for the i-th training example.
                    var variance = weight / (2 + 2 * Math.Cosh(score));

                    // Increment the first entry of hessian.
                    hessian[0] += variance;

                    var values = cursor.Features.Values;
                    if (cursor.Features.IsDense)
                    {
                        int ioff = 1;

                        // Increment remaining entries of hessian.
                        for (int i = 1; i < numParams; i++)
                        {
                            ch.Assert(ioff == i * (i + 1) / 2);
                            int wi = weightIndices == null ? i - 1 : weightIndices[i] - 1;
                            Contracts.Assert(0 <= wi && wi < cursor.Features.Length);
                            var val = values[wi] * variance;
                            // Add the implicit first bias term to X'X
                            hessian[ioff++] += val;
                            // Add the remainder of X'X
                            for (int j = 0; j < i; j++)
                            {
                                int wj = weightIndices == null ? j : weightIndices[j + 1] - 1;
                                Contracts.Assert(0 <= wj && wj < cursor.Features.Length);
                                hessian[ioff++] += val * values[wj];
                            }
                        }
                        ch.Assert(ioff == hessian.Length);
                    }
                    else
                    {
                        var indices = cursor.Features.Indices;
                        for (int ii = 0; ii < cursor.Features.Count; ++ii)
                        {
                            int i  = indices[ii];
                            int wi = i + 1;
                            if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(i + 1, out wi))
                            {
                                continue;
                            }

                            Contracts.Assert(0 < wi && wi <= cursor.Features.Length);
                            int ioff = wi * (wi + 1) / 2;
                            var val  = values[ii] * variance;
                            // Add the implicit first bias term to X'X
                            hessian[ioff] += val;
                            // Add the remainder of X'X
                            for (int jj = 0; jj <= ii; jj++)
                            {
                                int j  = indices[jj];
                                int wj = j + 1;
                                if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(j + 1, out wj))
                                {
                                    continue;
                                }

                                Contracts.Assert(0 < wj && wj <= cursor.Features.Length);
                                hessian[ioff + wj] += val * values[jj];
                            }
                        }
                    }
                }
            }

            // Apply Cholesky Decomposition to find the inverse of the Hessian.
            Double[] invHessian = null;
            try
            {
                // First, find the Cholesky decomposition LL' of the Hessian.
                Mkl.Pptrf(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numParams, hessian);
                // Note that hessian is already modified at this point. It is no longer the original Hessian,
                // but instead represents the Cholesky decomposition L.
                // Also note that the following routine is supposed to consume the Cholesky decomposition L instead
                // of the original information matrix.
                Mkl.Pptri(Mkl.Layout.RowMajor, Mkl.UpLo.Lo, numParams, hessian);
                // At this point, hessian should contain the inverse of the original Hessian matrix.
                // Swap hessian with invHessian to avoid confusion in the following context.
                Utils.Swap(ref hessian, ref invHessian);
                Contracts.Assert(hessian == null);
            }
            catch (DllNotFoundException)
            {
                throw ch.ExceptNotSupp("The MKL library (Microsoft.ML.MklImports.dll) or one of its dependencies is missing.");
            }

            Float[] stdErrorValues = new Float[numParams];
            stdErrorValues[0] = (Float)Math.Sqrt(invHessian[0]);

            for (int i = 1; i < numParams; i++)
            {
                // Initialize with inverse Hessian.
                stdErrorValues[i] = (Single)invHessian[i * (i + 1) / 2 + i];
            }

            if (L2Weight > 0)
            {
                // Iterate through all entries of inverse Hessian to make adjustment to variance.
                // A discussion on ridge regularized LR coefficient covariance matrix can be found here:
                // http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3228544/
                // http://www.inf.unibz.it/dis/teaching/DWDM/project2010/LogisticRegression.pdf
                int ioffset = 1;
                for (int iRow = 1; iRow < numParams; iRow++)
                {
                    for (int iCol = 0; iCol <= iRow; iCol++)
                    {
                        var entry      = (Single)invHessian[ioffset];
                        var adjustment = -L2Weight * entry * entry;
                        stdErrorValues[iRow] -= adjustment;
                        if (0 < iCol && iCol < iRow)
                        {
                            stdErrorValues[iCol] -= adjustment;
                        }
                        ioffset++;
                    }
                }

                Contracts.Assert(ioffset == invHessian.Length);
            }

            for (int i = 1; i < numParams; i++)
            {
                stdErrorValues[i] = (Float)Math.Sqrt(stdErrorValues[i]);
            }

            VBuffer <Float> stdErrors = new VBuffer <Float>(CurrentWeights.Length, numParams, stdErrorValues, weightIndices);

            _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance, ref stdErrors);
        }
        protected virtual void TrainCore(IChannel ch, RoleMappedData data)
        {
            Host.AssertValue(ch);
            ch.AssertValue(data);

            // Compute the number of threads to use. The ctor should have verified that this will
            // produce a positive value.
            int numThreads = !UseThreads ? 1 : (NumThreads ?? Environment.ProcessorCount);

            if (Host.ConcurrencyFactor > 0 && numThreads > Host.ConcurrencyFactor)
            {
                numThreads = Host.ConcurrencyFactor;
                ch.Warning("The number of threads specified in trainer arguments is larger than the concurrency factor "
                           + "setting of the environment. Using {0} training threads instead.", numThreads);
            }

            ch.Assert(numThreads > 0);

            NumGoodRows = 0;
            WeightSum   = 0;

            _features = null;
            _labels   = null;
            _weights  = null;
            if (numThreads > 1)
            {
                ch.Info("LBFGS multi-threading will attempt to load dataset into memory. In case of out-of-memory " +
                        "issues, add 'numThreads=1' to the trainer arguments and 'cache=-' to the command line " +
                        "arguments to turn off multi-threading.");
                _features = new VBuffer <float> [1000];
                _labels   = new float[1000];
                if (data.Schema.Weight != null)
                {
                    _weights = new float[1000];
                }
            }

            var cursorFactory = new FloatLabelCursor.Factory(data, CursOpt.Features | CursOpt.Label | CursOpt.Weight);

            long numBad;

            // REVIEW: This pass seems overly expensive for the benefit when multi-threading is off....
            using (var cursor = cursorFactory.Create())
                using (var pch = Host.StartProgressChannel("LBFGS data prep"))
                {
                    // REVIEW: maybe it makes sense for the factory to capture the good row count after
                    // the first successful cursoring?
                    Double totalCount = data.Data.GetRowCount(true) ?? Double.NaN;

                    long exCount = 0;
                    pch.SetHeader(new ProgressHeader(null, new[] { "examples" }),
                                  e => e.SetProgress(0, exCount, totalCount));
                    while (cursor.MoveNext())
                    {
                        WeightSum += cursor.Weight;
                        if (ShowTrainingStats)
                        {
                            ProcessPriorDistribution(cursor.Label, cursor.Weight);
                        }

                        PreTrainingProcessInstance(cursor.Label, ref cursor.Features, cursor.Weight);
                        exCount++;
                        if (_features != null)
                        {
                            ch.Assert(cursor.KeptRowCount <= int.MaxValue);
                            int index = (int)cursor.KeptRowCount - 1;
                            Utils.EnsureSize(ref _features, index + 1);
                            Utils.EnsureSize(ref _labels, index + 1);
                            if (_weights != null)
                            {
                                Utils.EnsureSize(ref _weights, index + 1);
                                _weights[index] = cursor.Weight;
                            }
                            Utils.Swap(ref _features[index], ref cursor.Features);
                            _labels[index] = cursor.Label;

                            if (cursor.KeptRowCount >= int.MaxValue)
                            {
                                ch.Warning("Limiting data size for multi-threading");
                                break;
                            }
                        }
                    }
                    NumGoodRows = cursor.KeptRowCount;
                    numBad      = cursor.SkippedRowCount;
                }
            ch.Check(NumGoodRows > 0, NoTrainingInstancesMessage);
            if (numBad > 0)
            {
                ch.Warning("Skipped {0} instances with missing features/label/weight during training", numBad);
            }

            if (_features != null)
            {
                ch.Assert(numThreads > 1);

                // If there are so many threads that each only gets a small number (less than 10) of instances, trim
                // the number of threads so each gets a more reasonable number (100 or so). These numbers are pretty arbitrary,
                // but avoid the possibility of having no instances on some threads.
                if (numThreads > 1 && NumGoodRows / numThreads < 10)
                {
                    int numNew = Math.Max(1, (int)NumGoodRows / 100);
                    ch.Warning("Too few instances to use {0} threads, decreasing to {1} thread(s)", numThreads, numNew);
                    numThreads = numNew;
                }
                ch.Assert(numThreads > 0);

                // Divide up the instances among the threads.
                _numChunks = numThreads;
                _ranges    = new int[_numChunks + 1];
                int cinstTot = (int)NumGoodRows;
                for (int ichk = 0, iinstMin = 0; ichk < numThreads; ichk++)
                {
                    int cchkLeft = numThreads - ichk;                                // Number of chunks left to fill.
                    ch.Assert(0 < cchkLeft && cchkLeft <= numThreads);
                    int cinstThis = (cinstTot - iinstMin + cchkLeft - 1) / cchkLeft; // Size of this chunk.
                    ch.Assert(0 < cinstThis && cinstThis <= cinstTot - iinstMin);
                    iinstMin         += cinstThis;
                    _ranges[ichk + 1] = iinstMin;
                }

                _localLosses    = new float[numThreads];
                _localGradients = new VBuffer <float> [numThreads - 1];
                int size = BiasCount + WeightCount;
                for (int i = 0; i < _localGradients.Length; i++)
                {
                    _localGradients[i] = VBufferUtils.CreateEmpty <float>(size);
                }

                ch.Assert(_numChunks > 0 && _data == null);
            }
            else
            {
                // Streaming, single-threaded case.
                _data          = data;
                _cursorFactory = cursorFactory;
                ch.Assert(_numChunks == 0 && _data != null);
            }

            VBuffer <float>       initWeights;
            ITerminationCriterion terminationCriterion;
            Optimizer             opt = InitializeOptimizer(ch, cursorFactory, out initWeights, out terminationCriterion);

            opt.Quiet = Quiet;

            float loss;

            try
            {
                opt.Minimize(DifferentiableFunction, ref initWeights, terminationCriterion, ref CurrentWeights, out loss);
            }
            catch (Optimizer.PrematureConvergenceException e)
            {
                if (!Quiet)
                {
                    ch.Warning("Premature convergence occurred. The OptimizationTolerance may be set too small. {0}", e.Message);
                }
                CurrentWeights = e.State.X;
                loss           = e.State.Value;
            }

            ch.Assert(CurrentWeights.Length == BiasCount + WeightCount);

            int numParams = BiasCount;

            if ((L1Weight > 0 && !Quiet) || ShowTrainingStats)
            {
                VBufferUtils.ForEachDefined(ref CurrentWeights, (index, value) => { if (index >= BiasCount && value != 0)
                                                                                    {
                                                                                        numParams++;
                                                                                    }
                                            });
                if (L1Weight > 0 && !Quiet)
                {
                    ch.Info("L1 regularization selected {0} of {1} weights.", numParams, BiasCount + WeightCount);
                }
            }

            if (ShowTrainingStats)
            {
                ComputeTrainingStatistics(ch, cursorFactory, loss, numParams);
            }
        }
Beispiel #7
0
        /// <summary>
        /// Build a Bing TreeEnsemble .ini representation of the given predictor
        /// </summary>
        public static string LinearModelAsIni(ref VBuffer <Float> weights, Float bias, IPredictor predictor = null,
                                              RoleMappedSchema schema = null, PlattCalibrator calibrator = null)
        {
            // TODO: Might need to consider a max line length for the Weights list, requiring us to split it up into
            //   multiple evaluators
            StringBuilder inputBuilder           = new StringBuilder();
            StringBuilder aggregatedNodesBuilder = new StringBuilder("Nodes=");
            StringBuilder weightsBuilder         = new StringBuilder("Weights=");

            var featureNames = default(VBuffer <ReadOnlyMemory <char> >);

            MetadataUtils.GetSlotNames(schema, RoleMappedSchema.ColumnRole.Feature, weights.Length, ref featureNames);

            int          numNonZeroWeights = 0;
            const string weightsSep        = "\t";

            VBufferUtils.ForEachDefined(ref weights,
                                        (idx, value) =>
            {
                if (Math.Abs(value - 0) >= Epsilon)
                {
                    numNonZeroWeights++;

                    var name = featureNames.GetItemOrDefault(idx);

                    inputBuilder.AppendLine("[Input:" + numNonZeroWeights + "]");
                    inputBuilder.AppendLine("Name=" + (featureNames.Count == 0 ? "Feature_" + idx : name.IsEmpty ? $"f{idx}" : name.ToString()));
                    inputBuilder.AppendLine("Transform=linear");
                    inputBuilder.AppendLine("Slope=1");
                    inputBuilder.AppendLine("Intercept=0");
                    inputBuilder.AppendLine();

                    aggregatedNodesBuilder.Append("I:" + numNonZeroWeights + weightsSep);
                    weightsBuilder.Append(value + weightsSep);
                }
            });

            StringBuilder builder = new StringBuilder();

            builder.AppendLine("[TreeEnsemble]");
            builder.AppendLine("Inputs=" + numNonZeroWeights);
            builder.AppendLine("Evaluators=1");
            builder.AppendLine();

            builder.AppendLine(inputBuilder.ToString());

            builder.AppendLine("[Evaluator:1]");
            builder.AppendLine("EvaluatorType=Aggregator");
            builder.AppendLine("Type=Linear");
            builder.AppendLine("Bias=" + bias);
            builder.AppendLine("NumNodes=" + numNonZeroWeights);
            builder.AppendLine(aggregatedNodesBuilder.ToString().Trim());
            builder.AppendLine(weightsBuilder.ToString().Trim());

#if false // REVIEW: This should be done by the caller using the actual training args!
            builder.AppendLine();
            builder.AppendLine("[Comments]");
            builder.Append("Trained by TLC");
            if (predictor != null)
            {
                builder.Append(" as /cl " + predictor.GetType().Name);
                if (predictor is IInitializable)
                {
                    string settings = string.Join(";", (predictor as IInitializable).GetSettings());
                    if (!string.IsNullOrEmpty(settings))
                    {
                        builder.Append(" /cls " + settings);
                    }
                }
            }
#endif

            string ini = builder.ToString();

            // Add the calibration if the model was trained with calibration
            if (calibrator != null)
            {
                string calibratorEvaluatorIni = IniFileUtils.GetCalibratorEvaluatorIni(ini, calibrator);
                ini = IniFileUtils.AddEvaluator(ini, calibratorEvaluatorIni);
            }
            return(ini);
        }
Beispiel #8
0
        protected override void ComputeTrainingStatistics(IChannel ch, FloatLabelCursor.Factory cursorFactory, float loss, int numParams)
        {
            Contracts.AssertValue(ch);
            Contracts.AssertValue(cursorFactory);
            Contracts.Assert(NumGoodRows > 0);
            Contracts.Assert(WeightSum > 0);
            Contracts.Assert(BiasCount == 1);
            Contracts.Assert(loss >= 0);
            Contracts.Assert(numParams >= BiasCount);
            Contracts.Assert(CurrentWeights.IsDense);

            ch.Info("Model trained with {0} training examples.", NumGoodRows);

            // Compute deviance: start with loss function.
            float deviance = (float)(2 * loss * WeightSum);

            if (L2Weight > 0)
            {
                // Need to subtract L2 regularization loss.
                // The bias term is not regularized.
                var regLoss = VectorUtils.NormSquared(CurrentWeights.Values, 1, CurrentWeights.Length - 1) * L2Weight;
                deviance -= regLoss;
            }

            if (L1Weight > 0)
            {
                // Need to subtract L1 regularization loss.
                // The bias term is not regularized.
                Double regLoss = 0;
                VBufferUtils.ForEachDefined(ref CurrentWeights, (ind, value) => { if (ind >= BiasCount)
                                                                                  {
                                                                                      regLoss += Math.Abs(value);
                                                                                  }
                                            });
                deviance -= (float)regLoss * L1Weight * 2;
            }

            ch.Info("Residual Deviance: \t{0} (on {1} degrees of freedom)", deviance, Math.Max(NumGoodRows - numParams, 0));

            // Compute null deviance, i.e., the deviance of null hypothesis.
            // Cap the prior positive rate at 1e-15.
            Double priorPosRate = _posWeight / WeightSum;

            Contracts.Assert(0 <= priorPosRate && priorPosRate <= 1);
            float nullDeviance = (priorPosRate <= 1e-15 || 1 - priorPosRate <= 1e-15) ?
                                 0f : (float)(2 * WeightSum * MathUtils.Entropy(priorPosRate, true));

            ch.Info("Null Deviance:     \t{0} (on {1} degrees of freedom)", nullDeviance, NumGoodRows - 1);

            // Compute AIC.
            ch.Info("AIC:               \t{0}", 2 * numParams + deviance);

            // Show the coefficients statistics table.
            var featureColIdx = cursorFactory.Data.Schema.Feature.Index;
            var schema        = cursorFactory.Data.Data.Schema;
            var featureLength = CurrentWeights.Length - BiasCount;
            var namesSpans    = VBufferUtils.CreateEmpty <ReadOnlyMemory <char> >(featureLength);

            if (schema.HasSlotNames(featureColIdx, featureLength))
            {
                schema.GetMetadata(MetadataUtils.Kinds.SlotNames, featureColIdx, ref namesSpans);
            }
            Host.Assert(namesSpans.Length == featureLength);

            // Inverse mapping of non-zero weight slots.
            Dictionary <int, int> weightIndicesInvMap = null;

            // Indices of bias and non-zero weight slots.
            int[] weightIndices = null;

            // Whether all weights are non-zero.
            bool denseWeight = numParams == CurrentWeights.Length;

            // Extract non-zero indices of weight.
            if (!denseWeight)
            {
                weightIndices          = new int[numParams];
                weightIndicesInvMap    = new Dictionary <int, int>(numParams);
                weightIndices[0]       = 0;
                weightIndicesInvMap[0] = 0;
                int j = 1;
                for (int i = 1; i < CurrentWeights.Length; i++)
                {
                    if (CurrentWeights.Values[i] != 0)
                    {
                        weightIndices[j]       = i;
                        weightIndicesInvMap[i] = j++;
                    }
                }

                Contracts.Assert(j == numParams);
            }

            // Compute the standard error of coefficients.
            long hessianDimension = (long)numParams * (numParams + 1) / 2;

            if (hessianDimension > int.MaxValue)
            {
                ch.Warning("The number of parameter is too large. Cannot hold the variance-covariance matrix in memory. " +
                           "Skipping computation of standard errors and z-statistics of coefficients. Consider choosing a larger L1 regularizer" +
                           "to reduce the number of parameters.");
                _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance);
                return;
            }

            // Building the variance-covariance matrix for parameters.
            // The layout of this algorithm is a packed row-major lower triangular matrix.
            // E.g., layout of indices for 4-by-4:
            // 0
            // 1 2
            // 3 4 5
            // 6 7 8 9
            var hessian = new Double[hessianDimension];

            // Initialize diagonal elements with L2 regularizers except for the first entry (index 0)
            // Since bias is not regularized.
            if (L2Weight > 0)
            {
                // i is the array index of the diagonal entry at iRow-th row and iRow-th column.
                // iRow is one-based.
                int i = 0;
                for (int iRow = 2; iRow <= numParams; iRow++)
                {
                    i         += iRow;
                    hessian[i] = L2Weight;
                }

                Contracts.Assert(i == hessian.Length - 1);
            }

            // Initialize the remaining entries.
            var bias = CurrentWeights.Values[0];

            using (var cursor = cursorFactory.Create())
            {
                while (cursor.MoveNext())
                {
                    var label  = cursor.Label;
                    var weight = cursor.Weight;
                    var score  = bias + VectorUtils.DotProductWithOffset(ref CurrentWeights, 1, ref cursor.Features);
                    // Compute Bernoulli variance n_i * p_i * (1 - p_i) for the i-th training example.
                    var variance = weight / (2 + 2 * Math.Cosh(score));

                    // Increment the first entry of hessian.
                    hessian[0] += variance;

                    var values = cursor.Features.Values;
                    if (cursor.Features.IsDense)
                    {
                        int ioff = 1;

                        // Increment remaining entries of hessian.
                        for (int i = 1; i < numParams; i++)
                        {
                            ch.Assert(ioff == i * (i + 1) / 2);
                            int wi = weightIndices == null ? i - 1 : weightIndices[i] - 1;
                            Contracts.Assert(0 <= wi && wi < cursor.Features.Length);
                            var val = values[wi] * variance;
                            // Add the implicit first bias term to X'X
                            hessian[ioff++] += val;
                            // Add the remainder of X'X
                            for (int j = 0; j < i; j++)
                            {
                                int wj = weightIndices == null ? j : weightIndices[j + 1] - 1;
                                Contracts.Assert(0 <= wj && wj < cursor.Features.Length);
                                hessian[ioff++] += val * values[wj];
                            }
                        }
                        ch.Assert(ioff == hessian.Length);
                    }
                    else
                    {
                        var indices = cursor.Features.Indices;
                        for (int ii = 0; ii < cursor.Features.Count; ++ii)
                        {
                            int i  = indices[ii];
                            int wi = i + 1;
                            if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(i + 1, out wi))
                            {
                                continue;
                            }

                            Contracts.Assert(0 < wi && wi <= cursor.Features.Length);
                            int ioff = wi * (wi + 1) / 2;
                            var val  = values[ii] * variance;
                            // Add the implicit first bias term to X'X
                            hessian[ioff] += val;
                            // Add the remainder of X'X
                            for (int jj = 0; jj <= ii; jj++)
                            {
                                int j  = indices[jj];
                                int wj = j + 1;
                                if (weightIndicesInvMap != null && !weightIndicesInvMap.TryGetValue(j + 1, out wj))
                                {
                                    continue;
                                }

                                Contracts.Assert(0 < wj && wj <= cursor.Features.Length);
                                hessian[ioff + wj] += val * values[jj];
                            }
                        }
                    }
                }
            }

            _stats = new LinearModelStatistics(Host, NumGoodRows, numParams, deviance, nullDeviance);
        }