Ejemplo n.º 1
0
        static public int [] MultiNomialLogRegressionLowerBoundNewtonRaphson(double [][] input1, int[] labels, string SaveFile)
        {
            // http://accord-framework.net/docs/html/T_Accord_Statistics_Models_Regression_MultinomialLogisticRegression.htm
            // Create a estimation algorithm to estimate the regression
            LowerBoundNewtonRaphson lbnr = new LowerBoundNewtonRaphson()
            {
                MaxIterations = 10,
                Tolerance     = 1e-6
            };
            // *******************************************************************************
            var cv = CrossValidation.Create(

                k: 10,     // We will be using 10-fold cross validation

                // First we define the learning algorithm:
                learner: (p) => new LowerBoundNewtonRaphson(),

                // Now we have to specify how the n.b. performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teach, x, y, w) => teach.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: input1, y: labels
                );
            // Genrate a cross validation of the data
            var cvresult = cv.Learn(input1, labels);



            // iteratively estimate the  model
            MultinomialLogisticRegression mlr = lbnr.Learn(input1, labels);

            // Generate statistics from confusion matrices
            ConfusionMatrix        cm  = ConfusionMatrix.Estimate(mlr, input1, labels);
            GeneralConfusionMatrix gcm = cvresult.ToConfusionMatrix(input1, labels);

            Funcs.Utility.OutPutStats(cvresult.NumberOfSamples, cvresult.NumberOfInputs,
                                      cvresult.Training.Mean, gcm.Accuracy, cm.FalsePositives, cm.FalseNegatives, cm.FScore);

            // We can compute the model answers
            int[]  answers       = mlr.Decide(input1);
            string modelsavefile = SaveFile.Replace(".csv", ".MLR.save");

            mlr.Save(modelsavefile, compression: SerializerCompression.None);

            return(answers);
        }
Ejemplo n.º 2
0
#pragma warning restore 612, 618

        /// <summary>
        /// Learns a model that can map the given inputs to the given outputs.
        /// </summary>
        /// <param name="x">The model inputs.</param>
        /// <param name="y">The desired outputs associated with each <paramref name="x">inputs</paramref>.</param>
        /// <param name="weights">The weight of importance for each input-output pair.</param>
        /// <returns>
        /// A model that has learned how to produce <paramref name="y" /> given <paramref name="x" />.
        /// </returns>
        public MultinomialLogisticRegression Learn(double[][] x, double[][] y, double[] weights = null)
        {
            var learning = new LowerBoundNewtonRaphson(regression)
            {
                Tolerance  = tolerance,
                Iterations = iterations
            };

            learning.Learn(x, y, weights);

            computeInformation(x, y);

            return(regression);
        }
        public void doc_learn()
        {
            #region doc_learn
            // Declare a very simple classification/regression
            // problem with only 2 input variables (x and y):
            double[][] inputs =
            {
                new[] { 3.0, 1.0 },
                new[] { 7.0, 1.0 },
                new[] { 3.0, 1.1 },
                new[] { 3.0, 2.0 },
                new[] { 6.0, 1.0 },
            };

            // Class labels for each of the inputs
            int[] outputs =
            {
                0, 2, 0, 1, 2
            };

            // Create a estimation algorithm to estimate the regression
            LowerBoundNewtonRaphson lbnr = new LowerBoundNewtonRaphson()
            {
                MaxIterations = 100,
                Tolerance     = 1e-6
            };

            // Now, we will iteratively estimate our model:
            MultinomialLogisticRegression mlr = lbnr.Learn(inputs, outputs);

            // We can compute the model answers
            int[] answers = mlr.Decide(inputs);

            // And also the probability of each of the answers
            double[][] probabilities = mlr.Probabilities(inputs);

            // Now we can check how good our model is at predicting
            double error = new ZeroOneLoss(outputs).Loss(answers);

            // We can also verify the classes with highest
            // probability are the ones being decided for:
            int[] argmax = probabilities.ArgMax(dimension: 1); // should be same as 'answers'
            #endregion

            Assert.AreEqual(0, error);
            Assert.AreEqual(answers, argmax);
        }
Ejemplo n.º 4
0
        /// <summary>
        ///   Computes the Multiple Linear Regression Analysis.
        /// </summary>
        ///
        public bool Compute()
        {
            double delta;
            int    iteration = 0;

            var learning = new LowerBoundNewtonRaphson(regression);

            do // learning iterations until convergence
            {
                delta = learning.Run(inputData, outputData);
                iteration++;
            } while (delta > tolerance && iteration < iterations);

            // Check if the full model has converged
            bool converged = iteration < iterations;


            computeInformation();

            return(converged);
        }
Ejemplo n.º 5
0
        private MultinomialLogisticRegression buildModel()
        {
            if (independent == null)
            {
                formatData();
            }
            mlr = new MultinomialLogisticRegression(nvars, ncat);
            LowerBoundNewtonRaphson lbn = new LowerBoundNewtonRaphson(mlr);

            do
            {
                delta = lbn.Run(independent, dependent);
                iteration++;
            } while (iteration < totit && delta > converg);
            coefficients  = mlr.Coefficients;
            standarderror = new double[ncat - 1][];
            waldstat      = new double[ncat - 1][];
            waldpvalue    = new double[ncat - 1][];
            for (int i = 0; i < coefficients.Length; i++)
            {
                double[] steArr        = new double[nvars + 1];
                double[] waldStatArr   = new double[nvars + 1];
                double[] waldPvalueArr = new double[nvars + 1];
                for (int j = 0; j < nvars + 1; j++)
                {
                    Accord.Statistics.Testing.WaldTest wt = mlr.GetWaldTest(i, j);
                    steArr[j]        = wt.StandardError;
                    waldStatArr[j]   = wt.Statistic;
                    waldPvalueArr[j] = wt.PValue;
                }
                waldstat[i]      = waldStatArr;
                waldpvalue[i]    = waldPvalueArr;
                standarderror[i] = steArr;
            }
            loglikelihood = mlr.GetLogLikelihood(independent, dependent);
            deviance      = mlr.GetDeviance(independent, dependent);
            x2            = mlr.ChiSquare(independent, dependent).Statistic;
            pv            = mlr.ChiSquare(independent, dependent).PValue;
            return(mlr);
        }
Ejemplo n.º 6
0
        private static void multinomial(double[][] inputs, int[] outputs)
        {
            var lbnr = new LowerBoundNewtonRaphson()
            {
                MaxIterations = 100,
                Tolerance     = 1e-6
            };

            // Learn a multinomial logistic regression using the teacher:
            MultinomialLogisticRegression mlr = lbnr.Learn(inputs, outputs);

            // We can compute the model answers
            int[] answers = mlr.Decide(inputs);

            // And also the probability of each of the answers
            double[][] probabilities = mlr.Probabilities(inputs);

            // Now we can check how good our model is at predicting
            double error = new AccuracyLoss(outputs).Loss(answers);

            // We can also verify the classes with highest
            // probability are the ones being decided for:
            int[] argmax = probabilities.ArgMax(dimension: 1); // should be same as 'answers'
        }
        public void RegressTest2()
        {
            double[][] inputs;
            int[]      outputs;

            CreateInputOutputsExample1(out inputs, out outputs);

            // Create a new Multinomial Logistic Regression for 3 categories
            var mlr = new MultinomialLogisticRegression(inputs: 2, categories: 3);

            // Create a estimation algorithm to estimate the regression
            LowerBoundNewtonRaphson lbnr = new LowerBoundNewtonRaphson(mlr);

            // Now, we will iteratively estimate our model. The Run method returns
            // the maximum relative change in the model parameters and we will use
            // it as the convergence criteria.

            double delta;
            int    iteration = 0;

            do
            {
                // Perform an iteration
                delta = lbnr.Run(inputs, outputs);
                iteration++;
            } while (iteration < 100 && delta > 1e-6);

            Assert.AreEqual(52, iteration);
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[0][0]));
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[0][1]));
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[0][2]));
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[1][0]));
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[1][1]));
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[1][2]));


            // This is the same example given in R Data Analysis Examples for
            // Multinomial Logistic Regression: http://www.ats.ucla.edu/stat/r/dae/mlogit.htm

            // brand 2
            Assert.AreEqual(-11.774655, mlr.Coefficients[0][0], 1e-4); // intercept
            Assert.AreEqual(0.523814, mlr.Coefficients[0][1], 1e-4);   // female
            Assert.AreEqual(0.368206, mlr.Coefficients[0][2], 1e-4);   // age

            // brand 3
            Assert.AreEqual(-22.721396, mlr.Coefficients[1][0], 1e-4); // intercept
            Assert.AreEqual(0.465941, mlr.Coefficients[1][1], 1e-4);   // female
            Assert.AreEqual(0.685908, mlr.Coefficients[1][2], 1e-4);   // age


            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[0][0]));
            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[0][1]));
            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[0][2]));
            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[1][0]));
            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[1][1]));
            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[1][2]));

            /*
             * // Using the standard Hessian estimation
             * Assert.AreEqual(1.774612, mlr.StandardErrors[0][0], 1e-6);
             * Assert.AreEqual(0.194247, mlr.StandardErrors[0][1], 1e-6);
             * Assert.AreEqual(0.055003, mlr.StandardErrors[0][2], 1e-6);
             *
             * Assert.AreEqual(2.058028, mlr.StandardErrors[1][0], 1e-6);
             * Assert.AreEqual(0.226090, mlr.StandardErrors[1][1], 1e-6);
             * Assert.AreEqual(0.062627, mlr.StandardErrors[1][2], 1e-6);
             */

            // Using the lower-bound approximation
            Assert.AreEqual(1.047378039787443, mlr.StandardErrors[0][0], 1e-6);
            Assert.AreEqual(0.153150051082552, mlr.StandardErrors[0][1], 1e-6);
            Assert.AreEqual(0.031640507386863, mlr.StandardErrors[0][2], 1e-6);

            Assert.AreEqual(1.047378039787443, mlr.StandardErrors[1][0], 1e-6);
            Assert.AreEqual(0.153150051082552, mlr.StandardErrors[1][1], 1e-6);
            Assert.AreEqual(0.031640507386863, mlr.StandardErrors[1][2], 1e-6);

            double ll = mlr.GetLogLikelihood(inputs, outputs);

            Assert.AreEqual(-702.97, ll, 1e-2);
            Assert.IsFalse(double.IsNaN(ll));

            var chi = mlr.ChiSquare(inputs, outputs);

            Assert.AreEqual(185.85, chi.Statistic, 1e-2);
            Assert.IsFalse(double.IsNaN(chi.Statistic));

            var wald00 = mlr.GetWaldTest(0, 0);
            var wald01 = mlr.GetWaldTest(0, 1);
            var wald02 = mlr.GetWaldTest(0, 2);

            var wald10 = mlr.GetWaldTest(1, 0);
            var wald11 = mlr.GetWaldTest(1, 1);
            var wald12 = mlr.GetWaldTest(1, 2);

            Assert.IsFalse(double.IsNaN(wald00.Statistic));
            Assert.IsFalse(double.IsNaN(wald01.Statistic));
            Assert.IsFalse(double.IsNaN(wald02.Statistic));

            Assert.IsFalse(double.IsNaN(wald10.Statistic));
            Assert.IsFalse(double.IsNaN(wald11.Statistic));
            Assert.IsFalse(double.IsNaN(wald12.Statistic));

            /*
             * // Using standard Hessian estimation
             * Assert.AreEqual(-6.6351, wald00.Statistic, 1e-4);
             * Assert.AreEqual( 2.6966, wald01.Statistic, 1e-4);
             * Assert.AreEqual( 6.6943, wald02.Statistic, 1e-4);
             *
             * Assert.AreEqual(-11.0404, wald10.Statistic, 1e-4);
             * Assert.AreEqual( 2.0609, wald11.Statistic, 1e-4);
             * Assert.AreEqual(10.9524, wald12.Statistic, 1e-4);
             */

            // Using Lower-Bound approximation
            Assert.AreEqual(-11.241995503283842, wald00.Statistic, 1e-4);
            Assert.AreEqual(3.4202662152119889, wald01.Statistic, 1e-4);
            Assert.AreEqual(11.637150673342207, wald02.Statistic, 1e-4);

            Assert.AreEqual(-21.693553825772664, wald10.Statistic, 1e-4);
            Assert.AreEqual(3.0423802097069097, wald11.Statistic, 1e-4);
            Assert.AreEqual(21.678124991086548, wald12.Statistic, 1e-4);
        }
        public async Task <MultinomialLogisticRegressionAnalysisItemList> GetLogisticRegressionAnalysisData(
            DateTimeOffset?startDate, DateTimeOffset?finishDate,
            bool product, bool engineering, bool unanticipated, bool assessmentsTeam, bool enterpriseTeam)
        {
            var logisticRegressionData = new MultinomialLogisticRegressionAnalysisItemList();

            var taskItemRepository = new TaskItemRepository();
            var taskItemList       = await taskItemRepository.GetTaskItemListAsync(startDate, finishDate);

            logisticRegressionData.UserIds = GetUserIds(taskItemList);

            var inputs         = new List <List <double> >();
            var outputList     = new List <int>();
            var ids            = new List <int>();
            var titles         = new List <string>();
            var taskItemHelper = new TaskItemHelper();

            foreach (var logisticRegressionTaskItem
                     in from taskItem in taskItemList
                     where taskItem.StartTime != null &&
                     taskItem.FinishTime != null &&
                     taskItemHelper.TaskItemDevTeamIsSelected(assessmentsTeam, enterpriseTeam, taskItem)
                     select GetLogisticRegressionTaskItem(taskItem))
            {
                ids.Add(logisticRegressionTaskItem.Id);
                titles.Add(logisticRegressionTaskItem.Title);
                inputs.Add(new List <double>
                {
                    logisticRegressionTaskItem.Lifetime.TotalDays,
                    logisticRegressionTaskItem.LeadTime.TotalDays,
                    logisticRegressionTaskItem.TimeSpentInBacklog.TotalDays,
                    (logisticRegressionTaskItem.DevTeamIsAssessments ? 1.0 : 0.0),
                    (logisticRegressionTaskItem.DevTeamIsEnterprise ? 1.0 : 0.0),
                    logisticRegressionTaskItem.NumRevisions
                });

                foreach (var user in logisticRegressionData.UserIds)
                {
                    inputs.Last().Add(logisticRegressionTaskItem.CreatedById == user ? 1.0 : 0.0);
                }

                foreach (var user in logisticRegressionData.UserIds)
                {
                    inputs.Last().Add(logisticRegressionTaskItem.LastChangedBy.Id == user ? 1.0 : 0.0);
                }

                outputList.Add((int)logisticRegressionTaskItem.TaskItemType);
            }

            var inputArray    = inputs.Select(inputList => inputList.ToArray()).ToArray();
            var actualResults = outputList.ToArray();

            var lbnr = new LowerBoundNewtonRaphson()
            {
                MaxIterations = 100,
                Tolerance     = 1e-6
            };

            var mlr = lbnr.Learn(inputArray, actualResults);

            var predictions = mlr.Decide(inputArray);

            var probabilities = mlr.Probabilities(inputArray);

            logisticRegressionData.Error = new ZeroOneLoss(actualResults).Loss(predictions);

            for (var i = 0; i < ids.Count; i++)
            {
                if (taskItemHelper.TaskItemTypeIsSelected(product, engineering, unanticipated, actualResults[i]))
                {
                    var probability = probabilities[i].Max();

                    var logisticRegressionItem = new MultinomialLogisticRegressionAnalysisItem
                    {
                        Id          = ids[i],
                        Inputs      = inputs[i],
                        Title       = titles[i],
                        Actual      = actualResults[i],
                        Prediction  = predictions[i],
                        Probability = probability
                    };

                    if (logisticRegressionItem.Actual != logisticRegressionItem.Prediction)
                    {
                        logisticRegressionData.Items.Add(logisticRegressionItem);
                    }
                }
            }

            return(logisticRegressionData);
        }