public void ComputeTest2()
        {
            MultinomialLogisticRegression mlr = createExample1();

            double[][] inputs  = example1.Submatrix(null, 1, 2).ToArray();
            double[]   outputs = example1.Submatrix(null, 0, 0).Reshape(0);
            double[]   responses;

            // Tested against values extracted from predicted probabilities
            // table from: http://www.ats.ucla.edu/stat/r/dae/mlogit.htm

            responses = mlr.Compute(inputs[0]);
            Assert.AreEqual(0.9479577862063925, responses[0], 1e-5);
            Assert.AreEqual(0.0502297144022469, responses[1], 1e-5);
            Assert.AreEqual(0.0018124993913602, responses[2], 1e-5);

            responses = mlr.Compute(inputs[5]);
            Assert.AreEqual(0.772875639435192, responses[0], 1e-5);
            Assert.AreEqual(0.208690558456066, responses[1], 1e-5);
            Assert.AreEqual(0.018433802108742, responses[2], 1e-5);

            responses = mlr.Compute(inputs[11]);
            Assert.AreEqual(0.772875639435192, responses[0], 1e-5);
            Assert.AreEqual(0.208690558456066, responses[1], 1e-5);
            Assert.AreEqual(0.018433802108742, responses[2], 1e-5);

            responses = mlr.Compute(inputs[12]);
            Assert.AreEqual(0.695617266629850, responses[0], 1e-5);
            Assert.AreEqual(0.271439833912059, responses[1], 1e-5);
            Assert.AreEqual(0.032942899458091, responses[2], 1e-5);
        }
        /// <summary>
        /// Trains the classifier and computes the training error if option provided.
        /// </summary>
        /// <param name="trainingData">The training data that will be used to train classifier.</param>
        /// <param name="trainingLabels">The training labels related to provided training data.</param>
        /// <param name="calculateError">The boolean check to tell if the training error should be calculated.</param>
        public override void Train(List <double[]> trainingData, List <int> trainingLabels, bool calculateError = true)
        {
            if (LearningAlgorithmName == LogisticRegressionOptimizationAlgorithm.ConjugateGradient)
            {
                LearningAlgorithm = new MultinomialLogisticLearning <ConjugateGradient>();
            }
            else if (LearningAlgorithmName == LogisticRegressionOptimizationAlgorithm.GradientDescent)
            {
                LearningAlgorithm = new MultinomialLogisticLearning <GradientDescent>();
            }
            else if (LearningAlgorithmName == LogisticRegressionOptimizationAlgorithm.BroydenFletcherGoldfarbShanno)
            {
                LearningAlgorithm = new MultinomialLogisticLearning <BroydenFletcherGoldfarbShanno>();
            }
            else
            {
                LearningAlgorithm = new LowerBoundNewtonRaphson()
                {
                    MaxIterations = 100,
                    Tolerance     = 1e-6
                };
            }

            Model         = LearningAlgorithm.Learn(trainingData.ToArray(), trainingLabels.ToArray());
            Probabilities = Model.Probabilities(trainingData.ToArray());
            if (calculateError == true)
            {
                CalculateTrainingError(trainingData, trainingLabels);
            }
        }
        private static MultinomialLogisticRegression createExample1()
        {
            MultinomialLogisticRegression mlr = new MultinomialLogisticRegression(2, 3);


            // brand 2
            mlr.Coefficients[0][0] = -11.774655; // intercept
            mlr.Coefficients[0][1] = 0.523814;   // female
            mlr.Coefficients[0][2] = 0.368206;   // age

            // brand 3
            mlr.Coefficients[1][0] = -22.721396; // intercept
            mlr.Coefficients[1][1] = 0.465941;   // female
            mlr.Coefficients[1][2] = 0.685908;   // age


            mlr.StandardErrors[0][0] = 1.774612;
            mlr.StandardErrors[0][1] = 0.194247;
            mlr.StandardErrors[0][2] = 0.055003;

            mlr.StandardErrors[1][0] = 2.058028;
            mlr.StandardErrors[1][1] = 0.226090;
            mlr.StandardErrors[1][2] = 0.062627;

            return(mlr);
        }
Пример #4
0
        private GeneralConfusionMatrix Test(MultinomialLogisticRegression logReg, double[][] x_test, int[] y_expected, out int[] y_predicted)
        {
            y_predicted = logReg.Decide(x_test);
            var logReg_conf = new GeneralConfusionMatrix(y_expected, y_predicted);

            return(logReg_conf);
        }
        public void GetWaldTestTest()
        {
            MultinomialLogisticRegression target = createExample1();

            double[][] inputs;
            int[]      outputs;

            CreateInputOutputsExample1(out inputs, out outputs);

            WaldTest actual;

            actual = target.GetWaldTest(0, 0);
            Assert.AreEqual(-6.6351, actual.Statistic, 1e-4);
            Assert.AreEqual(3.244e-11, actual.PValue, 1e-14);

            actual = target.GetWaldTest(0, 1);
            Assert.AreEqual(2.6966, actual.Statistic, 1e-4);
            Assert.AreEqual(0.007004, actual.PValue, 1e-5);

            actual = target.GetWaldTest(0, 2);
            Assert.AreEqual(6.6943, actual.Statistic, 1e-4);
            Assert.AreEqual(2.167e-11, actual.PValue, 1e-14);

            actual = target.GetWaldTest(1, 0);
            Assert.AreEqual(-11.0404, actual.Statistic, 1e-4);
            Assert.AreEqual(0.0, actual.PValue, 1e-25);

            actual = target.GetWaldTest(1, 1);
            Assert.AreEqual(2.0609, actual.Statistic, 1e-4);
            Assert.AreEqual(0.039315, actual.PValue, 1e-6);

            actual = target.GetWaldTest(1, 2);
            Assert.AreEqual(10.9524, actual.Statistic, 1e-3);
            Assert.AreEqual(0.0, actual.PValue, 1e-25);
        }
        /// <summary>
        /// Learns a model that can map the given inputs to the given outputs.
        /// </summary>
        /// <param name="x">The model inputs.</param>
        /// <param name="y">The desired outputs associated with each <paramref name="x">inputs</paramref>.</param>
        /// <param name="weights">The weight of importance for each input-output pair.</param>
        /// <returns>
        /// A model that has learned how to produce <paramref name="y" /> given <paramref name="x" />.
        /// </returns>
        public MultinomialLogisticRegression Learn(double[][] x, int[] y, double[] weights = null)
        {
            this.inputs  = x;
            this.outputs = y;

            if (regression == null)
            {
                regression = new Regression.MultinomialLogisticRegression(x.Columns(), y.Max() + 1);
            }

            if (method.NumberOfVariables != regression.NumberOfParameters)
            {
                method.NumberOfVariables = regression.NumberOfParameters;
            }

            method.Function = crossEntropy;

            var gom = method as IGradientOptimizationMethod;

            if (gom != null)
            {
                gom.Gradient = crossEntropyGradient;
            }

            var sc = method as ISupportsCancellation;

            if (sc != null)
            {
                sc.Token = Token;
            }

            if (miniBatchSize <= 0)
            {
                miniBatchSize = x.Length;
            }

            this.gradient  = new double[regression.NumberOfParameters];
            this.log_y_hat = new double[regression.NumberOfOutputs];

            this.current     = 0;
            this.miniBatches = new IntRange[(int)Math.Floor(x.Length / (double)miniBatchSize)];
            for (int i = 0; i < miniBatches.Length; i++)
            {
                miniBatches[i] = new IntRange(i, Math.Min(i + miniBatchSize, x.Length));
            }

            bool success = method.Minimize();

            for (int i = 0, k = 0; i < regression.Coefficients.Length; i++)
            {
                for (int j = 0; j < regression.Coefficients[i].Length; j++, k++)
                {
                    regression.Coefficients[i][j] = method.Solution[k];
                }
            }

            return(regression);
        }
Пример #7
0
        private static void Predict()
        {
            var lgrPred = -1;
            var lgrProb = -1.0;

            if (MultinomialLogisticRegression != null)
            {
                lgrPred = MultinomialLogisticRegression.Decide(CurrPredictionPoints);
                lgrProb = MultinomialLogisticRegression.Probability(CurrPredictionPoints);
                PredictedFrequencyClassifiers = lgrPred;
                if (Classification.IsValidation)
                {
                    Console.WriteLine($"Real Frequency: {BrainStorm0.ClassificationShape.Hertz} Logistic Regression Predicted Frequency: {lgrPred} Probability: {lgrProb}");
                }
            }

            var mmdPred  = -1;
            var mmdScore = -1.0;

            if (MinimumMeanDistance != null)
            {
                mmdPred  = MinimumMeanDistance.Decide(CurrPredictionPoints);
                mmdScore = MinimumMeanDistance.Score(CurrPredictionPoints);
                if (Classification.IsValidation)
                {
                    Console.WriteLine(
                        $"Real Frequency: {BrainStorm0.ClassificationShape.Hertz} Minimun Distance Predicted Frequency: {PredictedFrequencyClassifiers}");
                }
                else
                {
                    Console.WriteLine(
                        $"MMD Predicted Frequency: {mmdPred} {mmdScore}");
                }
            }

            var rfPred = -1;

            if (RandomForest != null)
            {
                rfPred = RandomForest.Decide(CurrPredictionPoints);
                if (Classification.IsValidation)
                {
                    Console.WriteLine(
                        $"Real Frequency: {BrainStorm0.ClassificationShape.Hertz} Random Forest Predicted Frequency: {rfPred}");
                }
                else
                {
                    Console.WriteLine(
                        $"Random Forest Predicted Frequency: {rfPred}");
                }
            }
            if (IsTyping)
            {
                UpdateTypingPredictions();
            }
        }
Пример #8
0
        static public int [] MultiNomialLogRegressionLowerBoundNewtonRaphson(double [][] input1, int[] labels, string SaveFile)
        {
            // http://accord-framework.net/docs/html/T_Accord_Statistics_Models_Regression_MultinomialLogisticRegression.htm
            // Create a estimation algorithm to estimate the regression
            LowerBoundNewtonRaphson lbnr = new LowerBoundNewtonRaphson()
            {
                MaxIterations = 10,
                Tolerance     = 1e-6
            };
            // *******************************************************************************
            var cv = CrossValidation.Create(

                k: 10,     // We will be using 10-fold cross validation

                // First we define the learning algorithm:
                learner: (p) => new LowerBoundNewtonRaphson(),

                // Now we have to specify how the n.b. performance should be measured:
                loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),

                // This function can be used to perform any special
                // operations before the actual learning is done, but
                // here we will just leave it as simple as it can be:
                fit: (teach, x, y, w) => teach.Learn(x, y, w),

                // Finally, we have to pass the input and output data
                // that will be used in cross-validation.
                x: input1, y: labels
                );
            // Genrate a cross validation of the data
            var cvresult = cv.Learn(input1, labels);



            // iteratively estimate the  model
            MultinomialLogisticRegression mlr = lbnr.Learn(input1, labels);

            // Generate statistics from confusion matrices
            ConfusionMatrix        cm  = ConfusionMatrix.Estimate(mlr, input1, labels);
            GeneralConfusionMatrix gcm = cvresult.ToConfusionMatrix(input1, labels);

            Funcs.Utility.OutPutStats(cvresult.NumberOfSamples, cvresult.NumberOfInputs,
                                      cvresult.Training.Mean, gcm.Accuracy, cm.FalsePositives, cm.FalseNegatives, cm.FScore);

            // We can compute the model answers
            int[]  answers       = mlr.Decide(input1);
            string modelsavefile = SaveFile.Replace(".csv", ".MLR.save");

            mlr.Save(modelsavefile, compression: SerializerCompression.None);

            return(answers);
        }
Пример #9
0
        /// <summary>
        /// Вывод вероятности принадлежности каждого объекта к каждому классу.
        /// Для вычисления должен быть параметр testOutputs - ожидаемые значения
        /// </summary>
        public void PrintProbabilities(MultinomialLogisticRegression mlr)
        {
            double[][] probabilities = mlr.Probabilities(TestInputs);

            Console.WriteLine("Probabilities for {0}", Сlassifier);
            for (int m = 0; m < probabilities.Count(); m++)
            {
                for (int n = 0; n < probabilities[m].Count(); n++)
                {
                    Console.WriteLine("([{0}, {1}]: {2})", m, n, probabilities[m][n]);
                }
            }
        }
        public void ChiSquareMethodTest()
        {
            double[][] inputs;
            int[]      outputs;

            CreateInputOutputsExample1(out inputs, out outputs);

            MultinomialLogisticRegression target = createExample1();

            ChiSquareTest actual = target.ChiSquare(inputs, outputs);

            Assert.AreEqual(4, actual.DegreesOfFreedom);
            Assert.AreEqual(185.85, actual.Statistic, 1e-2);
        }
        public void GetLogLikelihoodTest()
        {
            MultinomialLogisticRegression mlr = createExample1();

            double[][] inputs;
            int[]      outputs;

            CreateInputOutputsExample1(out inputs, out outputs);

            double expected = -702.97;
            double actual   = mlr.GetLogLikelihood(inputs, outputs);

            Assert.AreEqual(expected, actual, 1e-2);
        }
Пример #12
0
        static public int[] MultiNomialLogisticRegressionBFGS(double [][] input, int [] labels, string fName)
        {
            /* The L-BFGS algorithm is a member of the broad family of quasi-Newton optimization methods.
             * L-BFGS stands for 'Limited memory BFGS'. Indeed, L-BFGS uses a limited memory variation of
             * the Broyden–Fletcher–Goldfarb–Shanno (BFGS) update to approximate the inverse Hessian matrix
             * (denoted by Hk). Unlike the original BFGS method which stores a dense approximation, L-BFGS
             * stores only a few vectors that represent the approximation implicitly. Due to its moderate
             * memory requirement, L-BFGS method is particularly well suited for optimization problems with
             * a large number of variables.
             */

            // Create a lbfgs model
            var mlbfgs = new MultinomialLogisticLearning <BroydenFletcherGoldfarbShanno>();

            // Estimate using the data against a logistic regression
            MultinomialLogisticRegression mlr = mlbfgs.Learn(input, labels);

            //
            // Create a cross validation model derived from the training set to measure the performance of this
            // predictive model and estimate how well we expect the model will generalize. The algorithm executes
            // multiple rounds of cross validation on different partitions and averages the results.
            //
            int folds = 4; // could play around with this later
            var cv    = CrossValidation.Create(k: folds, learner: (p) => new MultinomialLogisticLearning <BroydenFletcherGoldfarbShanno>(),
                                               loss: (actual, expected, p) => new ZeroOneLoss(expected).Loss(actual),
                                               fit: (teacher, x, y, w) => teacher.Learn(x, y, w),
                                               x: input, y: labels);
            var result = cv.Learn(input, labels);
            GeneralConfusionMatrix gcm = result.ToConfusionMatrix(input, labels);
            ConfusionMatrix        cm  = ConfusionMatrix.Estimate(mlr, input, labels);

            //
            //output relevant statistics
            //
            Funcs.Utility.OutPutStats(result.NumberOfSamples, result.NumberOfInputs,
                                      result.Training.Mean, gcm.Accuracy, cm.FalsePositives, cm.FalseNegatives, cm.FScore);

            // Compute the model predictions and return the values
            int[] answers = mlr.Decide(input);

            // And also the probability of each of the answers
            double[][] probabilities = mlr.Probabilities(input);

            // Now we can check how good our model is at predicting
            double error = new Accord.Math.Optimization.Losses.ZeroOneLoss(labels).Loss(answers);

            mlr.Save(fName, compression: SerializerCompression.None);

            return(answers);
        }
        public void doc_learn()
        {
            #region doc_learn
            // Declare a very simple classification/regression
            // problem with only 2 input variables (x and y):
            double[][] inputs =
            {
                new[] { 3.0, 1.0 },
                new[] { 7.0, 1.0 },
                new[] { 3.0, 1.1 },
                new[] { 3.0, 2.0 },
                new[] { 6.0, 1.0 },
            };

            // Class labels for each of the inputs
            int[] outputs =
            {
                0, 2, 0, 1, 2
            };

            // Create a estimation algorithm to estimate the regression
            LowerBoundNewtonRaphson lbnr = new LowerBoundNewtonRaphson()
            {
                MaxIterations = 100,
                Tolerance     = 1e-6
            };

            // Now, we will iteratively estimate our model:
            MultinomialLogisticRegression mlr = lbnr.Learn(inputs, outputs);

            // We can compute the model answers
            int[] answers = mlr.Decide(inputs);

            // And also the probability of each of the answers
            double[][] probabilities = mlr.Probabilities(inputs);

            // Now we can check how good our model is at predicting
            double error = new ZeroOneLoss(outputs).Loss(answers);

            // We can also verify the classes with highest
            // probability are the ones being decided for:
            int[] argmax = probabilities.ArgMax(dimension: 1); // should be same as 'answers'
            #endregion

            Assert.AreEqual(0, error);
            Assert.AreEqual(answers, argmax);
        }
Пример #14
0
        private void createSurface(double[][] table)
        {
            // Get the ranges for each variable (X and Y)
            DoubleRange[] ranges = table.GetRange(0);

            // Generate a Cartesian coordinate system
            double[][] map = Matrix.Mesh(ranges[0], 200, ranges[1], 200);

            MultinomialLogisticRegression lr = mlr.Regression;

            // Classify each point in the Cartesian coordinate system
            double[] result = lr.Decide(map).ToDouble();

            double[,] surface = map.ToMatrix().InsertColumn(result);

            decisionMap.DataSource = surface;
        }
        public void GetOddsRatioTest()
        {
            MultinomialLogisticRegression target = createExample1();
            double actual;


            actual = target.GetOddsRatio(0, 1);
            Assert.AreEqual(System.Math.Exp(target.Coefficients[0][1]), actual);

            actual = target.GetOddsRatio(0, 2);
            Assert.AreEqual(System.Math.Exp(target.Coefficients[0][2]), actual);

            actual = target.GetOddsRatio(1, 1);
            Assert.AreEqual(System.Math.Exp(target.Coefficients[1][1]), actual);

            actual = target.GetOddsRatio(1, 2);
            Assert.AreEqual(System.Math.Exp(target.Coefficients[1][2]), actual);
        }
Пример #16
0
        private async void learnAllButton_Click(object sender, EventArgs e)
        {
            if (!isDataLoaded)
            {
                Utilities.InfoMessageBox("Please load data or wait until data is loaded first.");
                return;
            }

            if (!Utilities.ConfirmMessageBox("Learning models can take up to 2 minutes. Continue?"))
            {
                return;
            }

            Accord.Statistics.Tools.Center(dataSet.X_Train, inPlace: true);
            Accord.Statistics.Tools.Standardize(dataSet.X_Train, inPlace: true);
            Accord.Statistics.Tools.Center(dataSet.X_Test, inPlace: true);
            Accord.Statistics.Tools.Standardize(dataSet.X_Test, inPlace: true);

            double[][] XKnownTrainSet, XKnownTestSet;
            int[]      YKnownTrainSet, YKnownTestSet;

            Utils.SplitTrainTest(dataSet.X_Train, dataSet.Y_Train, out XKnownTrainSet, out XKnownTestSet, out YKnownTrainSet, out YKnownTestSet);

            xSubset.Training = XKnownTestSet;
            xSubset.Testing  = dataSet.X_Test;
            ySubset.Training = YKnownTestSet;
            ySubset.Testing  = dataSet.Y_Test;

            toolStripStatusLabel.Text = "Learning models...";

            var learnTask = Task.Factory.StartNew(() =>
            {
                naiveBayes         = LearnNB(XKnownTrainSet, YKnownTrainSet);
                logisticRegression = LearnLogReg(XKnownTrainSet, YKnownTrainSet);
            });
            await learnTask;

            isModelsLearned           = true;
            toolStripStatusLabel.Text = "Learning done";

            TestModels();
        }
Пример #17
0
        public void SaveProbabilities(MultinomialLogisticRegression mlr, string path = @"H:\Documents\Visual Studio 2015\Projects\ML\ML\SaveResults\")
        {
            string timeAfter = InitialTime();

            double[][] probabilities = mlr.Probabilities(TestInputs);

            for (int m = 0; m < probabilities.Count(); m++)
            {
                for (int n = 0; n < probabilities[m].Count(); n++)
                {
                    using (FileStream fs = new FileStream(path + timeAfter + "_Probabilities" + Сlassifier + ".txt", FileMode.Append))
                    {
                        using (StreamWriter writer = new StreamWriter(fs))
                        {
                            writer.WriteLine("([{0}, {1}]: {2})", m, n, probabilities[m][n]);
                        }
                    }
                }
            }
        }
        public void CloneTest()
        {
            MultinomialLogisticRegression target = createExample1();
            MultinomialLogisticRegression actual = (MultinomialLogisticRegression)target.Clone();

            Assert.AreNotEqual(target, actual);
            Assert.AreEqual(target.Categories, actual.Categories);
            Assert.AreEqual(target.Inputs, actual.Inputs);

            Assert.AreNotSame(target.Coefficients, actual.Coefficients);
            Assert.AreNotSame(target.StandardErrors, actual.StandardErrors);

            for (int i = 0; i < target.Coefficients.Length; i++)
            {
                for (int j = 0; j < target.Coefficients[i].Length; j++)
                {
                    Assert.AreEqual(target.Coefficients[i][j], actual.Coefficients[i][j]);
                    Assert.AreEqual(target.StandardErrors[i][j], actual.StandardErrors[i][j]);
                }
            }
        }
        public void MultinomialLogisticRegressionConstructorTest()
        {
            int inputs     = 4;
            int categories = 7;
            MultinomialLogisticRegression target = new MultinomialLogisticRegression(inputs, categories);

            Assert.AreEqual(4, target.Inputs);
            Assert.AreEqual(7, target.Categories);

            Assert.AreEqual(6, target.Coefficients.Length);
            for (int i = 0; i < target.Coefficients.Length; i++)
            {
                Assert.AreEqual(5, target.Coefficients[i].Length);
            }

            Assert.AreEqual(6, target.StandardErrors.Length);
            for (int i = 0; i < target.StandardErrors.Length; i++)
            {
                Assert.AreEqual(5, target.StandardErrors[i].Length);
            }
        }
Пример #20
0
        public void RegressTest2()
        {
            Accord.Math.Random.Generator.Seed = 0;

            double[][] inputs;
            int[]      outputs;

            MultinomialLogisticRegressionTest.CreateInputOutputsExample1(out inputs, out outputs);

            // Create an algorithm to estimate the regression
            var msgd = new MultinomialLogisticLearning <ConjugateGradient>();

            // Now, we can iteratively estimate our model
            MultinomialLogisticRegression mlr = msgd.Learn(inputs, outputs);

            int[] predicted = mlr.Decide(inputs);

            double acc = new ZeroOneLoss(outputs).Loss(predicted);

            Assert.AreEqual(0.61088435374149663, acc, 1e-8);
        }
Пример #21
0
        private MultinomialLogisticRegression buildModel()
        {
            if (independent == null)
            {
                formatData();
            }
            mlr = new MultinomialLogisticRegression(nvars, ncat);
            LowerBoundNewtonRaphson lbn = new LowerBoundNewtonRaphson(mlr);

            do
            {
                delta = lbn.Run(independent, dependent);
                iteration++;
            } while (iteration < totit && delta > converg);
            coefficients  = mlr.Coefficients;
            standarderror = new double[ncat - 1][];
            waldstat      = new double[ncat - 1][];
            waldpvalue    = new double[ncat - 1][];
            for (int i = 0; i < coefficients.Length; i++)
            {
                double[] steArr        = new double[nvars + 1];
                double[] waldStatArr   = new double[nvars + 1];
                double[] waldPvalueArr = new double[nvars + 1];
                for (int j = 0; j < nvars + 1; j++)
                {
                    Accord.Statistics.Testing.WaldTest wt = mlr.GetWaldTest(i, j);
                    steArr[j]        = wt.StandardError;
                    waldStatArr[j]   = wt.Statistic;
                    waldPvalueArr[j] = wt.PValue;
                }
                waldstat[i]      = waldStatArr;
                waldpvalue[i]    = waldPvalueArr;
                standarderror[i] = steArr;
            }
            loglikelihood = mlr.GetLogLikelihood(independent, dependent);
            deviance      = mlr.GetDeviance(independent, dependent);
            x2            = mlr.ChiSquare(independent, dependent).Statistic;
            pv            = mlr.ChiSquare(independent, dependent).PValue;
            return(mlr);
        }
Пример #22
0
        /// <summary>
        ///   Creates a new <see cref="LowerBoundNewtonRaphson"/>.
        /// </summary>
        /// <param name="regression">The regression to estimate.</param>
        ///
        public LowerBoundNewtonRaphson(MultinomialLogisticRegression regression)
        {
            this.regression = regression;

            K = regression.Categories - 1;
            M = regression.Inputs + 1;
            parameterCount = K * M;

            solution = regression.Coefficients.Reshape(1);

            xxt    = new double[M, M];
            errors = new double[K];
            output = new double[K];

            lowerBound = new double[parameterCount, parameterCount];
            gradient   = new double[parameterCount];

            // Differently from the IRLS iteration, the weight matrix can be fixed
            // as it does not depend on the current coefficients anymore [I - 11/m]

            // TODO: Avoid the multiple allocations in the line below
            weights = (-0.5).Multiply(Matrix.Identity(K).Subtract(Matrix.Create(K, K, 1.0 / M)));
        }
Пример #23
0
        private static void multinomial(double[][] inputs, int[] outputs)
        {
            var lbnr = new LowerBoundNewtonRaphson()
            {
                MaxIterations = 100,
                Tolerance     = 1e-6
            };

            // Learn a multinomial logistic regression using the teacher:
            MultinomialLogisticRegression mlr = lbnr.Learn(inputs, outputs);

            // We can compute the model answers
            int[] answers = mlr.Decide(inputs);

            // And also the probability of each of the answers
            double[][] probabilities = mlr.Probabilities(inputs);

            // Now we can check how good our model is at predicting
            double error = new AccuracyLoss(outputs).Loss(answers);

            // We can also verify the classes with highest
            // probability are the ones being decided for:
            int[] argmax = probabilities.ArgMax(dimension: 1); // should be same as 'answers'
        }
Пример #24
0
        /// <summary>
        ///   Creates a new <see cref="LowerBoundNewtonRaphson"/>.
        /// </summary>
        /// <param name="regression">The regression to estimate.</param>
        /// 
        public LowerBoundNewtonRaphson(MultinomialLogisticRegression regression)
        {
            this.regression = regression;

            K = regression.Categories - 1;
            M = regression.Inputs + 1;
            parameterCount = K * M;

            solution = regression.Coefficients.Reshape(1);

            xxt = new double[M, M];
            errors = new double[K];
            output = new double[K];

            lowerBound = new double[parameterCount, parameterCount];
            gradient = new double[parameterCount];

            // Differently from the IRLS iteration, the weight matrix can be fixed
            // as it does not depend on the current coefficients anymore [I - 11/m]

            // TODO: Avoid the multiple allocations in the line below
            weights = (-0.5).Multiply(Matrix.Identity(K).Subtract(Matrix.Create(K, K, 1.0 / M)));
        }
 /// <summary>
 /// Loads the trained model.
 /// </summary>
 /// <param name="path">The location from where to load the trained model.</param>
 public override void Load(string path)
 {
     Model = Accord.IO.Serializer.Load <MultinomialLogisticRegression>(path);
 }
Пример #26
0
        private void init(double[][] inputs, double[][] outputs)
        {
            this.inputCount  = inputs[0].Length;
            this.outputCount = outputs[0].Length;

            for (int i = 0; i < inputs.Length; i++)
            {
                if (inputs[i].Length != inputCount)
                {
                    throw new ArgumentException("All input vectors must have the same length.");
                }
            }

            for (int i = 0; i < outputs.Length; i++)
            {
                if (outputs[i].Length != outputCount)
                {
                    throw new ArgumentException("All output vectors must have the same length.");
                }
            }

            // Store data sets
            this.inputData  = inputs;
            this.outputData = outputs;



            // Create the linear regression
            regression = new MultinomialLogisticRegression(inputCount, outputCount);

            // Create additional structures
            this.coefficientCount = regression.Coefficients[0].Length;
            this.coefficients     = regression.Coefficients;
            this.standardErrors   = regression.StandardErrors;
            this.confidences      = new DoubleRange[outputCount - 1][];
            this.oddsRatios       = new double[outputCount - 1][];
            this.waldTests        = new WaldTest[outputCount - 1][];

            for (int i = 0; i < confidences.Length; i++)
            {
                this.confidences[i] = new DoubleRange[coefficientCount];
                this.oddsRatios[i]  = new double[coefficientCount];
                this.waldTests[i]   = new WaldTest[coefficientCount];
            }


            this.inputNames = new string[inputCount];
            for (int i = 0; i < inputNames.Length; i++)
            {
                inputNames[i] = "Input " + i;
            }

            this.outputNames = new string[outputCount];
            for (int i = 0; i < outputNames.Length; i++)
            {
                outputNames[i] = "Class " + i;
            }


            // Create object-oriented structure to represent the analysis
            var coefs = new MultinomialCoefficient[(outputCount - 1) * coefficientCount + 1];

            coefs[0] = new MultinomialCoefficient(this, 0, 0);
            for (int k = 1, j = 1; j < outputCount; j++)
            {
                for (int i = 0; i < coefficientCount; i++, k++)
                {
                    coefs[k] = new MultinomialCoefficient(this, j, i);
                }
            }

            this.coefficientCollection = new MultinomialCoefficientCollection(coefs);
        }
        public void learn_test_4()
        {
            #region doc_learn_2
            // This example shows how to learn a multinomial logistic regression
            // analysis in the famous Fisher's Iris dataset. It should serve to
            // demonstrate that this class does not really need to be used with
            // DataTables, Codification codebooks and other supplementary features.

            Iris iris = new Iris();

            // Load Fisher's Iris dataset:
            double[][] x = iris.Instances;
            int[]      y = iris.ClassLabels;

            // Create a new Multinomial Logistic Regression Analysis:
            var analysis = new MultinomialLogisticRegressionAnalysis();

            // Note: we could have passed the class names from iris.ClassNames and
            // variable names from iris.VariableNames during MLR instantiation as:
            //
            // var analysis = new MultinomialLogisticRegressionAnalysis()
            // {
            //     InputNames = iris.VariableNames,
            //     OutputNames = iris.ClassNames
            // };

            // However, this example is also intended to demonstrate that
            // those are not required when learning a regression analysis.

            // Learn the regression from the input and output pairs:
            MultinomialLogisticRegression regression = analysis.Learn(x, y);

            // Let's retrieve some information about what we just learned:
            int coefficients    = analysis.Coefficients.Count; // should be 11
            int numberOfInputs  = analysis.NumberOfInputs;     // should be 4
            int numberOfOutputs = analysis.NumberOfOutputs;    // should be 3

            string[] inputNames  = analysis.InputNames;        // should be "Input 1", "Input 2", "Input 3", "Input 4"
            string[] outputNames = analysis.OutputNames;       // should be "Class 0", "class 1", "class 2"

            // The regression is best visualized when it is data-bound to a
            // Windows.Forms DataGridView or WPF DataGrid. You can get the
            // values for all different coefficients and discrete values:

            // DataGridBox.Show(regression.Coefficients); // uncomment this line

            // You can get the matrix of coefficients:
            double[][] coef = analysis.CoefficientValues;

            // Should be equal to:
            double[][] expectedCoef = new double[][]
            {
                new double[] { 2.85217775752471, -0.0579282723520426, -0.533293368378012, -1.16283850605289 },
                new double[] { 5.21813357698422, -0.113601186660817, 0.291387041358367, -0.9826369387481 }
            };

            // And their associated standard errors:
            double[][] stdErr = analysis.StandardErrors;

            // Should be equal to:
            double[][] expectedErr = new double[][]
            {
                new double[] { -2.02458003380033, -0.339533576505471, -1.164084923948, -0.520961533343425, 0.0556314901718 },
                new double[] { -3.73971589217449, -1.47672790071382, -1.76795568348094, -0.495032307980058, 0.113563519656386 }
            };

            // We can also get statistics and hypothesis tests:
            WaldTest[][]  wald          = analysis.WaldTests;     // should all have p < 0.05
            ChiSquareTest chiSquare     = analysis.ChiSquare;     // should be p=0
            double        logLikelihood = analysis.LogLikelihood; // should be -29.558338705646587

            // You can use the regression to predict the values:
            int[] pred = regression.Transform(x);

            // And get the accuracy of the prediction if needed:
            var cm = GeneralConfusionMatrix.Estimate(regression, x, y);

            double acc   = cm.Accuracy; // should be 0.94666666666666666
            double kappa = cm.Kappa;    // should be 0.91999999999999982
            #endregion

            Assert.AreEqual(11, coefficients);
            Assert.AreEqual(4, numberOfInputs);
            Assert.AreEqual(3, numberOfOutputs);

            Assert.AreEqual(new[] { "Input 0", "Input 1", "Input 2", "Input 3" }, inputNames);
            Assert.AreEqual(new[] { "Class 0", "Class 1", "Class 2" }, outputNames);

            Assert.AreEqual(0.94666666666666666, acc, 1e-10);
            Assert.AreEqual(0.91999999999999982, kappa, 1e-10);
            Assert.AreEqual(7.8271969268290043E-54, chiSquare.PValue, 1e-8);
            Assert.AreEqual(-29.558338705646587, logLikelihood, 1e-8);
        }
 private void init(MultinomialLogisticRegression regression)
 {
     this.regression = regression;
 }
Пример #29
0
        static void Main(string[] args)
        {
            if (args.Length > 3 | args.Length < 1 | args.Length < 3)
            {
                Console.WriteLine("Requires a previously trained and saved Model File");
                Console.WriteLine("Usage <testfile> <label file> <Model File>");
                System.Environment.Exit(-1);
            }

            Console.WriteLine("Logisitic Regression Prediction\n");
            string testFname   = args[0];
            string labelsFname = args[1];
            string ModelFname  = args[2];


            double[,] Rawdata;
            double[,] labeldata;
            // Read in the test data, validate file existence by attempting to open the files first
            try
            {
                FileStream fs = File.Open(testFname, FileMode.Open, FileAccess.Write, FileShare.None);
                fs.Close();
                // Reuse fs for validating labels
                fs = File.Open(labelsFname, FileMode.Open, FileAccess.Write, FileShare.None);
                fs.Close();

                fs = File.Open(ModelFname, FileMode.Open, FileAccess.Read, FileShare.None);
                fs.Close();
            }
            catch (Exception e)
            {
                Console.WriteLine("Error opening file{0}", e);
                System.Environment.Exit(-1);
            }
            using (CsvReader reader = new CsvReader(testFname, hasHeaders: false))
            {
                Rawdata = reader.ToMatrix();
            }
            using (CsvReader reader = new CsvReader(labelsFname, hasHeaders: false))
            {
                labeldata = reader.ToMatrix();
            }

            // Convert Raw data to Jagged array
            double[][] testdata = Rawdata.ToJagged();
            int[]      output1  = funcs.convetToJaggedArray(labeldata);

            int [] answers = new int[labeldata.GetLength(0)];

            // For Accord.net Logistic Regression the input data needs to be in Jagged Arrays
            // Labels can either be int (1,0) or bools
            if (ModelFname.IndexOf("bfgs", StringComparison.OrdinalIgnoreCase) >= 0)
            {
                // Load a BFGS regression model
                try
                {
                    MultinomialLogisticRegression mlr = Serializer.Load <MultinomialLogisticRegression>(ModelFname);
                    answers = mlr.Decide(testdata);
                } catch (Exception e)
                {
                    Console.WriteLine("Error opening model file: {0}", ModelFname);
                    Console.WriteLine("Exception {0}", e);
                    System.Environment.Exit(-1);
                }
            }


            else if (ModelFname.IndexOf("pcd", StringComparison.OrdinalIgnoreCase) >= 0)
            {
                LogisticRegression regression = new LogisticRegression();
                try
                {
                    regression = Serializer.Load <LogisticRegression>(ModelFname);
                    answers    = funcs.BoolToInt(regression.Decide(testdata));
                }
                catch (Exception e)
                {
                    Console.WriteLine("Error opening model file: {0}", ModelFname);
                    Console.WriteLine("Exception {0}", e);
                    System.Environment.Exit(-1);
                }
            }

            Console.WriteLine("Successfully loaded model file => {0}", ModelFname);

            double subtotal = 0;
            int    index    = 0;

            foreach (var result in answers)
            {
                if (result == output1[index])
                {
                    subtotal = subtotal + 1;
                }
                index++;
            }
            double accuracy = subtotal / answers.Count();

            Console.WriteLine("Predicted accuracy using model:{0} is, {1}", ModelFname, Math.Round(accuracy * 100, 2));
        }
        public void RegressTest2()
        {
            double[][] inputs;
            int[]      outputs;

            CreateInputOutputsExample1(out inputs, out outputs);

            // Create a new Multinomial Logistic Regression for 3 categories
            var mlr = new MultinomialLogisticRegression(inputs: 2, categories: 3);

            // Create a estimation algorithm to estimate the regression
            LowerBoundNewtonRaphson lbnr = new LowerBoundNewtonRaphson(mlr);

            // Now, we will iteratively estimate our model. The Run method returns
            // the maximum relative change in the model parameters and we will use
            // it as the convergence criteria.

            double delta;
            int    iteration = 0;

            do
            {
                // Perform an iteration
                delta = lbnr.Run(inputs, outputs);
                iteration++;
            } while (iteration < 100 && delta > 1e-6);

            Assert.AreEqual(52, iteration);
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[0][0]));
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[0][1]));
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[0][2]));
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[1][0]));
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[1][1]));
            Assert.IsFalse(double.IsNaN(mlr.Coefficients[1][2]));


            // This is the same example given in R Data Analysis Examples for
            // Multinomial Logistic Regression: http://www.ats.ucla.edu/stat/r/dae/mlogit.htm

            // brand 2
            Assert.AreEqual(-11.774655, mlr.Coefficients[0][0], 1e-4); // intercept
            Assert.AreEqual(0.523814, mlr.Coefficients[0][1], 1e-4);   // female
            Assert.AreEqual(0.368206, mlr.Coefficients[0][2], 1e-4);   // age

            // brand 3
            Assert.AreEqual(-22.721396, mlr.Coefficients[1][0], 1e-4); // intercept
            Assert.AreEqual(0.465941, mlr.Coefficients[1][1], 1e-4);   // female
            Assert.AreEqual(0.685908, mlr.Coefficients[1][2], 1e-4);   // age


            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[0][0]));
            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[0][1]));
            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[0][2]));
            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[1][0]));
            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[1][1]));
            Assert.IsFalse(double.IsNaN(mlr.StandardErrors[1][2]));

            /*
             * // Using the standard Hessian estimation
             * Assert.AreEqual(1.774612, mlr.StandardErrors[0][0], 1e-6);
             * Assert.AreEqual(0.194247, mlr.StandardErrors[0][1], 1e-6);
             * Assert.AreEqual(0.055003, mlr.StandardErrors[0][2], 1e-6);
             *
             * Assert.AreEqual(2.058028, mlr.StandardErrors[1][0], 1e-6);
             * Assert.AreEqual(0.226090, mlr.StandardErrors[1][1], 1e-6);
             * Assert.AreEqual(0.062627, mlr.StandardErrors[1][2], 1e-6);
             */

            // Using the lower-bound approximation
            Assert.AreEqual(1.047378039787443, mlr.StandardErrors[0][0], 1e-6);
            Assert.AreEqual(0.153150051082552, mlr.StandardErrors[0][1], 1e-6);
            Assert.AreEqual(0.031640507386863, mlr.StandardErrors[0][2], 1e-6);

            Assert.AreEqual(1.047378039787443, mlr.StandardErrors[1][0], 1e-6);
            Assert.AreEqual(0.153150051082552, mlr.StandardErrors[1][1], 1e-6);
            Assert.AreEqual(0.031640507386863, mlr.StandardErrors[1][2], 1e-6);

            double ll = mlr.GetLogLikelihood(inputs, outputs);

            Assert.AreEqual(-702.97, ll, 1e-2);
            Assert.IsFalse(double.IsNaN(ll));

            var chi = mlr.ChiSquare(inputs, outputs);

            Assert.AreEqual(185.85, chi.Statistic, 1e-2);
            Assert.IsFalse(double.IsNaN(chi.Statistic));

            var wald00 = mlr.GetWaldTest(0, 0);
            var wald01 = mlr.GetWaldTest(0, 1);
            var wald02 = mlr.GetWaldTest(0, 2);

            var wald10 = mlr.GetWaldTest(1, 0);
            var wald11 = mlr.GetWaldTest(1, 1);
            var wald12 = mlr.GetWaldTest(1, 2);

            Assert.IsFalse(double.IsNaN(wald00.Statistic));
            Assert.IsFalse(double.IsNaN(wald01.Statistic));
            Assert.IsFalse(double.IsNaN(wald02.Statistic));

            Assert.IsFalse(double.IsNaN(wald10.Statistic));
            Assert.IsFalse(double.IsNaN(wald11.Statistic));
            Assert.IsFalse(double.IsNaN(wald12.Statistic));

            /*
             * // Using standard Hessian estimation
             * Assert.AreEqual(-6.6351, wald00.Statistic, 1e-4);
             * Assert.AreEqual( 2.6966, wald01.Statistic, 1e-4);
             * Assert.AreEqual( 6.6943, wald02.Statistic, 1e-4);
             *
             * Assert.AreEqual(-11.0404, wald10.Statistic, 1e-4);
             * Assert.AreEqual( 2.0609, wald11.Statistic, 1e-4);
             * Assert.AreEqual(10.9524, wald12.Statistic, 1e-4);
             */

            // Using Lower-Bound approximation
            Assert.AreEqual(-11.241995503283842, wald00.Statistic, 1e-4);
            Assert.AreEqual(3.4202662152119889, wald01.Statistic, 1e-4);
            Assert.AreEqual(11.637150673342207, wald02.Statistic, 1e-4);

            Assert.AreEqual(-21.693553825772664, wald10.Statistic, 1e-4);
            Assert.AreEqual(3.0423802097069097, wald11.Statistic, 1e-4);
            Assert.AreEqual(21.678124991086548, wald12.Statistic, 1e-4);
        }
        public void learn_test()
        {
            // http://www.ats.ucla.edu/stat/stata/dae/mlogit.htm
            #region doc_learn_1
            // This example downloads an example dataset from the web and learns a multinomial logistic
            // regression on it. However, please keep in mind that the Multinomial Logistic Regression
            // can also work without many of the elements that will be shown below, like the codebook,
            // DataTables, and a CsvReader.

            // Let's download an example dataset from the web to learn a multinomial logistic regression:
            CsvReader reader = CsvReader.FromUrl("https://raw.githubusercontent.com/rlowrance/re/master/hsbdemo.csv", hasHeaders: true);

            // Let's read the CSV into a DataTable. As mentioned above, this step
            // can help, but is not necessarily required for learning a the model:
            DataTable table = reader.ToTable();

            // We will learn a MLR regression between the following input and output fields of this table:
            string[] inputNames  = new[] { "write", "ses" };
            string[] outputNames = new[] { "prog" };

            // Now let's create a codification codebook to convert the string fields in the data
            // into integer symbols. This is required because the MLR model can only learn from
            // numeric data, so strings have to be transformed first. We can force a particular
            // interpretation for those columns if needed, as shown in the initializer below:
            var codification = new Codification()
            {
                { "write", CodificationVariable.Continuous },
                { "ses", CodificationVariable.CategoricalWithBaseline, new[] { "low", "middle", "high" } },
                { "prog", CodificationVariable.Categorical, new[] { "academic", "general" } },
            };

            // Learn the codification
            codification.Learn(table);

            // Now, transform symbols into a vector representation, growing the number of inputs:
            double[][] x = codification.Transform(table, inputNames, out inputNames).ToDouble();
            double[][] y = codification.Transform(table, outputNames, out outputNames).ToDouble();

            // Create a new Multinomial Logistic Regression Analysis:
            var analysis = new MultinomialLogisticRegressionAnalysis()
            {
                InputNames  = inputNames,
                OutputNames = outputNames,
            };

            // Learn the regression from the input and output pairs:
            MultinomialLogisticRegression regression = analysis.Learn(x, y);

            // Let's retrieve some information about what we just learned:
            int coefficients    = analysis.Coefficients.Count; // should be 9
            int numberOfInputs  = analysis.NumberOfInputs;     // should be 3
            int numberOfOutputs = analysis.NumberOfOutputs;    // should be 3

            inputNames  = analysis.InputNames;                 // should be "write", "ses: middle", "ses: high"
            outputNames = analysis.OutputNames;                // should be "prog: academic", "prog: general", "prog: vocation"

            // The regression is best visualized when it is data-bound to a
            // Windows.Forms DataGridView or WPF DataGrid. You can get the
            // values for all different coefficients and discrete values:

            // DataGridBox.Show(regression.Coefficients); // uncomment this line

            // You can get the matrix of coefficients:
            double[][] coef = analysis.CoefficientValues;

            // Should be equal to:
            double[][] expectedCoef = new double[][]
            {
                new double[] { 2.85217775752471, -0.0579282723520426, -0.533293368378012, -1.16283850605289 },
                new double[] { 5.21813357698422, -0.113601186660817, 0.291387041358367, -0.9826369387481 }
            };

            // And their associated standard errors:
            double[][] stdErr = analysis.StandardErrors;

            // Should be equal to:
            double[][] expectedErr = new double[][]
            {
                new double[] { -2.02458003380033, -0.339533576505471, -1.164084923948, -0.520961533343425, 0.0556314901718 },
                new double[] { -3.73971589217449, -1.47672790071382, -1.76795568348094, -0.495032307980058, 0.113563519656386 }
            };

            // We can also get statistics and hypothesis tests:
            WaldTest[][]  wald          = analysis.WaldTests;     // should all have p < 0.05
            ChiSquareTest chiSquare     = analysis.ChiSquare;     // should be p=1.06300120956871E-08
            double        logLikelihood = analysis.LogLikelihood; // should be -179.98173272217591

            // You can use the regression to predict the values:
            int[] pred = regression.Transform(x);

            // And get the accuracy of the prediction if needed:
            var cm = GeneralConfusionMatrix.Estimate(regression, x, y.ArgMax(dimension: 1));

            double acc   = cm.Accuracy; // should be 0.61
            double kappa = cm.Kappa;    // should be 0.2993487536492252
            #endregion


            Assert.AreEqual(9, coefficients);
            Assert.AreEqual(3, numberOfInputs);
            Assert.AreEqual(3, numberOfOutputs);

            Assert.AreEqual(new[] { "write", "ses: middle", "ses: high" }, inputNames);
            Assert.AreEqual(new[] { "prog: academic", "prog: general", "prog: vocation" }, outputNames);

            Assert.AreEqual(0.61, acc, 1e-10);
            Assert.AreEqual(0.2993487536492252, kappa, 1e-10);
            Assert.AreEqual(1.06300120956871E-08, chiSquare.PValue, 1e-8);
            Assert.AreEqual(-179.98172637136295, logLikelihood, 1e-8);

            testmlr(analysis);
        }