public void LogisticRegresssionAanlysis(float[,] datas, int rowCount, int columnCount, out double[] coef,
                                                out double[] odds, out double[] stde, out double[] min, out double[] max)
        {
            double[][] inputs  = new double[rowCount][];
            double[]   outputs = new double[rowCount];

            //根据给定的数据得到输入、输出数组
            SetInputAndOutputData(datas, rowCount, columnCount, ref inputs, ref outputs);

            // Create a Logistic Regression analysis
            var regression = new LogisticRegressionAnalysis();

            regression.Learn(inputs, outputs);
            //regression.Compute(); // compute the analysis.

            // We can also investigate all parameters individually. For
            // example the coefficients values will be available at the
            // vector
            coef = regression.CoefficientValues;

            // The first value refers to the model's intercept term. We
            // can also retrieve the odds ratios and standard errors:
            odds = regression.OddsRatios;
            stde = regression.StandardErrors;

            //得到置信区间
            Accord.DoubleRange[] confidence = regression.Confidences;
            min = new double[confidence.Length];
            max = new double[confidence.Length];
            for (int i = 0; i < confidence.Length; i++)
            {
                min[i] = confidence[i].Min;
                max[i] = confidence[i].Max;
            }
        }
Пример #2
0
        protected override void EndProcessing()
        {
            var model = new LogisticRegressionAnalysis();

            double[][] inputs;
            double[]   outputs;

            if (ParameterSetName == "XY")
            {
                inputs  = Converter.ToDoubleJaggedArray(X);
                outputs = Converter.ToDoubleArray(Y);
            }
            else
            {
                outputs = _data.GetColumn(OutputName).ToDoubleArray();

                _data.RemoveColumn(OutputName);
                inputs = _data.ToDoubleJaggedArray();

                model.Inputs = _data.ColumnNames.ToArray <string>();
                model.Output = OutputName;
            }

            double[] w = null;
            if (Weights != null)
            {
                w = Converter.ToDoubleArray(Weights);
            }

            model.Learn(inputs, outputs, w);

            WriteObject(model);
        }
Пример #3
0
        public void learn2()
        {
            // Test instance 01
            double[][] trainInput =
            {
                new double[] { 1, 1 },
                new double[] { 0, 0 },
            };

            double[] trainOutput = { 1, 0 };
            double[] testInput   = { 0, 0.2 };

            var target = new LogisticRegressionAnalysis();

            target.Regularization = 1e-10;

            var regression = target.Learn(trainInput, trainOutput);

            Assert.AreSame(regression, target.Regression);

            double[] coef = target.Coefficients.Apply(x => x.Value);
            Assert.AreEqual(coef[0], -19.360661491141897, 1e-6);
            Assert.AreEqual(coef[1], 19.702873967721807, 1e-6);
            Assert.AreEqual(coef[2], 19.702873967721807, 1e-6);

            double output = target.Regression.Score(testInput);

            Assert.AreEqual(0, output, 1e-6);

            // Test instance 02
            trainInput = new double[][]
            {
                new double[] { 1, 0, 1, 1, 0, 1, 1, 0, 1, 0 },
                new double[] { 0, 1, 0, 1, 1, 0, 1, 1, 0, 1 },
                new double[] { 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 },
                new double[] { 1, 0, 1, 1, 0, 1, 1, 0, 1, 0 },
                new double[] { 0, 1, 0, 1, 1, 0, 1, 1, 0, 1 },
                new double[] { 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 },
            };

            trainOutput = new double[6] {
                1, 1, 0, 0, 1, 1
            };

            target = new LogisticRegressionAnalysis();

            regression = target.Learn(trainInput, trainOutput);

            double[] actual   = regression.Score(trainInput);
            double[] expected = { 0.500000000158903, 0.999999998410966, 0.500000000913694, 0.500000000158903, 0.999999998410966, 0.500000000913694 };
            Assert.IsTrue(actual.IsEqual(expected, 1e-6));

            coef = target.Coefficients.Apply(x => x.Value);
            //string str = coef.ToCSharp();
            expected = new double[] { 1.86680346470929, -3.87720719574071, 2.44120453079343, -0.574401066088034, 5.16960959435804, 2.44120453079343, -3.87720719574087, 5.16960959435804, 2.44120453079343, -3.87720719574087, 2.44120453079343 };
            Assert.IsTrue(coef.IsEqual(expected, 1e-6));
        }
Пример #4
0
        public LogisticRegressionAnalysis Regress()
        {
            var analyzer = new LogisticRegressionAnalysis()
            {
                Iterations = 100,
                Inputs     = GetInputNames(),
            };

            analyzer.Learn(InputData(), Filter.FilteredResult().Select(i => resultsFunc(i)?1.0:0.0).ToArray());
            return(analyzer);
        }
        public static void Train()
        {
            DataHandler.ImportReviewData(3);

            var maxCount = 1;

            double[][] input = new double[maxCount][];
            for (int i = 0; i < maxCount; i++)
            {
                input[i] = CalculateProbabilities(DataHandler.Reviews[i].reviewText);
            }

            double[] output = DataHandler.Reviews.Take(maxCount).Select(r => r.overall).ToArray();

            LogisticRegressionAnalysis regression = new LogisticRegressionAnalysis();
            LogisticRegression         lr         = regression.Learn(input, output);
        }
Пример #6
0
        private void logReg()
        {
            int numObs = terrainVisResults[0].visScore.Count * terrainVisResults[0].visScore[0].Count;


            foreach (RefPlaneVis rpv in terrainVisResults)
            {
                double[][] input  = new double[numObs][];
                double[]   output = new double[numObs];
                int        oNum   = 0;
                for (int i = 0; i < rpv.visScore.Count; i++)
                {
                    for (int s = 0; s < rpv.visScore[i].Count; s++)
                    {
                        double score = rpv.visScore[i][s];
                        if (Double.IsNaN(score))
                        {
                            score = 0;
                        }
                        input[oNum] = new double[] { score, rpv.groupNum, score + rpv.groupNum };//score * rpv.groupNum,
                        if (rpv.observationPts[i][s])
                        {
                            output[oNum] = 1;
                        }
                        else
                        {
                            output[oNum] = 0;
                        }
                        oNum++;
                    }
                }
                printInputOutput(input, output);
                var lra = new LogisticRegressionAnalysis();


                // Now, we can use the learner to finally estimate our model:
                LogisticRegression regression = lra.Learn(input, output);
                var cf = lra.Coefficients;
            }
        }
Пример #7
0
        public void learn1()
        {
            double[][] inputs  = training.Submatrix(null, 0, 3);
            double[]   outputs = training.GetColumn(4);

            var lra = new LogisticRegressionAnalysis()
            {
                ComputeInnerModels = true
            };

            var regression = lra.Learn(inputs, outputs);

            double[] actual = regression.Score(inputs);

            double[] expected =
            {
                0.000012, 0.892611, 0.991369, 0.001513, 0.904055,
                0.001446, 0.998673, 0.001260, 0.629312, 0.004475,
                0.505362, 0.999791, 0.000050, 1.000000, 0.990362,
                0.985265, 1.000000, 1.000000, 0.001319, 0.000001,
                0.000001, 0.000050, 0.702488, 0.003049, 0.000046,
                0.000419, 0.026276, 0.036813, 0.000713, 0.001484,
                0.000008, 0.000009, 0.278950, 0.001402, 0.025764,
                0.002464, 0.000219, 0.007328, 0.000106, 0.002619,
                0.002913, 0.000002,
            };

            for (int i = 0; i < expected.Length; i++)
            {
                Assert.AreEqual(expected[i], actual[i], 1e-6);
            }

            Assert.AreEqual(5, lra.LikelihoodRatioTests.Length);
            Assert.IsNull(lra.LikelihoodRatioTests[0]);
            Assert.AreEqual(0.99999995244237416, lra.LikelihoodRatioTests[1].PValue, 1e-7);
            Assert.AreEqual(0.99999993274336052, lra.LikelihoodRatioTests[2].PValue, 1e-7);
            Assert.AreEqual(0.99999992480479116, lra.LikelihoodRatioTests[3].PValue, 1e-7);
            Assert.AreEqual(0.0000000059730130622305527, lra.LikelihoodRatioTests[4].PValue, 1e-20);
        }
        public void learn1()
        {
            double[][] inputs = training.Submatrix(null, 0, 3);
            double[] outputs = training.GetColumn(4);

            var lra = new LogisticRegressionAnalysis()
            {
                ComputeInnerModels = true
            };

            var regression = lra.Learn(inputs, outputs);

            double[] actual = regression.Score(inputs);

            double[] expected = 
            {
                0.000012, 0.892611, 0.991369, 0.001513, 0.904055,
                0.001446, 0.998673, 0.001260, 0.629312, 0.004475,
                0.505362, 0.999791, 0.000050, 1.000000, 0.990362,
                0.985265, 1.000000, 1.000000, 0.001319, 0.000001,
                0.000001, 0.000050, 0.702488, 0.003049, 0.000046,
                0.000419, 0.026276, 0.036813, 0.000713, 0.001484,
                0.000008, 0.000009, 0.278950, 0.001402, 0.025764,
                0.002464, 0.000219, 0.007328, 0.000106, 0.002619,
                0.002913, 0.000002,
            };

            for (int i = 0; i < expected.Length; i++)
                Assert.AreEqual(expected[i], actual[i], 1e-6);

            Assert.AreEqual(5, lra.LikelihoodRatioTests.Length);
            Assert.IsNull(lra.LikelihoodRatioTests[0]);
            Assert.AreEqual(0.99999995244237416, lra.LikelihoodRatioTests[1].PValue, 1e-7);
            Assert.AreEqual(0.99999993274336052, lra.LikelihoodRatioTests[2].PValue, 1e-7);
            Assert.AreEqual(0.99999992480479116, lra.LikelihoodRatioTests[3].PValue, 1e-7);
            Assert.AreEqual(0.0000000059730130622305527, lra.LikelihoodRatioTests[4].PValue, 1e-20);
        }
Пример #9
0
        public void FromSummary_new_method()
        {
            #region doc_learn_summary
            // Suppose we have a (fictitious) data set about patients who
            // underwent cardiac surgery. The first column gives the number
            // of arterial bypasses performed during the surgery. The second
            // column gives the number of patients whose surgery went well,
            // while the third column gives the number of patients who had
            // at least one complication during the surgery.
            //
            int[,] data =
            {
                // # of stents       success     complications
                { 1, 140, 45 },
                { 2, 130, 60 },
                { 3, 150, 31 },
                { 4,  96, 65 }
            };

            // Get input variable and number of positives and negatives
            double[][] inputs   = data.GetColumn(0).ToDouble().ToJagged();
            int[]      positive = data.GetColumn(1);
            int[]      negative = data.GetColumn(2);

            // Create a new Logistic Regression Analysis from the summary data
            var lra = new LogisticRegressionAnalysis();

            // compute the analysis
            LogisticRegression regression = lra.Learn(inputs, positive, negative);

            // Now we can show a summary of the analysis
            // Accord.Controls.DataGridBox.Show(regression.Coefficients);


            // We can also investigate all parameters individually. For
            // example the coefficients values will be available at the
            // vector

            double[] coef = lra.CoefficientValues;

            // The first value refers to the model's intercept term. We
            // can also retrieve the odds ratios and standard errors:

            double[] odds = lra.OddsRatios;
            double[] stde = lra.StandardErrors;


            // Finally, we can use it to estimate risk for a new patient
            double y = lra.Regression.Score(new double[] { 4 }); // 67.0
            #endregion


            Assert.AreEqual(3.7586367581050162, odds[0], 1e-8);
            Assert.AreEqual(0.85772731075090014, odds[1], 1e-8);

            Assert.AreEqual(0.20884336554629004, stde[0], 1e-6);
            Assert.AreEqual(0.075837785246620285, stde[1], 1e-6);

            Assert.AreEqual(0.67044096045332713, y, 1e-8);

            LogisticRegressionAnalysis expected;


            {
                int[] qtr = data.GetColumn(0);

                var expanded = Accord.Statistics.Tools.Expand(qtr, positive, negative);

                double[][] inp     = expanded.GetColumn(0).ToDouble().ToJagged();
                double[]   outputs = expanded.GetColumn(1).ToDouble();

                expected = new LogisticRegressionAnalysis();

                expected.Learn(inp, outputs);

                double slope = expected.Coefficients[1].Value; // should return -0.153
                double inter = expected.Coefficients[0].Value;
                double value = expected.ChiSquare.PValue;      // should return 0.042
                Assert.AreEqual(-0.15346904821339602, slope, 1e-8);
                Assert.AreEqual(1.324056323049271, inter, 1e-8);
                Assert.AreEqual(0.042491262992507946, value, 1e-8);
            }



            var actual = lra;
            Assert.AreEqual(expected.Coefficients[0].Value, actual.Coefficients[0].Value, 1e-8);
            Assert.AreEqual(expected.Coefficients[1].Value, actual.Coefficients[1].Value, 1e-8);

            Assert.AreEqual(expected.ChiSquare.PValue, actual.ChiSquare.PValue, 1e-8);
            Assert.AreEqual(expected.WaldTests[0].PValue, actual.WaldTests[0].PValue, 1e-8);
            Assert.AreEqual(expected.WaldTests[1].PValue, actual.WaldTests[1].PValue, 1e-8);

            Assert.AreEqual(expected.Confidences[0].Max, actual.Confidences[0].Max, 1e-6);
            Assert.AreEqual(expected.Confidences[0].Min, actual.Confidences[0].Min, 1e-6);
            Assert.AreEqual(expected.Confidences[1].Max, actual.Confidences[1].Max, 1e-6);
            Assert.AreEqual(expected.Confidences[1].Min, actual.Confidences[1].Min, 1e-6);
        }
Пример #10
0
        public void example_learn()
        {
            #region doc_learn_part1
            // Suppose we have the following data about some patients.
            // The first variable is continuous and represent patient
            // age. The second variable is dichotomic and give whether
            // they smoke or not (this is completely fictional data).

            double[][] inputs =
            {
                //            Age  Smoking
                new double[] { 55, 0 },
                new double[] { 28, 0 },
                new double[] { 65, 1 },
                new double[] { 46, 0 },
                new double[] { 86, 1 },
                new double[] { 56, 1 },
                new double[] { 85, 0 },
                new double[] { 33, 0 },
                new double[] { 21, 1 },
                new double[] { 42, 1 },
            };

            // Additionally, we also have information about whether
            // or not they those patients had lung cancer. The array
            // below gives 0 for those who did not, and 1 for those
            // who did.

            double[] output =
            {
                0, 0, 0, 1, 1, 1, 0, 0, 0, 1
            };

            // Create a Logistic Regression analysis
            var lra = new LogisticRegressionAnalysis()
            {
                Regularization = 0
            };

            // compute the analysis
            LogisticRegression regression = lra.Learn(inputs, output);

            // Now we can show a summary of the analysis
            // Accord.Controls.DataGridBox.Show(regression.Coefficients);
            #endregion

            #region doc_learn_part2
            // We can also investigate all parameters individually. For
            // example the coefficients values will be available at the
            // vector

            double[] coef = lra.CoefficientValues;

            // The first value refers to the model's intercept term. We
            // can also retrieve the odds ratios and standard errors:

            double[] odds = lra.OddsRatios;
            double[] stde = lra.StandardErrors;

            // We can use the analysis to predict a score for a new patient:
            double y = lra.Regression.Score(new double[] { 87, 1 }); // 0.75

            // For those inputs, the answer probability is approximately 75%.

            // We can also obtain confidence intervals for the probability:
            DoubleRange ci = lra.GetConfidenceInterval(new double[] { 87, 1 });
            #endregion

            Assert.AreEqual(0.085627701183146374, odds[0], 1e-8);
            Assert.AreEqual(1.0208597029292648, odds[1], 1e-8);
            Assert.AreEqual(5.8584748981777919, odds[2], 1e-8);

            Assert.AreEqual(2.1590686019473897, stde[0], 1e-8);
            Assert.AreEqual(0.033790422321041035, stde[1], 1e-8);
            Assert.AreEqual(1.4729903935788211, stde[2], 1e-8);

            Assert.AreEqual(0.75143272858389798, y, 1e-8);
            Assert.AreEqual(0.079591541770048527, ci.Min, 1e-8);
            Assert.AreEqual(0.99062645401700389, ci.Max, 1e-8);
        }
Пример #11
0
        static void Main(string[] args)
        {
            /*int n = 9;
             * double[] a = new double[n];
             * double[,] c = new double[n,n];
             * a[0] = 0.1;
             * double sum = a[0];
             * for (int i = 1; i < n; i++)
             * {
             *  a[i] = a[i - 1] + 0.1;
             *  sum += a[i];
             * }
             * double[] b = new double[n];
             * b[0] = 0.1;
             * for (int i = 1; i < n / 2 + 1; i++)
             *  b[i] = b[i - 1] + 0.01;
             * for (int i = n / 2 + 1; i < n; i++)
             *  b[i] = b[i - 1] - 0.01;
             *
             * for (int i = 1; i < n; i++)
             * {
             *  c[1, i] = ProbabilityNormalOne(a, b, c[1, i]);
             *  c[0, i] = a[i];
             * }
             *
             *
             *
             *
             *
             * // Console.WriteLine(Round(Dispersion(b, a)));
             * //Console.WriteLine(Round(ExpectedValue(b, a)));
             * Console.WriteLine(ErrorNormal(b, a));
             * var chi = new ChiSquareTest(b, a, n-1);
             * double pvalue = chi.PValue;
             * bool significant = chi.Significant;
             *
             * Console.WriteLine("___________");
             * for (int i = 0; i < 9; i++)
             * {
             *  Console.Write(b[i]);
             *  Console.Write(' ');
             *  Console.WriteLine(Round(ProbabilityNormal(b, a, b[i]), 3));
             * }
             *
             * Console.WriteLine(pvalue);
             * Console.WriteLine(significant);
             * Console.Read();*/
            double[][] inputs = new double[4][] { new double[] { 1, 2 }, new double[] { 1, 3 },
                                                  new double[] { 1, 4 }, new double[] { 1, 5 } };

            double[] outputs = new double[] { 1, 1, 1, 0 };

            double alpha = 0.03;

            double[] theta = new double[] { 1, 1 };
            double[] newt  = Learn(inputs, outputs, theta, alpha);
            Console.WriteLine("_____________");
            foreach (var i in newt)
            {
                Console.WriteLine(i);
            }
            var lra = new LogisticRegressionAnalysis()
            {
                Regularization = 0.03
            };

            // compute the analysis
            LogisticRegression regression = lra.Learn(inputs, outputs);

            _ = Accord.Controls.DataGridBox.Show(regression.Coefficients);
            Console.ReadLine();
        }
        public void gh_937()
        {
            #region doc_learn_database
            // Note: this example uses a System.Data.DataTable to represent input data,
            // but note that this is not required. The data could have been represented
            // as jagged double matrices (double[][]) directly.

            // If you have to handle heterogeneus data in your application, such as user records
            // in a database, this data is best represented within the framework using a .NET's
            // DataTable object. In order to try to learn a classification or regression model
            // using this datatable, first we will need to convert the table into a representation
            // that the machine learning model can understand. Such representation is quite often,
            // a matrix of doubles (double[][]).
            var data = new DataTable("Customer Revenue Example");

            data.Columns.Add("Day", "CustomerId", "Time (hour)", "Weather", "Buy");
            data.Rows.Add("D1", 0, 8, "Sunny", true);
            data.Rows.Add("D2", 1, 10, "Sunny", true);
            data.Rows.Add("D3", 2, 10, "Rain", false);
            data.Rows.Add("D4", 3, 16, "Rain", true);
            data.Rows.Add("D5", 4, 15, "Rain", true);
            data.Rows.Add("D6", 5, 20, "Rain", false);
            data.Rows.Add("D7", 6, 12, "Cloudy", true);
            data.Rows.Add("D8", 7, 12, "Sunny", false);

            // One way to perform this conversion is by using a Codification filter. The Codification
            // filter can take care of converting variables that actually denote symbols (i.e. the
            // weather in the example above) into representations that make more sense given the assumption
            // of a real vector-based classifier.

            // Create a codification codebook
            var codebook = new Codification()
            {
                { "Weather", CodificationVariable.Categorical },
                { "Time (hour)", CodificationVariable.Continuous },
                { "Revenue", CodificationVariable.Continuous },
            };

            // Learn from the data
            codebook.Learn(data);

            // Now, we will use the codebook to transform the DataTable into double[][] vectors. Due
            // the way the conversion works, we can end up with more columns in your output vectors
            // than the ones started with. If you would like more details about what those columns
            // represent, you can pass then as 'out' parameters in the methods that follow below.
            string[] inputNames;  // (note: if you do not want to run this example yourself, you
            string   outputName;  // can see below the new variable names that will be generated)

            // Now, we can translate our training data into integer symbols using our codebook:
            double[][] inputs  = codebook.Apply(data, "Weather", "Time (hour)").ToJagged(out inputNames);
            double[]   outputs = codebook.Apply(data, "Buy").ToVector(out outputName);
            // (note: the Apply method transform a DataTable into another DataTable containing the codified
            //  variables. The ToJagged and ToVector methods are then used to transform those tables into
            //  double[][] matrices and double[] vectors, respectively.

            // If we would like to learn a logistic regression model for this data, there are two possible
            // ways depending on which aspect of the logistic regression we are interested the most. If we
            // are interested in interpreting the logistic regression, performing hypothesis tests with the
            // coefficients and performing an actual _logistic regression analysis_, then we can use the
            // LogisticRegressionAnalysis class for this. If however we are only interested in using
            // the learned model directly to predict new values for the dataset, then we could be using the
            // LogisticRegression and IterativeReweightedLeastSquares classes directly instead.

            // This example deals with the former case. For the later, please see the documentation page
            // for the LogisticRegression class.

            // We can create a new multiple linear analysis for the variables
            var lra = new LogisticRegressionAnalysis()
            {
                // We can also inform the names of the new variables that have been created by the
                // codification filter. Those can help in the visualizing the analysis once it is
                // data-bound to a visual control such a Windows.Forms.DataGridView or WPF DataGrid:

                Inputs = inputNames, // will be { "Weather: Sunny", "Weather: Rain, "Weather: Cloudy", "Time (hours)" }
                Output = outputName  // will be "Revenue"
            };

            // Compute the analysis and obtain the estimated regression
            LogisticRegression regression = lra.Learn(inputs, outputs);

            // And then predict the label using
            double predicted = lra.Transform(inputs[0]); // result will be ~0.287

            // Because we opted for doing a MultipleLinearRegressionAnalysis instead of a simple
            // linear regression, we will have further information about the regression available:
            int           inputCount        = lra.NumberOfInputs;  // should be 4
            int           outputCount       = lra.NumberOfOutputs; // should be 1
            double        logl              = lra.LogLikelihood;   // should be -4.6035570737785525
            ChiSquareTest x2                = lra.ChiSquare;       // should be 1.37789 (p=0.8480, non-significant)
            double[]      stdErr            = lra.StandardErrors;  // should be high except for the last value of 0.27122079214927985 (due small data)
            double[]      or                = lra.OddsRatios;      // should be 1.1116659950687609 for the last coefficient (related to time of day)
            LogisticCoefficientCollection c = lra.Coefficients;    // coefficient table (bind to a visual control for quick inspection)
            double[][] h = lra.InformationMatrix;                  // should contain Fisher's information matrix for the problem
            #endregion

            Assert.AreEqual(0.28703150858677107, predicted, 1e-8);
            Assert.AreEqual(4, inputCount, 1e-8);
            Assert.AreEqual(1, outputCount, 1e-8);
            Assert.AreEqual(-4.6035570737785525, logl, 1e-8);
            Assert.IsTrue(new[] { 0.0019604927838235376, 88.043929817973222, 101.42211648160144, 2.1954970044905113E-07, 1.1116659950687609 }.IsEqual(or, 1e-4));

            Assert.AreEqual(1.377897662970609, x2.Statistic, 1e-8);
            Assert.AreEqual(0.84802726696077046, x2.PValue, 1e-8);
        }
Пример #13
0
        private void btnSampleRunAnalysis_Click(object sender, EventArgs e)
        {
            // Check requirements
            if (sourceTable == null)
            {
                MessageBox.Show("A sample spreadsheet can be found in the " +
                                "Resources folder in the same directory as this application.",
                                "Please load some data before attempting an analysis");
                return;
            }

            if (checkedListBox1.CheckedItems.Count == 0)
            {
                MessageBox.Show("Please select the dependent input variables to be used in the regression model.",
                                "Please choose at least one input variable");
            }


            // Finishes and save any pending changes to the given data
            dgvAnalysisSource.EndEdit();
            sourceTable.AcceptChanges();

            // Gets the column of the dependent variable
            String    dependentName = (string)comboBox1.SelectedItem;
            DataTable dependent     = sourceTable.DefaultView.ToTable(false, dependentName);

            // Gets the columns of the independent variables
            List <string> names = new List <string>();

            foreach (string name in checkedListBox1.CheckedItems)
            {
                names.Add(name);
            }

            String[]  independentNames = names.ToArray();
            DataTable independent      = sourceTable.DefaultView.ToTable(false, independentNames);


            // Creates the input and output matrices from the source data table
            this.inputs  = independent.ToJagged();
            this.outputs = dependent.Columns[dependentName].ToArray();

            // Creates the Simple Descriptive Analysis of the given source
            var sda = new DescriptiveAnalysis()
            {
                ColumnNames = independentNames
            }.Learn(inputs);

            // TODO: Standardize the InputNames/OutputNames properties


            // Populates statistics overview tab with analysis data
            dgvDistributionMeasures.DataSource = sda.Measures;

            // Creates the Logistic Regression Analysis of the given source
            this.lra = new LogisticRegressionAnalysis()
            {
                Inputs = independentNames,
                Output = dependentName
            };

            // Compute the Logistic Regression Analysis
            LogisticRegression lr = lra.Learn(inputs, outputs);

            // Populates coefficient overview with analysis data
            dgvLogisticCoefficients.DataSource = lra.Coefficients;

            // Populate details about the fitted model
            tbChiSquare.Text     = lra.ChiSquare.Statistic.ToString("N5");
            tbPValue.Text        = lra.ChiSquare.PValue.ToString("N5");
            checkBox1.Checked    = lra.ChiSquare.Significant;
            tbDeviance.Text      = lra.Deviance.ToString("N5");
            tbLogLikelihood.Text = lra.LogLikelihood.ToString("N5");


            // Create the Multiple Linear Regression Analysis of the given source
            this.mlr = new MultipleLinearRegressionAnalysis(intercept: true)
            {
                Inputs = independentNames,
                Output = dependentName
            };

            // Compute the Linear Regression Analysis
            MultipleLinearRegression reg = mlr.Learn(inputs, outputs);

            dgvLinearCoefficients.DataSource = mlr.Coefficients;
            dgvRegressionAnova.DataSource    = mlr.Table;

            tbRSquared.Text          = mlr.RSquared.ToString("N5");
            tbRSquaredAdj.Text       = mlr.RSquareAdjusted.ToString("N5");
            tbChiPValue.Text         = mlr.ChiSquareTest.PValue.ToString("N5");
            tbFPValue.Text           = mlr.FTest.PValue.ToString("N5");
            tbZPValue.Text           = mlr.ZTest.PValue.ToString("N5");
            tbChiStatistic.Text      = mlr.ChiSquareTest.Statistic.ToString("N5");
            tbFStatistic.Text        = mlr.FTest.Statistic.ToString("N5");
            tbZStatistic.Text        = mlr.ZTest.Statistic.ToString("N5");
            cbChiSignificant.Checked = mlr.ChiSquareTest.Significant;
            cbFSignificant.Checked   = mlr.FTest.Significant;
            cbZSignificant.Checked   = mlr.ZTest.Significant;

            // Populate projection source table
            string[] cols = independentNames;
            if (!independentNames.Contains(dependentName))
            {
                cols = independentNames.Concatenate(dependentName);
            }

            DataTable projSource = sourceTable.DefaultView.ToTable(false, cols);

            dgvProjectionSource.DataSource = projSource;
        }
        public void FromSummary_new_method()
        {
            #region doc_learn_summary
            // Suppose we have a (fictitious) data set about patients who 
            // underwent cardiac surgery. The first column gives the number
            // of arterial bypasses performed during the surgery. The second
            // column gives the number of patients whose surgery went well,
            // while the third column gives the number of patients who had
            // at least one complication during the surgery.
            // 
            int[,] data =
            {
                // # of stents       success     complications
                {       1,             140,           45       },
                {       2,             130,           60       },
                {       3,             150,           31       },
                {       4,              96,           65       }
            };

            // Get input variable and number of positives and negatives
            double[][] inputs = data.GetColumn(0).ToDouble().ToJagged();
            int[] positive = data.GetColumn(1);
            int[] negative = data.GetColumn(2);

            // Create a new Logistic Regression Analysis from the summary data
            var lra = new LogisticRegressionAnalysis();

            // compute the analysis
            LogisticRegression regression = lra.Learn(inputs, positive, negative);

            // Now we can show a summary of the analysis
            // Accord.Controls.DataGridBox.Show(regression.Coefficients);


            // We can also investigate all parameters individually. For
            // example the coefficients values will be available at the
            // vector

            double[] coef = lra.CoefficientValues;

            // The first value refers to the model's intercept term. We
            // can also retrieve the odds ratios and standard errors:

            double[] odds = lra.OddsRatios;
            double[] stde = lra.StandardErrors;


            // Finally, we can use it to estimate risk for a new patient
            double y = lra.Regression.Score(new double[] { 4 }); // 67.0
            #endregion


            Assert.AreEqual(3.7586367581050162, odds[0], 1e-8);
            Assert.AreEqual(0.85772731075090014, odds[1], 1e-8);

            Assert.AreEqual(0.20884336554629004, stde[0], 1e-6);
            Assert.AreEqual(0.075837785246620285, stde[1], 1e-6);

            Assert.AreEqual(0.67044096045332713, y, 1e-8);

            LogisticRegressionAnalysis expected;


            {
                int[] qtr = data.GetColumn(0);

                var expanded = Accord.Statistics.Tools.Expand(qtr, positive, negative);

                double[][] inp = expanded.GetColumn(0).ToDouble().ToJagged();
                double[] outputs = expanded.GetColumn(1).ToDouble();

                expected = new LogisticRegressionAnalysis();

                expected.Learn(inp, outputs);

                double slope = expected.Coefficients[1].Value; // should return -0.153
                double inter = expected.Coefficients[0].Value;
                double value = expected.ChiSquare.PValue;      // should return 0.042
                Assert.AreEqual(-0.15346904821339602, slope, 1e-8);
                Assert.AreEqual(1.324056323049271, inter, 1e-8);
                Assert.AreEqual(0.042491262992507946, value, 1e-8);
            }



            var actual = lra;
            Assert.AreEqual(expected.Coefficients[0].Value, actual.Coefficients[0].Value, 1e-8);
            Assert.AreEqual(expected.Coefficients[1].Value, actual.Coefficients[1].Value, 1e-8);

            Assert.AreEqual(expected.ChiSquare.PValue, actual.ChiSquare.PValue, 1e-8);
            Assert.AreEqual(expected.WaldTests[0].PValue, actual.WaldTests[0].PValue, 1e-8);
            Assert.AreEqual(expected.WaldTests[1].PValue, actual.WaldTests[1].PValue, 1e-8);

            Assert.AreEqual(expected.Confidences[0].Max, actual.Confidences[0].Max, 1e-6);
            Assert.AreEqual(expected.Confidences[0].Min, actual.Confidences[0].Min, 1e-6);
            Assert.AreEqual(expected.Confidences[1].Max, actual.Confidences[1].Max, 1e-6);
            Assert.AreEqual(expected.Confidences[1].Min, actual.Confidences[1].Min, 1e-6);
        }
        public void example_learn()
        {
            #region doc_learn_part1
            // Suppose we have the following data about some patients.
            // The first variable is continuous and represent patient
            // age. The second variable is dichotomic and give whether
            // they smoke or not (this is completely fictional data).

            double[][] inputs =
            {
                //            Age  Smoking
                new double[] { 55,    0   }, 
                new double[] { 28,    0   }, 
                new double[] { 65,    1   }, 
                new double[] { 46,    0   }, 
                new double[] { 86,    1   }, 
                new double[] { 56,    1   }, 
                new double[] { 85,    0   }, 
                new double[] { 33,    0   }, 
                new double[] { 21,    1   }, 
                new double[] { 42,    1   }, 
            };

            // Additionally, we also have information about whether
            // or not they those patients had lung cancer. The array
            // below gives 0 for those who did not, and 1 for those
            // who did.

            double[] output =
            {
                0, 0, 0, 1, 1, 1, 0, 0, 0, 1
            };

            // Create a Logistic Regression analysis
            var lra = new LogisticRegressionAnalysis()
            {
                Regularization = 0
            };

            // compute the analysis
            LogisticRegression regression = lra.Learn(inputs, output);

            // Now we can show a summary of the analysis
            // Accord.Controls.DataGridBox.Show(regression.Coefficients);
            #endregion

            #region doc_learn_part2
            // We can also investigate all parameters individually. For
            // example the coefficients values will be available at the
            // vector

            double[] coef = lra.CoefficientValues;

            // The first value refers to the model's intercept term. We
            // can also retrieve the odds ratios and standard errors:

            double[] odds = lra.OddsRatios;
            double[] stde = lra.StandardErrors;

            // We can use the analysis to predict a score for a new patient:
            double y = lra.Regression.Score(new double[] { 87, 1 }); // 0.75

            // For those inputs, the answer probability is approximately 75%.

            // We can also obtain confidence intervals for the probability:
            DoubleRange ci = lra.GetConfidenceInterval(new double[] { 87, 1 });
            #endregion

            Assert.AreEqual(0.085627701183146374, odds[0], 1e-8);
            Assert.AreEqual(1.0208597029292648, odds[1], 1e-8);
            Assert.AreEqual(5.8584748981777919, odds[2], 1e-8);

            Assert.AreEqual(2.1590686019473897, stde[0], 1e-8);
            Assert.AreEqual(0.033790422321041035, stde[1], 1e-8);
            Assert.AreEqual(1.4729903935788211, stde[2], 1e-8);

            Assert.AreEqual(0.75143272858389798, y, 1e-8);
            Assert.AreEqual(0.079591541770048527, ci.Min, 1e-8);
            Assert.AreEqual(0.99062645401700389, ci.Max, 1e-8);
        }
        public void learn2()
        {
            // Test instance 01
            double[][] trainInput =
            {
               new double[] { 1, 1 },
               new double[] { 0, 0 },
            };

            double[] trainOutput = { 1, 0 };
            double[] testInput = { 0, 0.2 };

            var target = new LogisticRegressionAnalysis();

            target.Regularization = 1e-10;

            var regression = target.Learn(trainInput, trainOutput);
            Assert.AreSame(regression, target.Regression);

            double[] coef = target.Coefficients.Apply(x => x.Value);
            Assert.AreEqual(coef[0], -19.360661491141897, 1e-6);
            Assert.AreEqual(coef[1], 19.702873967721807, 1e-6);
            Assert.AreEqual(coef[2], 19.702873967721807, 1e-6);

            double output = target.Regression.Score(testInput);
            Assert.AreEqual(0, output, 1e-6);

            // Test instance 02
            trainInput = new double[][]
            {
                new double[] { 1, 0, 1, 1, 0, 1, 1, 0, 1, 0 },
                new double[] { 0, 1, 0, 1, 1, 0, 1, 1, 0, 1 },
                new double[] { 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 },
                new double[] { 1, 0, 1, 1, 0, 1, 1, 0, 1, 0 },
                new double[] { 0, 1, 0, 1, 1, 0, 1, 1, 0, 1 },
                new double[] { 1, 1, 0, 0, 1, 1, 0, 1, 1, 1 },
            };

            trainOutput = new double[6] { 1, 1, 0, 0, 1, 1 };

            target = new LogisticRegressionAnalysis();

            regression = target.Learn(trainInput, trainOutput);

            double[] actual = regression.Score(trainInput);
            double[] expected = { 0.500000000158903, 0.999999998410966, 0.500000000913694, 0.500000000158903, 0.999999998410966, 0.500000000913694 };
            Assert.IsTrue(actual.IsEqual(expected, 1e-6));

            coef = target.Coefficients.Apply(x => x.Value);
            //string str = coef.ToCSharp();
            expected = new double[] { 1.86680346470929, -3.87720719574071, 2.44120453079343, -0.574401066088034, 5.16960959435804, 2.44120453079343, -3.87720719574087, 5.16960959435804, 2.44120453079343, -3.87720719574087, 2.44120453079343 };
            Assert.IsTrue(coef.IsEqual(expected, 1e-6));
        }