public void SumOfSeveralCauchysChiSquareTest(double center, double gamma, int count) { var distr1 = new CauchyDistribution(center, gamma); var distr2 = new CauchyDistribution(center, gamma); var sum = distr1 + distr2; if (count > 2) { for (var i = 0; i < count - 2; i++) { var distr = new CauchyDistribution(center, gamma); sum += distr; } } var test = ChiSquareTest.Test(sum); Assert.IsTrue(test); }
public void DifferenceOfSeveralCauchysChiSquareTest(double center, double gamma, int count) { var distr1 = new CauchyDistribution(center, gamma); var distr2 = new CauchyDistribution(center, gamma); var diff = distr1 - distr2; if (count > 2) { for (var i = 0; i < count - 2; i++) { var distr = new CauchyDistribution(center, gamma); diff -= distr; } } var test = ChiSquareTest.Test(diff); Assert.IsTrue(test); }
public void ProductOfSeveralCauchysChiSquareTest(double center, double gamma, int count) { var distr1 = new CauchyDistribution(center, gamma); var distr2 = new CauchyDistribution(center, gamma); var product = distr1 * distr2; if (count > 2) { for (var i = 0; i < count - 2; i++) { var distr = new CauchyDistribution(center, gamma); product *= distr; } } var test = ChiSquareTest.Test(product); Assert.IsTrue(test); }
public void QuotientOfSeveralCauchysChiSquareTest(double center, double gamma, int count) { var distr1 = new CauchyDistribution(center, gamma); var distr2 = new CauchyDistribution(center, gamma); var quotient = distr1 / distr2; if (count > 2) { for (var i = 0; i < count - 2; i++) { var distr = new CauchyDistribution(center, gamma); quotient /= distr; } } var test = ChiSquareTest.Test(quotient); Assert.IsTrue(test); }
private void computeInformation() { // Store model information this.results = regression.Compute(inputData); this.deviance = regression.GetDeviance(inputData, outputData); this.logLikelihood = regression.GetLogLikelihood(inputData, outputData); this.chiSquare = regression.ChiSquare(inputData, outputData); this.coefficients = regression.Coefficients; this.standardErrors = regression.StandardErrors; // Store coefficient information for (int i = 0; i < waldTests.Length; i++) { this.waldTests[i] = regression.GetWaldTest(i); this.confidences[i] = regression.GetConfidenceInterval(i); this.oddsRatios[i] = regression.GetOddsRatio(i); } }
public void QuotientOfSeveralNormalsChiSquareTest(double mu, double sigma, int count) { var distr1 = new NormalDistribution(mu, sigma); var distr2 = new NormalDistribution(mu, sigma); var diff = distr1 / distr2; if (count > 2) { for (var i = 0; i < count - 2; i++) { var distr = new NormalDistribution(mu, sigma); diff /= distr; } } var test = ChiSquareTest.Test(diff); Assert.IsTrue(test); }
public void ProductOfSeveralExponentialsChiSquareTest(double lambda, int count) { var distr1 = new ExponentialDistribution(lambda); var distr2 = new ExponentialDistribution(lambda); var product = distr1 * distr2; if (count > 2) { for (var i = 0; i < count - 2; i++) { var distr = new ExponentialDistribution(lambda); product *= distr; } } var test = ChiSquareTest.Test(product); Assert.IsTrue(test); }
public void SumOfSeveralNormalsChiSquareTest(double mu, double sigma, int count) { var distr1 = new NormalDistribution(mu, sigma); var distr2 = new NormalDistribution(mu, sigma); var sum = distr1 + distr2; if (count > 2) { for (var i = 0; i < count - 2; i++) { var distr = new NormalDistribution(mu, sigma); sum += distr; } } var test = ChiSquareTest.Test(sum); Assert.IsTrue(test); }
public void SumOfSeveralExponentialsChiSquareTest(double lambda, int count) { var distr1 = new ExponentialDistribution(lambda); var distr2 = new ExponentialDistribution(lambda); var sum = distr1 + distr2; if (count > 2) { for (var i = 0; i < count - 2; i++) { var distr = new ExponentialDistribution(lambda); sum += distr; } } var test = ChiSquareTest.Test(sum); Assert.IsTrue(test); }
public void DifferenceOfSeveralExponentialsChiSquareTest(double lambda, int count) { var distr1 = new ExponentialDistribution(lambda); var distr2 = new ExponentialDistribution(lambda); var diff = distr1 - distr2; if (count > 2) { for (var i = 0; i < count - 2; i++) { var distr = new ExponentialDistribution(lambda); diff -= distr; } } var test = ChiSquareTest.Test(diff); Assert.IsTrue(test); }
public void QuotientOfSeveralUniformsChiSquareTest(double a, double b, int count) { var distr1 = new UniformDistribution(a, b); var distr2 = new UniformDistribution(a, b); var quotient = distr1 / distr2; if (count > 2) { for (var i = 0; i < count - 2; i++) { var distr = new UniformDistribution(a, b); quotient /= distr; } } var test = ChiSquareTest.Test(quotient); Assert.IsTrue(test); }
public void ProductOfSeveralUniformsChiSquareTest(double a, double b, int count) { var distr1 = new UniformDistribution(a, b); var distr2 = new UniformDistribution(a, b); var product = distr1 * distr2; if (count > 2) { for (var i = 0; i < count - 2; i++) { var distr = new UniformDistribution(a, b); product *= distr; } } var test = ChiSquareTest.Test(product); Assert.IsTrue(test); }
public void DifferenceOfSeveralUniformsChiSquareTest(double a, double b, int count) { var distr1 = new UniformDistribution(a, b); var distr2 = new UniformDistribution(a, b); var diff = distr1 - distr2; if (count > 2) { for (var i = 0; i < count - 2; i++) { var distr = new UniformDistribution(a, b); diff -= distr; } } var test = ChiSquareTest.Test(diff); Assert.IsTrue(test); }
public void SumOfSeveralUniformsChiSquareTest(double a, double b, int count) { var distr1 = new UniformDistribution(a, b); var distr2 = new UniformDistribution(a, b); var sum = distr1 + distr2; if (count > 2) { for (var i = 0; i < count - 2; i++) { var distr = new UniformDistribution(a, b); sum += distr; } } var test = ChiSquareTest.Test(sum); Assert.IsTrue(test); }
public void QuotientOfSeveralExponentialsChiSquareTest(double lambda, int count) { var distr1 = new ExponentialDistribution(lambda); var distr2 = new ExponentialDistribution(lambda); var quotient = distr1 / distr2; if (count > 2) { for (var i = 0; i < count - 2; i++) { var distr = new ExponentialDistribution(lambda); quotient /= distr; } } var test = ChiSquareTest.Test(quotient); Assert.IsTrue(test); }
public void GenerateRandomGeneValueDoesNotDepartFromUniformDistribution() { this._integerDomain = new IntegerDomain(IntegerDomainTest.minimum, IntegerDomainTest.maximum); // Remember which values were hit for a lot of iterations. double[] observations = new double[IntegerDomainTest.triesForRandomTests]; for (int i = 0; i < IntegerDomainTest.triesForRandomTests; i++) { observations[i] = this._integerDomain.GenerateRandomGeneValue().GetValue(); } // Apply the Chi-Squared test. ChiSquareTest uniformTest = new ChiSquareTest( observations, new UniformDiscreteDistribution(IntegerDomainTest.minimum, IntegerDomainTest.maximum)); Assert.False( uniformTest.Significant, $"Random generation was found to be not uniform by the Chi-Squared test with significance level {uniformTest.Size}."); }
private void computeInformation() { // Store model information #pragma warning disable 612, 618 this.result = regression.Compute(inputData, timeData); #pragma warning restore 612, 618 this.deviance = regression.GetDeviance(inputData, timeData, censorData); this.logLikelihood = regression.GetPartialLogLikelihood(inputData, timeData, censorData); this.chiSquare = regression.ChiSquare(inputData, timeData, censorData); // Store coefficient information for (int i = 0; i < regression.Coefficients.Length; i++) { this.standardErrors[i] = regression.StandardErrors[i]; this.waldTests[i] = regression.GetWaldTest(i); this.coefficients[i] = regression.Coefficients[i]; this.confidences[i] = regression.GetConfidenceInterval(i); this.hazardRatios[i] = regression.GetHazardRatio(i); } }
public void ConstructorTest5() { Accord.Math.Tools.SetupGenerator(0); double[] unif = UniformContinuousDistribution.Standard.Generate(1000); double[] norm = NormalDistribution.Standard.Generate(1000); var u = UniformContinuousDistribution.Standard; var n = NormalDistribution.Standard; { var chi = new ChiSquareTest(unif, u); Assert.AreEqual(3.2399999999999958, chi.Statistic, 1e-6); Assert.AreEqual(7, chi.DegreesOfFreedom); Assert.AreEqual(0.86194834721001945, chi.PValue, 1e-6); Assert.IsFalse(chi.Significant); } { var chi = new ChiSquareTest(unif, n); Assert.AreEqual(1547.9120000000009, chi.Statistic, 1e-6); Assert.AreEqual(7, chi.DegreesOfFreedom); Assert.AreEqual(0, chi.PValue, 1e-6); Assert.IsTrue(chi.Significant); } { var chi = new ChiSquareTest(norm, u); Assert.AreEqual(401.71999999999991, chi.Statistic, 1e-6); Assert.AreEqual(7, chi.DegreesOfFreedom); Assert.AreEqual(0, chi.PValue, 1e-6); Assert.IsTrue(chi.Significant); } { var chi = new ChiSquareTest(norm, n); Assert.AreEqual(9.7439999999999696, chi.Statistic, 1e-6); Assert.AreEqual(7, chi.DegreesOfFreedom); Assert.AreEqual(0.20355084764042014, chi.PValue, 1e-6); Assert.IsFalse(chi.Significant); } }
public void ConstructorTest5() { Accord.Math.Tools.SetupGenerator(0); double[] unif = UniformContinuousDistribution.Standard.Generate(10000); double[] norm = NormalDistribution.Standard.Generate(10000); var u = UniformContinuousDistribution.Standard; var n = NormalDistribution.Standard; { var chi = new ChiSquareTest(unif, u); Assert.AreEqual(2.7011909090910131, chi.Statistic, 1e-6); Assert.AreEqual(10, chi.DegreesOfFreedom); Assert.AreEqual(0.98760847271849528, chi.PValue, 1e-6); Assert.IsFalse(chi.Significant); } { var chi = new ChiSquareTest(unif, n); Assert.AreEqual(14865.499690909099, chi.Statistic, 1e-6); Assert.AreEqual(10, chi.DegreesOfFreedom); Assert.AreEqual(0, chi.PValue, 1e-6); Assert.IsTrue(chi.Significant); } { var chi = new ChiSquareTest(norm, u); Assert.AreEqual(3934.3426909090917, chi.Statistic, 1e-6); Assert.AreEqual(10, chi.DegreesOfFreedom); Assert.AreEqual(0, chi.PValue, 1e-6); Assert.IsTrue(chi.Significant); } { var chi = new ChiSquareTest(norm, n); Assert.AreEqual(6.2902909090909525, chi.Statistic, 1e-6); Assert.AreEqual(10, chi.DegreesOfFreedom); Assert.AreEqual(0.79031311920555392, chi.PValue, 1e-6); Assert.IsFalse(chi.Significant); } }
private void computeInner() { if (inputCount <= 2) { return; } // Perform likelihood-ratio tests against diminished nested models var innerModel = new ProportionalHazards(inputCount - 1); var learning = createLearner(innerModel); for (int i = 0; i < inputCount; i++) { // Create a diminished inner model without the current variable double[][] data = inputData.RemoveColumn(i); #if DEBUG if (data[0].Length == 0) { throw new Exception(); } #endif Array.Clear(innerModel.Coefficients, 0, inputCount - 1); learning.MaxIterations = Iterations; learning.Tolerance = Tolerance; learning.Learn(data, timeData, censorData); double ratio = 2.0 * (logLikelihood - innerModel.GetPartialLogLikelihood(data, timeData, censorData)); ratioTests[i] = new ChiSquareTest(ratio, 1); } innerComputed = true; }
/// <summary> /// Checks if two variables can be eliminated. /// </summary> /// public static bool CanEliminate(bool[] actual, bool[] expected, double alpha) { var matrix = new ConfusionMatrix(actual, expected); double maxExpectedFrequency = matrix.ExpectedValues.Max(); IHypothesisTest test; if (maxExpectedFrequency > 10) { test = new ChiSquareTest(matrix, yatesCorrection: false) { Size = alpha } } ; else if (maxExpectedFrequency >= 5) { test = new ChiSquareTest(matrix, yatesCorrection: true) { Size = alpha } } ; else { test = new FisherExactTest(matrix) { Size = alpha } }; return(!test.Significant); }
#pragma warning restore 612, 618 private void computeInner(double[][] inputData, double[] outputData, double[] weights) { for (int i = 0; i < NumberOfInputs; i++) { // Create a diminished inner model without the current variable double[][] data = inputData.RemoveColumn(i); // Perform likelihood-ratio tests against diminished nested models var innerModel = new LogisticRegression(NumberOfInputs); var learning = new IterativeReweightedLeastSquares(innerModel) { Iterations = iterations, Tolerance = tolerance, Regularization = regularization }; learning.Learn(data, outputData, weights); double ratio = 2.0 * (logLikelihood - innerModel.GetLogLikelihood(data, outputData)); ratioTests[i + 1] = new ChiSquareTest(ratio, 1); } innerComputed = true; }
public void MutateDoesNotDepartFromUniformDistribution() { // Set up categorical domain with integer values 0 - 3. var possibleValues = new List <int> { 0, 1, 2, 3 }; CategoricalDomain <int> domain = new CategoricalDomain <int>(possibleValues); // Remember which values were generated for a lot of iterations. double[] observations = new double[CategoricalDomainTest.triesForRandomTests]; Allele <int> geneValue = new Allele <int>(1); for (int i = 0; i < CategoricalDomainTest.triesForRandomTests; i++) { observations[i] = (int)domain.MutateGeneValue(geneValue, CategoricalDomainTest.dummyVariancePercentage).GetValue(); } // Apply the Chi-Squared test. ChiSquareTest uniformTest = new ChiSquareTest(observations, new UniformDiscreteDistribution(0, 3)); Assert.False( uniformTest.Significant, $"Mutation was found to not produce a uniform distribution by the Chi-Squared test with significance level {uniformTest.Size}."); }
private void computeInner(double limit, int maxIterations) { // Perform likelihood-ratio tests against diminished nested models LogisticRegression innerModel = new LogisticRegression(inputCount - 1); IterativeReweightedLeastSquares learning = new IterativeReweightedLeastSquares(innerModel); for (int i = 0; i < inputCount; i++) { // Create a diminished inner model without the current variable double[][] data = inputData.RemoveColumn(i); int iteration = 0; double delta = 0; do // learning iterations until convergence { delta = learning.Run(data, outputData); iteration++; } while (delta > limit && iteration < maxIterations); double ratio = 2.0 * (logLikelihood - innerModel.GetLogLikelihood(data, outputData)); ratioTests[i + 1] = new ChiSquareTest(ratio, 1); } }
/// <summary> /// Computes one step of the Stepwise Logistic Regression Analysis. /// </summary> /// <returns> /// Returns the index of the variable discarded in the step or -1 /// in case no variable could be discarded. /// </returns> /// public int DoStep() { ChiSquareTest[] tests = null; // Check if we are performing the first step if (currentModel == null) { // This is the first step. We should create the full model. int inputCount = inputData[0].Length; int[] variables = Vector.Range(0, inputCount); var regression = new LogisticRegression() { NumberOfInputs = inputCount }; fit(regression, inputData, outputData); ChiSquareTest test = regression.ChiSquare(inputData, outputData); fullLikelihood = regression.GetLogLikelihood(inputData, outputData); if (Double.IsNaN(fullLikelihood)) { throw new ConvergenceException( "Perfect separation detected. Please rethink the use of logistic regression."); } tests = new ChiSquareTest[regression.NumberOfInputs + 1]; currentModel = new StepwiseLogisticRegressionModel(this, regression, variables, test, tests); completeModel = currentModel; } // Verify first if a variable reduction is possible if (currentModel.Regression.NumberOfInputs == 1) { return(-1); // cannot reduce further } // Now go and create the diminished nested models var nestedModels = new StepwiseLogisticRegressionModel[currentModel.Regression.NumberOfInputs]; for (int i = 0; i < nestedModels.Length; i++) { // Create a diminished nested model without the current variable LogisticRegression regression = new LogisticRegression() { NumberOfInputs = currentModel.Regression.NumberOfInputs - 1 }; int[] variables = currentModel.Variables.RemoveAt(i); double[][] subset = inputData.Get(null, variables); fit(regression, subset, outputData); // Check the significance of the nested model double logLikelihood = regression.GetLogLikelihood(subset, outputData); double ratio = 2.0 * (fullLikelihood - logLikelihood); ChiSquareTest test = new ChiSquareTest(ratio, inputNames.Length - variables.Length) { Size = threshold }; if (tests != null) { tests[i + 1] = test; } // Store the nested model nestedModels[i] = new StepwiseLogisticRegressionModel(this, regression, variables, test, null); } // Select the model with the highest p-value double pmax = 0; int imax = -1; for (int i = 0; i < nestedModels.Length; i++) { if (nestedModels[i].ChiSquare.PValue >= pmax) { imax = i; pmax = nestedModels[i].ChiSquare.PValue; } } // Create the read-only nested model collection this.nestedModelCollection = new StepwiseLogisticRegressionModelCollection(nestedModels); // If the model with highest p-value is not significant, if (imax >= 0 && pmax > threshold) { // Then this means the variable can be safely discarded from the full model int removed = currentModel.Variables[imax]; // Our diminished nested model will become our next full model. this.currentModel = nestedModels[imax]; // Finally, return the index of the removed variable return(removed); } else { // Else we can not safely remove any variable from the model. return(-1); } }
private void compute(double[][] x, double[] y) { int n = x.Length; int p = NumberOfInputs; SSt = 0; SSe = 0; outputMean = 0.0; NumberOfSamples = x.Length; // Compute the regression OrdinaryLeastSquares.Token = Token; regression = OrdinaryLeastSquares.Learn(x, y); informationMatrix = OrdinaryLeastSquares.GetInformationMatrix(); // Calculate mean of the expected outputs outputMean = y.Mean(); // Calculate actual outputs (results) #pragma warning disable 612, 618 results = regression.Transform(x); // Calculate SSe and SSt for (int i = 0; i < x.Length; i++) { double d; d = y[i] - results[i]; SSe += d * d; d = y[i] - outputMean; SSt += d * d; } // Calculate SSr SSr = SSt - SSe; // Calculate R-Squared rSquared = (SSt != 0) ? 1.0 - (SSe / SSt) : 1.0; // Calculated Adjusted R-Squared if (rSquared == 1) { rAdjusted = 1; } else { if (n - p == 1) { rAdjusted = double.NaN; } else { rAdjusted = 1.0 - (1.0 - rSquared) * ((n - 1.0) / (n - p - 1.0)); } } // Calculate Degrees of Freedom DFr = p; DFe = n - (p + 1); DFt = DFr + DFe; // Calculate Sum of Squares Mean MSe = SSe / DFe; MSr = SSr / DFr; MSt = SSt / DFt; // Calculate the F statistic ftest = new FTest(MSr / MSe, DFr, DFe); stdError = Math.Sqrt(MSe); // Create the ANOVA table List <AnovaVariationSource> table = new List <AnovaVariationSource>(); table.Add(new AnovaVariationSource(this, "Regression", SSr, DFr, MSr, ftest)); table.Add(new AnovaVariationSource(this, "Error", SSe, DFe, MSe, null)); table.Add(new AnovaVariationSource(this, "Total", SSt, DFt, MSt, null)); this.anovaTable = new AnovaSourceCollection(table); // Compute coefficient standard errors; standardErrors = new double[NumberOfInputs + 1]; for (int i = 0; i < informationMatrix.Length; i++) { standardErrors[i] = Math.Sqrt(MSe * informationMatrix[i][i]); } // Compute coefficient tests for (int i = 0; i < CoefficientValues.Length; i++) { double tStatistic = CoefficientValues[i] / standardErrors[i]; ttests[i] = new TTest(estimatedValue: CoefficientValues[i], standardError: standardErrors[i], degreesOfFreedom: DFe); ftests[i] = new FTest(tStatistic * tStatistic, 1, DFe); confidences[i] = ttests[i].GetConfidenceInterval(confidencePercent); } // Compute model performance tests ttest = new TTest(results, outputMean); ztest = new ZTest(results, outputMean); chiSquareTest = new ChiSquareTest(y, results, n - p - 1); #pragma warning restore 612, 618 }
/// <summary> /// Computes the analysis. /// </summary> /// public void Compute() { bool[] fail = new bool[Distributions.Length]; // Step 1. Fit all candidate distributions to the data. for (int i = 0; i < Distributions.Length; i++) { var distribution = Distributions[i]; try { distribution.Fit(data); } catch { // TODO: Maybe revisit the decision to swallow exceptions here. fail[i] = true; } } // Step 2. Use statistical tests to see how well each // distribution was able to model the data. KolmogorovSmirnov = new KolmogorovSmirnovTest[Distributions.Length]; ChiSquare = new ChiSquareTest[Distributions.Length]; AndersonDarling = new AndersonDarlingTest[Distributions.Length]; DistributionNames = new string[Distributions.Length]; double[] ks = new double[Distributions.Length]; double[] cs = new double[Distributions.Length]; double[] ad = new double[Distributions.Length]; var measures = new List <GoodnessOfFit>(); for (int i = 0; i < Distributions.Length; i++) { ks[i] = Double.NegativeInfinity; cs[i] = Double.NegativeInfinity; ad[i] = Double.NegativeInfinity; var d = this.Distributions[i] as IUnivariateDistribution; if (d == null || fail[i]) { continue; } this.DistributionNames[i] = GetName(d.GetType()); int ms = 5000; run(() => { this.KolmogorovSmirnov[i] = new KolmogorovSmirnovTest(data, d); ks[i] = -KolmogorovSmirnov[i].Statistic; }, ms); run(() => { this.ChiSquare[i] = new ChiSquareTest(data, d); cs[i] = -ChiSquare[i].Statistic; }, ms); run(() => { this.AndersonDarling[i] = new AndersonDarlingTest(data, d); ad[i] = AndersonDarling[i].Statistic; }, ms); if (Double.IsNaN(ks[i])) { ks[i] = Double.NegativeInfinity; } if (Double.IsNaN(cs[i])) { cs[i] = Double.NegativeInfinity; } if (Double.IsNaN(ad[i])) { ad[i] = Double.NegativeInfinity; } measures.Add(new GoodnessOfFit(this, i)); } this.KolmogorovSmirnovRank = getRank(ks); this.ChiSquareRank = getRank(cs); this.AndersonDarlingRank = getRank(ad); measures.Sort(); this.GoodnessOfFit = new GoodnessOfFitCollection(measures); }
public void learn_test_4() { #region doc_learn_2 // This example shows how to learn a multinomial logistic regression // analysis in the famous Fisher's Iris dataset. It should serve to // demonstrate that this class does not really need to be used with // DataTables, Codification codebooks and other supplementary features. Iris iris = new Iris(); // Load Fisher's Iris dataset: double[][] x = iris.Instances; int[] y = iris.ClassLabels; // Create a new Multinomial Logistic Regression Analysis: var analysis = new MultinomialLogisticRegressionAnalysis(); // Note: we could have passed the class names from iris.ClassNames and // variable names from iris.VariableNames during MLR instantiation as: // // var analysis = new MultinomialLogisticRegressionAnalysis() // { // InputNames = iris.VariableNames, // OutputNames = iris.ClassNames // }; // However, this example is also intended to demonstrate that // those are not required when learning a regression analysis. // Learn the regression from the input and output pairs: MultinomialLogisticRegression regression = analysis.Learn(x, y); // Let's retrieve some information about what we just learned: int coefficients = analysis.Coefficients.Count; // should be 11 int numberOfInputs = analysis.NumberOfInputs; // should be 4 int numberOfOutputs = analysis.NumberOfOutputs; // should be 3 string[] inputNames = analysis.InputNames; // should be "Input 1", "Input 2", "Input 3", "Input 4" string[] outputNames = analysis.OutputNames; // should be "Class 0", "class 1", "class 2" // The regression is best visualized when it is data-bound to a // Windows.Forms DataGridView or WPF DataGrid. You can get the // values for all different coefficients and discrete values: // DataGridBox.Show(regression.Coefficients); // uncomment this line // You can get the matrix of coefficients: double[][] coef = analysis.CoefficientValues; // Should be equal to: double[][] expectedCoef = new double[][] { new double[] { 2.85217775752471, -0.0579282723520426, -0.533293368378012, -1.16283850605289 }, new double[] { 5.21813357698422, -0.113601186660817, 0.291387041358367, -0.9826369387481 } }; // And their associated standard errors: double[][] stdErr = analysis.StandardErrors; // Should be equal to: double[][] expectedErr = new double[][] { new double[] { -2.02458003380033, -0.339533576505471, -1.164084923948, -0.520961533343425, 0.0556314901718 }, new double[] { -3.73971589217449, -1.47672790071382, -1.76795568348094, -0.495032307980058, 0.113563519656386 } }; // We can also get statistics and hypothesis tests: WaldTest[][] wald = analysis.WaldTests; // should all have p < 0.05 ChiSquareTest chiSquare = analysis.ChiSquare; // should be p=0 double logLikelihood = analysis.LogLikelihood; // should be -29.558338705646587 // You can use the regression to predict the values: int[] pred = regression.Transform(x); // And get the accuracy of the prediction if needed: var cm = GeneralConfusionMatrix.Estimate(regression, x, y); double acc = cm.Accuracy; // should be 0.94666666666666666 double kappa = cm.Kappa; // should be 0.91999999999999982 #endregion Assert.AreEqual(11, coefficients); Assert.AreEqual(4, numberOfInputs); Assert.AreEqual(3, numberOfOutputs); Assert.AreEqual(new[] { "Input 0", "Input 1", "Input 2", "Input 3" }, inputNames); Assert.AreEqual(new[] { "Class 0", "Class 1", "Class 2" }, outputNames); Assert.AreEqual(0.94666666666666666, acc, 1e-10); Assert.AreEqual(0.91999999999999982, kappa, 1e-10); Assert.AreEqual(7.8271969268290043E-54, chiSquare.PValue, 1e-8); Assert.AreEqual(-29.558338705646587, logLikelihood, 1e-8); }
public void ComputeTest1() { // Consider the following example data, adapted from John C. Pezzullo's // example for his great Cox's proportional hazards model example in // JavaScript (http://www.sph.emory.edu/~cdckms/CoxPH/prophaz2.html). // In this data, we have three columns. The first column denotes the // input variables for the problem. The second column, the survival // times. And the last one is the output of the experiment (if the // subject has died [1] or has survived [0]). double[,] example = { // input time censor { 50, 1, 0 }, { 70, 2, 1 }, { 45, 3, 0 }, { 35, 5, 0 }, { 62, 7, 1 }, { 50, 11, 0 }, { 45, 4, 0 }, { 57, 6, 0 }, { 32, 8, 0 }, { 57, 9, 1 }, { 60, 10, 1 }, }; // First we will extract the input, times and outputs double[,] inputs = example.GetColumns(0); double[] times = example.GetColumn(1); int[] output = example.GetColumn(2).ToInt32(); // Now we can proceed and create the analysis var cox = new ProportionalHazardsAnalysis(inputs, times, output); cox.Compute(); // compute the analysis // Now we can show an analysis summary // DataGridBox.Show(cox.Coefficients); // We can also investigate all parameters individually. For // example the coefficients values will be available at double[] coef = cox.CoefficientValues; double[] stde = cox.StandardErrors; // We can also obtain the hazards ratios double[] ratios = cox.HazardRatios; // And other information such as the partial // likelihood, the deviance and also make // hypothesis tests on the parameters double partial = cox.LogLikelihood; double deviance = cox.Deviance; // Chi-Square for whole model ChiSquareTest chi = cox.ChiSquare; // Wald tests for individual parameters WaldTest wald = cox.Coefficients[0].Wald; // Finally, we can also use the model to predict // scores for new observations (without considering time) double y1 = cox.Regression.Compute(new double[] { 63 }); double y2 = cox.Regression.Compute(new double[] { 32 }); // Those scores can be interpreted by comparing then // to 1. If they are greater than one, the odds are // the patient will not survive. If the value is less // than one, the patient is likely to survive. // The first value, y1, gives approximately 86.138, // while the second value, y2, gives about 0.00072. // We can also consider instant estimates for a given time: double p1 = cox.Regression.Compute(new double[] { 63 }, 2); double p2 = cox.Regression.Compute(new double[] { 63 }, 10); // Here, p1 is the score after 2 time instants, with a // value of 0.0656. The second value, p2, is the time // after 10 time instants, with a value of 6.2907. Assert.AreEqual(86.138421225296526, y1); Assert.AreEqual(0.00072281400325299814, y2); Assert.AreEqual(0.065660458190496693, p1); Assert.AreEqual(6.2907511049223928, p2); Assert.AreEqual(1, coef.Length); Assert.AreEqual(0.37704239281490765, coef[0]); Assert.AreEqual(0.25415746361167235, stde[0]); Assert.AreEqual(1.4579661153488215, ratios[0]); Assert.AreEqual(-2.0252666205735466, partial); Assert.AreEqual(4.0505332411470931, deviance); Assert.AreEqual(0.13794183001851756, wald.PValue, 1e-4); Assert.AreEqual(1, chi.DegreesOfFreedom); Assert.AreEqual(7.3570, chi.Statistic, 1e-4); Assert.AreEqual(0.0067, chi.PValue, 1e-3); }
public void ComputeTest() { // Suppose we have the following data about some patients. // The first variable is continuous and represent patient // age. The second variable is dichotomic and give whether // they smoke or not (this is completely fictional data). double[][] inputs = { // Age Smoking new double[] { 55, 0 }, // 1 new double[] { 28, 0 }, // 2 new double[] { 65, 1 }, // 3 new double[] { 46, 0 }, // 4 new double[] { 86, 1 }, // 5 new double[] { 56, 1 }, // 6 new double[] { 85, 0 }, // 7 new double[] { 33, 0 }, // 8 new double[] { 21, 1 }, // 9 new double[] { 42, 1 }, // 10 new double[] { 33, 0 }, // 11 new double[] { 20, 1 }, // 12 new double[] { 43, 1 }, // 13 new double[] { 31, 1 }, // 14 new double[] { 22, 1 }, // 15 new double[] { 43, 1 }, // 16 new double[] { 46, 0 }, // 17 new double[] { 86, 1 }, // 18 new double[] { 56, 1 }, // 19 new double[] { 55, 0 }, // 20 }; // Additionally, we also have information about whether // or not they those patients had lung cancer. The array // below gives 0 for those who did not, and 1 for those // who did. double[] output = { 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0 }; // Create a Stepwise Logistic Regression analysis var regression = new StepwiseLogisticRegressionAnalysis(inputs, output, new[] { "Age", "Smoking" }, "Cancer"); regression.Compute(); // compute the analysis. // The full model will be stored in the complete property: StepwiseLogisticRegressionModel full = regression.Complete; // The best model will be stored in the current property: StepwiseLogisticRegressionModel best = regression.Current; // Let's check the full model results // DataGridBox.Show(full.Coefficients); // We can see only the Smoking variable is statistically significant. // This is an indication the Age variable could be discarded from // the model. // And check the best inner model result // DataGridBox.Show(best.Coefficients); // This is the best nested model found. This model only has the // Smoking variable, which is still significant. Since no other // variables can be dropped, this is the best final model. // The variables used in the current best model are string[] inputVariableNames = best.Inputs; // Smoking // The best model likelihood ratio p-value is ChiSquareTest test = best.ChiSquare; // {0.816990081334823} // so the model is distinguishable from a null model. We can also // query the other nested models by checking the Nested property: // DataGridBox.Show(regression.Nested); // Finally, we can also use the analysis to classify a new patient double y = regression.Current.Regression.Compute(new double[] { 1 }); // For a smoking person, the answer probability is approximately 83%. Assert.AreEqual(3, full.Coefficients.Count); Assert.AreEqual("Intercept", full.Coefficients[0].Name); Assert.AreEqual("Age", full.Coefficients[1].Name); Assert.AreEqual("Smoking", full.Coefficients[2].Name); Assert.AreEqual(0.10115178966846869, full.Coefficients[0].OddsRatio, 1e-10); Assert.AreEqual(1.0071560349008841, full.Coefficients[1].OddsRatio, 1e-10); Assert.AreEqual(35.498643454320685, full.Coefficients[2].OddsRatio, 1e-10); Assert.IsFalse(full.Coefficients.Apply(p => p.OddsRatio).HasNaN()); Assert.AreEqual(1.8621025559858235, full.Coefficients[0].StandardError, 1e-10); Assert.AreEqual(0.030965622111482096, full.Coefficients[1].StandardError, 1e-10); Assert.AreEqual(1.3272612173685281, full.Coefficients[2].StandardError, 1e-10); Assert.IsFalse(full.Coefficients.Apply(p => p.StandardError).HasNaN()); Assert.AreEqual(2, best.Coefficients.Count); Assert.AreEqual("Intercept", best.Coefficients[0].Name); Assert.AreEqual("Smoking", best.Coefficients[1].Name); Assert.AreEqual(0.14285724083908749, best.Coefficients[0].OddsRatio); Assert.AreEqual(34.999975694637072, best.Coefficients[1].OddsRatio); Assert.AreEqual(1.0685028815195794, best.Coefficients[0].StandardError, 1e-10); Assert.AreEqual(1.3197099261438616, best.Coefficients[1].StandardError, 1e-10); Assert.IsFalse(best.Coefficients.Apply(p => p.StandardError).HasNaN()); Assert.AreEqual(2, regression.Nested.Count); Assert.AreEqual(best, regression.Nested[0]); Assert.AreEqual("Age", regression.Nested[1].Names); Assert.AreEqual(0.83333333214363825, y); int[] finalVars = regression.Current.Variables; double[][] finalData = inputs.Submatrix(null, finalVars); double[] expectedOutput = regression.Current.Regression.Compute(finalData); Assert.IsTrue(regression.Result.IsEqual(expectedOutput)); }