public void ComputeTest1() { // Consider the following example data, adapted from John C. Pezzullo's // example for his great Cox's proportional hazards model example in // JavaScript (http://www.sph.emory.edu/~cdckms/CoxPH/prophaz2.html). // In this data, we have three columns. The first column denotes the // input variables for the problem. The second column, the survival // times. And the last one is the output of the experiment (if the // subject has died [1] or has survived [0]). double[,] example = { // input time censor { 50, 1, 0 }, { 70, 2, 1 }, { 45, 3, 0 }, { 35, 5, 0 }, { 62, 7, 1 }, { 50, 11, 0 }, { 45, 4, 0 }, { 57, 6, 0 }, { 32, 8, 0 }, { 57, 9, 1 }, { 60, 10, 1 }, }; // First we will extract the input, times and outputs double[,] inputs = example.GetColumns(0); double[] times = example.GetColumn(1); int[] output = example.GetColumn(2).ToInt32(); // Now we can proceed and create the analysis var cox = new ProportionalHazardsAnalysis(inputs, times, output); cox.Compute(); // compute the analysis // Now we can show an analysis summary // DataGridBox.Show(cox.Coefficients); // We can also investigate all parameters individually. For // example the coefficients values will be available at double[] coef = cox.CoefficientValues; double[] stde = cox.StandardErrors; // We can also obtain the hazards ratios double[] ratios = cox.HazardRatios; // And other information such as the partial // likelihood, the deviance and also make // hypothesis tests on the parameters double partial = cox.LogLikelihood; double deviance = cox.Deviance; // Chi-Square for whole model ChiSquareTest chi = cox.ChiSquare; // Wald tests for individual parameters WaldTest wald = cox.Coefficients[0].Wald; // Finally, we can also use the model to predict // scores for new observations (without considering time) double y1 = cox.Regression.Compute(new double[] { 63 }); double y2 = cox.Regression.Compute(new double[] { 32 }); // Those scores can be interpreted by comparing then // to 1. If they are greater than one, the odds are // the patient will not survive. If the value is less // than one, the patient is likely to survive. // The first value, y1, gives approximately 86.138, // while the second value, y2, gives about 0.00072. // We can also consider instant estimates for a given time: double p1 = cox.Regression.Compute(new double[] { 63 }, 2); double p2 = cox.Regression.Compute(new double[] { 63 }, 10); // Here, p1 is the score after 2 time instants, with a // value of 0.0656. The second value, p2, is the time // after 10 time instants, with a value of 6.2907. Assert.AreEqual(86.138421225296526, y1); Assert.AreEqual(0.00072281400325299814, y2); Assert.AreEqual(0.065660458190496693, p1); Assert.AreEqual(6.2907511049223928, p2); Assert.AreEqual(1, coef.Length); Assert.AreEqual(0.37704239281490765, coef[0]); Assert.AreEqual(0.25415746361167235, stde[0]); Assert.AreEqual(1.4579661153488215, ratios[0]); Assert.AreEqual(-2.0252666205735466, partial); Assert.AreEqual(4.0505332411470931, deviance); Assert.AreEqual(0.13794183001851756, wald.PValue, 1e-4); Assert.AreEqual(1, chi.DegreesOfFreedom); Assert.AreEqual(7.3570, chi.Statistic, 1e-4); Assert.AreEqual(0.0067, chi.PValue, 1e-3); }
private void btnSampleRunAnalysis_Click(object sender, EventArgs e) { // Check requirements if (sourceTable == null) { MessageBox.Show("A sample spreadsheet can be found in the " + "Resources folder in the same directory as this application.", "Please load some data before attempting an analysis"); return; } // Finishes and save any pending changes to the given data dgvAnalysisSource.EndEdit(); sourceTable.AcceptChanges(); // Gets the column of the dependent variable String dependentName = (string)cbTimeName.SelectedItem; String censorName = (string)cbEventName.SelectedItem; DataTable timeTable = sourceTable.DefaultView.ToTable(false, dependentName); DataTable censorTable = sourceTable.DefaultView.ToTable(false, censorName); // Gets the columns of the independent variables List<string> names = new List<string>(); foreach (string name in checkedListBox1.CheckedItems) names.Add(name); String[] independentNames = names.ToArray(); // Creates the input and output matrices from the source data table double[][] input; double[] time = timeTable.Columns[dependentName].ToArray(); int[] censor = censorTable.Columns[censorName].ToArray().ToInt32(); if (independentNames.Length == 0) { input = new double[time.Length][]; for (int i = 0; i < input.Length; i++) input[i] = new double[0]; } else { DataTable independent = sourceTable.DefaultView.ToTable(false, independentNames); input = independent.ToArray(); } String[] sourceColumns; double[,] sourceMatrix = sourceTable.ToMatrix(out sourceColumns); // Creates the Simple Descriptive Analysis of the given source DescriptiveAnalysis sda = new DescriptiveAnalysis(sourceMatrix, sourceColumns); sda.Compute(); // Populates statistics overview tab with analysis data dgvDistributionMeasures.DataSource = sda.Measures; // Creates the Logistic Regression Analysis of the given source pha = new ProportionalHazardsAnalysis(input, time, censor, independentNames, dependentName, censorName); // Compute the Logistic Regression Analysis pha.Compute(); // Populates coefficient overview with analysis data dgvLogisticCoefficients.DataSource = pha.Coefficients; // Populate details about the fitted model tbChiSquare.Text = pha.ChiSquare.Statistic.ToString("N5"); tbPValue.Text = pha.ChiSquare.PValue.ToString("N5"); checkBox1.Checked = pha.ChiSquare.Significant; tbDeviance.Text = pha.Deviance.ToString("N5"); tbLogLikelihood.Text = pha.LogLikelihood.ToString("N5"); // Populate projection source table string[] cols = independentNames; if (!independentNames.Contains(dependentName)) cols = cols.Concatenate(dependentName); if (!independentNames.Contains(censorName)) cols = cols.Concatenate(censorName); DataTable projSource = sourceTable.DefaultView.ToTable(false, cols); dgvProjectionSource.DataSource = projSource; }
private void btnSampleRunAnalysis_Click(object sender, EventArgs e) { // Check requirements if (sourceTable == null) { MessageBox.Show("A sample spreadsheet can be found in the " + "Resources folder in the same directory as this application.", "Please load some data before attempting an analysis"); return; } // Finishes and save any pending changes to the given data dgvAnalysisSource.EndEdit(); sourceTable.AcceptChanges(); // Gets the column of the dependent variable String dependentName = (string)cbTimeName.SelectedItem; String censorName = (string)cbEventName.SelectedItem; DataTable timeTable = sourceTable.DefaultView.ToTable(false, dependentName); DataTable censorTable = sourceTable.DefaultView.ToTable(false, censorName); // Gets the columns of the independent variables List <string> names = new List <string>(); foreach (string name in checkedListBox1.CheckedItems) { names.Add(name); } String[] independentNames = names.ToArray(); // Creates the input and output matrices from the source data table this.time = timeTable.Columns[dependentName].ToArray(); this.censor = censorTable.Columns[censorName].ToArray().ToInt32(); if (independentNames.Length == 0) { this.inputs = Jagged.Zeros(time.Length, 0); } else { DataTable independent = sourceTable.DefaultView.ToTable(false, independentNames); this.inputs = independent.ToJagged(); } String[] sourceColumns; this.sourceMatrix = sourceTable.ToJagged(out sourceColumns); // Creates the Simple Descriptive Analysis of the given source var sda = new DescriptiveAnalysis(sourceColumns).Learn(sourceMatrix); // Populates statistics overview tab with analysis data dgvDistributionMeasures.DataSource = sda.Measures; // Creates the Logistic Regression Analysis of the given source pha = new ProportionalHazardsAnalysis(independentNames, dependentName, censorName); // Compute the Logistic Regression Analysis ProportionalHazards model = pha.Learn(inputs, time, censor); // Populates coefficient overview with analysis data dgvLogisticCoefficients.DataSource = pha.Coefficients; // Populate details about the fitted model tbChiSquare.Text = pha.ChiSquare.Statistic.ToString("N5"); tbPValue.Text = pha.ChiSquare.PValue.ToString("N5"); checkBox1.Checked = pha.ChiSquare.Significant; tbDeviance.Text = pha.Deviance.ToString("N5"); tbLogLikelihood.Text = pha.LogLikelihood.ToString("N5"); // Populate projection source table string[] cols = independentNames; if (!independentNames.Contains(dependentName)) { cols = cols.Concatenate(dependentName); } if (!independentNames.Contains(censorName)) { cols = cols.Concatenate(censorName); } DataTable projSource = sourceTable.DefaultView.ToTable(false, cols); dgvProjectionSource.DataSource = projSource; }
public void learn_test() { #region doc_learn_part1 // Consider the following example data, adapted from John C. Pezzullo's // example for his great Cox's proportional hazards model example in // JavaScript (http://statpages.org/prophaz2.html). // In this data, we have three columns. The first column denotes the // input variables for the problem. The second column, the survival // times. And the last one is the output of the experiment (if the // subject has died [1] or has survived [0]). double[][] example = { // input time censor new double[] { 50, 1, 0 }, new double[] { 70, 2, 1 }, new double[] { 45, 3, 0 }, new double[] { 35, 5, 0 }, new double[] { 62, 7, 1 }, new double[] { 50, 11, 0 }, new double[] { 45, 4, 0 }, new double[] { 57, 6, 0 }, new double[] { 32, 8, 0 }, new double[] { 57, 9, 1 }, new double[] { 60, 10, 1 }, }; // First we will extract the input, times and outputs double[][] inputs = example.Get(null, 0, 1); double[] times = example.GetColumn(1); SurvivalOutcome[] output = example.GetColumn(2).To <SurvivalOutcome[]>(); // Now we can proceed and create the analysis (giving optional variable names) var cox = new ProportionalHazardsAnalysis(new[] { "input" }, "time", "censor"); // Then compute the analysis, learning a regression in the process: ProportionalHazards regression = cox.Learn(inputs, times, output); // Now we can show an analysis summary // Accord.Controls.DataGridBox.Show(cox.Coefficients); #endregion #region doc_learn_part2 // We can also investigate all parameters individually. For // example the coefficients values will be available at double[] coef = cox.CoefficientValues; // should be { 0.37704239281490765 } double[] stde = cox.StandardErrors; // should be { 0.25415746361167235 } // We can also obtain the hazards ratios double[] ratios = cox.HazardRatios; // should be { 1.4579661153488215 } // And other information such as the partial // likelihood, the deviance and also make // hypothesis tests on the parameters double partialL = cox.LogLikelihood; // should be -2.0252666205735466 double deviance = cox.Deviance; // should be 4.0505332411470931 // Chi-Square for whole model ChiSquareTest chi = cox.ChiSquare; // should be 7.3570 (p=0.0067) // Wald tests for individual parameters WaldTest wald = cox.Coefficients[0].Wald; // should be 1.4834 (p=0.1379) // Finally, we can also use the model to predict // scores for new observations (without considering time) double y1 = cox.Regression.Probability(new double[] { 63 }); // should be 86.138421225296526 double y2 = cox.Regression.Probability(new double[] { 32 }); // should be 0.00072281400325299814 // Those scores can be interpreted by comparing then // to 1. If they are greater than one, the odds are // the patient will not survive. If the value is less // than one, the patient is likely to survive. // The first value, y1, gives approximately 86.138, // while the second value, y2, gives about 0.00072. // We can also consider instant estimates for a given time: double p1 = cox.Regression.Probability(new double[] { 63 }, 2); // should be 0.17989138010770425 double p2 = cox.Regression.Probability(new double[] { 63 }, 10); // should be 15.950244161356357 // Here, p1 is the score after 2 time instants, with a // value of 0.0656. The second value, p2, is the time // after 10 time instants, with a value of 6.2907. // In addition, if we would like a higher precision when // computing very small probabilities using the methods // above, we can use the LogLikelihood methods instead: double log_y1 = cox.Regression.LogLikelihood(new double[] { 63 }); // should be 4.4559555514489091 double log_y2 = cox.Regression.LogLikelihood(new double[] { 32 }); // should be -7.2323586258132284 double log_p1 = cox.Regression.LogLikelihood(new double[] { 63 }, 2); // should be -1.7154020540835324 double log_p2 = cox.Regression.LogLikelihood(new double[] { 63 }, 10); // should be 2.7694741370357177 #endregion Assert.AreEqual(86.138421225296526, y1, 1e-10); Assert.AreEqual(0.00072281400325299814, y2, 1e-10); Assert.AreEqual(0.17989138010770425, p1, 1e-10); Assert.AreEqual(15.950244161356357, p2, 1e-10); Assert.AreEqual(4.4559555514489091, log_y1, 1e-10); Assert.AreEqual(-7.2323586258132284, log_y2, 1e-10); Assert.AreEqual(-1.7154020540835324, log_p1, 1e-10); Assert.AreEqual(2.7694741370357177, log_p2, 1e-10); Assert.AreEqual(System.Math.Log(y1), log_y1, 1e-10); Assert.AreEqual(System.Math.Log(y2), log_y2, 1e-10); Assert.AreEqual(System.Math.Log(p1), log_p1, 1e-10); Assert.AreEqual(System.Math.Log(p2), log_p2, 1e-10); Assert.AreEqual(1, coef.Length); Assert.AreEqual(0.37704239281490765, coef[0]); Assert.AreEqual(0.25415746361167235, stde[0]); Assert.AreEqual(1.4579661153488215, ratios[0]); Assert.AreEqual(-2.0252666205735466, partialL, 1e-6); Assert.AreEqual(4.0505332411470931, deviance, 1e-6); Assert.AreEqual(1.4834991955655938, wald.Statistic, 1e-4); Assert.AreEqual(0.13794183001851756, wald.PValue, 1e-4); Assert.AreEqual(1, chi.DegreesOfFreedom); Assert.AreEqual(7.3570, chi.Statistic, 1e-4); Assert.AreEqual(0.0067, chi.PValue, 1e-3); }