/// <summary> /// Classify our data using Logistic Regression classifer and save the model. /// </summary> /// <param name="train_data">Frame objects that we will use to train classifers.</param> /// <param name="test_data">Frame objects that we will use to test classifers.</param> /// <param name="train_label">Labels of the train data.</param> /// <param name="test_label">Labels of the test data.</param> /// <param name="Classifier_Path">Path where we want to save the classifer on the disk.</param> /// <param name="Classifier_Name">Name of the classifer we wnat to save.</param> /// <returns></returns> public void LogisticRegression(double[][] train_data, double[][] test_data, int[] train_label, int[] test_label, String Classifier_Path, String Classifier_Name) { var learner = new IterativeReweightedLeastSquares <LogisticRegression>() { Tolerance = 1e-4, MaxIterations = 100, Regularization = 0 }; LogisticRegression regression = learner.Learn(train_data, train_label); double ageOdds = regression.GetOddsRatio(0); double smokeOdds = regression.GetOddsRatio(1); double[] scores = regression.Probability(test_data); //bool[] pre = regression.Decide(test_data); var cm = GeneralConfusionMatrix.Estimate(regression, test_data, test_label); double error = cm.Error; Console.WriteLine(error); regression.Save(Path.Combine(Classifier_Path, Classifier_Name)); }
public void ComputeTest3() { double[][] input = { new double[] { 55, 0 }, // 0 - no cancer new double[] { 28, 0 }, // 0 new double[] { 65, 1 }, // 0 new double[] { 46, 0 }, // 1 - have cancer new double[] { 86, 1 }, // 1 new double[] { 86, 1 }, // 1 new double[] { 56, 1 }, // 1 new double[] { 85, 0 }, // 0 new double[] { 33, 0 }, // 0 new double[] { 21, 1 }, // 0 new double[] { 42, 1 }, // 1 }; double[] output = { 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1 }; double[] weights = { 1.0, 1.0, 1.0, 1.0, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0 }; LogisticRegression regression = new LogisticRegression(inputs: 2); var teacher = new IterativeReweightedLeastSquares(regression); teacher.Regularization = 0; double delta = 0; do { delta = teacher.Run(input, output, weights); } while (delta > 0.001); double ageOdds = regression.GetOddsRatio(1); double smokeOdds = regression.GetOddsRatio(2); Assert.AreEqual(1.0208597028836701, ageOdds, 1e-10); Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-8); Assert.AreEqual(-2.4577464307294092, regression.Intercept, 1e-8); Assert.AreEqual(-2.4577464307294092, regression.Coefficients[0], 1e-8); Assert.AreEqual(0.020645118265359252, regression.Coefficients[1], 1e-8); Assert.AreEqual(1.7678893101571855, regression.Coefficients[2], 1e-8); }
public void ComputeTest3() { double[][] input = { new double[] { 55, 0 }, // 0 - no cancer new double[] { 28, 0 }, // 0 new double[] { 65, 1 }, // 0 new double[] { 46, 0 }, // 1 - have cancer new double[] { 86, 1 }, // 1 new double[] { 86, 1 }, // 1 new double[] { 56, 1 }, // 1 new double[] { 85, 0 }, // 0 new double[] { 33, 0 }, // 0 new double[] { 21, 1 }, // 0 new double[] { 42, 1 }, // 1 }; double[] output = { 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1 }; double[] weights = { 1.0, 1.0, 1.0, 1.0, 0.5, 0.5, 1.0, 1.0, 1.0, 1.0, 1.0 }; LogisticRegression regression = new LogisticRegression(inputs: 2); var teacher = new IterativeReweightedLeastSquares(regression); double delta = 0; do { delta = teacher.Run(input, output, weights); } while (delta > 0.001); double ageOdds = regression.GetOddsRatio(1); double smokeOdds = regression.GetOddsRatio(2); Assert.AreEqual(1.0208597028836701, ageOdds, 1e-10); Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-10); Assert.IsFalse(double.IsNaN(ageOdds)); Assert.IsFalse(double.IsNaN(smokeOdds)); }
public void BuildLR(List <train> datalist) { double[][] input; int[] output; GetData(out input, out output, datalist); // To verify this hypothesis, we are going to create a logistic // regression model for those two inputs (age and smoking), learned // using a method called "Iteratively Reweighted Least Squares": var learner = new IterativeReweightedLeastSquares <LogisticRegression>() { Tolerance = 1e-4, // Let's set some convergence parameters Iterations = 100, // maximum number of iterations to perform Regularization = 0 }; // Now, we can use the learner to finally estimate our model: regression = learner.Learn(input, output); // At this point, we can compute the odds ratio of our variables. // In the model, the variable at 0 is always the intercept term, // with the other following in the sequence. Index 1 is the age // and index 2 is whether the patient smokes or not. // For the age variable, we have that individuals with // higher age have 1.021 greater odds of getting lung // cancer controlling for cigarette smoking. double ageOdds = regression.GetOddsRatio(1); // 1.0208597028836701 // For the smoking/non smoking category variable, however, we // have that individuals who smoke have 5.858 greater odds // of developing lung cancer compared to those who do not // smoke, controlling for age (remember, this is completely // fictional and for demonstration purposes only). double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331 // If we would like to use the model to predict a probability for // each patient regarding whether they are at risk of cancer or not, // we can use the Probability function: double[] scores = regression.Probability(input); // Finally, if we would like to arrive at a conclusion regarding // each patient, we can use the Decide method, which will transform // the probabilities (from 0 to 1) into actual true/false values: log("The LogisticRegression model has been trained"); }
private void computeInformation(double[][] inputData, double[] outputData, double[] weights) { // Store model information #pragma warning disable 612, 618 result = regression.Compute(inputData); #pragma warning restore 612, 618 if (weights == null) { this.deviance = regression.GetDeviance(inputData, outputData); this.logLikelihood = regression.GetLogLikelihood(inputData, outputData); this.chiSquare = regression.ChiSquare(inputData, outputData); } else { this.deviance = regression.GetDeviance(inputData, outputData, weights); this.logLikelihood = regression.GetLogLikelihood(inputData, outputData, weights); this.chiSquare = regression.ChiSquare(inputData, outputData, weights); } // Store coefficient information for (int i = 0; i < regression.Coefficients.Length; i++) { this.standardErrors[i] = regression.StandardErrors[i]; this.waldTests[i] = regression.GetWaldTest(i); this.coefficients[i] = regression.Coefficients[i]; this.confidences[i] = regression.GetConfidenceInterval(i); this.oddsRatios[i] = regression.GetOddsRatio(i); } }
public static DTOs.Responses.LogisticRegressionResult ToDto(this LogisticRegression logisticRegression) { return(new DTOs.Responses.LogisticRegressionResult { OddsRatio = logisticRegression.GetOddsRatio(), StandardErrors = logisticRegression.GetStandardErrors(), Regression = logisticRegression.LinearRegressionResult.ToDto() }); }
/// <summary> /// Computes the Logistic Regression Analysis. /// </summary> /// <remarks>The likelihood surface for the /// logistic regression learning is convex, so there will be only one /// peak. Any local maxima will be also a global maxima. /// </remarks> /// <param name="limit"> /// The difference between two iterations of the regression algorithm /// when the algorithm should stop. If not specified, the value of /// 10e-4 will be used. The difference is calculated based on the largest /// absolute parameter change of the regression. /// </param> /// <param name="maxIterations"> /// The maximum number of iterations to be performed by the regression /// algorithm. /// </param> /// <returns> /// True if the model converged, false otherwise. /// </returns> /// public bool Compute(double limit, int maxIterations) { double delta; int iteration = 0; do // learning iterations until convergence { delta = regression.Regress(inputData, outputData); iteration++; } while (delta > limit && iteration < maxIterations); // Check if the full model has converged bool converged = iteration <= maxIterations; // Store model information this.result = regression.Compute(inputData); this.deviance = regression.GetDeviance(inputData, outputData); this.logLikelihood = regression.GetLogLikelihood(inputData, outputData); this.chiSquare = regression.ChiSquare(inputData, outputData); // Store coefficient information for (int i = 0; i < regression.Coefficients.Length; i++) { this.waldTests[i] = regression.GetWaldTest(i); this.standardErrors[i] = regression.GetStandardError(i); this.coefficients[i] = regression.Coefficients[i]; this.confidences[i] = regression.GetConfidenceInterval(i); this.oddsRatios[i] = regression.GetOddsRatio(i); } // Perform likelihood-ratio tests against diminished nested models for (int i = 0; i < inputCount; i++) { // Create a diminished inner model without the current variable double[][] data = inputData.RemoveColumn(i); LogisticRegression inner = new LogisticRegression(inputCount - 1); iteration = 0; do // learning iterations until convergence { delta = inner.Regress(data, outputData); iteration++; } while (delta > limit && iteration < maxIterations); double ratio = 2.0 * (logLikelihood - inner.GetLogLikelihood(data, outputData)); ratioTests[i + 1] = new ChiSquareTest(ratio, 1); } // Returns true if the full model has converged, false otherwise. return(converged); }
/// <summary> /// Constructs a new Logistic regression model. /// </summary> /// internal StepwiseLogisticRegressionModel(StepwiseLogisticRegressionAnalysis analysis, LogisticRegression regression, int[] variables, ChiSquareTest chiSquare, ChiSquareTest[] tests) { this.Analysis = analysis; this.Regression = regression; int coefficientCount = regression.NumberOfInputs + 1; this.Inputs = analysis.Inputs.Get(variables); this.ChiSquare = chiSquare; this.LikelihoodRatioTests = tests; this.Variables = variables; this.StandardErrors = new double[coefficientCount]; this.WaldTests = new WaldTest[coefficientCount]; this.CoefficientValues = new double[coefficientCount]; this.Confidences = new DoubleRange[coefficientCount]; this.OddsRatios = new double[coefficientCount]; // Store coefficient information for (int i = 0; i < regression.NumberOfInputs + 1; i++) { this.StandardErrors[i] = regression.StandardErrors[i]; this.WaldTests[i] = regression.GetWaldTest(i); this.CoefficientValues[i] = regression.GetCoefficient(i); this.Confidences[i] = regression.GetConfidenceInterval(i); this.OddsRatios[i] = regression.GetOddsRatio(i); } StringBuilder sb = new StringBuilder(); for (int i = 0; i < Inputs.Length; i++) { sb.Append(Inputs[i]); if (i < Inputs.Length - 1) { sb.Append(", "); } } this.Names = sb.ToString(); var logCoefs = new List <NestedLogisticCoefficient>(coefficientCount); for (int i = 0; i < coefficientCount; i++) { logCoefs.Add(new NestedLogisticCoefficient(this, i)); } this.Coefficients = new NestedLogisticCoefficientCollection(logCoefs); }
public LogisticRegression GetLogisticRegressionParams(int index, double percentage) { LogisticInfo LI = new LogisticInfo(); LI.LogisticParams = new double[2]; int length = MD.Length; int samplesize = (int)(length * percentage); double [][] inputArr = new double[samplesize * 2][]; for (int i = 0; i < samplesize * 2; i++) { inputArr[i] = new double[2]; } double[] outputArr = new double[samplesize * 2]; for (int i = 0; i < samplesize; i++) { inputArr[i][0] = MD[i].GetParameters()[index]; inputArr[i][1] = 0; outputArr[i] = 0; } int j = length - samplesize; for (int i = samplesize; i < samplesize * 2; i++) { inputArr[i][0] = MD[j].GetParameters()[index]; inputArr[i][1] = 0; outputArr[i] = 1; j++; } LogisticRegression LR = new LogisticRegression(); LR.NumberOfInputs = 1; var learner = new IterativeReweightedLeastSquares <LogisticRegression>() { Tolerance = 1e-4, // Let's set some convergence parameters Iterations = 100, // maximum number of iterations to perform Regularization = 0 }; LogisticRegression regression = learner.Learn(inputArr, outputArr); LI.LogisticParams[0] = -(regression.Intercept + 1); LI.LogisticParams[1] = regression.GetOddsRatio(1) - 1; return(regression); }
private void computeInformation() { // Store model information this.result = regression.Compute(inputData); this.deviance = regression.GetDeviance(inputData, outputData); this.logLikelihood = regression.GetLogLikelihood(inputData, outputData); this.chiSquare = regression.ChiSquare(inputData, outputData); // Store coefficient information for (int i = 0; i < regression.Coefficients.Length; i++) { this.standardErrors[i] = regression.StandardErrors[i]; this.waldTests[i] = regression.GetWaldTest(i); this.coefficients[i] = regression.Coefficients[i]; this.confidences[i] = regression.GetConfidenceInterval(i); this.oddsRatios[i] = regression.GetOddsRatio(i); } }
public void When_Compute_Logistic_Regression() { double[][] inputs = { new double[] { 55, 0 }, new double[] { 28, 0 }, new double[] { 65, 1 }, new double[] { 46, 0 }, new double[] { 86, 1 }, new double[] { 56, 1 }, new double[] { 85, 0 }, new double[] { 33, 0 }, new double[] { 21, 1 }, new double[] { 42, 1 } }; double[] outputs = { 0, 0, 0, 1, 1, 1, 0, 0, 0, 1 }; var logisticRegression = new LogisticRegression(); logisticRegression.Regress(inputs, outputs); var result = logisticRegression.Compute(new double[] { 87, 1 }); var oddsRatio = logisticRegression.GetOddsRatio(); var standardErrors = logisticRegression.GetStandardErrors(); Assert.Equal(0.75143272858390264, result); Assert.Equal(0.085627701183141239, oddsRatio[0]); Assert.Equal(1.0208597029292656, oddsRatio[1]); Assert.Equal(5.8584748981778869, oddsRatio[2]); Assert.Equal(2.1590686019476122, standardErrors[0]); Assert.Equal(0.0337904223210436, standardErrors[1]); Assert.Equal(1.4729903935788495, standardErrors[2]); }
public void ComputeTest() { // Suppose we have the following data about some patients. // The first variable is continuous and represent patient // age. The second variable is dichotomic and give whether // they smoke or not (This is completely fictional data). double[][] input = { new double[] { 55, 0 }, // 0 - no cancer new double[] { 28, 0 }, // 0 new double[] { 65, 1 }, // 0 new double[] { 46, 0 }, // 1 - have cancer new double[] { 86, 1 }, // 1 new double[] { 56, 1 }, // 1 new double[] { 85, 0 }, // 0 new double[] { 33, 0 }, // 0 new double[] { 21, 1 }, // 0 new double[] { 42, 1 }, // 1 }; // We also know if they have had lung cancer or not, and // we would like to know whether smoking has any connection // with lung cancer (This is completely fictional data). double[] output = { 0, 0, 0, 1, 1, 1, 0, 0, 0, 1 }; // To verify this hypothesis, we are going to create a logistic // regression model for those two inputs (age and smoking). LogisticRegression regression = new LogisticRegression(inputs: 2); // Next, we are going to estimate this model. For this, we // will use the Iteratively Reweighted Least Squares method. var teacher = new IterativeReweightedLeastSquares(regression); // Now, we will iteratively estimate our model. The Run method returns // the maximum relative change in the model parameters and we will use // it as the convergence criteria. double delta = 0; do { // Perform an iteration delta = teacher.Run(input, output); } while (delta > 0.001); // At this point, we can compute the odds ratio of our variables. // In the model, the variable at 0 is always the intercept term, // with the other following in the sequence. Index 1 is the age // and index 2 is whether the patient smokes or not. // For the age variable, we have that individuals with // higher age have 1.021 greater odds of getting lung // cancer controlling for cigarette smoking. double ageOdds = regression.GetOddsRatio(1); // 1.0208597028836701 // For the smoking/non smoking category variable, however, we // have that individuals who smoke have 5.858 greater odds // of developing lung cancer compared to those who do not // smoke, controlling for age (remember, this is completely // fictional and for demonstration purposes only). double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331 Assert.AreEqual(1.0208597028836701, ageOdds, 1e-10); Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-10); Assert.IsFalse(double.IsNaN(ageOdds)); Assert.IsFalse(double.IsNaN(smokeOdds)); }
public void ComputeTest() { // Suppose we have the following data about some patients. // The first variable is continuous and represent patient // age. The second variable is dichotomic and give whether // they smoke or not (This is completely fictional data). double[][] input = { new double[] { 55, 0 }, // 0 - no cancer new double[] { 28, 0 }, // 0 new double[] { 65, 1 }, // 0 new double[] { 46, 0 }, // 1 - have cancer new double[] { 86, 1 }, // 1 new double[] { 56, 1 }, // 1 new double[] { 85, 0 }, // 0 new double[] { 33, 0 }, // 0 new double[] { 21, 1 }, // 0 new double[] { 42, 1 }, // 1 }; // We also know if they have had lung cancer or not, and // we would like to know whether smoking has any connection // with lung cancer (This is completely fictional data). double[] output = { 0, 0, 0, 1, 1, 1, 0, 0, 0, 1 }; // To verify this hypothesis, we are going to create a logistic // regression model for those two inputs (age and smoking). LogisticRegression regression = new LogisticRegression(inputs: 2); // Next, we are going to estimate this model. For this, we // will use the Iteratively Reweighted Least Squares method. var teacher = new IterativeReweightedLeastSquares(regression); teacher.Regularization = 0; // Now, we will iteratively estimate our model. The Run method returns // the maximum relative change in the model parameters and we will use // it as the convergence criteria. double delta = 0; do { // Perform an iteration delta = teacher.Run(input, output); } while (delta > 0.001); // At this point, we can compute the odds ratio of our variables. // In the model, the variable at 0 is always the intercept term, // with the other following in the sequence. Index 1 is the age // and index 2 is whether the patient smokes or not. // For the age variable, we have that individuals with // higher age have 1.021 greater odds of getting lung // cancer controlling for cigarette smoking. double ageOdds = regression.GetOddsRatio(1); // 1.0208597028836701 // For the smoking/non smoking category variable, however, we // have that individuals who smoke have 5.858 greater odds // of developing lung cancer compared to those who do not // smoke, controlling for age (remember, this is completely // fictional and for demonstration purposes only). double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331 double[] actual = new double[output.Length]; for (int i = 0; i < input.Length; i++) { actual[i] = regression.Compute(input[i]); } double[] expected = { 0.21044171560168326, 0.13242527535212373, 0.65747803433771812, 0.18122484822324372, 0.74755661773156912, 0.61450041841477232, 0.33116705418194975, 0.14474110902457912, 0.43627109657399382, 0.54419383282533118 }; for (int i = 0; i < actual.Length; i++) { Assert.AreEqual(expected[i], actual[i]); } Assert.AreEqual(1.0208597028836701, ageOdds, 1e-10); Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-8); Assert.AreEqual(-2.4577464307294092, regression.Intercept, 1e-8); Assert.AreEqual(-2.4577464307294092, regression.Coefficients[0], 1e-8); Assert.AreEqual(0.020645118265359252, regression.Coefficients[1], 1e-10); Assert.AreEqual(1.7678893101571855, regression.Coefficients[2], 1e-8); bool[] actualOutput = regression.Decide(input); Assert.IsFalse(actualOutput[0]); Assert.IsFalse(actualOutput[1]); Assert.IsTrue(actualOutput[2]); Assert.IsFalse(actualOutput[3]); Assert.IsTrue(actualOutput[4]); Assert.IsTrue(actualOutput[5]); Assert.IsFalse(actualOutput[6]); Assert.IsFalse(actualOutput[7]); Assert.IsFalse(actualOutput[8]); Assert.IsTrue(actualOutput[9]); }
public void learn_new_mechanism() { #region doc_log_reg_1 // Suppose we have the following data about some patients. // The first variable is continuous and represent patient // age. The second variable is dichotomic and give whether // they smoke or not (This is completely fictional data). // We also know if they have had lung cancer or not, and // we would like to know whether smoking has any connection // with lung cancer (This is completely fictional data). double[][] input = { // age, smokes?, had cancer? new double[] { 55, 0 }, // false - no cancer new double[] { 28, 0 }, // false new double[] { 65, 1 }, // false new double[] { 46, 0 }, // true - had cancer new double[] { 86, 1 }, // true new double[] { 56, 1 }, // true new double[] { 85, 0 }, // false new double[] { 33, 0 }, // false new double[] { 21, 1 }, // false new double[] { 42, 1 }, // true }; bool[] output = // Whether each patient had lung cancer or not { false, false, false, true, true, true, false, false, false, true }; // To verify this hypothesis, we are going to create a logistic // regression model for those two inputs (age and smoking), learned // using a method called "Iteratively Reweighted Least Squares": var learner = new IterativeReweightedLeastSquares <LogisticRegression>() { Tolerance = 1e-4, // Let's set some convergence parameters Iterations = 100, // maximum number of iterations to perform Regularization = 0 }; // Now, we can use the learner to finally estimate our model: LogisticRegression regression = learner.Learn(input, output); // At this point, we can compute the odds ratio of our variables. // In the model, the variable at 0 is always the intercept term, // with the other following in the sequence. Index 1 is the age // and index 2 is whether the patient smokes or not. // For the age variable, we have that individuals with // higher age have 1.021 greater odds of getting lung // cancer controlling for cigarette smoking. double ageOdds = regression.GetOddsRatio(1); // 1.0208597028836701 // For the smoking/non smoking category variable, however, we // have that individuals who smoke have 5.858 greater odds // of developing lung cancer compared to those who do not // smoke, controlling for age (remember, this is completely // fictional and for demonstration purposes only). double smokeOdds = regression.GetOddsRatio(2); // 5.8584748789881331 // If we would like to use the model to predict a probability for // each patient regarding whether they are at risk of cancer or not, // we can use the Probability function: double[] scores = regression.Probability(input); // Finally, if we would like to arrive at a conclusion regarding // each patient, we can use the Decide method, which will transform // the probabilities (from 0 to 1) into actual true/false values: bool[] actual = regression.Decide(input); #endregion double[] expected = { 0.21044171560168326, 0.13242527535212373, 0.65747803433771812, 0.18122484822324372, 0.74755661773156912, 0.61450041841477232, 0.33116705418194975, 0.14474110902457912, 0.43627109657399382, 0.54419383282533118 }; for (int i = 0; i < scores.Length; i++) { Assert.AreEqual(expected[i], scores[i], 1e-8); } double[] transform = regression.Transform(input, scores); for (int i = 0; i < scores.Length; i++) { Assert.AreEqual(expected[i], transform[i], 1e-8); } Assert.AreEqual(1.0208597028836701, ageOdds, 1e-10); Assert.AreEqual(5.8584748789881331, smokeOdds, 1e-6); Assert.AreEqual(-2.4577464307294092, regression.Intercept, 1e-8); Assert.AreEqual(-2.4577464307294092, regression.Coefficients[0], 1e-8); Assert.AreEqual(0.020645118265359252, regression.Coefficients[1], 1e-10); Assert.AreEqual(1.7678893101571855, regression.Coefficients[2], 1e-8); Assert.IsFalse(actual[0]); Assert.IsFalse(actual[1]); Assert.IsTrue(actual[2]); Assert.IsFalse(actual[3]); Assert.IsTrue(actual[4]); Assert.IsTrue(actual[5]); Assert.IsFalse(actual[6]); Assert.IsFalse(actual[7]); Assert.IsFalse(actual[8]); Assert.IsTrue(actual[9]); }
private void GetResultButton_Click(object sender, EventArgs e) { Settings.Default["AlgorName"] = label3.Text; ResultBox.Items.Clear(); if (label3.Text == "Statics") { if (!(PF is Istatics)) { MessageBox.Show("Production Facade doesn't have this interface yet"); return; } Settings.Default["Parameter"] = textBox1.Text; int numOfParmas = PF.GetData()[0].GetNumOfParams(); double mean, sd; for (int i = 0; i < numOfParmas; i++) { mean = PF.GetMean(i); ResultBox.Items.Add("Mean of param " + i + ": " + mean); sd = PF.GetStandardDeviation(i); ResultBox.Items.Add("StandardDeviation of param " + i + ": " + sd); if (textBox1.Text != "") { double number = Double.Parse(textBox1.Text); double Threshold = PF.GetThreshold(number); ResultBox.Items.Add("Threshold of param " + i + ": " + Threshold); ResultBox.Items.Add(""); } } } else if (label3.Text == "Peak") { if (!(PF is Ipeek)) { MessageBox.Show("Production Facade doesn't have this interface yet"); return; } if (XaxiscomboBox.Text == "" && YaxiscomboBox.Text == "") { return; } sortData(); Settings.Default["NumOfPoints"] = numBox.Text; Settings.Default["Range"] = rangeBox.Text; if (YaxiscomboBox.Text.Contains("#")) { string text = YaxiscomboBox.Text.Substring(10).Trim(); int num = Int32.Parse(text); int numOfpoints = 3; double percentage = 0.2; if (numBox.Text != "") { numOfpoints = Int32.Parse(numBox.Text); } if (rangeBox.Text != "") { percentage = double.Parse(rangeBox.Text); } dataDict = PF.CalculateTimesEachX(num); List <PeekValleyData> peaks = PF.GetPeaksWithNumOfSamePoints(num, numOfpoints, percentage); peaksData = peaks; /* * for (int i = 0; i < PF.GetData().Length; i++) * { * ResultBox.Items.Add(PF.GetData()[i].WriteToLine()); * }*/ if (peaksData.Count() == 0) { ResultBox.Items.Add("No Peak found yet."); } foreach (var item in peaks) { ResultBox.Items.Add("Peak Value: " + item.value + " Times: " + item.times); } } else if (YaxiscomboBox.Text != "") { string text = YaxiscomboBox.Text.Substring(5).Trim(); int Yindex = Int32.Parse(text); // MessageBox.Show(Yindex.ToString()); int numOfpoints = 3; double percentage = 0.2; if (numBox.Text != "") { numOfpoints = Int32.Parse(numBox.Text); } if (rangeBox.Text != "") { percentage = double.Parse(rangeBox.Text); } text = XaxiscomboBox.Text.Substring(5).Trim(); int Xindex; if (XaxiscomboBox.Text.Contains("TimeSpan")) { Xindex = -1; } else { Xindex = Int32.Parse(text); } MyData[] MD = PF.GetData(); dataPoints = new points[MD.Count()]; for (int i = 0; i < MD.Count(); i++) { dataPoints[i].x = MD[i].GetParameters()[Xindex]; dataPoints[i].y = MD[i].GetParameters()[Yindex]; } List <PeekValleyData> peaks = PF.GetPeaksWithXY(Yindex, Xindex, numOfpoints, percentage); peaksData = peaks; /* * for (int i = 0; i < PF.GetData().Length; i++) * { * ResultBox.Items.Add(PF.GetData()[i].WriteToLine()); * } * */ foreach (var item in peaks) { ResultBox.Items.Add("X: " + item.x + " Y: " + item.y); } } } else if (label3.Text == "Polynomial Fit") { if (!(PF is Ipolyfit)) { MessageBox.Show("Production Facade doesn't have this interface yet"); return; } if (XaxiscomboBox.Text == "" && YaxiscomboBox.Text == "") { return; } sortData(); int power = 1; if (powerBox.Text != "") { power = Int32.Parse(powerBox.Text); } if (YaxiscomboBox.Text.Contains("#")) { string text = YaxiscomboBox.Text.Substring(10).Trim(); int num = Int32.Parse(text); Dictionary <double, int> dict = PF.CalculateTimesEachX(num); dataDict = dict; double[,] x = new double[dict.Keys.Count(), power]; double[] keys = dict.Keys.ToArray(); for (int i = 0; i < dict.Keys.Count(); i++) { for (int j = 0; j < power; j++) { x[i, j] = Math.Pow(keys[i], j + 1); } } double[] y = new double[keys.Length]; for (int i = 0; i < keys.Length; i++) { y[i] = dict[keys[i]]; } double[,] result = PF.GetPolyFit(x, y, power); paramsResult = result; string r = "y="; r = r + result[0, 0].ToString("#.000"); for (int i = 1; i <= power; i++) { r = r + "+ " + result[i, 0].ToString("#.000") + "x^" + i.ToString(); } ResultBox.Items.Add(r); } else if (YaxiscomboBox.Text != "") { string text = YaxiscomboBox.Text.Substring(5).Trim(); int Yindex = Int32.Parse(text); int Xindex; if (XaxiscomboBox.Text.Contains("TimeSpan")) { MessageBox.Show("We have not support Timespan in this algorithm yet."); return; } else { text = XaxiscomboBox.Text.Substring(5).Trim(); Xindex = Int32.Parse(text); } MyData[] MD = PF.GetData(); dataPoints = new points[MD.Count()]; double[,] x = new double[MD.Count(), power]; for (int i = 0; i < MD.Count(); i++) { dataPoints[i].x = MD[i].GetParameters()[Xindex]; for (int j = 0; j < power; j++) { x[i, j] = Math.Pow(MD[i].GetParameters()[Xindex], j + 1); } } double[] y = new double[MD.Count()]; for (int i = 0; i < y.Length; i++) { y[i] = MD[i].GetParameters()[Yindex]; dataPoints[i].y = y[i]; } double[,] result = PF.GetPolyFit(x, y, power); paramsResult = result; string r = "y="; r = r + result[0, 0].ToString("#.000"); for (int i = 1; i <= power; i++) { r = r + "+ " + result[i, 0].ToString("#.000") + "x^" + i.ToString(); } ResultBox.Items.Add(r); } if (xvalueBox.Text != "") { double x = Convert.ToDouble(xvalueBox.Text); double y = 0; for (int j = 0; j < paramsResult.GetLength(0); j++) { y = y + Math.Pow(x, j) * paramsResult[j, 0]; } ResultBox.Items.Add("X-value: " + x.ToString("#.000") + " Y-value: " + y.ToString("#.000")); Settings.Default["Xvalue"] = xvalueBox.Text; } Settings.Default["Power"] = powerBox.Text; } else if (label3.Text == "Normal Distribution Fit") { if (!(PF is INormalDistributionFit)) { MessageBox.Show("Production Facade doesn't have this interface yet"); return; } if (XaxiscomboBox.Text == "" && YaxiscomboBox.Text == "") { return; } sortData(); /* * for (int i = 0; i < PF.GetData().Length; i++) * { * ResultBox.Items.Add(PF.GetData()[i].WriteToLine()); * }*/ if (YaxiscomboBox.Text.Contains("#")) { string text = YaxiscomboBox.Text.Substring(10).Trim(); int num = Int32.Parse(text); dataDict = PF.CalculateTimesEachX(num); NormalDistributionInfo ndi = PF.GetNormalDistributionFitWithNumOfSamePoints(num); NDinfo = ndi; ResultBox.Items.Add("Mean: " + ndi.mean.ToString() + " SD: " + ndi.SD.ToString()); } else { if (XaxiscomboBox.Text == "" || YaxiscomboBox.Text == "") { return; } string text = YaxiscomboBox.Text.Substring(5).Trim(); int Yindex = Int32.Parse(text); int Xindex; if (XaxiscomboBox.Text.Contains("TimeSpan")) { MessageBox.Show("We have not support Timespan in this algorithm yet."); return; } else { text = XaxiscomboBox.Text.Substring(5).Trim(); Xindex = Int32.Parse(text); } MyData[] MD = PF.GetData(); dataPoints = new points[MD.Count()]; for (int i = 0; i < MD.Count(); i++) { dataPoints[i].x = MD[i].GetParameters()[Xindex]; dataPoints[i].y = MD[i].GetParameters()[Yindex]; } int num = Int32.Parse(text); dataDict = PF.CalculateTimesEachX(num); NormalDistributionInfo ndi = PF.GetNormalDistributionFitWithNumOfSamePoints(num); NDinfo = ndi; ResultBox.Items.Add("Mean: " + ndi.mean.ToString() + " SD: " + ndi.SD.ToString()); } if (xvalueBox.Text != "") { double x = Convert.ToDouble(xvalueBox.Text); double y = 0; y = (1 / (NDinfo.SD * Math.Sqrt(2 * Math.PI))) * Math.Exp(-Math.Pow((x - NDinfo.mean), 2) / (2 * Math.Pow(NDinfo.SD, 2))); int numberOfdata = 0; foreach (var item in dataDict) { numberOfdata = numberOfdata + item.Value; } y = y * numberOfdata; ResultBox.Items.Add("X-value: " + x.ToString("#.000") + " Y-value: " + y.ToString("#.000")); Settings.Default["Xvalue"] = xvalueBox.Text; } } else if (label3.Text == "Logistic Regression") { if (!(PF is ILogisticRegression)) { MessageBox.Show("Production Facade doesn't have this interface yet"); return; } if (comboBox2.Text != "") { string text = comboBox2.Text.Substring(5).Trim(); int index = Int32.Parse(text); dataDict = PF.CalculateTimesEachX(index); double percentage = 0.1; PF.SortData(index); if (rangeBox.Text != "") { percentage = double.Parse(rangeBox.Text); } LogisticRegression LR = PF.GetLogisticRegressionParams(index, percentage); LogisticInfo LI = new LogisticInfo(); LI.LogisticParams = new double[2]; LI.LogisticParams[0] = LR.Intercept; LI.LogisticParams[1] = LR.GetOddsRatio(1) - 1; ResultBox.Items.Add("Param0: " + LI.LogisticParams[0] + " Parma1: " + LI.LogisticParams[1]); if (xvalueBox.Text != "") { double x = Convert.ToDouble(xvalueBox.Text); double[] valueArr = new double[] { x, 0 }; ResultBox.Items.Add("Value: " + x + " Probability: " + LR.Probability(valueArr) + " Conclusion:" + LR.Decide(valueArr)); } MyData[] MD = PF.GetData(); double threshold = -1; /* * for (int i = 0; i < MD.Length; i++) * { * double value = MD[i].GetParameters()[index]; * double[] valueArr = new double[] { value, 0 }; * ResultBox.Items.Add("Value: " + value + " Probability: " + LR.Probability(valueArr) + " Conclusion:" + LR.Decide(valueArr)); * }*/ Dictionary <double, int> lowerDict = new Dictionary <double, int>(); Dictionary <double, int> higherDict = new Dictionary <double, int>(); for (int i = 0; i < MD.Length; i++) { double value = MD[i].GetParameters()[index]; double[] valueArr = new double[] { value, 0 }; if (threshold == -1 && LR.Decide(valueArr) == true) { threshold = value; } if (threshold == -1) { if (lowerDict.ContainsKey(value)) { lowerDict[value]++; } else { lowerDict.Add(value, 1); } } else { if (higherDict.ContainsKey(value)) { higherDict[value]++; } else { higherDict.Add(value, 1); } } } ResultBox.Items.Add("Threshold: " + threshold); } } else if (label3.Text == "Two Peaks") { if (!(PF is Ipeek)) { MessageBox.Show("Production Facade doesn't have this interface yet"); return; } if (peak1Box.Text != "" && peak2Box.Text != "") { string text = YaxiscomboBox.Text.Substring(10).Trim(); int index = Int32.Parse(text); double peak1 = Convert.ToDouble(peak1Box.Text); double peak2 = Convert.ToDouble(peak2Box.Text); int numOfpoints = 5; if (numBox.Text != "") { numOfpoints = Int32.Parse(numBox.Text); } NormalDistributionInfo peak1Info = PF.GetPeakNormalDistribution(peak1, index, numOfpoints); ResultBox.Items.Add("Mean: " + peak1Info.mean.ToString("#.000") + " SD: " + peak1Info.SD.ToString("#.000")); NormalDistributionInfo peak2Info = PF.GetPeakNormalDistribution(peak2, index, numOfpoints); ResultBox.Items.Add("Mean: " + peak2Info.mean.ToString("#.000") + " SD: " + peak2Info.SD.ToString("#.000")); if (xvalueBox.Text != "") { double x = Convert.ToDouble(xvalueBox.Text); double end = 0; double y = (1 / (peak1Info.SD * Math.Sqrt(2 * Math.PI))) * Math.Exp(-Math.Pow((x - peak1Info.mean), 2) / (2 * Math.Pow(peak1Info.SD, 2))); if (y < 0.01) { double x1 = x - 1; double y1 = (1 / (peak1Info.SD * Math.Sqrt(2 * Math.PI))) * Math.Exp(-Math.Pow((x1 - peak1Info.mean), 2) / (2 * Math.Pow(peak1Info.SD, 2))); if (y1 < y) { end = x1; } else { end = x + 1; } } else { double x1 = x - 1; double y1 = (1 / (peak1Info.SD * Math.Sqrt(2 * Math.PI))) * Math.Exp(-Math.Pow((x1 - peak1Info.mean), 2) / (2 * Math.Pow(peak1Info.SD, 2))); if (y1 < y) { end = x1; while (y1 > 0.01) { end--; y1 = (1 / (peak1Info.SD * Math.Sqrt(2 * Math.PI))) * Math.Exp(-Math.Pow((end - peak1Info.mean), 2) / (2 * Math.Pow(peak1Info.SD, 2))); } } else { end = x + 1; y1 = (1 / (peak1Info.SD * Math.Sqrt(2 * Math.PI))) * Math.Exp(-Math.Pow((end - peak1Info.mean), 2) / (2 * Math.Pow(peak1Info.SD, 2))); while (y1 > 0.01) { end++; y1 = (1 / (peak1Info.SD * Math.Sqrt(2 * Math.PI))) * Math.Exp(-Math.Pow((end - peak1Info.mean), 2) / (2 * Math.Pow(peak1Info.SD, 2))); } } } Func <double, double> f1 = (a) => (1 / (peak1Info.SD * Math.Sqrt(2 * Math.PI))) * Math.Exp(-Math.Pow((a - peak1Info.mean), 2) / (2 * Math.Pow(peak1Info.SD, 2))); double result1 = MathNet.Numerics.Integration.NewtonCotesTrapeziumRule.IntegrateTwoPoint(f1, x, end); if (x <= peak1) { result1 = 1 - Math.Abs(result1); if (result1 > 1) { result1 = 1; } } ResultBox.Items.Add("Probablity of keeping bad chips: " + Math.Abs(result1).ToString("#.000")); double end1 = 0; y = (1 / (peak2Info.SD * Math.Sqrt(2 * Math.PI))) * Math.Exp(-Math.Pow((x - peak2Info.mean), 2) / (2 * Math.Pow(peak2Info.SD, 2))); if (y < 0.01) { double x1 = x - 1; double y1 = (1 / (peak2Info.SD * Math.Sqrt(2 * Math.PI))) * Math.Exp(-Math.Pow((x1 - peak2Info.mean), 2) / (2 * Math.Pow(peak2Info.SD, 2))); if (y1 < y) { end1 = x1; } else { end1 = x + 1; } } else { double x1 = x - 1; double y1 = (1 / (peak2Info.SD * Math.Sqrt(2 * Math.PI))) * Math.Exp(-Math.Pow((x1 - peak2Info.mean), 2) / (2 * Math.Pow(peak2Info.SD, 2))); if (y1 < y) { end1 = x1; while (y1 > 0.01) { end1--; y1 = (1 / (peak2Info.SD * Math.Sqrt(2 * Math.PI))) * Math.Exp(-Math.Pow((end1 - peak2Info.mean), 2) / (2 * Math.Pow(peak2Info.SD, 2))); } } else { end1 = x + 1; y1 = (1 / (peak2Info.SD * Math.Sqrt(2 * Math.PI))) * Math.Exp(-Math.Pow((end1 - peak2Info.mean), 2) / (2 * Math.Pow(peak2Info.SD, 2))); while (y1 > 0.01) { end1++; y1 = (1 / (peak2Info.SD * Math.Sqrt(2 * Math.PI))) * Math.Exp(-Math.Pow((end1 - peak2Info.mean), 2) / (2 * Math.Pow(peak2Info.SD, 2))); } } } Func <double, double> f2 = (a) => (1 / (peak2Info.SD * Math.Sqrt(2 * Math.PI))) * Math.Exp(-Math.Pow((a - peak2Info.mean), 2) / (2 * Math.Pow(peak2Info.SD, 2))); double result2 = MathNet.Numerics.Integration.NewtonCotesTrapeziumRule.IntegrateTwoPoint(f2, x, end1); if (x >= peak2) { result2 = 1 - Math.Abs(result2); if (result2 > 1) { result2 = 1; } } ResultBox.Items.Add("probability of losing good chips: " + Math.Abs(result2).ToString("#.000")); } dataDict = PF.CalculateTimesEachX(index); TDinfo = new TwoDictInfo(); TDinfo.peak1Info = peak1Info; TDinfo.peak2Info = peak2Info; TDinfo.peak1value = peak1; TDinfo.peak2value = peak2; TDinfo.peak1times = dataDict[peak1]; TDinfo.peak2times = dataDict[peak2]; TDinfo.last = dataDict.Keys.ToList().Last(); //peak1Box.Text = ""; // peak2Box.Text = ""; } else { if (XaxiscomboBox.Text == "" && YaxiscomboBox.Text == "") { return; } sortData(); Settings.Default["NumOfPoints"] = numBox.Text; Settings.Default["Range"] = rangeBox.Text; if (YaxiscomboBox.Text.Contains("#")) { string text = YaxiscomboBox.Text.Substring(10).Trim(); int num = Int32.Parse(text); int numOfpoints = 3; double percentage = 0.2; if (numBox.Text != "") { numOfpoints = Int32.Parse(numBox.Text); } if (rangeBox.Text != "") { percentage = double.Parse(rangeBox.Text); } dataDict = PF.CalculateTimesEachX(num); List <PeekValleyData> peaks = PF.GetPeaksWithNumOfSamePoints(num, numOfpoints, percentage); peaksData = peaks; /* * for (int i = 0; i < PF.GetData().Length; i++) * { * ResultBox.Items.Add(PF.GetData()[i].WriteToLine()); * }*/ if (peaksData.Count() == 0) { ResultBox.Items.Add("No Peak found yet."); } foreach (var item in peaks) { ResultBox.Items.Add("Peak Value: " + item.value + " Times: " + item.times); } } else if (YaxiscomboBox.Text != "") { string text = YaxiscomboBox.Text.Substring(5).Trim(); int Yindex = Int32.Parse(text); // MessageBox.Show(Yindex.ToString()); int numOfpoints = 3; double percentage = 0.2; if (numBox.Text != "") { numOfpoints = Int32.Parse(numBox.Text); } if (rangeBox.Text != "") { percentage = double.Parse(rangeBox.Text); } text = XaxiscomboBox.Text.Substring(5).Trim(); int Xindex; if (XaxiscomboBox.Text.Contains("TimeSpan")) { Xindex = -1; } else { Xindex = Int32.Parse(text); } MyData[] MD = PF.GetData(); dataPoints = new points[MD.Count()]; for (int i = 0; i < MD.Count(); i++) { dataPoints[i].x = MD[i].GetParameters()[Xindex]; dataPoints[i].y = MD[i].GetParameters()[Yindex]; } List <PeekValleyData> peaks = PF.GetPeaksWithXY(Yindex, Xindex, numOfpoints, percentage); peaksData = peaks; /* * for (int i = 0; i < PF.GetData().Length; i++) * { * ResultBox.Items.Add(PF.GetData()[i].WriteToLine()); * } * */ foreach (var item in peaks) { ResultBox.Items.Add("X: " + item.x + " Y: " + item.y); } } if (peaksData.Count() < 2) { ResultBox.Items.Add("We need at least two peaks in this algorithm"); } else { peak1Box.Show(); peak2Box.Show(); label13.Show(); label12.Show(); xvalueBox.Show(); label10.Show(); peak1Box.Items.Clear(); peak2Box.Items.Clear(); points[] pts = GetPeakPoints(); foreach (var point in pts) { if (!peak1Box.Items.Contains(point.x)) { peak1Box.Items.Add(point.x); } if (!peak2Box.Items.Contains(point.x)) { peak2Box.Items.Add(point.x); } } } } } Settings.Default.Save(); }
/// <summary> /// Uses data from <paramref name="fileName">fileName</paramref> to train a logistic regression model./> /// </summary> /// <param name="fileName">The name of the data file.</param> /// <returns>A string to print giving information about the weights and odds ratios.</returns> public static string Learn(string fileName) { //Read all inputs and outputs from training file. string[] lines = File.ReadAllLines("Logistic Regression Model/data/" + fileName + ".txt"); double[][] inputs = new double[lines.Length][]; int[] outputs = new int[lines.Length]; for (int a = 0; a < lines.Length; a++) { string[] split = lines[a].Split(':'); //Dynamically get variables from file. string[] scores = split[1].Split('&'); inputs[a] = new double[scores.Length]; for (int b = 0; b < scores.Length; b++) { inputs[a][b] = double.Parse(scores[b]); } outputs[a] = int.Parse(split[2]); } //Set up Accord.NET learner. IterativeReweightedLeastSquares <LogisticRegression> learner = new IterativeReweightedLeastSquares <LogisticRegression>() { Tolerance = 1e-4, MaxIterations = 100, Regularization = 1e-10 }; //Shuffle the input and output pairs to eliminate some inherent bias from //training data. Dictionary <double[], int> map = inputs.Zip(outputs, (arg1, arg2) => new { arg1, arg2 }).ToDictionary(x => x.arg1, x => x.arg2); map.Shuffle(); inputs = map.Keys.ToArray(); outputs = map.Values.ToArray(); //Train Regression LogisticRegression regression = learner.Learn(inputs, outputs.ToBoolArray()); //Save to a Model file. int counter = 0; while (File.Exists("Logistic Regression Model/models/Model-" + counter + ".txt")) { counter++; } //Create a file writer FileStream fs = File.Create("Logistic Regression Model/models/Model-" + counter + ".txt"); StreamWriter writer = new StreamWriter(fs); //Print the weights string result = "Weights: " + regression.Weights.GetString() + "\n"; //Write lines. writer.WriteLine(regression.Weights.Append(regression.Intercept).ToArray().GetString()); for (int c = 0; c < regression.Weights.Length; c++) { writer.WriteLine(regression.GetOddsRatio(c)); result += "Odds Ratio " + c + ": " + regression.GetOddsRatio(c) + "\n"; } //Get Loss values. double[] actual = new double[inputs.Length]; double[] expected = new double[outputs.Length]; for (int a = 0; a < actual.Length; a++) { actual[a] = regression.Probability(inputs[a]); expected[a] = outputs[a]; } //Calculate and print square loss. string loss = "Loss: " + new SquareLoss(expected) { Mean = true, Root = true }.Loss(actual); result += loss + "\n"; writer.WriteLine(loss); Console.WriteLine("\n\n" + loss); //Calculate and print R-squared Loss string r2 = "R2: " + new RSquaredLoss(inputs[0].Length, expected).Loss(actual); result += r2; writer.WriteLine(r2); //Cleanup writer.Close(); writer.Dispose(); fs.Close(); fs.Dispose(); Console.WriteLine("Model trained successfully!"); Console.WriteLine("\nEvaluating...\n"); //Get the VIFs float[] VIFs = CalculateVIFs(inputs); //Log it for (int a = 0; a < VIFs.Length; a++) { Logger.Log("Variance Inflation Factor #" + a + ": " + VIFs[a]); } return(result); }
public static void Execute() { double[][] input = { new double[] { 55, 0 }, new double[] { 28, 0 }, new double[] { 65, 0 }, new double[] { 46, 0 }, new double[] { 86, 0 }, new double[] { 56, 0 }, new double[] { 85, 0 }, new double[] { 33, 0 }, new double[] { 21, 0 }, new double[] { 42, 0 }, }; double[] output = { 0, 0, 0, 1, 1, 1, 0, 0, 0, 1 }; LogisticRegression regression = new LogisticRegression(2); var trainer = new IterativeReweightedLeastSquares(regression); double delta = 0; do { // Perform an iteration delta = trainer.Run(input, output); } while (delta > 0.001); var b1 = regression.Coefficients[1]; var b2 = regression.Coefficients[2]; var b0 = regression.Intercept; System.Console.WriteLine(b0); System.Console.WriteLine(b1); System.Console.WriteLine(b2); var func = new Func <double, double, double>((x1, x2) => { var result = 1 / (1 + Math.Exp(-b0 - b1 * x1 - b2 * x2)); return(result); }); var age = 79; var smoking = 0; var r = func(age, smoking); System.Console.WriteLine("input x [age:{0}, smoking:{1}] is {2}", age, smoking, r); LogisticRegression LR = new LogisticRegression(); LR.NumberOfInputs = 1; var learner = new IterativeReweightedLeastSquares <LogisticRegression>() { Tolerance = 1e-4, // Let's set some convergence parameters Iterations = 100, // maximum number of iterations to perform Regularization = 0 }; LR = learner.Learn(input, output); System.Console.WriteLine(LR.Intercept); System.Console.WriteLine(LR.GetOddsRatio(1) - 1); System.Console.WriteLine(LR.GetOddsRatio(2) - 1); double [] test = new double[] { 79, 0 }; System.Console.WriteLine(LR.Probability(test)); }