protected override void EndProcessing() { var model = new MultipleLinearRegressionAnalysis(); double[][] inputs; double[] outputs; if (ParameterSetName == "XY") { inputs = Converter.ToDoubleJaggedArray(X); outputs = Converter.ToDoubleArray(Y); } else { outputs = _data.GetColumn(OutputName).ToDoubleArray(); _data.RemoveColumn(OutputName); inputs = _data.ToDoubleJaggedArray(); model.Inputs = _data.ColumnNames.ToArray <string>(); model.Output = OutputName; } double[] w = null; if (Weights != null) { w = Converter.ToDoubleArray(Weights); } model.Learn(inputs, outputs, w); WriteObject(model); }
public MultipleLinearRegressionAnalysis Regress() { var analyzer = new MultipleLinearRegressionAnalysis(true) { Inputs = GetInputNames() }; analyzer.Learn(InputData(), Filter.FilteredResult().Select(resultsFunc).ToArray()); return(analyzer); }
public void TestLinearRegressionTest() { var inputs = new double[][] { // age, smokes? new double[] { 55, 0 }, new double[] { 28, 0 }, new double[] { 65, 1 }, new double[] { 46, 0 }, new double[] { 86, 1 }, new double[] { 56, 1 }, new double[] { 85, 0 }, new double[] { 33, 0 }, new double[] { 21, 1 }, new double[] { 42, 1 }, }; var output = new double[] { // Whether each patient had lung cancer or not 0, 0, 0, 1, 1, 1, 0, 0, 0, 1 }; // Accord.NET var analysis = new MultipleLinearRegressionAnalysis(); analysis.OrdinaryLeastSquares.UseIntercept = true; analysis.OrdinaryLeastSquares.IsRobust = false; analysis.Learn(inputs, output); // Estimator var x = DataMap.FromJagged(inputs, new string[] { "age", "smokes" }); var y = new DataMap(); y.Add("lung_cancer", output); var est = new LinearRegressionEstimator(); est.Fit(x, y); var model = est.Model; Assert.Equal(analysis.Coefficients.Count, model.Coefficients.Count); Assert.Equal(analysis.Coefficients[0].Value, model.Coefficients[0].Value); Assert.Equal(analysis.Coefficients[1].Value, model.Coefficients[1].Value); Assert.Equal(analysis.Coefficients[2].Value, model.Coefficients[2].Value); }
private void Procesar() { try { if (gvArchivo.Rows.Count == 0) { MessageBox.Show("Primero se debe realizar la carga de datos históricos", "Mensaje", MessageBoxButtons.OK, MessageBoxIcon.Warning); return; } tabControl1.SelectedIndex = 1; string[] independentNames = Helper.UtilFunction.getVariablesIndependientesDefecto(); string dependentName = Helper.UtilFunction.getVariableDependienteDefecto(); DataTable independent = sourceTable.DefaultView.ToTable(false, independentNames); DataTable dependent = sourceTable.DefaultView.ToTable(false, dependentName); inputs = independent.ToJagged(); outputs = dependent.Columns[dependentName].ToArray(); MultipleLinearRegressionAnalysis mlr = new MultipleLinearRegressionAnalysis(intercept: true) { Inputs = independentNames, Output = dependentName }; // Compute the Linear Regression Analysis MultipleLinearRegression reg = mlr.Learn(inputs, outputs); gvEstadistica.DataSource = mlr.Coefficients; // var listaCoeficientes = mlr.Coefficients.ToList(); listaCoeficientes = mlr.Coefficients.ToList(); var coeficientes = mlr.CoefficientValues; proceso = true; MessageBox.Show("Se ejecuto el algoritmo correctamente", "Message", MessageBoxButtons.OK, MessageBoxIcon.Information); } catch (Exception ex) { MessageBox.Show(ex.Message, "Mensaje", MessageBoxButtons.OK, MessageBoxIcon.Error); } }
private void btnSampleRunAnalysis_Click(object sender, EventArgs e) { // Check requirements if (sourceTable == null) { MessageBox.Show("A sample spreadsheet can be found in the " + "Resources folder in the same directory as this application.", "Please load some data before attempting an analysis"); return; } if (checkedListBox1.CheckedItems.Count == 0) { MessageBox.Show("Please select the dependent input variables to be used in the regression model.", "Please choose at least one input variable"); } // Finishes and save any pending changes to the given data dgvAnalysisSource.EndEdit(); sourceTable.AcceptChanges(); // Gets the column of the dependent variable String dependentName = (string)comboBox1.SelectedItem; DataTable dependent = sourceTable.DefaultView.ToTable(false, dependentName); // Gets the columns of the independent variables List <string> names = new List <string>(); foreach (string name in checkedListBox1.CheckedItems) { names.Add(name); } String[] independentNames = names.ToArray(); DataTable independent = sourceTable.DefaultView.ToTable(false, independentNames); // Creates the input and output matrices from the source data table this.inputs = independent.ToJagged(); this.outputs = dependent.Columns[dependentName].ToArray(); // Creates the Simple Descriptive Analysis of the given source var sda = new DescriptiveAnalysis() { ColumnNames = independentNames }.Learn(inputs); // TODO: Standardize the InputNames/OutputNames properties // Populates statistics overview tab with analysis data dgvDistributionMeasures.DataSource = sda.Measures; // Creates the Logistic Regression Analysis of the given source this.lra = new LogisticRegressionAnalysis() { Inputs = independentNames, Output = dependentName }; // Compute the Logistic Regression Analysis LogisticRegression lr = lra.Learn(inputs, outputs); // Populates coefficient overview with analysis data dgvLogisticCoefficients.DataSource = lra.Coefficients; // Populate details about the fitted model tbChiSquare.Text = lra.ChiSquare.Statistic.ToString("N5"); tbPValue.Text = lra.ChiSquare.PValue.ToString("N5"); checkBox1.Checked = lra.ChiSquare.Significant; tbDeviance.Text = lra.Deviance.ToString("N5"); tbLogLikelihood.Text = lra.LogLikelihood.ToString("N5"); // Create the Multiple Linear Regression Analysis of the given source this.mlr = new MultipleLinearRegressionAnalysis(intercept: true) { Inputs = independentNames, Output = dependentName }; // Compute the Linear Regression Analysis MultipleLinearRegression reg = mlr.Learn(inputs, outputs); dgvLinearCoefficients.DataSource = mlr.Coefficients; dgvRegressionAnova.DataSource = mlr.Table; tbRSquared.Text = mlr.RSquared.ToString("N5"); tbRSquaredAdj.Text = mlr.RSquareAdjusted.ToString("N5"); tbChiPValue.Text = mlr.ChiSquareTest.PValue.ToString("N5"); tbFPValue.Text = mlr.FTest.PValue.ToString("N5"); tbZPValue.Text = mlr.ZTest.PValue.ToString("N5"); tbChiStatistic.Text = mlr.ChiSquareTest.Statistic.ToString("N5"); tbFStatistic.Text = mlr.FTest.Statistic.ToString("N5"); tbZStatistic.Text = mlr.ZTest.Statistic.ToString("N5"); cbChiSignificant.Checked = mlr.ChiSquareTest.Significant; cbFSignificant.Checked = mlr.FTest.Significant; cbZSignificant.Checked = mlr.ZTest.Significant; // Populate projection source table string[] cols = independentNames; if (!independentNames.Contains(dependentName)) { cols = independentNames.Concatenate(dependentName); } DataTable projSource = sourceTable.DefaultView.ToTable(false, cols); dgvProjectionSource.DataSource = projSource; }
public string ProcessLinearRegression(DataTable sourceTable, string _dependentName, string _independentName) { try { double[][] inputs; double[] outputs; LinearRegressionOutput output = new LinearRegressionOutput(); // Gets the column of the dependent variable String dependentName = _dependentName; DataTable dependent = sourceTable.DefaultView.ToTable(false, dependentName); String[] independentNames = _independentName.Split(','); //String[] independentNames = names.ToArray(); DataTable independent = sourceTable.DefaultView.ToTable(false, independentNames); // Creates the input and output matrices from the source data table inputs = independent.ToJagged(); outputs = dependent.Columns[dependentName].ToArray(); // Creates the Simple Descriptive Analysis of the given source var sda = new DescriptiveAnalysis() { ColumnNames = independentNames }.Learn(inputs); // TODO: Standardize the InputNames/OutputNames properties // Populates statistics overview tab with analysis data DescriptiveMeasureCollection DMC = sda.Measures; DistributionMesure dm = new DistributionMesure() { //Column = sda.Measures.GetEnumerator(x => x), }; //DMC.s //output.DistributionMeasuresDataSource = dm; // Creates the Logistic Regression Analysis of the given source this.lra = new LogisticRegressionAnalysis() { Inputs = independentNames, Output = dependentName }; // Create the Multiple Linear Regression Analysis of the given source this.mlr = new MultipleLinearRegressionAnalysis(intercept: true) { Inputs = independentNames, Output = dependentName }; // Compute the Linear Regression Analysis MultipleLinearRegression reg = mlr.Learn(inputs, outputs); LinearRegressionCoefficientCollection LCC = mlr.Coefficients; List <LinearCoefficients> lcs = new List <LinearCoefficients>(); foreach (var rc in LCC) { LinearCoefficients lc = new LinearCoefficients() { Name = rc.Name, Value = rc.Value, StandardError = rc.StandardError, TStatistic = rc.TTest.Statistic, P_ValueofT = rc.TTest.PValue, FStatistic = rc.FTest.Statistic, P_ValueofF = rc.FTest.PValue, ConfidenceUpper = rc.ConfidenceUpper, ConfidenceLower = rc.ConfidenceLower }; lcs.Add(lc); } output.LinearCoefficientsDataSource = lcs; //AnovaSourceCollection RDS = mlr.Table; List <RegressionAnova> acs = new List <RegressionAnova>(); foreach (var RDS in mlr.Table) { RegressionAnova ra = new RegressionAnova() { Source = RDS.Source, DegreesOfFreedom = RDS.DegreesOfFreedom, SumOfSquares = RDS.SumOfSquares, MeanSquares = RDS.MeanSquares, FStatistic = RDS.Statistic, PValueSignificance = (RDS.Significance == null) ? 0 : RDS.Significance.PValue }; acs.Add(ra); } output.RegressionAnovaDataSource = acs; output.RSquared = mlr.RSquared.ToString("N5"); output.RSquaredAdj = mlr.RSquareAdjusted.ToString("N5"); output.ChiPValue = mlr.ChiSquareTest.PValue.ToString("N5"); output.FPValue = mlr.FTest.PValue.ToString("N5"); output.ZPValue = mlr.ZTest.PValue.ToString("N5"); output.ChiStatistic = mlr.ChiSquareTest.Statistic.ToString("N5"); output.FStatistic = mlr.FTest.Statistic.ToString("N5"); output.ZStatistic = mlr.ZTest.Statistic.ToString("N5"); output.ChiSignificantChecked = mlr.ChiSquareTest.Significant; output.FSignificantChecked = mlr.FTest.Significant; output.ZSignificantChecked = mlr.ZTest.Significant; // Populate projection source table string[] cols = independentNames; if (!independentNames.Contains(dependentName)) { cols = independentNames.Concatenate(dependentName); } DataTable projSource = sourceTable.DefaultView.ToTable(false, cols); //output.ProjectionSourceDataSource = projSource; DataTable independentProj = projSource.DefaultView.ToTable(false, lra.Inputs); DataTable dependentProj = projSource.DefaultView.ToTable(false, lra.Output); double[][] input = independentProj.ToJagged(); double[] output1; output1 = mlr.Regression.Transform(input); DataTable result = projSource.Clone(); for (int i = 0; i < input.Length; i++) { DataRow row = result.NewRow(); for (int j = 0; j < lra.Inputs.Length; j++) { row[lra.Inputs[j]] = input[i][j]; } row[lra.Output] = output1[i]; result.Rows.Add(row); } output.ProjectionResultDataSource = result; var jsonResult = JsonConvert.SerializeObject(output, Formatting.Indented); return(jsonResult); } catch (Exception ex) { throw new Exception("Error:" + ex); } }
private List <AccordResult> CalculateLinearRegression(List <BalancePointPair> allBalancePointPairs, WthNormalParams normalParamsKey) { var allBalancePointGroups = allBalancePointPairs.GroupBy(s => new { s.CoolingBalancePoint, s.HeatingBalancePoint }); List <AccordResult> accordResults = new List <AccordResult>(); List <AccordResult> rejectedAccords = new List <AccordResult>(); foreach (var group in allBalancePointGroups) { try { List <BalancePointPair> IdenticalBalancePointPairsForAllReadings = group.ToList(); BalancePointPair _pointPair = IdenticalBalancePointPairsForAllReadings.First(); int readingsCount = IdenticalBalancePointPairsForAllReadings.Count; double[] fullYData = new double[readingsCount]; double[] fullYDataDailyAvg = new double[readingsCount]; double[][] hcddMatrix = new double[readingsCount][]; double[][] hcddMatrixNonDaily = new double[readingsCount][]; foreach (BalancePointPair balancePointPair in IdenticalBalancePointPairsForAllReadings) { fullYData[IdenticalBalancePointPairsForAllReadings.IndexOf(balancePointPair)] = (balancePointPair.ActualUsage); fullYDataDailyAvg[IdenticalBalancePointPairsForAllReadings.IndexOf(balancePointPair)] = (balancePointPair.ActualUsage / balancePointPair.DaysInReading); hcddMatrix[IdenticalBalancePointPairsForAllReadings.IndexOf(balancePointPair)] = new double[] { (balancePointPair.HeatingDegreeDays / balancePointPair.DaysInReading), (balancePointPair.CoolingDegreeDays / balancePointPair.DaysInReading) }; } double[] avgHddsForEachReadingInYear = new double[readingsCount]; double[] avgCddsForEachReadingInYear = new double[readingsCount]; for (int i = 0; i < readingsCount; i++) { avgHddsForEachReadingInYear[i] = hcddMatrix[i][0]; avgCddsForEachReadingInYear[i] = hcddMatrix[i][1]; } double[] modelParams = new double[3]; modelParams[0] = 0; modelParams[1] = 0; modelParams[2] = 0; if (fullYData.Sum() == 0) { AccordResult empty = new AccordResult { bpPair = _pointPair }; accordResults.Add(empty); } else if (_pointPair.HeatingBalancePoint == 0 && _pointPair.CoolingBalancePoint == 0) { double[] onesVector = new double[readingsCount]; for (int i = 0; i < readingsCount; i++) { onesVector[i] = 1; } modelParams[0] = Fit.LineThroughOrigin(onesVector, fullYDataDailyAvg); OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = false }; SimpleLinearRegression regressionAccord = ols.Learn(onesVector, fullYDataDailyAvg); //double[] predictedAccord = regressionAccord.Transform(onesVector); double r2 = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination(onesVector.Select(x => x * modelParams[0]), fullYDataDailyAvg); //double mean = fullYDataDailyAvg.Mean(); //if (mean != modelParams[0] || mean != regressionAccord.Slope) //{ // Console.WriteLine("Hey!"); //} //double r2Accord = regressionAccord.CoefficientOfDetermination(onesVector, fullYDataDailyAvg); //double sxx = onesVector.Subtract(onesVector.Mean()).Pow(2).Sum(); //double hypothesizedValue = 0; //try //{ // TTest test = new TTest( // estimatedValue: regressionAccord.Slope, standardError: sxx, degreesOfFreedom: _pointPair.ReadingsInNormalYear - 2, // hypothesizedValue: hypothesizedValue, alternate: OneSampleHypothesis.ValueIsDifferentFromHypothesis // ); // if (test.Significant) // { AccordResult accordResult = new AccordResult() { SimpleLinearRegression = regressionAccord, R2Accord = r2, IsSimpleSingleRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, CoolingBP = _pointPair.CoolingBalancePoint, Intercept = regressionAccord.Slope, bpPair = _pointPair }; accordResults.Add(accordResult); // } //} //catch (Exception e) //{ // Console.WriteLine(e.Message + e.StackTrace); //} } else if (_pointPair.CoolingBalancePoint != 0 && _pointPair.HeatingBalancePoint != 0) { //modelParams = MultipleRegression.QR(hcddMatrix, fullYDataDailyAvg, intercept: true); //Accord //var ols = new OrdinaryLeastSquares() //{ // UseIntercept = true //}; try { MultipleLinearRegressionAnalysis mlra = new MultipleLinearRegressionAnalysis(intercept: true); mlra.Learn(hcddMatrix, fullYDataDailyAvg); // //MultipleLinearRegression regressionAccord = ols.Learn(hcddMatrix, fullYDataDailyAvg); var regressionAccord = mlra.Regression; double[] predicted = regressionAccord.Transform(hcddMatrix); double r2Accord = new RSquaredLoss(numberOfInputs: 2, expected: fullYDataDailyAvg) { Adjust = false }.Loss(predicted); double r2Coeff = regressionAccord.CoefficientOfDetermination(hcddMatrix, fullYDataDailyAvg, adjust: false); //double r2Math = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination(hcddMatrix.Select( // x => (x[0] * regressionAccord.Weights[0]) + (x[1] * regressionAccord.Weights[1]) + regressionAccord.Intercept //), fullYDataDailyAvg); //double r2MathPred = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination(predicted, fullYDataDailyAvg); AccordResult accordResult = new AccordResult() { //MultipleRegression = regressionAccord, R2Accord = r2Accord, R2Coeff = r2Coeff, HeatingBP = _pointPair.HeatingBalancePoint, CoolingBP = _pointPair.CoolingBalancePoint, IsSimpleSingleRegression = false, MLRA = mlra, Intercept = regressionAccord.Intercept, bpPair = _pointPair, IsMultipleLinearRegression = true }; if (mlra.Coefficients.All(x => x.TTest.Significant)) { accordResults.Add(accordResult); } else { rejectedAccords.Add(accordResult); } } catch (Exception e) { Console.WriteLine(e.Message + " " + e.StackTrace); } } else if (_pointPair.HeatingBalancePoint > 0) { // Tuple<double, double> heatingTuple = Fit.Line(avgHddsForEachReadingInYear, fullYDataDailyAvg); // modelParams[0] = heatingTuple.Item1; // modelParams[1] = heatingTuple.Item2; // double r = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination( // avgHddsForEachReadingInYear.Select(x => heatingTuple.Item1 + heatingTuple.Item2 * x), fullYDataDailyAvg); OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = true }; SimpleLinearRegression regressionAccord = ols.Learn(avgHddsForEachReadingInYear, fullYDataDailyAvg); double[] predictedAccord = regressionAccord.Transform(avgHddsForEachReadingInYear); double rAccord = new RSquaredLoss(1, fullYDataDailyAvg).Loss(predictedAccord); //double rAccord2 = regressionAccord.CoefficientOfDetermination(avgHddsForEachReadingInYear, fullYDataDailyAvg, adjust: false); //double r2Math = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination(avgHddsForEachReadingInYear.Select( // x => (x * regressionAccord.Slope) + regressionAccord.Intercept // ), fullYDataDailyAvg); //double r2 = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination(predictedAccord, fullYDataDailyAvg); int degreesOfFreedom = _pointPair.ReadingsInNormalYear - 2; double ssx = Math.Sqrt((avgHddsForEachReadingInYear.Subtract(avgHddsForEachReadingInYear.Mean())).Pow(2).Sum()); double s = Math.Sqrt(((fullYDataDailyAvg.Subtract(predictedAccord).Pow(2)).Sum()) / degreesOfFreedom); double error = regressionAccord.GetStandardError(avgHddsForEachReadingInYear, fullYDataDailyAvg); double seSubB = s / ssx; double hypothesizedValue = 0; TTest tTest = new TTest( estimatedValue: regressionAccord.Slope, standardError: seSubB, degreesOfFreedom: degreesOfFreedom, hypothesizedValue: hypothesizedValue, alternate: OneSampleHypothesis.ValueIsDifferentFromHypothesis ); AccordResult accordResult = new AccordResult() { SimpleLinearRegression = regressionAccord, R2Accord = rAccord, IsSimpleSingleRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, CoolingBP = _pointPair.CoolingBalancePoint, TTest = tTest, Intercept = regressionAccord.Intercept, bpPair = _pointPair }; if (tTest.Significant) { accordResults.Add(accordResult); } else { rejectedAccords.Add(accordResult); } } else if (_pointPair.CoolingBalancePoint > 0) { //Tuple<double, double> coolingTuple = Fit.Line(avgCddsForEachReadingInYear, fullYDataDailyAvg); //modelParams[0] = coolingTuple.Item1; //modelParams[2] = coolingTuple.Item2; OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = true }; SimpleLinearRegression regressionAccord = ols.Learn(avgCddsForEachReadingInYear, fullYDataDailyAvg); double[] predictedAccord = regressionAccord.Transform(avgCddsForEachReadingInYear); double rAccord = new RSquaredLoss(1, fullYDataDailyAvg).Loss(predictedAccord); //double r2Math = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination(avgCddsForEachReadingInYear.Select( // x => (x * regressionAccord.Slope) + regressionAccord.Intercept // ), fullYDataDailyAvg); //double r2 = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination(predictedAccord, fullYDataDailyAvg); int degreesOfFreedom = _pointPair.ReadingsInNormalYear - 2; double ssx = Math.Sqrt(avgCddsForEachReadingInYear.Subtract(avgCddsForEachReadingInYear.Mean()).Pow(2).Sum()); double s = Math.Sqrt(((fullYDataDailyAvg.Subtract(predictedAccord).Pow(2)).Sum()) / degreesOfFreedom); double seSubB = s / ssx; double hypothesizedValue = 0; double myT = seSubB / regressionAccord.Slope; TTest tTest = new TTest( estimatedValue: regressionAccord.Slope, standardError: seSubB, degreesOfFreedom: degreesOfFreedom, hypothesizedValue: hypothesizedValue, alternate: OneSampleHypothesis.ValueIsDifferentFromHypothesis ); AccordResult accordResult = new AccordResult() { SimpleLinearRegression = regressionAccord, R2Accord = rAccord, IsSimpleSingleRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, CoolingBP = _pointPair.CoolingBalancePoint, TTest = tTest, Intercept = regressionAccord.Intercept, bpPair = _pointPair }; if (tTest.Significant) { accordResults.Add(accordResult); } else { rejectedAccords.Add(accordResult); } } ; } catch (Exception e) { Console.WriteLine(normalParamsKey.AccID + " " + normalParamsKey.UtilID + " " + normalParamsKey.UnitID + " " + e.Message + e.StackTrace); } } //rejectedAccords = rejectedAccords.OrderByDescending(s => s.R2Accord).ToList(); //accordResults = accordResults.OrderByDescending(s => s.R2Accord).ToList(); return(accordResults); }
public void learn_Test() { #region doc_learn_part1 // Consider the following data. An experimenter would // like to infer a relationship between two variables // A and B and a corresponding outcome variable R. double[][] example = { // A B R new double[] { 6.41, 10.11, 26.1 }, new double[] { 6.61, 22.61, 33.8 }, new double[] { 8.45, 11.11, 52.7 }, new double[] { 1.22, 18.11, 16.2 }, new double[] { 7.42, 12.81, 87.3 }, new double[] { 4.42, 10.21, 12.5 }, new double[] { 8.61, 11.94, 77.5 }, new double[] { 1.73, 13.13, 12.1 }, new double[] { 7.47, 17.11, 86.5 }, new double[] { 6.11, 15.13, 62.8 }, new double[] { 1.42, 16.11, 17.5 }, }; // For this, we first extract the input and output // pairs. The first two columns have values for the // input variables, and the last for the output: double[][] inputs = example.GetColumns(new[] { 0, 1 }); double[] output = example.GetColumn(2); // We can create a new multiple linear analysis for the variables var mlra = new MultipleLinearRegressionAnalysis(intercept: true); // Compute the analysis and obtain the estimated regression MultipleLinearRegression regression = mlra.Learn(inputs, output); #endregion // We can also show a summary ANOVA // Accord.Controls.DataGridBox.Show(regression.Table); #region doc_learn_part2 // And also extract other useful information, such // as the linear coefficients' values and std errors: double[] coef = mlra.CoefficientValues; double[] stde = mlra.StandardErrors; // Coefficients of performance, such as r² double rsquared = mlra.RSquared; // 0.62879 // Hypothesis tests for the whole model ZTest ztest = mlra.ZTest; // 0.99999 FTest ftest = mlra.FTest; // 0.01898 // and for individual coefficients TTest ttest0 = mlra.Coefficients[0].TTest; // 0.00622 TTest ttest1 = mlra.Coefficients[1].TTest; // 0.53484 // and also extract confidence intervals DoubleRange ci = mlra.Coefficients[0].Confidence; // [3.2616, 14.2193] // We can use the analysis to predict an output for a sample double y = mlra.Regression.Transform(new double[] { 10, 15 }); // We can also obtain confidence intervals for the prediction: DoubleRange pci = mlra.GetConfidenceInterval(new double[] { 10, 15 }); // and also prediction intervals for the same prediction: DoubleRange ppi = mlra.GetPredictionInterval(new double[] { 10, 15 }); #endregion Assert.AreEqual(3, coef.Length); Assert.AreEqual(8.7405051051757816, coef[0]); Assert.AreEqual(1.1198079243314365, coef[1], 1e-10); Assert.AreEqual(-19.604474518407862, coef[2], 1e-10); Assert.IsFalse(coef.HasNaN()); Assert.AreEqual(2.375916659234715, stde[0], 1e-10); Assert.AreEqual(1.7268508921418664, stde[1], 1e-10); Assert.AreEqual(30.989640986710953, stde[2], 1e-10); Assert.IsFalse(coef.HasNaN()); Assert.AreEqual(0.62879941171298936, rsquared, 1e-10); Assert.AreEqual(0.99999999999999822, ztest.PValue, 1e-10); Assert.AreEqual(0.018986050133298293, ftest.PValue, 1e-10); Assert.AreEqual(0.0062299844256985537, ttest0.PValue, 1e-10); Assert.AreEqual(0.53484850318449118, ttest1.PValue, 1e-14); Assert.IsFalse(Double.IsNaN(ttest1.PValue)); Assert.AreEqual(3.2616314640800566, ci.Min, 1e-10); Assert.AreEqual(14.219378746271506, ci.Max, 1e-10); double[][] im = mlra.InformationMatrix; double mse = regression.GetStandardError(inputs, output); DoubleRange epci = regression.GetConfidenceInterval(new double[] { 10, 15 }, mse, inputs.Length, im); Assert.AreEqual(epci.Min, pci.Min, 1e-10); Assert.AreEqual(epci.Max, pci.Max, 1e-10); Assert.AreEqual(55.27840511658215, pci.Min, 1e-10); Assert.AreEqual(113.91698568006086, pci.Max, 1e-10); Assert.AreEqual(28.783074454641557, ppi.Min, 1e-10); Assert.AreEqual(140.41231634200145, ppi.Max, 1e-10); }
private AccordResult CalculateLinearRegression(List <BalancePointPair> allBalancePointPairs, WthNormalParams normalParamsKey) { var allBalancePointGroups = allBalancePointPairs.GroupBy(s => new { s.CoolingBalancePoint, s.HeatingBalancePoint }); List <AccordResult> accordResults = new List <AccordResult>(); foreach (var group in allBalancePointGroups) { try { List <BalancePointPair> IdenticalBalancePointPairsFromAllReadings = group.ToList(); BalancePointPair _pointPair = IdenticalBalancePointPairsFromAllReadings.First(); int readingsCount = IdenticalBalancePointPairsFromAllReadings.Count; double[] fullYData = new double[readingsCount]; double[] fullYDataDailyAvg = new double[readingsCount]; double[][] hcddMatrix = new double[readingsCount][]; double[][] hcddMatrixNonDaily = new double[readingsCount][]; foreach (BalancePointPair balancePointPair in IdenticalBalancePointPairsFromAllReadings) { fullYData[IdenticalBalancePointPairsFromAllReadings.IndexOf(balancePointPair)] = (balancePointPair.ActualUsage); fullYDataDailyAvg[IdenticalBalancePointPairsFromAllReadings.IndexOf(balancePointPair)] = (balancePointPair.ActualUsage / balancePointPair.DaysInReading); hcddMatrix[IdenticalBalancePointPairsFromAllReadings.IndexOf(balancePointPair)] = new double[] { (balancePointPair.HeatingDegreeDays / balancePointPair.DaysInReading), (balancePointPair.CoolingDegreeDays / balancePointPair.DaysInReading) }; } double[] avgHddsForEachReadingInYear = new double[readingsCount]; double[] avgCddsForEachReadingInYear = new double[readingsCount]; for (int i = 0; i < readingsCount; i++) { avgHddsForEachReadingInYear[i] = hcddMatrix[i][0]; avgCddsForEachReadingInYear[i] = hcddMatrix[i][1]; } double[] modelParams = new double[3]; modelParams[0] = 0; modelParams[1] = 0; modelParams[2] = 0; if (_pointPair.HeatingBalancePoint == 0 && _pointPair.CoolingBalancePoint == 0) { double[] onesVector = new double[readingsCount]; for (int i = 0; i < readingsCount; i++) { onesVector[i] = 1; } modelParams[0] = Fit.LineThroughOrigin(onesVector, fullYDataDailyAvg); OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = false }; double r2 = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination( onesVector.Select(x => x * modelParams[0]), fullYDataDailyAvg); AccordResult accordResult = new AccordResult() { IsSimpleSingleRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, CoolingBP = _pointPair.CoolingBalancePoint, Intercept = modelParams[0], R2Accord = r2, }; accordResults.Add(accordResult); } else if (_pointPair.CoolingBalancePoint != 0 && _pointPair.HeatingBalancePoint != 0) { try { MultipleLinearRegressionAnalysis mlra = new MultipleLinearRegressionAnalysis(intercept: true); mlra.Learn(hcddMatrix, fullYDataDailyAvg); var regressionAccord = mlra.Regression; double[] predicted = regressionAccord.Transform(hcddMatrix); double r2Accord = new RSquaredLoss(numberOfInputs: 2, expected: fullYDataDailyAvg) { Adjust = false }.Loss(predicted); double r2Coeff = regressionAccord.CoefficientOfDetermination(hcddMatrix, fullYDataDailyAvg, adjust: false); bool FTestFailed = !mlra.FTest.Significant; AccordResult accordResult = new AccordResult() { IsMultipleLinearRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, CoolingBP = _pointPair.CoolingBalancePoint, Intercept = regressionAccord.Intercept, B2 = regressionAccord.Weights[0], B4 = regressionAccord.Weights[1], R2Accord = r2Accord, FTestFailed = FTestFailed }; if (mlra.Coefficients.All(x => x.TTest.Significant)) { accordResults.Add(accordResult); } } catch (Exception e) { Log.Debug(normalParamsKey.AccID + " " + normalParamsKey.UtilID + " " + normalParamsKey.UnitID + " " + e.Message + " " + e.StackTrace); } } else if (_pointPair.HeatingBalancePoint > 0) { OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = true }; SimpleLinearRegression regressionAccord = ols.Learn(avgHddsForEachReadingInYear, fullYDataDailyAvg); double[] predictedAccord = regressionAccord.Transform(avgHddsForEachReadingInYear); double r2Accord = new RSquaredLoss(1, fullYDataDailyAvg).Loss(predictedAccord); int degreesOfFreedom = normalParamsKey.MoCt - 2; double ssx = Math.Sqrt((avgHddsForEachReadingInYear.Subtract(avgHddsForEachReadingInYear.Mean())).Pow(2).Sum()); double s = Math.Sqrt(((fullYDataDailyAvg.Subtract(predictedAccord).Pow(2)).Sum()) / degreesOfFreedom); double error = regressionAccord.GetStandardError(avgHddsForEachReadingInYear, fullYDataDailyAvg); double seSubB = s / ssx; double hypothesizedValue = 0; TTest tTest = new TTest( estimatedValue: regressionAccord.Slope, standardError: seSubB, degreesOfFreedom: degreesOfFreedom, hypothesizedValue: hypothesizedValue, alternate: OneSampleHypothesis.ValueIsDifferentFromHypothesis ); AccordResult accordResult = new AccordResult() { IsSimpleSingleRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, Intercept = regressionAccord.Intercept, B2 = regressionAccord.Slope, R2Accord = r2Accord }; if (tTest.Significant) { accordResults.Add(accordResult); } } else if (_pointPair.CoolingBalancePoint > 0) { OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = true }; SimpleLinearRegression regressionAccord = ols.Learn(avgCddsForEachReadingInYear, fullYDataDailyAvg); double[] predictedAccord = regressionAccord.Transform(avgCddsForEachReadingInYear); double rAccord = new RSquaredLoss(1, fullYDataDailyAvg).Loss(predictedAccord); int degreesOfFreedom = normalParamsKey.MoCt - 2; double ssx = Math.Sqrt(avgCddsForEachReadingInYear.Subtract(avgCddsForEachReadingInYear.Mean()).Pow(2).Sum()); double s = Math.Sqrt(((fullYDataDailyAvg.Subtract(predictedAccord).Pow(2)).Sum()) / degreesOfFreedom); double seSubB = s / ssx; double hypothesizedValue = 0; double myT = seSubB / regressionAccord.Slope; TTest tTest = new TTest( estimatedValue: regressionAccord.Slope, standardError: seSubB, degreesOfFreedom: degreesOfFreedom, hypothesizedValue: hypothesizedValue, alternate: OneSampleHypothesis.ValueIsDifferentFromHypothesis ); AccordResult accordResult = new AccordResult() { IsSimpleSingleRegression = true, CoolingBP = _pointPair.CoolingBalancePoint, Intercept = regressionAccord.Intercept, B4 = regressionAccord.Slope, R2Accord = rAccord }; if (tTest.Significant) { accordResults.Add(accordResult); } } ; } catch (Exception e) { Log.Debug(normalParamsKey.AccID + " " + normalParamsKey.UtilID + " " + normalParamsKey.UnitID + " " + e.Message + e.StackTrace); } } AccordResult accordWinner = accordResults .Where(s => s.Intercept >= 0) .OrderByDescending(s => s.R2Accord).ToList().FirstOrDefault(); return(accordWinner); }
private AccordResult CalculateLinearRegression(List <BalancePointPair> allBalancePointPairs, WthNormalParams normalParamsKey) { var allBalancePointGroups = allBalancePointPairs.GroupBy(s => new { s.CoolingBalancePoint, s.HeatingBalancePoint }); List <AccordResult> accordResults = new List <AccordResult>(); foreach (var group in allBalancePointGroups) { try { List <BalancePointPair> IdenticalBalancePointPairsFromAllReadings = group.ToList(); BalancePointPair _pointPair = IdenticalBalancePointPairsFromAllReadings.First(); int readingsCount = IdenticalBalancePointPairsFromAllReadings.Count; double[] fullYData = new double[readingsCount]; double[] fullYDataDailyAvg = new double[readingsCount]; double[][] hcddMatrix = new double[readingsCount][]; double[][] hcddMatrixNonDaily = new double[readingsCount][]; foreach (BalancePointPair balancePointPair in IdenticalBalancePointPairsFromAllReadings) { fullYData[IdenticalBalancePointPairsFromAllReadings.IndexOf(balancePointPair)] = (balancePointPair.ActualUsage); fullYDataDailyAvg[IdenticalBalancePointPairsFromAllReadings.IndexOf(balancePointPair)] = (balancePointPair.ActualUsage / balancePointPair.DaysInReading); hcddMatrix[IdenticalBalancePointPairsFromAllReadings.IndexOf(balancePointPair)] = new double[] { (balancePointPair.HeatingDegreeDays / balancePointPair.DaysInReading), (balancePointPair.CoolingDegreeDays / balancePointPair.DaysInReading) }; } if (!(fullYData.Sum() > 0)) { return(new AccordResult()); } double[] avgHddsForEachReadingInYear = new double[readingsCount]; double[] avgCddsForEachReadingInYear = new double[readingsCount]; for (int i = 0; i < readingsCount; i++) { avgHddsForEachReadingInYear[i] = hcddMatrix[i][0]; avgCddsForEachReadingInYear[i] = hcddMatrix[i][1]; } double[] modelParams = new double[3]; modelParams[0] = 0; modelParams[1] = 0; modelParams[2] = 0; if (_pointPair.HeatingBalancePoint == 0 && _pointPair.CoolingBalancePoint == 0) { double[] onesVector = new double[readingsCount]; for (int i = 0; i < readingsCount; i++) { onesVector[i] = 1; } modelParams[0] = Fit.LineThroughOrigin(onesVector, fullYDataDailyAvg); OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = false }; double r2 = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination( onesVector.Select(x => x * modelParams[0]), fullYDataDailyAvg); AccordResult accordResult = new AccordResult() { IsSimpleSingleRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, CoolingBP = _pointPair.CoolingBalancePoint, Intercept = modelParams[0], R2Accord = r2, //R2Accord = 0 }; accordResults.Add(accordResult); } else if (_pointPair.CoolingBalancePoint != 0 && _pointPair.HeatingBalancePoint != 0) { try { MultipleLinearRegressionAnalysis mlra = new MultipleLinearRegressionAnalysis(intercept: true); mlra.Learn(hcddMatrix, fullYDataDailyAvg); var regressionAccord = mlra.Regression; double[] predictedAccord = regressionAccord.Transform(hcddMatrix); double r2Accord = new RSquaredLoss(numberOfInputs: 2, expected: fullYDataDailyAvg) { Adjust = false }.Loss(predictedAccord); double r2Coeff = regressionAccord.CoefficientOfDetermination(hcddMatrix, fullYDataDailyAvg, adjust: false); bool FTestFailed = !mlra.FTest.Significant; AccordResult accordResult = new AccordResult() { IsMultipleLinearRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, CoolingBP = _pointPair.CoolingBalancePoint, Intercept = regressionAccord.Intercept, B2 = regressionAccord.Weights[0], B4 = regressionAccord.Weights[1], R2Accord = r2Accord, FTestFailed = FTestFailed }; //int degreesOfFreedom = normalParamsKey.MoCt - 3; double degreesOfFreedomAsDouble = mlra.Regression.GetDegreesOfFreedom(readingsCount); int degreesOfFreedom = Convert.ToInt32(degreesOfFreedomAsDouble); //if (degreesOfFreedom != 9) //{ // Log.Warning($"Multivariable regression. DOF expected to be 9. is: {degreesOfFreedom}"); //} //if (degreesOfFreedom != dof) //{ // Console.WriteLine($"dof different. mlra.dof = {dof} expected = {degreesOfFreedom}"); //} double s = Math.Sqrt(fullYDataDailyAvg.Subtract(predictedAccord).Pow(2).Sum() / degreesOfFreedom); double ssxHdd = Math.Sqrt((avgHddsForEachReadingInYear.Subtract(avgHddsForEachReadingInYear.Mean())).Pow(2).Sum()); double ssxCdd = Math.Sqrt((avgCddsForEachReadingInYear.Subtract(avgCddsForEachReadingInYear.Mean())).Pow(2).Sum()); double seSubHdd = s / ssxHdd; double seSubCdd = s / ssxCdd; double tStatisticHdd = regressionAccord.Weights[0] / seSubHdd; double tStatisticCdd = regressionAccord.Weights[1] / seSubCdd; double tCriticalFivePercent = 2.262156; double tCriticalTenPercent = 1.833113; bool myTestHdd = Math.Abs(tStatisticHdd) >= tCriticalTenPercent; bool myTestCdd = Math.Abs(tStatisticCdd) >= tCriticalTenPercent; //if (myTestHdd != mlra.Coefficients[0].TTest.Significant && degreesOfFreedom != 9) //{ // Console.WriteLine($"nope. mystat - {tStatisticHdd} accordstat - {mlra.Coefficients[0].TTest.Statistic} " + // $"accordCritical - {mlra.Coefficients[0].TTest.CriticalValue}"); //} //if (myTestCdd != mlra.Coefficients[1].TTest.Significant && degreesOfFreedom != 9) //{ // Console.WriteLine($"nope. mystat - {tStatisticCdd} accordstat - {mlra.Coefficients[1].TTest.Statistic} " + // $"accordCritical - {mlra.Coefficients[1].TTest.CriticalValue}"); //} //if (mlra.Coefficients.All(x => x.TTest.Significant) && // mlra.Coefficients.All(x => x.Value > 0) && // mlra.Regression.Intercept > 0 && // r2Accord >= 0.7500) //{ // accordResults.Add(accordResult); //} if ( myTestHdd && myTestCdd && mlra.Coefficients.All(x => x.Value > 0) && mlra.Regression.Intercept > 0 //&& accordResult.R2Accord >= 0.75 ) { accordResults.Add(accordResult); } } catch (Exception e) { Log.Debug($"AccID/UtilID/UnitID: {normalParamsKey.AccID}/{normalParamsKey.UtilID}/{normalParamsKey.UnitID} >> " + $"MultipleLinearRegressionAnalysis Exception: {e.Message}"); } } else if (_pointPair.HeatingBalancePoint > 0) { OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = true }; SimpleLinearRegression regressionAccord = ols.Learn(avgHddsForEachReadingInYear, fullYDataDailyAvg); double[] predictedAccord = regressionAccord.Transform(avgHddsForEachReadingInYear); double r2Accord = new RSquaredLoss(1, fullYDataDailyAvg).Loss(predictedAccord); //int degreesOfFreedom = normalParamsKey.MoCt - 2; double degreesOfFreedomAsDouble = regressionAccord.GetDegreesOfFreedom(readingsCount); int degreesOfFreedom = Convert.ToInt32(degreesOfFreedomAsDouble); //if (degreesOfFreedom != 10) //{ // Log.Warning($"Single variable regression. DOF expected to be 10. is: {degreesOfFreedom}"); //} double ssx = Math.Sqrt((avgHddsForEachReadingInYear.Subtract(avgHddsForEachReadingInYear.Mean())).Pow(2).Sum()); double s = Math.Sqrt(fullYDataDailyAvg.Subtract(predictedAccord).Pow(2).Sum() / degreesOfFreedom); double error = regressionAccord.GetStandardError(avgHddsForEachReadingInYear, fullYDataDailyAvg); double seSubB = s / ssx; double hypothesizedValue = 0; double tStatistic = regressionAccord.Slope / seSubB; double tCriticalFivePercent = 2.228138; double tCriticalTenPercent = 1.812461; bool myTest = Math.Abs(tStatistic) >= tCriticalTenPercent; //TTest tTest = new TTest( // estimatedValue: regressionAccord.Slope, standardError: seSubB, degreesOfFreedom: degreesOfFreedom, // hypothesizedValue: hypothesizedValue, alternate: OneSampleHypothesis.ValueIsDifferentFromHypothesis // ); //if (myTest != tTest.Significant) //{ // Console.WriteLine($"nope. mystat - {tStatistic} accordstat - {tTest.Statistic} accordCritical - {tTest.CriticalValue}"); //} AccordResult accordResult = new AccordResult() { IsSimpleSingleRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, Intercept = regressionAccord.Intercept, B2 = regressionAccord.Slope, R2Accord = r2Accord }; //if (tTest.Significant && accordResult.B2 > 0 && r2Accord >= 0.7500) //{ // accordResults.Add(accordResult); //} if (myTest && accordResult.B2 > 0 && accordResult.Intercept > 0 //&& r2Accord >= 0.7500 ) { accordResults.Add(accordResult); } } else if (_pointPair.CoolingBalancePoint > 0) { OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = true }; SimpleLinearRegression regressionAccord = ols.Learn(avgCddsForEachReadingInYear, fullYDataDailyAvg); double[] predictedAccord = regressionAccord.Transform(avgCddsForEachReadingInYear); double r2Accord = new RSquaredLoss(1, fullYDataDailyAvg).Loss(predictedAccord); //int degreesOfFreedom = normalParamsKey.MoCt - 2; double degreesOfFreedomAsDouble = regressionAccord.GetDegreesOfFreedom(readingsCount); int degreesOfFreedom = Convert.ToInt32(degreesOfFreedomAsDouble); //if (degreesOfFreedom != 10) //{ // Log.Warning($"Single variable regression. DOF expected to be 10. is: {degreesOfFreedom}"); //} double ssx = Math.Sqrt(avgCddsForEachReadingInYear.Subtract(avgCddsForEachReadingInYear.Mean()).Pow(2).Sum()); double s = Math.Sqrt(fullYDataDailyAvg.Subtract(predictedAccord).Pow(2).Sum() / degreesOfFreedom); double seSubB = s / ssx; double hypothesizedValue = 0; double tStatistic = regressionAccord.Slope / seSubB; double tCriticalFivePercent = 2.22813885198627; double tCriticalTenPercent = 1.812461; bool myTest = Math.Abs(tStatistic) >= tCriticalTenPercent; //TTest tTest = new TTest( // estimatedValue: regressionAccord.Slope, standardError: seSubB, degreesOfFreedom: degreesOfFreedom, // hypothesizedValue: hypothesizedValue, alternate: OneSampleHypothesis.ValueIsDifferentFromHypothesis // ); //if (myTest != tTest.Significant) //{ // Console.WriteLine($"nope. mystat - {tStatistic} accordstat - {tTest.Statistic} accordCritical - {tTest.CriticalValue}"); //} AccordResult accordResult = new AccordResult() { IsSimpleSingleRegression = true, CoolingBP = _pointPair.CoolingBalancePoint, Intercept = regressionAccord.Intercept, B4 = regressionAccord.Slope, R2Accord = r2Accord }; //if (tTest.Significant && accordResult.B4 > 0 && r2Accord >= 0.7500) //{ // accordResults.Add(accordResult); //} if ( myTest && accordResult.B4 > 0 //&& r2Accord >= 0.7500 ) { accordResults.Add(accordResult); } } } catch (Exception e) { Log.Debug($"AccID/UtilID/UnitID: {normalParamsKey.AccID}/{normalParamsKey.UtilID}/{normalParamsKey.UnitID} >> {e.Message} {e.StackTrace}"); } } AccordResult accordWinner = accordResults .Where(s => s.Intercept >= 0) .OrderByDescending(s => s.R2Accord).ToList().FirstOrDefault(); return(accordWinner); }
public void gh_937() { #region doc_learn_database // Note: this example uses a System.Data.DataTable to represent input data, // but note that this is not required. The data could have been represented // as jagged double matrices (double[][]) directly. // If you have to handle heterogeneus data in your application, such as user records // in a database, this data is best represented within the framework using a .NET's // DataTable object. In order to try to learn a classification or regression model // using this datatable, first we will need to convert the table into a representation // that the machine learning model can understand. Such representation is quite often, // a matrix of doubles (double[][]). var data = new DataTable("Customer Revenue Example"); data.Columns.Add("Day", "CustomerId", "Time (hour)", "Weather", "Revenue"); data.Rows.Add("D1", 0, 8, "Sunny", 101.2); data.Rows.Add("D2", 1, 10, "Sunny", 24.1); data.Rows.Add("D3", 2, 10, "Rain", 107); data.Rows.Add("D4", 3, 16, "Rain", 223); data.Rows.Add("D5", 4, 15, "Rain", 1); data.Rows.Add("D6", 5, 20, "Rain", 42); data.Rows.Add("D7", 6, 12, "Cloudy", 123); data.Rows.Add("D8", 7, 12, "Sunny", 64); // One way to perform this conversion is by using a Codification filter. The Codification // filter can take care of converting variables that actually denote symbols (i.e. the // weather in the example above) into representations that make more sense given the assumption // of a real vector-based classifier. // Create a codification codebook var codebook = new Codification() { { "Weather", CodificationVariable.Categorical }, { "Time (hour)", CodificationVariable.Continuous }, { "Revenue", CodificationVariable.Continuous }, }; // Learn from the data codebook.Learn(data); // Now, we will use the codebook to transform the DataTable into double[][] vectors. Due // the way the conversion works, we can end up with more columns in your output vectors // than the ones started with. If you would like more details about what those columns // represent, you can pass then as 'out' parameters in the methods that follow below. string[] inputNames; // (note: if you do not want to run this example yourself, you string outputName; // can see below the new variable names that will be generated) // Now, we can translate our training data into integer symbols using our codebook: double[][] inputs = codebook.Apply(data, "Weather", "Time (hour)").ToJagged(out inputNames); double[] outputs = codebook.Apply(data, "Revenue").ToVector(out outputName); // (note: the Apply method transform a DataTable into another DataTable containing the codified // variables. The ToJagged and ToVector methods are then used to transform those tables into // double[][] matrices and double[] vectors, respectively. // If we would like to learn a linear regression model for this data, there are two possible // ways depending on which aspect of the linear regression we are interested the most. If we // are interested in interpreting the linear regression, performing hypothesis tests with the // coefficients and performing an actual _linear regression analysis_, then we can use the // MultipleLinearRegressionAnalysis class for this. If however we are only interested in using // the learned model directly to predict new values for the dataset, then we could be using the // MultipleLinearRegression and OrdinaryLeastSquares classes directly instead. // This example deals with the former case. For the later, please see the documentation page // for the MultipleLinearRegression class. // We can create a new multiple linear analysis for the variables var mlra = new MultipleLinearRegressionAnalysis(intercept: true) { // We can also inform the names of the new variables that have been created by the // codification filter. Those can help in the visualizing the analysis once it is // data-bound to a visual control such a Windows.Forms.DataGridView or WPF DataGrid: Inputs = inputNames, // will be { "Weather: Sunny", "Weather: Rain, "Weather: Cloudy", "Time (hours)" } Output = outputName // will be "Revenue" }; // To overcome linear dependency errors mlra.OrdinaryLeastSquares.IsRobust = true; // Compute the analysis and obtain the estimated regression MultipleLinearRegression regression = mlra.Learn(inputs, outputs); // And then predict the label using double predicted = mlra.Transform(inputs[0]); // result will be ~72.3 // Because we opted for doing a MultipleLinearRegressionAnalysis instead of a simple // linear regression, we will have further information about the regression available: int inputCount = mlra.NumberOfInputs; // should be 4 int outputCount = mlra.NumberOfOutputs; // should be 1 double r2 = mlra.RSquared; // should be 0.12801838425195311 AnovaSourceCollection a = mlra.Table; // ANOVA table (bind to a visual control for quick inspection) double[][] h = mlra.InformationMatrix; // should contain Fisher's information matrix for the problem ZTest z = mlra.ZTest; // should be 0 (p=0.999, non-significant) #endregion Assert.AreEqual(72.279574468085144d, predicted, 1e-8); Assert.AreEqual(4, inputCount, 1e-8); Assert.AreEqual(1, outputCount, 1e-8); Assert.AreEqual(0.12801838425195311, r2, 1e-8); Assert.AreEqual(0.11010987669344097, a[0].Statistic, 1e-8); string str = h.ToCSharp(); double[][] expectedH = new double[][] { new double[] { 0.442293243337911, -0.069833718526197, -0.228692384542512, -0.0141758263063635, 0.143767140269202 }, new double[] { -0.0698337185261971, 0.717811616891116, -0.112258662892007, -0.0655549422852099, 0.535719235472913 }, new double[] { -0.228692384542512, -0.112258662892007, 0.717434922237013, -0.0232803210243207, 0.376483874802496 }, new double[] { -0.0141758263063635, -0.0655549422852099, -0.0232803210243207, 0.0370082984668314, -0.103011089615894 }, new double[] { 0.143767140269202, 0.535719235472913, 0.376483874802496, -0.103011089615894, 1.05597025054461 } }; Assert.IsTrue(expectedH.IsEqual(h, 1e-8)); Assert.AreEqual(0, z.Statistic, 1e-8); Assert.AreEqual(1, z.PValue, 1e-8); }
public void learn_Test() { #region doc_learn_part1 // Consider the following data. An experimenter would // like to infer a relationship between two variables // A and B and a corresponding outcome variable R. double[][] example = { // A B R new double[] { 6.41, 10.11, 26.1 }, new double[] { 6.61, 22.61, 33.8 }, new double[] { 8.45, 11.11, 52.7 }, new double[] { 1.22, 18.11, 16.2 }, new double[] { 7.42, 12.81, 87.3 }, new double[] { 4.42, 10.21, 12.5 }, new double[] { 8.61, 11.94, 77.5 }, new double[] { 1.73, 13.13, 12.1 }, new double[] { 7.47, 17.11, 86.5 }, new double[] { 6.11, 15.13, 62.8 }, new double[] { 1.42, 16.11, 17.5 }, }; // For this, we first extract the input and output // pairs. The first two columns have values for the // input variables, and the last for the output: double[][] inputs = example.GetColumns(new[] { 0, 1 }); double[] output = example.GetColumn(2); // We can create a new multiple linear analysis for the variables var mlra = new MultipleLinearRegressionAnalysis(intercept: true); // Compute the analysis and obtain the estimated regression MultipleLinearRegression regression = mlra.Learn(inputs, output); #endregion // We can also show a summary ANOVA // Accord.Controls.DataGridBox.Show(regression.Table); #region doc_learn_part2 // And also extract other useful information, such // as the linear coefficients' values and std errors: double[] coef = mlra.CoefficientValues; double[] stde = mlra.StandardErrors; // Coefficients of performance, such as r² double rsquared = mlra.RSquared; // 0.62879 // Hypothesis tests for the whole model ZTest ztest = mlra.ZTest; // 0.99999 FTest ftest = mlra.FTest; // 0.01898 // and for individual coefficients TTest ttest0 = mlra.Coefficients[0].TTest; // 0.00622 TTest ttest1 = mlra.Coefficients[1].TTest; // 0.53484 // and also extract confidence intervals DoubleRange ci = mlra.Coefficients[0].Confidence; // [3.2616, 14.2193] // We can use the analysis to predict an output for a sample double y = mlra.Regression.Transform(new double[] { 10, 15 }); // We can also obtain confidence intervals for the prediction: DoubleRange pci = mlra.GetConfidenceInterval(new double[] { 10, 15 }); // and also prediction intervals for the same prediction: DoubleRange ppi = mlra.GetPredictionInterval(new double[] { 10, 15 }); #endregion Assert.AreEqual(3, coef.Length); Assert.AreEqual(8.7405051051757816, coef[0]); Assert.AreEqual(1.1198079243314365, coef[1], 1e-10); Assert.AreEqual(-19.604474518407862, coef[2], 1e-10); Assert.IsFalse(coef.HasNaN()); Assert.AreEqual(2.375916659234715, stde[0], 1e-10); Assert.AreEqual(1.7268508921418664, stde[1], 1e-10); Assert.AreEqual(30.989640986710953, stde[2], 1e-10); Assert.IsFalse(coef.HasNaN()); Assert.AreEqual(0.62879941171298936, rsquared, 1e-10); Assert.AreEqual(0.99999999999999822, ztest.PValue, 1e-10); Assert.AreEqual(0.018986050133298293, ftest.PValue, 1e-10); Assert.AreEqual(0.0062299844256985537, ttest0.PValue, 1e-10); Assert.AreEqual(0.53484850318449118, ttest1.PValue, 1e-14); Assert.IsFalse(Double.IsNaN(ttest1.PValue)); Assert.AreEqual(3.2616314640800566, ci.Min, 1e-10); Assert.AreEqual(14.219378746271506, ci.Max, 1e-10); double[][] im = mlra.InformationMatrix; double mse = regression.GetStandardError(inputs, output); DoubleRange epci = regression.GetConfidenceInterval(new double[] { 10, 15 }, mse, inputs.Length, im); Assert.AreEqual(epci.Min, pci.Min, 1e-10); Assert.AreEqual(epci.Max, pci.Max, 1e-10); Assert.AreEqual(55.27840511658215, pci.Min, 1e-10); Assert.AreEqual(113.91698568006086, pci.Max, 1e-10); Assert.AreEqual(28.783074454641557, ppi.Min, 1e-10); Assert.AreEqual(140.41231634200145, ppi.Max, 1e-10); }