public static double AdjustedRSquared(double[] expected, double[] predicted) { var loss = new RSquaredLoss(expected.Length, expected); loss.Adjust = true; return(loss.Loss(predicted)); }
public MultipleLinearRegression Learn(double[][] inputs, double[] outputs) { var ols = new OrdinaryLeastSquares() { UseIntercept = true }; // Use Ordinary Least Squares to estimate a regression model MultipleLinearRegression regression = ols.Learn(inputs, outputs); // As result, we will be given the following: //double a = regression.Weights[0]; // a = 0 //double b = regression.Weights[1]; // b = 0 //double c = regression.Intercept; // c = 1 // This is the plane described by the equation // ax + by + c = z => 0x + 0y + 1 = z => 1 = z. // We can compute the predicted points using double[] predicted = regression.Transform(inputs); // And the squared error loss using double error = new SquareLoss(outputs).Loss(predicted); // We can also compute other measures, such as the coefficient of determination r² double r2 = new RSquaredLoss(numberOfInputs: 2, expected: outputs).Loss(predicted); // should be 1 // We can also compute the adjusted or weighted versions of r² using var r2loss = new RSquaredLoss(numberOfInputs: 2, expected: outputs) { Adjust = true, // Weights = weights; // (if you have a weighted problem) }; double ar2 = r2loss.Loss(predicted); // should be 1 // Alternatively, we can also use the less generic, but maybe more user-friendly method directly: double ur2 = regression.CoefficientOfDetermination(inputs, outputs, adjust: true); // should be 1 Console.WriteLine("Weights:"); foreach (var w in regression.Weights) { Console.WriteLine($",{w}"); } Console.WriteLine("Intercept:"); Console.WriteLine($",{regression.Intercept}"); Console.WriteLine($"error:{error}"); Console.WriteLine($"r2:{r2}"); Console.WriteLine($"r2loss:{r2loss}"); Console.WriteLine($"ar2:{ar2}"); Console.WriteLine($"ur2:{ur2}"); return(regression); }
/// <summary> /// Gets the coefficient of determination, as known as R² (r-squared). /// </summary> /// /// <remarks> /// <para> /// The coefficient of determination is used in the context of statistical models /// whose main purpose is the prediction of future outcomes on the basis of other /// related information. It is the proportion of variability in a data set that /// is accounted for by the statistical model. It provides a measure of how well /// future outcomes are likely to be predicted by the model.</para> /// <para> /// The R² coefficient of determination is a statistical measure of how well the /// regression line approximates the real data points. An R² of 1.0 indicates /// that the regression line perfectly fits the data.</para> /// </remarks> /// /// <returns>The R² (r-squared) coefficient for the given data.</returns> /// public double CoefficientOfDetermination(double[] inputs, double[] outputs, bool adjust, double[] weights = null) { var rsquared = new RSquaredLoss(NumberOfInputs, outputs); rsquared.Adjust = adjust; if (weights != null) { rsquared.Weights = weights; } return(rsquared.Loss(Transform(inputs))); }
public static void RegressionStatistics() { // Compute the predicted points using double[] predictedMGR = Predictor.MultipleGeneralRegression.Transform(PredictorPoints); // We can also compute other measures, such as the coefficient of determination r² double r2 = new RSquaredLoss(numberOfInputs: PredictorPoints.Length, expected: FrequencyLabelsDouble).Loss(predictedMGR); Console.WriteLine($"Multiple Linear Regression R^2 VALIDATION: {r2}\n"); Console.Write("Multiple Linear regression fit succesfully!"); }
private static void ValidateModelResults(string modelName, double[] regInSamplePreds, double[] regOutSamplePreds, double[][] trainX, double[] trainY, double[][] testX, double[] testY) { // RMSE for in-sample double regInSampleRMSE = Math.Sqrt(new SquareLoss(trainX).Loss(regInSamplePreds)); // RMSE for out-sample double regOutSampleRMSE = Math.Sqrt(new SquareLoss(testX).Loss(regOutSamplePreds)); Console.WriteLine("RMSE: {0:0.0000} (Train) vs. {1:0.0000} (Test)", regInSampleRMSE, regOutSampleRMSE); // R^2 for in-sample double regInSampleR2 = new RSquaredLoss(trainX[0].Length, trainX).Loss(regInSamplePreds); // R^2 for out-sample double regOutSampleR2 = new RSquaredLoss(testX[0].Length, testX).Loss(regOutSamplePreds); Console.WriteLine("R^2: {0:0.0000} (Train) vs. {1:0.0000} (Test)", regInSampleR2, regOutSampleR2); // Scatter Plot of expected and actual ScatterplotBox.Show( String.Format("Actual vs. Prediction ({0})", modelName), testY, regOutSamplePreds ); }
private PolynomialRegression GenerateRegressionFitting(SortedDictionary <double, double> values, char result) { // Extract inputs and outputs double[] inputs = values.Keys.ToArray(); double[] outputs = values.Values.ToArray(); // We can create a learning algorithm PolynomialLeastSquares ls = new PolynomialLeastSquares() { Degree = 2 }; // Now, we can use the algorithm to learn a polynomial PolynomialRegression poly = ls.Learn(inputs, outputs); // The learned polynomial will be given by #pragma warning disable IDE0059 // Unnecessary assignment of a value string str = poly.ToString("N1"); // "y(x) = 1.0x^2 + 0.0x^1 + 0.0" // Where its weights can be accessed using double[] weights = poly.Weights; // { 1.0000000000000024, -1.2407665029287351E-13 } double intercept = poly.Intercept; // 1.5652369518855253E-12 #pragma warning restore IDE0059 // Unnecessary assignment of a value // Finally, we can use this polynomial // to predict values for the input data double[] prediction = poly.Transform(inputs); double r2 = new RSquaredLoss(outputs.Length, outputs).Loss(prediction); // should be > 0.85 (close to 1 is ok) //LastGamesMetric: 0.77 0.81 0.08 //GoalsScoredMetric: 0.75 0.85 0.02 if (r2 == 1.0) { r2 = 0.0; } r2Values_.Add(result, r2); return(poly); }
public void learn_test_2() { #region doc_learn_2 // Let's say we would like predict a continuous number from a set // of discrete and continuous input variables. For this, we will // be using the Servo dataset from UCI's Machine Learning repository // as an example: http://archive.ics.uci.edu/ml/datasets/Servo // Create a Servo dataset Servo servo = new Servo(); object[][] instances = servo.Instances; // 167 x 4 double[] outputs = servo.Output; // 167 x 1 // This dataset contains 4 columns, where the first two are // symbolic (having possible values A, B, C, D, E), and the // last two are continuous. // We will use a codification filter to transform the symbolic // variables into one-hot vectors, while keeping the other two // continuous variables intact: var codebook = new Codification <object>() { { "motor", CodificationVariable.Categorical }, { "screw", CodificationVariable.Categorical }, { "pgain", CodificationVariable.Continuous }, { "vgain", CodificationVariable.Continuous }, }; // Learn the codebook codebook.Learn(instances); // We can gather some info about the problem: int numberOfInputs = codebook.NumberOfInputs; // should be 4 (since there are 4 variables) int numberOfOutputs = codebook.NumberOfOutputs; // should be 12 (due their one-hot encodings) // Now we can use it to obtain double[] vectors: double[][] inputs = codebook.ToDouble().Transform(instances); // We will use Ordinary Least Squares to create a // linear regression model with an intercept term var ols = new OrdinaryLeastSquares() { UseIntercept = true }; // Use Ordinary Least Squares to estimate a regression model: MultipleLinearRegression regression = ols.Learn(inputs, outputs); // We can compute the predicted points using: double[] predicted = regression.Transform(inputs); // And the squared error using the SquareLoss class: double error = new SquareLoss(outputs).Loss(predicted); // We can also compute other measures, such as the coefficient of determination r² using: double r2 = new RSquaredLoss(numberOfOutputs, outputs).Loss(predicted); // should be 0.55086630162967354 // Or the adjusted or weighted versions of r² using: var r2loss = new RSquaredLoss(numberOfOutputs, outputs) { Adjust = true, // Weights = weights; // (uncomment if you have a weighted problem) }; double ar2 = r2loss.Loss(predicted); // should be 0.51586887058782993 // Alternatively, we can also use the less generic, but maybe more user-friendly method directly: double ur2 = regression.CoefficientOfDetermination(inputs, outputs, adjust: true); // should be 0.51586887058782993 #endregion Assert.AreEqual(4, numberOfInputs); Assert.AreEqual(12, numberOfOutputs); Assert.AreEqual(12, regression.NumberOfInputs); Assert.AreEqual(1, regression.NumberOfOutputs); Assert.AreEqual(1.0859586717266123, error, 1e-6); double[] expected = regression.Compute(inputs); double[] actual = regression.Transform(inputs); Assert.IsTrue(expected.IsEqual(actual, 1e-10)); Assert.AreEqual(0.55086630162967354, r2); Assert.AreEqual(0.51586887058782993, ar2); Assert.AreEqual(0.51586887058782993, ur2); }
public void learn_test() { #region doc_learn // We will try to model a plane as an equation in the form // "ax + by + c = z". We have two input variables (x and y) // and we will be trying to find two parameters a and b and // an intercept term c. // We will use Ordinary Least Squares to create a // linear regression model with an intercept term var ols = new OrdinaryLeastSquares() { UseIntercept = true }; // Now suppose you have some points double[][] inputs = { new double[] { 1, 1 }, new double[] { 0, 1 }, new double[] { 1, 0 }, new double[] { 0, 0 }, }; // located in the same Z (z = 1) double[] outputs = { 1, 1, 1, 1 }; // Use Ordinary Least Squares to estimate a regression model MultipleLinearRegression regression = ols.Learn(inputs, outputs); // As result, we will be given the following: double a = regression.Weights[0]; // a = 0 double b = regression.Weights[1]; // b = 0 double c = regression.Intercept; // c = 1 // This is the plane described by the equation // ax + by + c = z => 0x + 0y + 1 = z => 1 = z. // We can compute the predicted points using double[] predicted = regression.Transform(inputs); // And the squared error loss using double error = new SquareLoss(outputs).Loss(predicted); // We can also compute other measures, such as the coefficient of determination r² double r2 = new RSquaredLoss(numberOfInputs: 2, expected: outputs).Loss(predicted); // should be 1 // We can also compute the adjusted or weighted versions of r² using var r2loss = new RSquaredLoss(numberOfInputs: 2, expected: outputs) { Adjust = true, // Weights = weights; // (if you have a weighted problem) }; double ar2 = r2loss.Loss(predicted); // should be 1 // Alternatively, we can also use the less generic, but maybe more user-friendly method directly: double ur2 = regression.CoefficientOfDetermination(inputs, outputs, adjust: true); // should be 1 #endregion Assert.AreEqual(2, regression.NumberOfInputs); Assert.AreEqual(1, regression.NumberOfOutputs); Assert.AreEqual(0.0, a, 1e-6); Assert.AreEqual(0.0, b, 1e-6); Assert.AreEqual(1.0, c, 1e-6); Assert.AreEqual(0.0, error, 1e-6); double[] expected = regression.Compute(inputs); double[] actual = regression.Transform(inputs); Assert.IsTrue(expected.IsEqual(actual, 1e-10)); Assert.AreEqual(1.0, r2); Assert.AreEqual(1.0, ar2); Assert.AreEqual(1.0, ur2); }
static void Main(string[] args) { //for separating the training and test samples int traintPos = 18; int testPos = 22; int allData = testPos + (testPos - traintPos); //for correct reading symbol of float point in csv System.Globalization.CultureInfo customCulture = (System.Globalization.CultureInfo)System.Threading.Thread.CurrentThread.CurrentCulture.Clone(); customCulture.NumberFormat.NumberDecimalSeparator = "."; System.Threading.Thread.CurrentThread.CurrentCulture = customCulture; //read data string CsvFilePath = @"msc_appel_data.csv"; DataTable mscTable = new CsvReader(CsvFilePath, true).ToTable(); //for encoding the string values of months into numerical values Dictionary <string, double> monthNames = new Dictionary <string, double> { ["January"] = 1, ["February"] = 2, ["March"] = 3, ["April"] = 4, ["May"] = 5, ["June"] = 6, ["July"] = 7, ["August"] = 8, ["September"] = 9, ["October"] = 10, ["November"] = 11, ["December"] = 12 }; string[] months = mscTable.Columns["month"].ToArray <String>(); double[] dMonths = new double[months.Length]; for (int i = 0; i < months.Length; i++) { dMonths[i] = monthNames[months[i]]; //Console.WriteLine(dMonths[i]); } //select the target column double[] OutResPositive = mscTable.Columns["res_positive"].ToArray(); // separation of the test and train target sample double[] OutResPositiveTrain = OutResPositive.Get(0, traintPos); double[] OutResPositiveTest = OutResPositive.Get(traintPos, testPos); //deleting unneeded columns mscTable.Columns.Remove("total_appeals"); mscTable.Columns.Remove("month"); mscTable.Columns.Remove("res_positive"); mscTable.Columns.Remove("year"); //add coded in a double column month into Table //create new column DataColumn newCol = new DataColumn("dMonth", typeof(double)); newCol.AllowDBNull = true; // add new column mscTable.Columns.Add(newCol); //fill new column int counter = 0; foreach (DataRow row in mscTable.Rows) { row["dMonth"] = dMonths[counter]; counter++; } //receiving input data from a table double[][] inputs = mscTable.ToArray(); //separation of the test and train sample double[][] inputsTrain = inputs.Get(0, traintPos); double[][] inputsTest = inputs.Get(traintPos, testPos); //simple linear regression model var ols = new OrdinaryLeastSquares() { UseIntercept = true }; //linear regression model for several features MultipleLinearRegression regression = ols.Learn(inputsTrain, OutResPositiveTrain); //make a prediction double[] predicted = regression.Transform(inputsTest); //console output for (int i = 0; i < testPos - traintPos; i++) { Console.WriteLine("predicted: {0} real: {1}", predicted[i], OutResPositiveTest[i]); } // And print the squared error using the SquareLoss class: Console.WriteLine("error = {0}", new SquareLoss(OutResPositiveTest).Loss(predicted)); // print the coefficient of determination double r2 = new RSquaredLoss(numberOfInputs: 29, expected: OutResPositiveTest).Loss(predicted); Console.WriteLine("R^2 = {0}", r2); // alternative print the coefficient of determination double ur2 = regression.CoefficientOfDetermination(inputs, OutResPositiveTest, adjust: true); Console.WriteLine("alternative version of R2 = {0}", r2); Console.WriteLine("Press enter and close chart to exit"); // for chart int[] classes = new int[allData]; double[] mountX = new double[allData]; for (int i = 0; i < allData; i++) { if (i < testPos) { // for csv data mountX[i] = i + 1; classes[i] = 0; //csv data is class 0 } else { //for predicted mountX[i] = i - (testPos - traintPos) + 1; classes[i] = 1; //predicted is class 1 } } // make points of chart List <double> OutChart = new List <double>(); OutChart.AddRange(OutResPositive); OutChart.AddRange(predicted); // plot chart ScatterplotBox.Show("res_positive from months", mountX, OutChart.ToArray(), classes).Hold(); // for pause Console.ReadLine(); }
private List <AccordResult> CalculateLinearRegression(List <BalancePointPair> allBalancePointPairs, WthNormalParams normalParamsKey) { var allBalancePointGroups = allBalancePointPairs.GroupBy(s => new { s.CoolingBalancePoint, s.HeatingBalancePoint }); List <AccordResult> accordResults = new List <AccordResult>(); List <AccordResult> rejectedAccords = new List <AccordResult>(); foreach (var group in allBalancePointGroups) { try { List <BalancePointPair> IdenticalBalancePointPairsForAllReadings = group.ToList(); BalancePointPair _pointPair = IdenticalBalancePointPairsForAllReadings.First(); int readingsCount = IdenticalBalancePointPairsForAllReadings.Count; double[] fullYData = new double[readingsCount]; double[] fullYDataDailyAvg = new double[readingsCount]; double[][] hcddMatrix = new double[readingsCount][]; double[][] hcddMatrixNonDaily = new double[readingsCount][]; foreach (BalancePointPair balancePointPair in IdenticalBalancePointPairsForAllReadings) { fullYData[IdenticalBalancePointPairsForAllReadings.IndexOf(balancePointPair)] = (balancePointPair.ActualUsage); fullYDataDailyAvg[IdenticalBalancePointPairsForAllReadings.IndexOf(balancePointPair)] = (balancePointPair.ActualUsage / balancePointPair.DaysInReading); hcddMatrix[IdenticalBalancePointPairsForAllReadings.IndexOf(balancePointPair)] = new double[] { (balancePointPair.HeatingDegreeDays / balancePointPair.DaysInReading), (balancePointPair.CoolingDegreeDays / balancePointPair.DaysInReading) }; } double[] avgHddsForEachReadingInYear = new double[readingsCount]; double[] avgCddsForEachReadingInYear = new double[readingsCount]; for (int i = 0; i < readingsCount; i++) { avgHddsForEachReadingInYear[i] = hcddMatrix[i][0]; avgCddsForEachReadingInYear[i] = hcddMatrix[i][1]; } double[] modelParams = new double[3]; modelParams[0] = 0; modelParams[1] = 0; modelParams[2] = 0; if (fullYData.Sum() == 0) { AccordResult empty = new AccordResult { bpPair = _pointPair }; accordResults.Add(empty); } else if (_pointPair.HeatingBalancePoint == 0 && _pointPair.CoolingBalancePoint == 0) { double[] onesVector = new double[readingsCount]; for (int i = 0; i < readingsCount; i++) { onesVector[i] = 1; } modelParams[0] = Fit.LineThroughOrigin(onesVector, fullYDataDailyAvg); OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = false }; SimpleLinearRegression regressionAccord = ols.Learn(onesVector, fullYDataDailyAvg); //double[] predictedAccord = regressionAccord.Transform(onesVector); double r2 = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination(onesVector.Select(x => x * modelParams[0]), fullYDataDailyAvg); //double mean = fullYDataDailyAvg.Mean(); //if (mean != modelParams[0] || mean != regressionAccord.Slope) //{ // Console.WriteLine("Hey!"); //} //double r2Accord = regressionAccord.CoefficientOfDetermination(onesVector, fullYDataDailyAvg); //double sxx = onesVector.Subtract(onesVector.Mean()).Pow(2).Sum(); //double hypothesizedValue = 0; //try //{ // TTest test = new TTest( // estimatedValue: regressionAccord.Slope, standardError: sxx, degreesOfFreedom: _pointPair.ReadingsInNormalYear - 2, // hypothesizedValue: hypothesizedValue, alternate: OneSampleHypothesis.ValueIsDifferentFromHypothesis // ); // if (test.Significant) // { AccordResult accordResult = new AccordResult() { SimpleLinearRegression = regressionAccord, R2Accord = r2, IsSimpleSingleRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, CoolingBP = _pointPair.CoolingBalancePoint, Intercept = regressionAccord.Slope, bpPair = _pointPair }; accordResults.Add(accordResult); // } //} //catch (Exception e) //{ // Console.WriteLine(e.Message + e.StackTrace); //} } else if (_pointPair.CoolingBalancePoint != 0 && _pointPair.HeatingBalancePoint != 0) { //modelParams = MultipleRegression.QR(hcddMatrix, fullYDataDailyAvg, intercept: true); //Accord //var ols = new OrdinaryLeastSquares() //{ // UseIntercept = true //}; try { MultipleLinearRegressionAnalysis mlra = new MultipleLinearRegressionAnalysis(intercept: true); mlra.Learn(hcddMatrix, fullYDataDailyAvg); // //MultipleLinearRegression regressionAccord = ols.Learn(hcddMatrix, fullYDataDailyAvg); var regressionAccord = mlra.Regression; double[] predicted = regressionAccord.Transform(hcddMatrix); double r2Accord = new RSquaredLoss(numberOfInputs: 2, expected: fullYDataDailyAvg) { Adjust = false }.Loss(predicted); double r2Coeff = regressionAccord.CoefficientOfDetermination(hcddMatrix, fullYDataDailyAvg, adjust: false); //double r2Math = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination(hcddMatrix.Select( // x => (x[0] * regressionAccord.Weights[0]) + (x[1] * regressionAccord.Weights[1]) + regressionAccord.Intercept //), fullYDataDailyAvg); //double r2MathPred = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination(predicted, fullYDataDailyAvg); AccordResult accordResult = new AccordResult() { //MultipleRegression = regressionAccord, R2Accord = r2Accord, R2Coeff = r2Coeff, HeatingBP = _pointPair.HeatingBalancePoint, CoolingBP = _pointPair.CoolingBalancePoint, IsSimpleSingleRegression = false, MLRA = mlra, Intercept = regressionAccord.Intercept, bpPair = _pointPair, IsMultipleLinearRegression = true }; if (mlra.Coefficients.All(x => x.TTest.Significant)) { accordResults.Add(accordResult); } else { rejectedAccords.Add(accordResult); } } catch (Exception e) { Console.WriteLine(e.Message + " " + e.StackTrace); } } else if (_pointPair.HeatingBalancePoint > 0) { // Tuple<double, double> heatingTuple = Fit.Line(avgHddsForEachReadingInYear, fullYDataDailyAvg); // modelParams[0] = heatingTuple.Item1; // modelParams[1] = heatingTuple.Item2; // double r = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination( // avgHddsForEachReadingInYear.Select(x => heatingTuple.Item1 + heatingTuple.Item2 * x), fullYDataDailyAvg); OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = true }; SimpleLinearRegression regressionAccord = ols.Learn(avgHddsForEachReadingInYear, fullYDataDailyAvg); double[] predictedAccord = regressionAccord.Transform(avgHddsForEachReadingInYear); double rAccord = new RSquaredLoss(1, fullYDataDailyAvg).Loss(predictedAccord); //double rAccord2 = regressionAccord.CoefficientOfDetermination(avgHddsForEachReadingInYear, fullYDataDailyAvg, adjust: false); //double r2Math = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination(avgHddsForEachReadingInYear.Select( // x => (x * regressionAccord.Slope) + regressionAccord.Intercept // ), fullYDataDailyAvg); //double r2 = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination(predictedAccord, fullYDataDailyAvg); int degreesOfFreedom = _pointPair.ReadingsInNormalYear - 2; double ssx = Math.Sqrt((avgHddsForEachReadingInYear.Subtract(avgHddsForEachReadingInYear.Mean())).Pow(2).Sum()); double s = Math.Sqrt(((fullYDataDailyAvg.Subtract(predictedAccord).Pow(2)).Sum()) / degreesOfFreedom); double error = regressionAccord.GetStandardError(avgHddsForEachReadingInYear, fullYDataDailyAvg); double seSubB = s / ssx; double hypothesizedValue = 0; TTest tTest = new TTest( estimatedValue: regressionAccord.Slope, standardError: seSubB, degreesOfFreedom: degreesOfFreedom, hypothesizedValue: hypothesizedValue, alternate: OneSampleHypothesis.ValueIsDifferentFromHypothesis ); AccordResult accordResult = new AccordResult() { SimpleLinearRegression = regressionAccord, R2Accord = rAccord, IsSimpleSingleRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, CoolingBP = _pointPair.CoolingBalancePoint, TTest = tTest, Intercept = regressionAccord.Intercept, bpPair = _pointPair }; if (tTest.Significant) { accordResults.Add(accordResult); } else { rejectedAccords.Add(accordResult); } } else if (_pointPair.CoolingBalancePoint > 0) { //Tuple<double, double> coolingTuple = Fit.Line(avgCddsForEachReadingInYear, fullYDataDailyAvg); //modelParams[0] = coolingTuple.Item1; //modelParams[2] = coolingTuple.Item2; OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = true }; SimpleLinearRegression regressionAccord = ols.Learn(avgCddsForEachReadingInYear, fullYDataDailyAvg); double[] predictedAccord = regressionAccord.Transform(avgCddsForEachReadingInYear); double rAccord = new RSquaredLoss(1, fullYDataDailyAvg).Loss(predictedAccord); //double r2Math = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination(avgCddsForEachReadingInYear.Select( // x => (x * regressionAccord.Slope) + regressionAccord.Intercept // ), fullYDataDailyAvg); //double r2 = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination(predictedAccord, fullYDataDailyAvg); int degreesOfFreedom = _pointPair.ReadingsInNormalYear - 2; double ssx = Math.Sqrt(avgCddsForEachReadingInYear.Subtract(avgCddsForEachReadingInYear.Mean()).Pow(2).Sum()); double s = Math.Sqrt(((fullYDataDailyAvg.Subtract(predictedAccord).Pow(2)).Sum()) / degreesOfFreedom); double seSubB = s / ssx; double hypothesizedValue = 0; double myT = seSubB / regressionAccord.Slope; TTest tTest = new TTest( estimatedValue: regressionAccord.Slope, standardError: seSubB, degreesOfFreedom: degreesOfFreedom, hypothesizedValue: hypothesizedValue, alternate: OneSampleHypothesis.ValueIsDifferentFromHypothesis ); AccordResult accordResult = new AccordResult() { SimpleLinearRegression = regressionAccord, R2Accord = rAccord, IsSimpleSingleRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, CoolingBP = _pointPair.CoolingBalancePoint, TTest = tTest, Intercept = regressionAccord.Intercept, bpPair = _pointPair }; if (tTest.Significant) { accordResults.Add(accordResult); } else { rejectedAccords.Add(accordResult); } } ; } catch (Exception e) { Console.WriteLine(normalParamsKey.AccID + " " + normalParamsKey.UtilID + " " + normalParamsKey.UnitID + " " + e.Message + e.StackTrace); } } //rejectedAccords = rejectedAccords.OrderByDescending(s => s.R2Accord).ToList(); //accordResults = accordResults.OrderByDescending(s => s.R2Accord).ToList(); return(accordResults); }
private AccordResult CalculateLinearRegression(List <BalancePointPair> allBalancePointPairs, WthNormalParams normalParamsKey) { var allBalancePointGroups = allBalancePointPairs.GroupBy(s => new { s.CoolingBalancePoint, s.HeatingBalancePoint }); List <AccordResult> accordResults = new List <AccordResult>(); foreach (var group in allBalancePointGroups) { try { List <BalancePointPair> IdenticalBalancePointPairsFromAllReadings = group.ToList(); BalancePointPair _pointPair = IdenticalBalancePointPairsFromAllReadings.First(); int readingsCount = IdenticalBalancePointPairsFromAllReadings.Count; double[] fullYData = new double[readingsCount]; double[] fullYDataDailyAvg = new double[readingsCount]; double[][] hcddMatrix = new double[readingsCount][]; double[][] hcddMatrixNonDaily = new double[readingsCount][]; foreach (BalancePointPair balancePointPair in IdenticalBalancePointPairsFromAllReadings) { fullYData[IdenticalBalancePointPairsFromAllReadings.IndexOf(balancePointPair)] = (balancePointPair.ActualUsage); fullYDataDailyAvg[IdenticalBalancePointPairsFromAllReadings.IndexOf(balancePointPair)] = (balancePointPair.ActualUsage / balancePointPair.DaysInReading); hcddMatrix[IdenticalBalancePointPairsFromAllReadings.IndexOf(balancePointPair)] = new double[] { (balancePointPair.HeatingDegreeDays / balancePointPair.DaysInReading), (balancePointPair.CoolingDegreeDays / balancePointPair.DaysInReading) }; } double[] avgHddsForEachReadingInYear = new double[readingsCount]; double[] avgCddsForEachReadingInYear = new double[readingsCount]; for (int i = 0; i < readingsCount; i++) { avgHddsForEachReadingInYear[i] = hcddMatrix[i][0]; avgCddsForEachReadingInYear[i] = hcddMatrix[i][1]; } double[] modelParams = new double[3]; modelParams[0] = 0; modelParams[1] = 0; modelParams[2] = 0; if (_pointPair.HeatingBalancePoint == 0 && _pointPair.CoolingBalancePoint == 0) { double[] onesVector = new double[readingsCount]; for (int i = 0; i < readingsCount; i++) { onesVector[i] = 1; } modelParams[0] = Fit.LineThroughOrigin(onesVector, fullYDataDailyAvg); OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = false }; double r2 = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination( onesVector.Select(x => x * modelParams[0]), fullYDataDailyAvg); AccordResult accordResult = new AccordResult() { IsSimpleSingleRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, CoolingBP = _pointPair.CoolingBalancePoint, Intercept = modelParams[0], R2Accord = r2, }; accordResults.Add(accordResult); } else if (_pointPair.CoolingBalancePoint != 0 && _pointPair.HeatingBalancePoint != 0) { try { MultipleLinearRegressionAnalysis mlra = new MultipleLinearRegressionAnalysis(intercept: true); mlra.Learn(hcddMatrix, fullYDataDailyAvg); var regressionAccord = mlra.Regression; double[] predicted = regressionAccord.Transform(hcddMatrix); double r2Accord = new RSquaredLoss(numberOfInputs: 2, expected: fullYDataDailyAvg) { Adjust = false }.Loss(predicted); double r2Coeff = regressionAccord.CoefficientOfDetermination(hcddMatrix, fullYDataDailyAvg, adjust: false); bool FTestFailed = !mlra.FTest.Significant; AccordResult accordResult = new AccordResult() { IsMultipleLinearRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, CoolingBP = _pointPair.CoolingBalancePoint, Intercept = regressionAccord.Intercept, B2 = regressionAccord.Weights[0], B4 = regressionAccord.Weights[1], R2Accord = r2Accord, FTestFailed = FTestFailed }; if (mlra.Coefficients.All(x => x.TTest.Significant)) { accordResults.Add(accordResult); } } catch (Exception e) { Log.Debug(normalParamsKey.AccID + " " + normalParamsKey.UtilID + " " + normalParamsKey.UnitID + " " + e.Message + " " + e.StackTrace); } } else if (_pointPair.HeatingBalancePoint > 0) { OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = true }; SimpleLinearRegression regressionAccord = ols.Learn(avgHddsForEachReadingInYear, fullYDataDailyAvg); double[] predictedAccord = regressionAccord.Transform(avgHddsForEachReadingInYear); double r2Accord = new RSquaredLoss(1, fullYDataDailyAvg).Loss(predictedAccord); int degreesOfFreedom = normalParamsKey.MoCt - 2; double ssx = Math.Sqrt((avgHddsForEachReadingInYear.Subtract(avgHddsForEachReadingInYear.Mean())).Pow(2).Sum()); double s = Math.Sqrt(((fullYDataDailyAvg.Subtract(predictedAccord).Pow(2)).Sum()) / degreesOfFreedom); double error = regressionAccord.GetStandardError(avgHddsForEachReadingInYear, fullYDataDailyAvg); double seSubB = s / ssx; double hypothesizedValue = 0; TTest tTest = new TTest( estimatedValue: regressionAccord.Slope, standardError: seSubB, degreesOfFreedom: degreesOfFreedom, hypothesizedValue: hypothesizedValue, alternate: OneSampleHypothesis.ValueIsDifferentFromHypothesis ); AccordResult accordResult = new AccordResult() { IsSimpleSingleRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, Intercept = regressionAccord.Intercept, B2 = regressionAccord.Slope, R2Accord = r2Accord }; if (tTest.Significant) { accordResults.Add(accordResult); } } else if (_pointPair.CoolingBalancePoint > 0) { OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = true }; SimpleLinearRegression regressionAccord = ols.Learn(avgCddsForEachReadingInYear, fullYDataDailyAvg); double[] predictedAccord = regressionAccord.Transform(avgCddsForEachReadingInYear); double rAccord = new RSquaredLoss(1, fullYDataDailyAvg).Loss(predictedAccord); int degreesOfFreedom = normalParamsKey.MoCt - 2; double ssx = Math.Sqrt(avgCddsForEachReadingInYear.Subtract(avgCddsForEachReadingInYear.Mean()).Pow(2).Sum()); double s = Math.Sqrt(((fullYDataDailyAvg.Subtract(predictedAccord).Pow(2)).Sum()) / degreesOfFreedom); double seSubB = s / ssx; double hypothesizedValue = 0; double myT = seSubB / regressionAccord.Slope; TTest tTest = new TTest( estimatedValue: regressionAccord.Slope, standardError: seSubB, degreesOfFreedom: degreesOfFreedom, hypothesizedValue: hypothesizedValue, alternate: OneSampleHypothesis.ValueIsDifferentFromHypothesis ); AccordResult accordResult = new AccordResult() { IsSimpleSingleRegression = true, CoolingBP = _pointPair.CoolingBalancePoint, Intercept = regressionAccord.Intercept, B4 = regressionAccord.Slope, R2Accord = rAccord }; if (tTest.Significant) { accordResults.Add(accordResult); } } ; } catch (Exception e) { Log.Debug(normalParamsKey.AccID + " " + normalParamsKey.UtilID + " " + normalParamsKey.UnitID + " " + e.Message + e.StackTrace); } } AccordResult accordWinner = accordResults .Where(s => s.Intercept >= 0) .OrderByDescending(s => s.R2Accord).ToList().FirstOrDefault(); return(accordWinner); }
private AccordResult CalculateLinearRegression(List <BalancePointPair> allBalancePointPairs, WthNormalParams normalParamsKey) { var allBalancePointGroups = allBalancePointPairs.GroupBy(s => new { s.CoolingBalancePoint, s.HeatingBalancePoint }); List <AccordResult> accordResults = new List <AccordResult>(); foreach (var group in allBalancePointGroups) { try { List <BalancePointPair> IdenticalBalancePointPairsFromAllReadings = group.ToList(); BalancePointPair _pointPair = IdenticalBalancePointPairsFromAllReadings.First(); int readingsCount = IdenticalBalancePointPairsFromAllReadings.Count; double[] fullYData = new double[readingsCount]; double[] fullYDataDailyAvg = new double[readingsCount]; double[][] hcddMatrix = new double[readingsCount][]; double[][] hcddMatrixNonDaily = new double[readingsCount][]; foreach (BalancePointPair balancePointPair in IdenticalBalancePointPairsFromAllReadings) { fullYData[IdenticalBalancePointPairsFromAllReadings.IndexOf(balancePointPair)] = (balancePointPair.ActualUsage); fullYDataDailyAvg[IdenticalBalancePointPairsFromAllReadings.IndexOf(balancePointPair)] = (balancePointPair.ActualUsage / balancePointPair.DaysInReading); hcddMatrix[IdenticalBalancePointPairsFromAllReadings.IndexOf(balancePointPair)] = new double[] { (balancePointPair.HeatingDegreeDays / balancePointPair.DaysInReading), (balancePointPair.CoolingDegreeDays / balancePointPair.DaysInReading) }; } if (!(fullYData.Sum() > 0)) { return(new AccordResult()); } double[] avgHddsForEachReadingInYear = new double[readingsCount]; double[] avgCddsForEachReadingInYear = new double[readingsCount]; for (int i = 0; i < readingsCount; i++) { avgHddsForEachReadingInYear[i] = hcddMatrix[i][0]; avgCddsForEachReadingInYear[i] = hcddMatrix[i][1]; } double[] modelParams = new double[3]; modelParams[0] = 0; modelParams[1] = 0; modelParams[2] = 0; if (_pointPair.HeatingBalancePoint == 0 && _pointPair.CoolingBalancePoint == 0) { double[] onesVector = new double[readingsCount]; for (int i = 0; i < readingsCount; i++) { onesVector[i] = 1; } modelParams[0] = Fit.LineThroughOrigin(onesVector, fullYDataDailyAvg); OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = false }; double r2 = MathNet.Numerics.GoodnessOfFit.CoefficientOfDetermination( onesVector.Select(x => x * modelParams[0]), fullYDataDailyAvg); AccordResult accordResult = new AccordResult() { IsSimpleSingleRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, CoolingBP = _pointPair.CoolingBalancePoint, Intercept = modelParams[0], R2Accord = r2, //R2Accord = 0 }; accordResults.Add(accordResult); } else if (_pointPair.CoolingBalancePoint != 0 && _pointPair.HeatingBalancePoint != 0) { try { MultipleLinearRegressionAnalysis mlra = new MultipleLinearRegressionAnalysis(intercept: true); mlra.Learn(hcddMatrix, fullYDataDailyAvg); var regressionAccord = mlra.Regression; double[] predictedAccord = regressionAccord.Transform(hcddMatrix); double r2Accord = new RSquaredLoss(numberOfInputs: 2, expected: fullYDataDailyAvg) { Adjust = false }.Loss(predictedAccord); double r2Coeff = regressionAccord.CoefficientOfDetermination(hcddMatrix, fullYDataDailyAvg, adjust: false); bool FTestFailed = !mlra.FTest.Significant; AccordResult accordResult = new AccordResult() { IsMultipleLinearRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, CoolingBP = _pointPair.CoolingBalancePoint, Intercept = regressionAccord.Intercept, B2 = regressionAccord.Weights[0], B4 = regressionAccord.Weights[1], R2Accord = r2Accord, FTestFailed = FTestFailed }; //int degreesOfFreedom = normalParamsKey.MoCt - 3; double degreesOfFreedomAsDouble = mlra.Regression.GetDegreesOfFreedom(readingsCount); int degreesOfFreedom = Convert.ToInt32(degreesOfFreedomAsDouble); //if (degreesOfFreedom != 9) //{ // Log.Warning($"Multivariable regression. DOF expected to be 9. is: {degreesOfFreedom}"); //} //if (degreesOfFreedom != dof) //{ // Console.WriteLine($"dof different. mlra.dof = {dof} expected = {degreesOfFreedom}"); //} double s = Math.Sqrt(fullYDataDailyAvg.Subtract(predictedAccord).Pow(2).Sum() / degreesOfFreedom); double ssxHdd = Math.Sqrt((avgHddsForEachReadingInYear.Subtract(avgHddsForEachReadingInYear.Mean())).Pow(2).Sum()); double ssxCdd = Math.Sqrt((avgCddsForEachReadingInYear.Subtract(avgCddsForEachReadingInYear.Mean())).Pow(2).Sum()); double seSubHdd = s / ssxHdd; double seSubCdd = s / ssxCdd; double tStatisticHdd = regressionAccord.Weights[0] / seSubHdd; double tStatisticCdd = regressionAccord.Weights[1] / seSubCdd; double tCriticalFivePercent = 2.262156; double tCriticalTenPercent = 1.833113; bool myTestHdd = Math.Abs(tStatisticHdd) >= tCriticalTenPercent; bool myTestCdd = Math.Abs(tStatisticCdd) >= tCriticalTenPercent; //if (myTestHdd != mlra.Coefficients[0].TTest.Significant && degreesOfFreedom != 9) //{ // Console.WriteLine($"nope. mystat - {tStatisticHdd} accordstat - {mlra.Coefficients[0].TTest.Statistic} " + // $"accordCritical - {mlra.Coefficients[0].TTest.CriticalValue}"); //} //if (myTestCdd != mlra.Coefficients[1].TTest.Significant && degreesOfFreedom != 9) //{ // Console.WriteLine($"nope. mystat - {tStatisticCdd} accordstat - {mlra.Coefficients[1].TTest.Statistic} " + // $"accordCritical - {mlra.Coefficients[1].TTest.CriticalValue}"); //} //if (mlra.Coefficients.All(x => x.TTest.Significant) && // mlra.Coefficients.All(x => x.Value > 0) && // mlra.Regression.Intercept > 0 && // r2Accord >= 0.7500) //{ // accordResults.Add(accordResult); //} if ( myTestHdd && myTestCdd && mlra.Coefficients.All(x => x.Value > 0) && mlra.Regression.Intercept > 0 //&& accordResult.R2Accord >= 0.75 ) { accordResults.Add(accordResult); } } catch (Exception e) { Log.Debug($"AccID/UtilID/UnitID: {normalParamsKey.AccID}/{normalParamsKey.UtilID}/{normalParamsKey.UnitID} >> " + $"MultipleLinearRegressionAnalysis Exception: {e.Message}"); } } else if (_pointPair.HeatingBalancePoint > 0) { OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = true }; SimpleLinearRegression regressionAccord = ols.Learn(avgHddsForEachReadingInYear, fullYDataDailyAvg); double[] predictedAccord = regressionAccord.Transform(avgHddsForEachReadingInYear); double r2Accord = new RSquaredLoss(1, fullYDataDailyAvg).Loss(predictedAccord); //int degreesOfFreedom = normalParamsKey.MoCt - 2; double degreesOfFreedomAsDouble = regressionAccord.GetDegreesOfFreedom(readingsCount); int degreesOfFreedom = Convert.ToInt32(degreesOfFreedomAsDouble); //if (degreesOfFreedom != 10) //{ // Log.Warning($"Single variable regression. DOF expected to be 10. is: {degreesOfFreedom}"); //} double ssx = Math.Sqrt((avgHddsForEachReadingInYear.Subtract(avgHddsForEachReadingInYear.Mean())).Pow(2).Sum()); double s = Math.Sqrt(fullYDataDailyAvg.Subtract(predictedAccord).Pow(2).Sum() / degreesOfFreedom); double error = regressionAccord.GetStandardError(avgHddsForEachReadingInYear, fullYDataDailyAvg); double seSubB = s / ssx; double hypothesizedValue = 0; double tStatistic = regressionAccord.Slope / seSubB; double tCriticalFivePercent = 2.228138; double tCriticalTenPercent = 1.812461; bool myTest = Math.Abs(tStatistic) >= tCriticalTenPercent; //TTest tTest = new TTest( // estimatedValue: regressionAccord.Slope, standardError: seSubB, degreesOfFreedom: degreesOfFreedom, // hypothesizedValue: hypothesizedValue, alternate: OneSampleHypothesis.ValueIsDifferentFromHypothesis // ); //if (myTest != tTest.Significant) //{ // Console.WriteLine($"nope. mystat - {tStatistic} accordstat - {tTest.Statistic} accordCritical - {tTest.CriticalValue}"); //} AccordResult accordResult = new AccordResult() { IsSimpleSingleRegression = true, HeatingBP = _pointPair.HeatingBalancePoint, Intercept = regressionAccord.Intercept, B2 = regressionAccord.Slope, R2Accord = r2Accord }; //if (tTest.Significant && accordResult.B2 > 0 && r2Accord >= 0.7500) //{ // accordResults.Add(accordResult); //} if (myTest && accordResult.B2 > 0 && accordResult.Intercept > 0 //&& r2Accord >= 0.7500 ) { accordResults.Add(accordResult); } } else if (_pointPair.CoolingBalancePoint > 0) { OrdinaryLeastSquares ols = new OrdinaryLeastSquares() { UseIntercept = true }; SimpleLinearRegression regressionAccord = ols.Learn(avgCddsForEachReadingInYear, fullYDataDailyAvg); double[] predictedAccord = regressionAccord.Transform(avgCddsForEachReadingInYear); double r2Accord = new RSquaredLoss(1, fullYDataDailyAvg).Loss(predictedAccord); //int degreesOfFreedom = normalParamsKey.MoCt - 2; double degreesOfFreedomAsDouble = regressionAccord.GetDegreesOfFreedom(readingsCount); int degreesOfFreedom = Convert.ToInt32(degreesOfFreedomAsDouble); //if (degreesOfFreedom != 10) //{ // Log.Warning($"Single variable regression. DOF expected to be 10. is: {degreesOfFreedom}"); //} double ssx = Math.Sqrt(avgCddsForEachReadingInYear.Subtract(avgCddsForEachReadingInYear.Mean()).Pow(2).Sum()); double s = Math.Sqrt(fullYDataDailyAvg.Subtract(predictedAccord).Pow(2).Sum() / degreesOfFreedom); double seSubB = s / ssx; double hypothesizedValue = 0; double tStatistic = regressionAccord.Slope / seSubB; double tCriticalFivePercent = 2.22813885198627; double tCriticalTenPercent = 1.812461; bool myTest = Math.Abs(tStatistic) >= tCriticalTenPercent; //TTest tTest = new TTest( // estimatedValue: regressionAccord.Slope, standardError: seSubB, degreesOfFreedom: degreesOfFreedom, // hypothesizedValue: hypothesizedValue, alternate: OneSampleHypothesis.ValueIsDifferentFromHypothesis // ); //if (myTest != tTest.Significant) //{ // Console.WriteLine($"nope. mystat - {tStatistic} accordstat - {tTest.Statistic} accordCritical - {tTest.CriticalValue}"); //} AccordResult accordResult = new AccordResult() { IsSimpleSingleRegression = true, CoolingBP = _pointPair.CoolingBalancePoint, Intercept = regressionAccord.Intercept, B4 = regressionAccord.Slope, R2Accord = r2Accord }; //if (tTest.Significant && accordResult.B4 > 0 && r2Accord >= 0.7500) //{ // accordResults.Add(accordResult); //} if ( myTest && accordResult.B4 > 0 //&& r2Accord >= 0.7500 ) { accordResults.Add(accordResult); } } } catch (Exception e) { Log.Debug($"AccID/UtilID/UnitID: {normalParamsKey.AccID}/{normalParamsKey.UtilID}/{normalParamsKey.UnitID} >> {e.Message} {e.StackTrace}"); } } AccordResult accordWinner = accordResults .Where(s => s.Intercept >= 0) .OrderByDescending(s => s.R2Accord).ToList().FirstOrDefault(); return(accordWinner); }