public void InternetSampleDownload() { FrameTable table = DownloadFrameTable(new Uri("https://raw.githubusercontent.com/Dataweekends/zero_to_deep_learning_udemy/master/data/weight-height.csv")); FrameView view = table.WhereNotNull(); view.AddComputedColumn("Bmi", (FrameRow r) => { double h = (double)r["Height"]; double w = (double)r["Weight"]; return(w / (h * h)); }); FrameView males = view.Where("Gender", (string s) => (s == "Male")); FrameView females = view.Where("Gender", (string s) => (s == "Female")); SummaryStatistics maleSummary = new SummaryStatistics(males["Height"].As <double>()); SummaryStatistics femaleSummary = new SummaryStatistics(females["Height"].As <double>()); Meta.Numerics.Statistics.TestResult allNormal = view["Height"].As <double>().ShapiroFranciaTest(); Meta.Numerics.Statistics.TestResult maleNormal = males["Height"].As <double>().ShapiroFranciaTest(); Meta.Numerics.Statistics.TestResult femaleNormal = females["Height"].As <double>().ShapiroFranciaTest(); Meta.Numerics.Statistics.TestResult tTest = Univariate.StudentTTest(males["Height"].As <double>(), females["Height"].As <double>()); Meta.Numerics.Statistics.TestResult mwTest = Univariate.MannWhitneyTest(males["Height"].As <double>(), females["Height"].As <double>()); LinearRegressionResult result0 = males["Weight"].As <double>().LinearRegression(males["Height"].As <double>()); PolynomialRegressionResult result1 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 1); PolynomialRegressionResult result2 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 2); PolynomialRegressionResult result3 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 3); //MultiLinearRegressionResult multi = view["Weight"].As<double>().MultiLinearRegression(view["Height"].As<double>(), view["Gender"].As<string>().Select(s => (s == "Male") ? 1.0 : 0.0).ToList()); }
// one-parameter constructor internal FitResult(double p1, double dp1, TestResult test) { this.parameters = new ColumnVector(new double[] {p1}, 0, 1, 1, true); this.covarianceMatrix = new SymmetricMatrix(1); this.covarianceMatrix[0, 0] = dp1 * dp1; this.covarianceMatrix.IsReadOnly = true; this.test = test; }
public void InternetTimeSeriesDownload() { FrameTable table = DownloadFrameTable(new Uri("https://timeseries.weebly.com/uploads/2/1/0/8/21086414/sea_ice.csv")); double[] powerSpectrum = table["Arctic"].As <double>().PowerSpectrum(); double v12 = table["Arctic"].As <double>().Autocovariance(12); Meta.Numerics.Statistics.TestResult lbTest = table["Arctic"].As <double>().LjungBoxTest(); }
// n-parameter constructor internal FitResult(IList<double> parameters, SymmetricMatrix covariance, TestResult test) { Debug.Assert(parameters != null); Debug.Assert(covariance != null); Debug.Assert(parameters.Count == covariance.Dimension); // This is a bit of a hack to ensure we store read-only ColumnVectors and SymmetricMatrix objects. this.parameters = ConvertListToReadOnlyVector(parameters); this.covarianceMatrix = covariance; this.covarianceMatrix.IsReadOnly = true; this.test = test; }
/// <summary> /// Computes the best-fit linear regression from the data. /// </summary> /// <returns>The result of the fit.</returns> /// <remarks> /// <para>Linear regression assumes that the data have been generated by a function y = a + b x + e, where e is /// normally distributed noise, and determines the values of a and b that best fit the data. It also /// determines an error matrix on the parameters a and b, and does an F-test to</para> /// <para>The fit result is two-dimensional. The first parameter is the intercept a, the second is the slope b. /// The goodness-of-fit test is a F-test comparing the variance accounted for by the model to the remaining, /// unexplained variance.</para> /// </remarks> /// <exception cref="InsufficientDataException">There are fewer than three data points.</exception> public LinearRegressionResult LinearRegression() { int n = this.Count; if (n < 3) { throw new InsufficientDataException(); } // The means and covariances are the inputs to most of the regression formulas. double mx = xData.Mean; double my = yData.Mean; double cxx = xData.Variance; double cyy = yData.Variance; double cxy = this.Covariance; Debug.Assert(cxx >= 0.0); Debug.Assert(cyy >= 0.0); // Compute the best-fit parameters double b = cxy / cxx; double a = my - b * mx; // Since cov(x,y) = (n S_xy - S_x S_y)/n^2 and var(x) = (n S_xx - S_x^2) / n^2, // these formulas are equivilent to the // to the usual formulas for a and b involving sums, but it is more stable against round-off ColumnVector v = new ColumnVector(a, b); v.IsReadOnly = true; // Compute Pearson r value double r = cxy / Math.Sqrt(cxx * cyy); TestResult rTest = new TestResult("r", r, TestType.TwoTailed, new Distributions.PearsonRDistribution(n)); // Compute residuals and other sum-of-squares double SSR = 0.0; double SSF = 0.0; Sample residuals = new Sample(); foreach (XY point in this) { double y = a + b * point.X; double z = point.Y - y; SSR += z * z; residuals.Add(z); SSF += MoreMath.Sqr(y - my); } double SST = cyy * n; // Note SST = SSF + SSR because \sum_{i} ( y_i - \bar{y})^2 = \sum_i (y_i - f_i)^2 + \sum_i (f_i - \bar{y})^2 // Use sums-of-squares to do ANOVA AnovaRow fit = new AnovaRow(SSF, 1); AnovaRow residual = new AnovaRow(SSR, n - 2); AnovaRow total = new AnovaRow(SST, n - 1); OneWayAnovaResult anova = new OneWayAnovaResult(fit, residual, total); // Compute covariance of parameters matrix double s2 = SSR / (n - 2); double cbb = s2 / cxx / n; double cab = -mx * cbb; double caa = (cxx + mx * mx) * cbb; SymmetricMatrix C = new SymmetricMatrix(2); C[0, 0] = caa; C[1, 1] = cbb; C[0, 1] = cab; C.IsReadOnly = true; // Package the parameters ParameterCollection parameters = new ParameterCollection( new string[] { "Intercept", "Slope" }, v, C ); // Prepare the prediction function Func <double, UncertainValue> predict = (double x) => { double y = a + b * x; return(new UncertainValue(y, Math.Sqrt(s2 * (1.0 + (1.0 + MoreMath.Sqr(x - mx) / cxx) / n)))); }; return(new LinearRegressionResult(parameters, rTest, anova, residuals, predict)); }