Esempio n. 1
0
        public void InternetSampleDownload()
        {
            FrameTable table = DownloadFrameTable(new Uri("https://raw.githubusercontent.com/Dataweekends/zero_to_deep_learning_udemy/master/data/weight-height.csv"));
            FrameView  view  = table.WhereNotNull();

            view.AddComputedColumn("Bmi", (FrameRow r) => {
                double h = (double)r["Height"];
                double w = (double)r["Weight"];
                return(w / (h * h));
            });

            FrameView males   = view.Where("Gender", (string s) => (s == "Male"));
            FrameView females = view.Where("Gender", (string s) => (s == "Female"));

            SummaryStatistics maleSummary   = new SummaryStatistics(males["Height"].As <double>());
            SummaryStatistics femaleSummary = new SummaryStatistics(females["Height"].As <double>());

            Meta.Numerics.Statistics.TestResult allNormal    = view["Height"].As <double>().ShapiroFranciaTest();
            Meta.Numerics.Statistics.TestResult maleNormal   = males["Height"].As <double>().ShapiroFranciaTest();
            Meta.Numerics.Statistics.TestResult femaleNormal = females["Height"].As <double>().ShapiroFranciaTest();

            Meta.Numerics.Statistics.TestResult tTest  = Univariate.StudentTTest(males["Height"].As <double>(), females["Height"].As <double>());
            Meta.Numerics.Statistics.TestResult mwTest = Univariate.MannWhitneyTest(males["Height"].As <double>(), females["Height"].As <double>());

            LinearRegressionResult     result0 = males["Weight"].As <double>().LinearRegression(males["Height"].As <double>());
            PolynomialRegressionResult result1 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 1);
            PolynomialRegressionResult result2 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 2);
            PolynomialRegressionResult result3 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 3);

            //MultiLinearRegressionResult multi = view["Weight"].As<double>().MultiLinearRegression(view["Height"].As<double>(), view["Gender"].As<string>().Select(s => (s == "Male") ? 1.0 : 0.0).ToList());
        }
Esempio n. 2
0
 // one-parameter constructor
 internal FitResult(double p1, double dp1, TestResult test)
 {
     this.parameters = new ColumnVector(new double[] {p1}, 0, 1, 1, true);
     this.covarianceMatrix = new SymmetricMatrix(1);
     this.covarianceMatrix[0, 0] = dp1 * dp1;
     this.covarianceMatrix.IsReadOnly = true;
     this.test = test;
 }
Esempio n. 3
0
        public void InternetTimeSeriesDownload()
        {
            FrameTable table = DownloadFrameTable(new Uri("https://timeseries.weebly.com/uploads/2/1/0/8/21086414/sea_ice.csv"));

            double[] powerSpectrum = table["Arctic"].As <double>().PowerSpectrum();
            double   v12           = table["Arctic"].As <double>().Autocovariance(12);

            Meta.Numerics.Statistics.TestResult lbTest = table["Arctic"].As <double>().LjungBoxTest();
        }
Esempio n. 4
0
        // n-parameter constructor
        internal FitResult(IList<double> parameters, SymmetricMatrix covariance, TestResult test)
        {
            Debug.Assert(parameters != null);
            Debug.Assert(covariance != null);
            Debug.Assert(parameters.Count == covariance.Dimension);

            // This is a bit of a hack to ensure we store read-only ColumnVectors and SymmetricMatrix objects.
            this.parameters = ConvertListToReadOnlyVector(parameters);
            this.covarianceMatrix = covariance;
            this.covarianceMatrix.IsReadOnly = true;

            this.test = test;
        }
        /// <summary>
        /// Computes the best-fit linear regression from the data.
        /// </summary>
        /// <returns>The result of the fit.</returns>
        /// <remarks>
        /// <para>Linear regression assumes that the data have been generated by a function y = a + b x + e, where e is
        /// normally distributed noise, and determines the values of a and b that best fit the data. It also
        /// determines an error matrix on the parameters a and b, and does an F-test to</para>
        /// <para>The fit result is two-dimensional. The first parameter is the intercept a, the second is the slope b.
        /// The goodness-of-fit test is a F-test comparing the variance accounted for by the model to the remaining,
        /// unexplained variance.</para>
        /// </remarks>
        /// <exception cref="InsufficientDataException">There are fewer than three data points.</exception>
        public LinearRegressionResult LinearRegression()
        {
            int n = this.Count;

            if (n < 3)
            {
                throw new InsufficientDataException();
            }

            // The means and covariances are the inputs to most of the regression formulas.
            double mx  = xData.Mean;
            double my  = yData.Mean;
            double cxx = xData.Variance;
            double cyy = yData.Variance;
            double cxy = this.Covariance;

            Debug.Assert(cxx >= 0.0);
            Debug.Assert(cyy >= 0.0);

            // Compute the best-fit parameters
            double b = cxy / cxx;
            double a = my - b * mx;
            // Since cov(x,y) = (n S_xy - S_x S_y)/n^2 and var(x) = (n S_xx - S_x^2) / n^2,
            // these formulas are equivilent to the
            // to the usual formulas for a and b involving sums, but it is more stable against round-off
            ColumnVector v = new ColumnVector(a, b);

            v.IsReadOnly = true;

            // Compute Pearson r value
            double     r     = cxy / Math.Sqrt(cxx * cyy);
            TestResult rTest = new TestResult("r", r, TestType.TwoTailed, new Distributions.PearsonRDistribution(n));

            // Compute residuals and other sum-of-squares
            double SSR       = 0.0;
            double SSF       = 0.0;
            Sample residuals = new Sample();

            foreach (XY point in this)
            {
                double y = a + b * point.X;
                double z = point.Y - y;
                SSR += z * z;
                residuals.Add(z);
                SSF += MoreMath.Sqr(y - my);
            }
            double SST = cyy * n;
            // Note SST = SSF + SSR because \sum_{i} ( y_i - \bar{y})^2 = \sum_i (y_i - f_i)^2 + \sum_i (f_i - \bar{y})^2

            // Use sums-of-squares to do ANOVA
            AnovaRow          fit      = new AnovaRow(SSF, 1);
            AnovaRow          residual = new AnovaRow(SSR, n - 2);
            AnovaRow          total    = new AnovaRow(SST, n - 1);
            OneWayAnovaResult anova    = new OneWayAnovaResult(fit, residual, total);

            // Compute covariance of parameters matrix
            double s2  = SSR / (n - 2);
            double cbb = s2 / cxx / n;
            double cab = -mx * cbb;
            double caa = (cxx + mx * mx) * cbb;

            SymmetricMatrix C = new SymmetricMatrix(2);

            C[0, 0]      = caa;
            C[1, 1]      = cbb;
            C[0, 1]      = cab;
            C.IsReadOnly = true;

            // Package the parameters
            ParameterCollection parameters = new ParameterCollection(
                new string[] { "Intercept", "Slope" }, v, C
                );

            // Prepare the prediction function
            Func <double, UncertainValue> predict = (double x) => {
                double y = a + b * x;
                return(new UncertainValue(y, Math.Sqrt(s2 * (1.0 + (1.0 + MoreMath.Sqr(x - mx) / cxx) / n))));
            };

            return(new LinearRegressionResult(parameters, rTest, anova, residuals, predict));
        }