Exemplo n.º 1
0
        public void InternetSampleDownload()
        {
            FrameTable table = DownloadFrameTable(new Uri("https://raw.githubusercontent.com/Dataweekends/zero_to_deep_learning_udemy/master/data/weight-height.csv"));
            FrameView  view  = table.WhereNotNull();

            view.AddComputedColumn("Bmi", (FrameRow r) => {
                double h = (double)r["Height"];
                double w = (double)r["Weight"];
                return(w / (h * h));
            });

            FrameView males   = view.Where("Gender", (string s) => (s == "Male"));
            FrameView females = view.Where("Gender", (string s) => (s == "Female"));

            SummaryStatistics maleSummary   = new SummaryStatistics(males["Height"].As <double>());
            SummaryStatistics femaleSummary = new SummaryStatistics(females["Height"].As <double>());

            TestResult allNormal    = view["Height"].As <double>().ShapiroFranciaTest();
            TestResult maleNormal   = males["Height"].As <double>().ShapiroFranciaTest();
            TestResult femaleNormal = females["Height"].As <double>().ShapiroFranciaTest();

            TestResult tTest  = Univariate.StudentTTest(males["Height"].As <double>(), females["Height"].As <double>());
            TestResult mwTest = Univariate.MannWhitneyTest(males["Height"].As <double>(), females["Height"].As <double>());

            LinearRegressionResult     result0 = males["Weight"].As <double>().LinearRegression(males["Height"].As <double>());
            PolynomialRegressionResult result1 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 1);
            PolynomialRegressionResult result2 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 2);
            PolynomialRegressionResult result3 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 3);

            //MultiLinearRegressionResult multi = view["Weight"].As<double>().MultiLinearRegression(view["Height"].As<double>(), view["Gender"].As<string>().Select(s => (s == "Male") ? 1.0 : 0.0).ToList());
        }
Exemplo n.º 2
0
        //function to calc Mann-Whitney statistics
        private double[] calcMWStats()
        {
            double[]      stats     = new double[8];       //to store final stats
            List <double> stoichsG1 = new List <double>(); //double[(this.PeptideStoichiometriesGroupOne.Count()]; //list to store distribution of group one stoichiometries
            List <double> stoichsG2 = new List <double>(); //double[(this.PeptideStoichiometriesGroupTwo.Count()]; //list to store distribution of group two stoichiometries

            foreach (Stoichiometry S1 in (this.PeptideStoichiometriesGroupOne))
            {
                stoichsG1.Add(S1.StoichiometryVal);
            }
            foreach (Stoichiometry S2 in (this.PeptideStoichiometriesGroupTwo))
            {
                stoichsG2.Add(S2.StoichiometryVal);
            }
            //calc stats
            TestResult mw = Univariate.MannWhitneyTest(stoichsG1, stoichsG2);

            stats[0] = mw.Statistic.Value;
            stats[1] = mw.Probability;

            //find medians, mins maxs
            stats[2] = stoichsG1.Median();
            stats[4] = stoichsG1.Min();
            stats[6] = stoichsG1.Max();

            stats[3] = stoichsG2.Median();
            stats[5] = stoichsG2.Min();
            stats[7] = stoichsG2.Max();

            return(stats);
        }
Exemplo n.º 3
0
        private static void CompareSamples()
        {
            List <double> a = new List <double>()
            {
                130.0, 140.0, 150.0, 150.0, 160.0, 190.0
            };
            List <double> b = new List <double>()
            {
                120.0, 150.0, 180.0, 170.0, 185.0, 175.0, 190.0, 200.0
            };

            TestResult student = Univariate.StudentTTest(a, b);

            Console.WriteLine($"{student.Statistic.Name} = {student.Statistic.Value}");
            Console.WriteLine($"{student.Type} P = {student.Probability}");

            student.Type = TestType.LeftTailed;
            Console.WriteLine($"{student.Type} P = {student.Probability}");

            TestResult mannWhitney = Univariate.MannWhitneyTest(a, b);

            Console.WriteLine($"{mannWhitney.Statistic.Name} = {mannWhitney.Statistic.Value}");
            Console.WriteLine($"{mannWhitney.Type} P = {mannWhitney.Probability}");

            TestResult kolmogorov = Univariate.KolmogorovSmirnovTest(a, b);

            Console.WriteLine($"{kolmogorov.Statistic.Name} = {kolmogorov.Statistic.Value}");
            Console.WriteLine($"{kolmogorov.Type} P = {kolmogorov.Probability}");
        }
Exemplo n.º 4
0
        public void UnivariateTest()
        {
            var univariate = new Univariate(1, 6, 7);

            Assert.AreEqual(-1.58, univariate.FirstAnswer, 0.01);
            Assert.AreEqual(-4.41, univariate.SecondAnswer, 0.01);
        }
Exemplo n.º 5
0
 /// <summary>
 /// Computes the exponential distribution that best fits the given sample.
 /// </summary>
 /// <param name="sample">The sample to fit.</param>
 /// <returns>The best fit parameter.</returns>
 /// <remarks>
 /// <para>The returned fit parameter is &#x3BC; (the <see cref="Mean"/>).
 /// This is the same parameter that is required by the <see cref="ExponentialDistribution(double)"/> constructor to
 /// specify a new exponential distribution.</para>
 /// </remarks>
 /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception>
 /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than two values.</exception>
 /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception>
 public static ExponentialFitResult FitToSample(Sample sample)
 {
     if (sample == null)
     {
         throw new ArgumentNullException(nameof(sample));
     }
     return(Univariate.FitToExponential(sample.data));
 }
 /// <summary>
 /// Fits a Rayleigh distribution to a sample.
 /// </summary>
 /// <param name="sample">The sample to fit, which must have at least 2 values.</param>
 /// <returns>The fit result. The only parameter is the scale parameter.</returns>
 public static RayleighFitResult FitToSample(Sample sample)
 {
     if (sample == null)
     {
         throw new ArgumentNullException(nameof(sample));
     }
     return(Univariate.FitToRayleigh(sample.data));
 }
Exemplo n.º 7
0
        public void TwoSampleKolmogorovNullDistributionTest()
        {
            Random rng = new Random(4);
            ContinuousDistribution population = new ExponentialDistribution();

            int[] sizes = new int[] { 23, 30, 175 };

            foreach (int na in sizes)
            {
                foreach (int nb in sizes)
                {
                    Sample d = new Sample();
                    ContinuousDistribution nullDistribution = null;
                    for (int i = 0; i < 128; i++)
                    {
                        List <double> a = TestUtilities.CreateDataSample(rng, population, na).ToList();
                        List <double> b = TestUtilities.CreateDataSample(rng, population, nb).ToList();

                        TestResult r = Univariate.KolmogorovSmirnovTest(a, b);
                        d.Add(r.Statistic.Value);
                        nullDistribution = r.Statistic.Distribution;
                    }

                    // Only do full KS test if the number of bins is larger than the sample size, otherwise we are going to fail
                    // because the KS test detects the granularity of the distribution.
                    TestResult mr = d.KolmogorovSmirnovTest(nullDistribution);
                    if (AdvancedIntegerMath.LCM(na, nb) > d.Count)
                    {
                        Assert.IsTrue(mr.Probability > 0.01);
                    }
                    // But always test that mean and standard deviation are as expected
                    Assert.IsTrue(d.PopulationMean.ConfidenceInterval(0.99).ClosedContains(nullDistribution.Mean));
                    Assert.IsTrue(d.PopulationStandardDeviation.ConfidenceInterval(0.99).ClosedContains(nullDistribution.StandardDeviation));
                    // This test is actually a bit sensitive, probably because the discrete-ness of the underlying distribution
                    // and the inaccuracy of the asymptotic approximation for intermediate sample size make strict comparisons iffy.
                }
            }
        }
Exemplo n.º 8
0
        public static void AnalyzingData()
        {
            FrameTable table;
            Uri        url     = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv");
            WebRequest request = WebRequest.Create(url);

            using (WebResponse response = request.GetResponse()) {
                using (StreamReader reader = new StreamReader(response.GetResponseStream())) {
                    table = FrameTable.FromCsv(reader);
                }
            }
            FrameView view = table.WhereNotNull();

            // Get the column with (zero-based) index 4.
            FrameColumn column4 = view.Columns[4];
            // Get the column named "Height".
            FrameColumn heightsColumn = view.Columns["Height"];
            // Even easier way to get the column named "Height".
            FrameColumn alsoHeightsColumn = view["Height"];

            IReadOnlyList <double> heights = view["Height"].As <double>();

            SummaryStatistics summary = new SummaryStatistics(view["Height"].As <double>());

            Console.WriteLine($"Count = {summary.Count}");
            Console.WriteLine($"Mean = {summary.Mean}");
            Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}");
            Console.WriteLine($"Skewness = {summary.Skewness}");
            Console.WriteLine($"Estimated population mean = {summary.PopulationMean}");
            Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}");

            IReadOnlyList <double> maleHeights =
                view.Where <string>("Sex", s => s == "M").Columns["Height"].As <double>();
            IReadOnlyList <double> femaleHeights =
                view.Where <string>("Sex", s => s == "F").Columns["Height"].As <double>();
            TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights);

            Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}");
            Console.WriteLine($"P = {test.Probability}");

            TestResult maleHeightNormality  = maleHeights.ShapiroFranciaTest();
            TestResult totalHeightNormality = view["Height"].As <double>().ShapiroFranciaTest();
            TestResult heightCompatibility  = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights);

            LinearRegressionResult fit =
                view["Weight"].As <double>().LinearRegression(view["Height"].As <double>());

            Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept}).");
            Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation.");

            ContingencyTable <string, bool> contingency =
                Bivariate.Crosstabs(view["Sex"].As <string>(), view["Result"].As <bool>());

            Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}");
            Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}");
            Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}");

            view.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0));
            view.AddComputedColumn("Age", r => (DateTime.Now - (DateTime)r["Birthdate"]).TotalDays / 365.24);

            MultiLinearLogisticRegressionResult result =
                view["Result"].As <bool>().MultiLinearLogisticRegression(
                    view["Bmi"].As <double>(),
                    view["Sex"].As <string, double>(s => s == "M" ? 1.0 : 0.0)
                    );

            foreach (Parameter parameter in result.Parameters)
            {
                Console.WriteLine($"{parameter.Name} = {parameter.Estimate}");
            }

            TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As <double>(), view["Result"].As <double>());

            Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}");
        }