public void InternetSampleDownload() { FrameTable table = DownloadFrameTable(new Uri("https://raw.githubusercontent.com/Dataweekends/zero_to_deep_learning_udemy/master/data/weight-height.csv")); FrameView view = table.WhereNotNull(); view.AddComputedColumn("Bmi", (FrameRow r) => { double h = (double)r["Height"]; double w = (double)r["Weight"]; return(w / (h * h)); }); FrameView males = view.Where("Gender", (string s) => (s == "Male")); FrameView females = view.Where("Gender", (string s) => (s == "Female")); SummaryStatistics maleSummary = new SummaryStatistics(males["Height"].As <double>()); SummaryStatistics femaleSummary = new SummaryStatistics(females["Height"].As <double>()); TestResult allNormal = view["Height"].As <double>().ShapiroFranciaTest(); TestResult maleNormal = males["Height"].As <double>().ShapiroFranciaTest(); TestResult femaleNormal = females["Height"].As <double>().ShapiroFranciaTest(); TestResult tTest = Univariate.StudentTTest(males["Height"].As <double>(), females["Height"].As <double>()); TestResult mwTest = Univariate.MannWhitneyTest(males["Height"].As <double>(), females["Height"].As <double>()); LinearRegressionResult result0 = males["Weight"].As <double>().LinearRegression(males["Height"].As <double>()); PolynomialRegressionResult result1 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 1); PolynomialRegressionResult result2 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 2); PolynomialRegressionResult result3 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 3); //MultiLinearRegressionResult multi = view["Weight"].As<double>().MultiLinearRegression(view["Height"].As<double>(), view["Gender"].As<string>().Select(s => (s == "Male") ? 1.0 : 0.0).ToList()); }
//function to calc Mann-Whitney statistics private double[] calcMWStats() { double[] stats = new double[8]; //to store final stats List <double> stoichsG1 = new List <double>(); //double[(this.PeptideStoichiometriesGroupOne.Count()]; //list to store distribution of group one stoichiometries List <double> stoichsG2 = new List <double>(); //double[(this.PeptideStoichiometriesGroupTwo.Count()]; //list to store distribution of group two stoichiometries foreach (Stoichiometry S1 in (this.PeptideStoichiometriesGroupOne)) { stoichsG1.Add(S1.StoichiometryVal); } foreach (Stoichiometry S2 in (this.PeptideStoichiometriesGroupTwo)) { stoichsG2.Add(S2.StoichiometryVal); } //calc stats TestResult mw = Univariate.MannWhitneyTest(stoichsG1, stoichsG2); stats[0] = mw.Statistic.Value; stats[1] = mw.Probability; //find medians, mins maxs stats[2] = stoichsG1.Median(); stats[4] = stoichsG1.Min(); stats[6] = stoichsG1.Max(); stats[3] = stoichsG2.Median(); stats[5] = stoichsG2.Min(); stats[7] = stoichsG2.Max(); return(stats); }
private static void CompareSamples() { List <double> a = new List <double>() { 130.0, 140.0, 150.0, 150.0, 160.0, 190.0 }; List <double> b = new List <double>() { 120.0, 150.0, 180.0, 170.0, 185.0, 175.0, 190.0, 200.0 }; TestResult student = Univariate.StudentTTest(a, b); Console.WriteLine($"{student.Statistic.Name} = {student.Statistic.Value}"); Console.WriteLine($"{student.Type} P = {student.Probability}"); student.Type = TestType.LeftTailed; Console.WriteLine($"{student.Type} P = {student.Probability}"); TestResult mannWhitney = Univariate.MannWhitneyTest(a, b); Console.WriteLine($"{mannWhitney.Statistic.Name} = {mannWhitney.Statistic.Value}"); Console.WriteLine($"{mannWhitney.Type} P = {mannWhitney.Probability}"); TestResult kolmogorov = Univariate.KolmogorovSmirnovTest(a, b); Console.WriteLine($"{kolmogorov.Statistic.Name} = {kolmogorov.Statistic.Value}"); Console.WriteLine($"{kolmogorov.Type} P = {kolmogorov.Probability}"); }
public void UnivariateTest() { var univariate = new Univariate(1, 6, 7); Assert.AreEqual(-1.58, univariate.FirstAnswer, 0.01); Assert.AreEqual(-4.41, univariate.SecondAnswer, 0.01); }
/// <summary> /// Computes the exponential distribution that best fits the given sample. /// </summary> /// <param name="sample">The sample to fit.</param> /// <returns>The best fit parameter.</returns> /// <remarks> /// <para>The returned fit parameter is μ (the <see cref="Mean"/>). /// This is the same parameter that is required by the <see cref="ExponentialDistribution(double)"/> constructor to /// specify a new exponential distribution.</para> /// </remarks> /// <exception cref="ArgumentNullException"><paramref name="sample"/> is null.</exception> /// <exception cref="InsufficientDataException"><paramref name="sample"/> contains fewer than two values.</exception> /// <exception cref="InvalidOperationException"><paramref name="sample"/> contains non-positive values.</exception> public static ExponentialFitResult FitToSample(Sample sample) { if (sample == null) { throw new ArgumentNullException(nameof(sample)); } return(Univariate.FitToExponential(sample.data)); }
/// <summary> /// Fits a Rayleigh distribution to a sample. /// </summary> /// <param name="sample">The sample to fit, which must have at least 2 values.</param> /// <returns>The fit result. The only parameter is the scale parameter.</returns> public static RayleighFitResult FitToSample(Sample sample) { if (sample == null) { throw new ArgumentNullException(nameof(sample)); } return(Univariate.FitToRayleigh(sample.data)); }
public void TwoSampleKolmogorovNullDistributionTest() { Random rng = new Random(4); ContinuousDistribution population = new ExponentialDistribution(); int[] sizes = new int[] { 23, 30, 175 }; foreach (int na in sizes) { foreach (int nb in sizes) { Sample d = new Sample(); ContinuousDistribution nullDistribution = null; for (int i = 0; i < 128; i++) { List <double> a = TestUtilities.CreateDataSample(rng, population, na).ToList(); List <double> b = TestUtilities.CreateDataSample(rng, population, nb).ToList(); TestResult r = Univariate.KolmogorovSmirnovTest(a, b); d.Add(r.Statistic.Value); nullDistribution = r.Statistic.Distribution; } // Only do full KS test if the number of bins is larger than the sample size, otherwise we are going to fail // because the KS test detects the granularity of the distribution. TestResult mr = d.KolmogorovSmirnovTest(nullDistribution); if (AdvancedIntegerMath.LCM(na, nb) > d.Count) { Assert.IsTrue(mr.Probability > 0.01); } // But always test that mean and standard deviation are as expected Assert.IsTrue(d.PopulationMean.ConfidenceInterval(0.99).ClosedContains(nullDistribution.Mean)); Assert.IsTrue(d.PopulationStandardDeviation.ConfidenceInterval(0.99).ClosedContains(nullDistribution.StandardDeviation)); // This test is actually a bit sensitive, probably because the discrete-ness of the underlying distribution // and the inaccuracy of the asymptotic approximation for intermediate sample size make strict comparisons iffy. } } }
public static void AnalyzingData() { FrameTable table; Uri url = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv"); WebRequest request = WebRequest.Create(url); using (WebResponse response = request.GetResponse()) { using (StreamReader reader = new StreamReader(response.GetResponseStream())) { table = FrameTable.FromCsv(reader); } } FrameView view = table.WhereNotNull(); // Get the column with (zero-based) index 4. FrameColumn column4 = view.Columns[4]; // Get the column named "Height". FrameColumn heightsColumn = view.Columns["Height"]; // Even easier way to get the column named "Height". FrameColumn alsoHeightsColumn = view["Height"]; IReadOnlyList <double> heights = view["Height"].As <double>(); SummaryStatistics summary = new SummaryStatistics(view["Height"].As <double>()); Console.WriteLine($"Count = {summary.Count}"); Console.WriteLine($"Mean = {summary.Mean}"); Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}"); Console.WriteLine($"Skewness = {summary.Skewness}"); Console.WriteLine($"Estimated population mean = {summary.PopulationMean}"); Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}"); IReadOnlyList <double> maleHeights = view.Where <string>("Sex", s => s == "M").Columns["Height"].As <double>(); IReadOnlyList <double> femaleHeights = view.Where <string>("Sex", s => s == "F").Columns["Height"].As <double>(); TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights); Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}"); Console.WriteLine($"P = {test.Probability}"); TestResult maleHeightNormality = maleHeights.ShapiroFranciaTest(); TestResult totalHeightNormality = view["Height"].As <double>().ShapiroFranciaTest(); TestResult heightCompatibility = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights); LinearRegressionResult fit = view["Weight"].As <double>().LinearRegression(view["Height"].As <double>()); Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept})."); Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation."); ContingencyTable <string, bool> contingency = Bivariate.Crosstabs(view["Sex"].As <string>(), view["Result"].As <bool>()); Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}"); Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}"); Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}"); view.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0)); view.AddComputedColumn("Age", r => (DateTime.Now - (DateTime)r["Birthdate"]).TotalDays / 365.24); MultiLinearLogisticRegressionResult result = view["Result"].As <bool>().MultiLinearLogisticRegression( view["Bmi"].As <double>(), view["Sex"].As <string, double>(s => s == "M" ? 1.0 : 0.0) ); foreach (Parameter parameter in result.Parameters) { Console.WriteLine($"{parameter.Name} = {parameter.Estimate}"); } TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As <double>(), view["Result"].As <double>()); Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}"); }