public void InternetSampleDownload() { FrameTable table = DownloadFrameTable(new Uri("https://raw.githubusercontent.com/Dataweekends/zero_to_deep_learning_udemy/master/data/weight-height.csv")); FrameView view = table.WhereNotNull(); view.AddComputedColumn("Bmi", (FrameRow r) => { double h = (double)r["Height"]; double w = (double)r["Weight"]; return(w / (h * h)); }); FrameView males = view.Where("Gender", (string s) => (s == "Male")); FrameView females = view.Where("Gender", (string s) => (s == "Female")); SummaryStatistics maleSummary = new SummaryStatistics(males["Height"].As <double>()); SummaryStatistics femaleSummary = new SummaryStatistics(females["Height"].As <double>()); TestResult allNormal = view["Height"].As <double>().ShapiroFranciaTest(); TestResult maleNormal = males["Height"].As <double>().ShapiroFranciaTest(); TestResult femaleNormal = females["Height"].As <double>().ShapiroFranciaTest(); TestResult tTest = Univariate.StudentTTest(males["Height"].As <double>(), females["Height"].As <double>()); TestResult mwTest = Univariate.MannWhitneyTest(males["Height"].As <double>(), females["Height"].As <double>()); LinearRegressionResult result0 = males["Weight"].As <double>().LinearRegression(males["Height"].As <double>()); PolynomialRegressionResult result1 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 1); PolynomialRegressionResult result2 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 2); PolynomialRegressionResult result3 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 3); //MultiLinearRegressionResult multi = view["Weight"].As<double>().MultiLinearRegression(view["Height"].As<double>(), view["Gender"].As<string>().Select(s => (s == "Male") ? 1.0 : 0.0).ToList()); }
public void FrameViewGroupBy() { FrameView original = GetTestFrame(); HashSet <bool?> values = new HashSet <bool?>(original["male"].As <bool?>().Distinct()); FrameTable grouped = original.GroupBy("male", v => { SummaryStatistics summary = new SummaryStatistics(v["height"].As <double>()); return(new Dictionary <string, object>() { { "count", summary.Count }, { "heightMean", summary.Mean }, { "heightStandardDeviation", summary.StandardDeviation } }); }); Assert.IsTrue(grouped.Rows.Count == values.Count); Assert.IsTrue(grouped.Columns.Count == 4); for (int i = 0; i < grouped.Rows.Count; i++) { bool?value = grouped["male"].As <bool?>()[i]; Assert.IsTrue(values.Contains(value)); FrameView selected = original.Where(r => (bool?)r["male"] == value); Assert.IsTrue(selected.Rows.Count > 0); double mean = selected["height"].As <double>().Mean(); Assert.IsTrue(TestUtilities.IsNearlyEqual(grouped["heightMean"].As <double>()[i], mean)); double standardDeviation = selected["height"].As <double>().StandardDeviation(); Assert.IsTrue(TestUtilities.IsNearlyEqual(grouped["heightStandardDeviation"].As <double>()[i], standardDeviation)); } }
public static void ManipulatingData() { FrameTable table; Uri url = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv"); WebRequest request = WebRequest.Create(url); using (WebResponse response = request.GetResponse()) { using (StreamReader reader = new StreamReader(response.GetResponseStream())) { table = FrameTable.FromCsv(reader); } } FrameView selected = table.Select("Height", "Weight", "Sex"); FrameView discarded = table.Discard("Name"); table.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0)); Console.WriteLine($"Bmi of first subject is {table["Bmi"][0]}."); FrameView noNulls = table.WhereNotNull(); FrameView noNullWeights = table.WhereNotNull("Weight"); FrameView noNullWeightsOrHeights = table.WhereNotNull("Weight", "Height"); double meanWeight = table.WhereNotNull("Weight").Columns["Weight"].As <double>().Mean(); FrameView men = table.Where <string>("Sex", s => s == "M"); FrameView shortMen = table.Where( r => ((string)r["Sex"]) == "M" && ((double)r["Height"] < 175.0) ); FrameView ordered = table.OrderBy("Height"); FrameView reversed = table.OrderBy("Height", SortOrder.Descending); FrameView alsoOrdered = table.OrderBy <double>("Height", (h1, h2) => h1.CompareTo(h2)); FrameView sorted = table.OrderBy((r1, r2) => { int first = ((string)r1["Sex"]).CompareTo((string )r2["Sex"]); int second = ((double)r1["Height"]).CompareTo((double)r2["Height"]); return(first != 0 ? first : second); }); List <string> sexes = table["Sex"].As <string>().Distinct().ToList(); FrameTable counts = table.GroupBy("Sex", v => v.Rows.Count, "Count"); FrameTable summarize = table.GroupBy("Sex", v => { SummaryStatistics summary = new SummaryStatistics(v["Height"].As <double>()); return(new Dictionary <string, object>() { { "Count", summary.Count }, { "Mean", summary.Mean }, { "StdDev", summary.StandardDeviation } }); }); }
public void StreamSampleSummaryAgreement() { // Streaming properties should give same answers as list methods. Random rng = new Random(2); List <double> sample = new List <double>(TestUtilities.CreateDataSample(rng, new UniformDistribution(Interval.FromEndpoints(-4.0, 3.0)), 32)); SummaryStatistics summary = new SummaryStatistics(sample); Assert.IsTrue(summary.Count == sample.Count); Assert.IsTrue(TestUtilities.IsNearlyEqual(sample.Mean(), summary.Mean)); Assert.IsTrue(TestUtilities.IsNearlyEqual(sample.Variance(), summary.Variance)); Assert.IsTrue(TestUtilities.IsNearlyEqual(sample.PopulationMean(), summary.PopulationMean)); Assert.IsTrue(TestUtilities.IsNearlyEqual(sample.PopulationVariance(), summary.PopulationVariance)); Assert.IsTrue(sample.Minimum() == summary.Minimum); Assert.IsTrue(sample.Maximum() == summary.Maximum); }
public override SummaryStatistics[] GetSummaryStatistics(IState[] states, params object[] parameters) { List <double>[] preds = new List <double> [states.Length]; SummaryStatistics[] summaryStatistics = new SummaryStatistics[states.Length]; List <Tuple <int, int, IState[]> > batches = GetBatches(states, Global.TFMAXBATCHSIZE); foreach (var batch in batches) { List <TrainingInstance> trainingInstances = new List <TrainingInstance>(); foreach (IState state in batch.Item3) { trainingInstances.Add(new TrainingInstance(state, 0)); } var X = representationUncert.BuildData(trainingInstances).Item1; List <double>[] batchPreds = nnUncert.Predict(X, null, Global.UNCERTMINSAMPLE).Preds; int count = 0; for (int i = batch.Item1; i < batch.Item2; i++) { preds[i] = batchPreds[count]; count++; } } for (int i = 0; i < preds.Length; i++) { double mean = preds[i].Average(); double sdModel = Math.Sqrt(Math.Max(0, preds[i].Select(x => Math.Pow(x, 2)).Average() - Math.Pow(mean, 2))); summaryStatistics[i] = new SummaryStatistics(mean, sdModel, 0); } return(summaryStatistics); }
public void AccuracyTest() { SummaryStatistics s1 = new SummaryStatistics(); SummaryStatistics s2 = new SummaryStatistics(); foreach (double omx in TestUtilities.GenerateRealValues(1.0E-24, 1.0, 1000000)) { double x = 1.0 - omx; double f1 = Math.Sqrt((1.0 - x) * (1.0 + x)); double f2 = Math.Sqrt(1.0 - x * x); DoubleDouble xe = (DoubleDouble)x; DoubleDouble fc = DoubleDouble.Sqrt(DoubleDouble.One - xe * xe); double e1 = Math.Abs((double)(f1 - fc)); double e2 = Math.Abs((double)(f2 - fc)); s1.Add(e1); s2.Add(e2); } }
public void StreamingSampleSummaryCombination() { // Combining partial summaries should give same answer as full summary Random rng = new Random(1); List <double> sample = new List <double>(TestUtilities.CreateDataSample(rng, new UniformDistribution(Interval.FromEndpoints(-4.0, 3.0)), 64)); SummaryStatistics summary = new SummaryStatistics(sample); Assert.IsTrue(summary.Count == sample.Count); for (int i = 0; i < 4; i++) { // Pick a split point in the data int m = rng.Next(0, sample.Count); // Create a summary of the first part. SummaryStatistics summary1 = new SummaryStatistics(sample.Take(m)); Assert.IsTrue(summary1.Count == m); // Create a summary of the second part. SummaryStatistics summary2 = new SummaryStatistics(sample.Skip(m)); Assert.IsTrue(summary2.Count == sample.Count - m); // Combine them. Their summary statistics should agree with the original summary. SummaryStatistics combined = SummaryStatistics.Combine(summary1, summary2); Assert.IsTrue(combined.Count == summary.Count); Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.Mean, summary.Mean)); Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.Variance, summary.Variance)); Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.StandardDeviation, summary.StandardDeviation)); Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.Skewness, summary.Skewness)); Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.PopulationMean, summary.PopulationMean)); Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.PopulationVariance, summary.PopulationVariance)); Assert.IsTrue(combined.Minimum == summary.Minimum); Assert.IsTrue(combined.Maximum == summary.Maximum); } }
internal MetricStatistics() { _statistic = new SummaryStatistics(); }
public Stats() { _statistics = new SummaryStatistics(); }
public static void AnalyzingData() { FrameTable table; Uri url = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv"); WebRequest request = WebRequest.Create(url); using (WebResponse response = request.GetResponse()) { using (StreamReader reader = new StreamReader(response.GetResponseStream())) { table = FrameTable.FromCsv(reader); } } FrameView view = table.WhereNotNull(); // Get the column with (zero-based) index 4. FrameColumn column4 = view.Columns[4]; // Get the column named "Height". FrameColumn heightsColumn = view.Columns["Height"]; // Even easier way to get the column named "Height". FrameColumn alsoHeightsColumn = view["Height"]; IReadOnlyList <double> heights = view["Height"].As <double>(); SummaryStatistics summary = new SummaryStatistics(view["Height"].As <double>()); Console.WriteLine($"Count = {summary.Count}"); Console.WriteLine($"Mean = {summary.Mean}"); Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}"); Console.WriteLine($"Skewness = {summary.Skewness}"); Console.WriteLine($"Estimated population mean = {summary.PopulationMean}"); Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}"); IReadOnlyList <double> maleHeights = view.Where <string>("Sex", s => s == "M").Columns["Height"].As <double>(); IReadOnlyList <double> femaleHeights = view.Where <string>("Sex", s => s == "F").Columns["Height"].As <double>(); TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights); Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}"); Console.WriteLine($"P = {test.Probability}"); TestResult maleHeightNormality = maleHeights.ShapiroFranciaTest(); TestResult totalHeightNormality = view["Height"].As <double>().ShapiroFranciaTest(); TestResult heightCompatibility = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights); LinearRegressionResult fit = view["Weight"].As <double>().LinearRegression(view["Height"].As <double>()); Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept})."); Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation."); ContingencyTable <string, bool> contingency = Bivariate.Crosstabs(view["Sex"].As <string>(), view["Result"].As <bool>()); Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}"); Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}"); Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}"); view.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0)); view.AddComputedColumn("Age", r => (DateTime.Now - (DateTime)r["Birthdate"]).TotalDays / 365.24); MultiLinearLogisticRegressionResult result = view["Result"].As <bool>().MultiLinearLogisticRegression( view["Bmi"].As <double>(), view["Sex"].As <string, double>(s => s == "M" ? 1.0 : 0.0) ); foreach (Parameter parameter in result.Parameters) { Console.WriteLine($"{parameter.Name} = {parameter.Estimate}"); } TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As <double>(), view["Result"].As <double>()); Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}"); }
/// <summary> /// Print the values to the <see cref="_Worksheet"/>. /// </summary> /// <param name="data">The <see cref="Models.Data"/></param> /// <param name="meanConfidenceLevel">(Optional) The confidence level for the <see cref="SummaryStatistics.Mean"/>. Default is 0.</param> /// <param name="standardDeviationConfidenceLevel">(Optional) The confidence level for the <see cref="SummaryStatistics.StandardDeviation"/>. Default is 0.</param> private void PrintValues(Models.Data data, int meanConfidenceLevel, int standardDeviationConfidenceLevel) { // Increment the column by 1. _column++; // Calculate the summary of the data. var summary = new SummaryStatistics(data.Range, _doCalculate, meanConfidenceLevel, standardDeviationConfidenceLevel); // Write the categories to she sheet. Sheet.Cells[_row++, _column] = data.Name; // Base WriteFunction(_doCalculate.Mean || _doCalculate.BoxWhiskerPlot, summary.Mean); WriteFunction(_doCalculate.Variance, summary.Variance); WriteFunction(_doCalculate.StandardDeviation, summary.StandardDeviation); WriteFunction(_doCalculate.Minimum, summary.Minimum); WriteFunction(_doCalculate.Quartile1 || _doCalculate.BoxWhiskerPlot, summary.Quartile1); WriteFunction(_doCalculate.Median, summary.Median); WriteFunction(_doCalculate.Quartile3, summary.Quartile3); WriteFunction(_doCalculate.Maximum, summary.Maximum); WriteFunction(_doCalculate.InterquartileRange, summary.InterquartileRange); WriteFunction(_doCalculate.Skewness, summary.Skewness); WriteFunction(_doCalculate.Kurtosis, summary.Kurtosis); WriteFunction(_doCalculate.MeanAbsoluteDeviation, summary.MeanAbsoluteDeviation); WriteFunction(_doCalculate.Mode, summary.Mode); HasMode.Add(summary.HasMode); WriteFunction(_doCalculate.Range, summary.Range); WriteFunction(_doCalculate.Count, summary.Count); WriteFunction(_doCalculate.Sum, summary.Sum); // Box-Whisker Plot WriteFunction(_doCalculate.BoxWhiskerPlot, summary.Quartile1Median); WriteFunction(_doCalculate.BoxWhiskerPlot, summary.MedianQuartile3); WriteFunction(_doCalculate.BoxWhiskerPlot, summary.MinusWhisker); WriteFunction(_doCalculate.BoxWhiskerPlot, summary.PlusWhisker); // Base (but printed after the Box-Whisker Plot if required) if (_doCalculate.Outliers || _doCalculate.BoxWhiskerPlot) { foreach (var outlier in summary.Outliers) { Sheet.Cells[_row++, _column] = "=" + outlier; if (OutliersRow.Count < summary.Outliers.Count) { OutliersRow.Add(_row - 1); } } } // Confidence Interval WriteFunction(_doCalculate.MeanConfidenceInterval || _doCalculate.StandardDeviationConfidenceInterval, summary.Count); WriteFunction(_doCalculate.MeanConfidenceInterval || _doCalculate.StandardDeviationConfidenceInterval, summary.Mean); WriteFunction(_doCalculate.MeanConfidenceInterval || _doCalculate.StandardDeviationConfidenceInterval, summary.StandardDeviation); if (_doCalculate.MeanConfidenceInterval) { Sheet.Cells[_row, _column] = summary.MeanConfidenceLevel; MeanConfidenceLevelRow = _row; ((Range)Sheet.Cells[_row++, _column]).NumberFormat = "0.00%"; summary.SetMeanAlpha(MeanConfidenceLevelRow, _column); } WriteFunction(_doCalculate.MeanConfidenceInterval, summary.MeanAlpha); WriteFunction(_doCalculate.MeanConfidenceInterval, summary.DegreesOfFreedom); WriteFunction(_doCalculate.MeanConfidenceInterval, summary.MeanConfidenceInterval); WriteFunction(_doCalculate.MeanConfidenceInterval, summary.MeanLowerLimit); WriteFunction(_doCalculate.MeanConfidenceInterval, summary.MeanUpperLimit); if (_doCalculate.StandardDeviationConfidenceInterval) { Sheet.Cells[_row, _column] = summary.StandardDeviationConfidenceLevel; StandardDeviationConfidenceLevelRow = _row; ((Range)Sheet.Cells[_row++, _column]).NumberFormat = "0.00%"; summary.SetStandardDeviationAlpha(StandardDeviationConfidenceLevelRow, _column); } WriteFunction(_doCalculate.StandardDeviationConfidenceInterval, summary.StandardDeviationAlpha); WriteFunction(_doCalculate.StandardDeviationConfidenceInterval, summary.DegreesOfFreedom); WriteFunction(_doCalculate.StandardDeviationConfidenceInterval, summary.StandardDeviationConfidenceIntervalLowerLimit); WriteFunction(_doCalculate.StandardDeviationConfidenceInterval, summary.StandardDeviationConfidenceIntervalUpperLimit); WriteFunction(_doCalculate.StandardDeviationConfidenceInterval, summary.StandardDeviationLowerLimit); WriteFunction(_doCalculate.StandardDeviationConfidenceInterval, summary.StandardDeviationUpperLimit); }