Esempio n. 1
0
        public void InternetSampleDownload()
        {
            FrameTable table = DownloadFrameTable(new Uri("https://raw.githubusercontent.com/Dataweekends/zero_to_deep_learning_udemy/master/data/weight-height.csv"));
            FrameView  view  = table.WhereNotNull();

            view.AddComputedColumn("Bmi", (FrameRow r) => {
                double h = (double)r["Height"];
                double w = (double)r["Weight"];
                return(w / (h * h));
            });

            FrameView males   = view.Where("Gender", (string s) => (s == "Male"));
            FrameView females = view.Where("Gender", (string s) => (s == "Female"));

            SummaryStatistics maleSummary   = new SummaryStatistics(males["Height"].As <double>());
            SummaryStatistics femaleSummary = new SummaryStatistics(females["Height"].As <double>());

            TestResult allNormal    = view["Height"].As <double>().ShapiroFranciaTest();
            TestResult maleNormal   = males["Height"].As <double>().ShapiroFranciaTest();
            TestResult femaleNormal = females["Height"].As <double>().ShapiroFranciaTest();

            TestResult tTest  = Univariate.StudentTTest(males["Height"].As <double>(), females["Height"].As <double>());
            TestResult mwTest = Univariate.MannWhitneyTest(males["Height"].As <double>(), females["Height"].As <double>());

            LinearRegressionResult     result0 = males["Weight"].As <double>().LinearRegression(males["Height"].As <double>());
            PolynomialRegressionResult result1 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 1);
            PolynomialRegressionResult result2 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 2);
            PolynomialRegressionResult result3 = males["Height"].As <double>().PolynomialRegression(males["Weight"].As <double>(), 3);

            //MultiLinearRegressionResult multi = view["Weight"].As<double>().MultiLinearRegression(view["Height"].As<double>(), view["Gender"].As<string>().Select(s => (s == "Male") ? 1.0 : 0.0).ToList());
        }
Esempio n. 2
0
        public void FrameViewGroupBy()
        {
            FrameView original = GetTestFrame();

            HashSet <bool?> values = new HashSet <bool?>(original["male"].As <bool?>().Distinct());

            FrameTable grouped = original.GroupBy("male", v => {
                SummaryStatistics summary = new SummaryStatistics(v["height"].As <double>());
                return(new Dictionary <string, object>()
                {
                    { "count", summary.Count },
                    { "heightMean", summary.Mean },
                    { "heightStandardDeviation", summary.StandardDeviation }
                });
            });

            Assert.IsTrue(grouped.Rows.Count == values.Count);
            Assert.IsTrue(grouped.Columns.Count == 4);

            for (int i = 0; i < grouped.Rows.Count; i++)
            {
                bool?value = grouped["male"].As <bool?>()[i];
                Assert.IsTrue(values.Contains(value));

                FrameView selected = original.Where(r => (bool?)r["male"] == value);
                Assert.IsTrue(selected.Rows.Count > 0);

                double mean = selected["height"].As <double>().Mean();
                Assert.IsTrue(TestUtilities.IsNearlyEqual(grouped["heightMean"].As <double>()[i], mean));

                double standardDeviation = selected["height"].As <double>().StandardDeviation();
                Assert.IsTrue(TestUtilities.IsNearlyEqual(grouped["heightStandardDeviation"].As <double>()[i], standardDeviation));
            }
        }
Esempio n. 3
0
        public static void ManipulatingData()
        {
            FrameTable table;
            Uri        url     = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv");
            WebRequest request = WebRequest.Create(url);

            using (WebResponse response = request.GetResponse()) {
                using (StreamReader reader = new StreamReader(response.GetResponseStream())) {
                    table = FrameTable.FromCsv(reader);
                }
            }

            FrameView selected = table.Select("Height", "Weight", "Sex");

            FrameView discarded = table.Discard("Name");

            table.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0));
            Console.WriteLine($"Bmi of first subject is {table["Bmi"][0]}.");

            FrameView noNulls                = table.WhereNotNull();
            FrameView noNullWeights          = table.WhereNotNull("Weight");
            FrameView noNullWeightsOrHeights = table.WhereNotNull("Weight", "Height");

            double meanWeight = table.WhereNotNull("Weight").Columns["Weight"].As <double>().Mean();

            FrameView men = table.Where <string>("Sex", s => s == "M");

            FrameView shortMen = table.Where(
                r => ((string)r["Sex"]) == "M" && ((double)r["Height"] < 175.0)
                );

            FrameView ordered = table.OrderBy("Height");

            FrameView reversed = table.OrderBy("Height", SortOrder.Descending);

            FrameView alsoOrdered = table.OrderBy <double>("Height", (h1, h2) => h1.CompareTo(h2));

            FrameView sorted = table.OrderBy((r1, r2) => {
                int first  = ((string)r1["Sex"]).CompareTo((string )r2["Sex"]);
                int second = ((double)r1["Height"]).CompareTo((double)r2["Height"]);
                return(first != 0 ? first : second);
            });

            List <string> sexes = table["Sex"].As <string>().Distinct().ToList();

            FrameTable counts = table.GroupBy("Sex", v => v.Rows.Count, "Count");

            FrameTable summarize = table.GroupBy("Sex", v => {
                SummaryStatistics summary = new SummaryStatistics(v["Height"].As <double>());
                return(new Dictionary <string, object>()
                {
                    { "Count", summary.Count },
                    { "Mean", summary.Mean },
                    { "StdDev", summary.StandardDeviation }
                });
            });
        }
Esempio n. 4
0
        public void StreamSampleSummaryAgreement()
        {
            // Streaming properties should give same answers as list methods.

            Random        rng    = new Random(2);
            List <double> sample = new List <double>(TestUtilities.CreateDataSample(rng, new UniformDistribution(Interval.FromEndpoints(-4.0, 3.0)), 32));

            SummaryStatistics summary = new SummaryStatistics(sample);

            Assert.IsTrue(summary.Count == sample.Count);
            Assert.IsTrue(TestUtilities.IsNearlyEqual(sample.Mean(), summary.Mean));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(sample.Variance(), summary.Variance));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(sample.PopulationMean(), summary.PopulationMean));
            Assert.IsTrue(TestUtilities.IsNearlyEqual(sample.PopulationVariance(), summary.PopulationVariance));

            Assert.IsTrue(sample.Minimum() == summary.Minimum);
            Assert.IsTrue(sample.Maximum() == summary.Maximum);
        }
Esempio n. 5
0
        public override SummaryStatistics[] GetSummaryStatistics(IState[] states, params object[] parameters)
        {
            List <double>[] preds = new List <double> [states.Length];

            SummaryStatistics[] summaryStatistics = new SummaryStatistics[states.Length];

            List <Tuple <int, int, IState[]> > batches = GetBatches(states, Global.TFMAXBATCHSIZE);

            foreach (var batch in batches)
            {
                List <TrainingInstance> trainingInstances = new List <TrainingInstance>();

                foreach (IState state in batch.Item3)
                {
                    trainingInstances.Add(new TrainingInstance(state, 0));
                }

                var X = representationUncert.BuildData(trainingInstances).Item1;

                List <double>[] batchPreds = nnUncert.Predict(X, null, Global.UNCERTMINSAMPLE).Preds;

                int count = 0;

                for (int i = batch.Item1; i < batch.Item2; i++)
                {
                    preds[i] = batchPreds[count];
                    count++;
                }
            }

            for (int i = 0; i < preds.Length; i++)
            {
                double mean    = preds[i].Average();
                double sdModel = Math.Sqrt(Math.Max(0, preds[i].Select(x => Math.Pow(x, 2)).Average() - Math.Pow(mean, 2)));
                summaryStatistics[i] = new SummaryStatistics(mean, sdModel, 0);
            }

            return(summaryStatistics);
        }
        public void AccuracyTest()
        {
            SummaryStatistics s1 = new SummaryStatistics();
            SummaryStatistics s2 = new SummaryStatistics();

            foreach (double omx in TestUtilities.GenerateRealValues(1.0E-24, 1.0, 1000000))
            {
                double x = 1.0 - omx;

                double f1 = Math.Sqrt((1.0 - x) * (1.0 + x));
                double f2 = Math.Sqrt(1.0 - x * x);

                DoubleDouble xe = (DoubleDouble)x;
                DoubleDouble fc = DoubleDouble.Sqrt(DoubleDouble.One - xe * xe);

                double e1 = Math.Abs((double)(f1 - fc));
                double e2 = Math.Abs((double)(f2 - fc));

                s1.Add(e1);
                s2.Add(e2);
            }
        }
        public void StreamingSampleSummaryCombination()
        {
            // Combining partial summaries should give same answer as full summary

            Random        rng    = new Random(1);
            List <double> sample = new List <double>(TestUtilities.CreateDataSample(rng, new UniformDistribution(Interval.FromEndpoints(-4.0, 3.0)), 64));

            SummaryStatistics summary = new SummaryStatistics(sample);

            Assert.IsTrue(summary.Count == sample.Count);

            for (int i = 0; i < 4; i++)
            {
                // Pick a split point in the data
                int m = rng.Next(0, sample.Count);

                // Create a summary of the first part.
                SummaryStatistics summary1 = new SummaryStatistics(sample.Take(m));
                Assert.IsTrue(summary1.Count == m);

                // Create a summary of the second part.
                SummaryStatistics summary2 = new SummaryStatistics(sample.Skip(m));
                Assert.IsTrue(summary2.Count == sample.Count - m);

                // Combine them. Their summary statistics should agree with the original summary.
                SummaryStatistics combined = SummaryStatistics.Combine(summary1, summary2);
                Assert.IsTrue(combined.Count == summary.Count);
                Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.Mean, summary.Mean));
                Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.Variance, summary.Variance));
                Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.StandardDeviation, summary.StandardDeviation));
                Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.Skewness, summary.Skewness));
                Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.PopulationMean, summary.PopulationMean));
                Assert.IsTrue(TestUtilities.IsNearlyEqual(combined.PopulationVariance, summary.PopulationVariance));

                Assert.IsTrue(combined.Minimum == summary.Minimum);
                Assert.IsTrue(combined.Maximum == summary.Maximum);
            }
        }
 internal MetricStatistics()
 {
     _statistic = new SummaryStatistics();
 }
 public Stats()
 {
     _statistics = new SummaryStatistics();
 }
Esempio n. 10
0
        public static void AnalyzingData()
        {
            FrameTable table;
            Uri        url     = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv");
            WebRequest request = WebRequest.Create(url);

            using (WebResponse response = request.GetResponse()) {
                using (StreamReader reader = new StreamReader(response.GetResponseStream())) {
                    table = FrameTable.FromCsv(reader);
                }
            }
            FrameView view = table.WhereNotNull();

            // Get the column with (zero-based) index 4.
            FrameColumn column4 = view.Columns[4];
            // Get the column named "Height".
            FrameColumn heightsColumn = view.Columns["Height"];
            // Even easier way to get the column named "Height".
            FrameColumn alsoHeightsColumn = view["Height"];

            IReadOnlyList <double> heights = view["Height"].As <double>();

            SummaryStatistics summary = new SummaryStatistics(view["Height"].As <double>());

            Console.WriteLine($"Count = {summary.Count}");
            Console.WriteLine($"Mean = {summary.Mean}");
            Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}");
            Console.WriteLine($"Skewness = {summary.Skewness}");
            Console.WriteLine($"Estimated population mean = {summary.PopulationMean}");
            Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}");

            IReadOnlyList <double> maleHeights =
                view.Where <string>("Sex", s => s == "M").Columns["Height"].As <double>();
            IReadOnlyList <double> femaleHeights =
                view.Where <string>("Sex", s => s == "F").Columns["Height"].As <double>();
            TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights);

            Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}");
            Console.WriteLine($"P = {test.Probability}");

            TestResult maleHeightNormality  = maleHeights.ShapiroFranciaTest();
            TestResult totalHeightNormality = view["Height"].As <double>().ShapiroFranciaTest();
            TestResult heightCompatibility  = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights);

            LinearRegressionResult fit =
                view["Weight"].As <double>().LinearRegression(view["Height"].As <double>());

            Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept}).");
            Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation.");

            ContingencyTable <string, bool> contingency =
                Bivariate.Crosstabs(view["Sex"].As <string>(), view["Result"].As <bool>());

            Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}");
            Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}");
            Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}");

            view.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0));
            view.AddComputedColumn("Age", r => (DateTime.Now - (DateTime)r["Birthdate"]).TotalDays / 365.24);

            MultiLinearLogisticRegressionResult result =
                view["Result"].As <bool>().MultiLinearLogisticRegression(
                    view["Bmi"].As <double>(),
                    view["Sex"].As <string, double>(s => s == "M" ? 1.0 : 0.0)
                    );

            foreach (Parameter parameter in result.Parameters)
            {
                Console.WriteLine($"{parameter.Name} = {parameter.Estimate}");
            }

            TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As <double>(), view["Result"].As <double>());

            Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}");
        }
Esempio n. 11
0
        /// <summary>
        /// Print the values to the <see cref="_Worksheet"/>.
        /// </summary>
        /// <param name="data">The <see cref="Models.Data"/></param>
        /// <param name="meanConfidenceLevel">(Optional) The confidence level for the <see cref="SummaryStatistics.Mean"/>. Default is 0.</param>
        /// <param name="standardDeviationConfidenceLevel">(Optional) The confidence level for the <see cref="SummaryStatistics.StandardDeviation"/>. Default is 0.</param>
        private void PrintValues(Models.Data data, int meanConfidenceLevel, int standardDeviationConfidenceLevel)
        {
            // Increment the column by 1.
            _column++;

            // Calculate the summary of the data.
            var summary = new SummaryStatistics(data.Range, _doCalculate, meanConfidenceLevel, standardDeviationConfidenceLevel);

            // Write the categories to she sheet.
            Sheet.Cells[_row++, _column] = data.Name;

            // Base
            WriteFunction(_doCalculate.Mean || _doCalculate.BoxWhiskerPlot, summary.Mean);
            WriteFunction(_doCalculate.Variance, summary.Variance);
            WriteFunction(_doCalculate.StandardDeviation, summary.StandardDeviation);
            WriteFunction(_doCalculate.Minimum, summary.Minimum);
            WriteFunction(_doCalculate.Quartile1 || _doCalculate.BoxWhiskerPlot, summary.Quartile1);
            WriteFunction(_doCalculate.Median, summary.Median);
            WriteFunction(_doCalculate.Quartile3, summary.Quartile3);
            WriteFunction(_doCalculate.Maximum, summary.Maximum);
            WriteFunction(_doCalculate.InterquartileRange, summary.InterquartileRange);
            WriteFunction(_doCalculate.Skewness, summary.Skewness);
            WriteFunction(_doCalculate.Kurtosis, summary.Kurtosis);
            WriteFunction(_doCalculate.MeanAbsoluteDeviation, summary.MeanAbsoluteDeviation);
            WriteFunction(_doCalculate.Mode, summary.Mode);
            HasMode.Add(summary.HasMode);
            WriteFunction(_doCalculate.Range, summary.Range);
            WriteFunction(_doCalculate.Count, summary.Count);
            WriteFunction(_doCalculate.Sum, summary.Sum);

            // Box-Whisker Plot
            WriteFunction(_doCalculate.BoxWhiskerPlot, summary.Quartile1Median);
            WriteFunction(_doCalculate.BoxWhiskerPlot, summary.MedianQuartile3);
            WriteFunction(_doCalculate.BoxWhiskerPlot, summary.MinusWhisker);
            WriteFunction(_doCalculate.BoxWhiskerPlot, summary.PlusWhisker);

            // Base (but printed after the Box-Whisker Plot if required)
            if (_doCalculate.Outliers || _doCalculate.BoxWhiskerPlot)
            {
                foreach (var outlier in summary.Outliers)
                {
                    Sheet.Cells[_row++, _column] = "=" + outlier;
                    if (OutliersRow.Count < summary.Outliers.Count)
                    {
                        OutliersRow.Add(_row - 1);
                    }
                }
            }

            // Confidence Interval
            WriteFunction(_doCalculate.MeanConfidenceInterval || _doCalculate.StandardDeviationConfidenceInterval, summary.Count);
            WriteFunction(_doCalculate.MeanConfidenceInterval || _doCalculate.StandardDeviationConfidenceInterval, summary.Mean);
            WriteFunction(_doCalculate.MeanConfidenceInterval || _doCalculate.StandardDeviationConfidenceInterval, summary.StandardDeviation);
            if (_doCalculate.MeanConfidenceInterval)
            {
                Sheet.Cells[_row, _column] = summary.MeanConfidenceLevel;
                MeanConfidenceLevelRow     = _row;
                ((Range)Sheet.Cells[_row++, _column]).NumberFormat = "0.00%";
                summary.SetMeanAlpha(MeanConfidenceLevelRow, _column);
            }
            WriteFunction(_doCalculate.MeanConfidenceInterval, summary.MeanAlpha);
            WriteFunction(_doCalculate.MeanConfidenceInterval, summary.DegreesOfFreedom);
            WriteFunction(_doCalculate.MeanConfidenceInterval, summary.MeanConfidenceInterval);
            WriteFunction(_doCalculate.MeanConfidenceInterval, summary.MeanLowerLimit);
            WriteFunction(_doCalculate.MeanConfidenceInterval, summary.MeanUpperLimit);
            if (_doCalculate.StandardDeviationConfidenceInterval)
            {
                Sheet.Cells[_row, _column]          = summary.StandardDeviationConfidenceLevel;
                StandardDeviationConfidenceLevelRow = _row;
                ((Range)Sheet.Cells[_row++, _column]).NumberFormat = "0.00%";
                summary.SetStandardDeviationAlpha(StandardDeviationConfidenceLevelRow, _column);
            }
            WriteFunction(_doCalculate.StandardDeviationConfidenceInterval, summary.StandardDeviationAlpha);
            WriteFunction(_doCalculate.StandardDeviationConfidenceInterval, summary.DegreesOfFreedom);
            WriteFunction(_doCalculate.StandardDeviationConfidenceInterval, summary.StandardDeviationConfidenceIntervalLowerLimit);
            WriteFunction(_doCalculate.StandardDeviationConfidenceInterval, summary.StandardDeviationConfidenceIntervalUpperLimit);
            WriteFunction(_doCalculate.StandardDeviationConfidenceInterval, summary.StandardDeviationLowerLimit);
            WriteFunction(_doCalculate.StandardDeviationConfidenceInterval, summary.StandardDeviationUpperLimit);
        }