public void MultivariateLinearLogisticRegressionSimple()
        {
            // define model y = a + b0 * x0 + b1 * x1 + noise
            double a  = 1.0;
            double b0 = -1.0 / 2.0;
            double b1 = 1.0 / 3.0;
            ContinuousDistribution x0distribution = new LaplaceDistribution();
            ContinuousDistribution x1distribution = new NormalDistribution();

            // draw a sample from the model
            Random             rng   = new Random(1);
            MultivariateSample old   = new MultivariateSample("y", "x0", "x1");
            FrameTable         table = new FrameTable();

            table.AddColumn <double>("x0");
            table.AddColumn <double>("x1");
            table.AddColumn <bool>("y");

            for (int i = 0; i < 100; i++)
            {
                double x0 = x0distribution.GetRandomValue(rng);
                double x1 = x1distribution.GetRandomValue(rng);
                double t  = a + b0 * x0 + b1 * x1;
                double p  = 1.0 / (1.0 + Math.Exp(-t));
                bool   y  = (rng.NextDouble() < p);
                old.Add(y ? 1.0 : 0.0, x0, x1);
                table.AddRow(x0, x1, y);
            }

            // do a linear regression fit on the model
            MultiLinearLogisticRegressionResult oldResult = old.LogisticLinearRegression(0);
            MultiLinearLogisticRegressionResult newResult = table["y"].As <bool>().MultiLinearLogisticRegression(
                table["x0"].As <double>(), table["x1"].As <double>()
                );

            // the result should have the appropriate dimension
            Assert.IsTrue(newResult.Parameters.Count == 3);

            // The parameters should match the model
            Assert.IsTrue(newResult.CoefficientOf(0).ConfidenceInterval(0.99).ClosedContains(b0));
            Assert.IsTrue(newResult.CoefficientOf("x1").ConfidenceInterval(0.99).ClosedContains(b1));
            Assert.IsTrue(newResult.Intercept.ConfidenceInterval(0.99).ClosedContains(a));

            // Our predictions should be better than chance.
            int correct = 0;

            for (int i = 0; i < table.Rows.Count; i++)
            {
                FrameRow row = table.Rows[i];
                double   x0  = (double)row["x0"];
                double   x1  = (double)row["x1"];
                double   p   = newResult.Predict(x0, x1).Value;
                bool     y   = (bool)row["y"];
                if ((y && p > 0.5) || (!y & p < 0.5))
                {
                    correct++;
                }
            }
            Assert.IsTrue(correct > 0.5 * table.Rows.Count);
        }
Esempio n. 2
0
        public void FrameTableDictionariesRoundtrip()
        {
            FrameTable frame = new FrameTable();

            frame.AddColumn <string>("name");
            frame.AddColumn <double>("height");
            frame.AddColumn <bool?>("male");

            frame.AddRow("a", 5.0, false);
            frame.AddRow("b", 6.0, true);
            frame.AddRow("c", 5.5, null);

            List <Dictionary <string, object> > dictionaries = frame.ToDictionaries().ToList();

            Assert.IsTrue(dictionaries.Count == frame.Rows.Count);
            Assert.IsTrue(dictionaries[0].Count == frame.Columns.Count);

            FrameTable frame2 = FrameTable.FromDictionaries(dictionaries);

            Assert.IsTrue(frame2.Rows.Count == frame.Rows.Count);
            Assert.IsTrue(frame2.Columns.Count == frame.Columns.Count);
            Assert.IsTrue(frame2.Columns[0].Name == frame.Columns[0].Name);
            Assert.IsTrue(frame2.Columns[1].StorageType == frame.Columns[1].StorageType);
            Assert.IsTrue(frame2.Rows[2]["male"] == frame2.Rows[2]["male"]);
        }
Esempio n. 3
0
        public void LinearLogisticRegressionSimple()
        {
            Polynomial m = Polynomial.FromCoefficients(-1.0, 2.0);

            FrameTable table = new FrameTable();

            table.AddColumn <double>("x");
            table.AddColumn <string>("z");

            Random rng = new Random(2);
            ContinuousDistribution xDistribution = new CauchyDistribution(4.0, 2.0);

            for (int i = 0; i < 24; i++)
            {
                double x = xDistribution.GetRandomValue(rng);
                double y = m.Evaluate(x);
                double p = 1.0 / (1.0 + Math.Exp(-y));
                bool   z = (rng.NextDouble() < p);
                table.AddRow(x, z.ToString());
            }

            LinearLogisticRegressionResult fit = table["z"].As((string s) => Boolean.Parse(s)).LinearLogisticRegression(table["x"].As <double>());

            Assert.IsTrue(fit.Intercept.ConfidenceInterval(0.99).ClosedContains(m.Coefficient(0)));
            Assert.IsTrue(fit.Slope.ConfidenceInterval(0.99).ClosedContains(m.Coefficient(1)));
        }
Esempio n. 4
0
        public void FrameViewColumnCoercion()
        {
            // Create nullable double and integer columns.
            FrameTable table = new FrameTable();

            table.AddColumn <double?>("one");
            table.AddColumn <int>("two");

            table.AddRow(1.1, 2);
            table.AddRow(null, 3);

            // Coerce the nullable double into a non-nullable double
            // Should work when value is non-null, and fail with value is null
            IReadOnlyList <double> one = table["one"].As <double>();

            Assert.IsTrue(one[0] == 1.1);
            try {
                double v = one[1];
                Assert.Fail();
            } catch (Exception) { }

            // Coerce the integer to a double.
            IReadOnlyList <double> two = table.Columns[1].As <double>();

            Assert.IsTrue(two[0] == 2.0);
        }
Esempio n. 5
0
        public void TimeSeriesFitAR1()
        {
            double alpha = 0.3;
            double mu    = 0.2;
            double sigma = 0.4;
            int    n     = 24;

            // For our fit to AR(1), we have incorporated bias correction (at least
            // for the most important parameter alpha), so we can do a small-n test.

            FrameTable data = new FrameTable();

            data.AddColumn <UncertainValue>("mu");
            data.AddColumn <UncertainValue>("alpha");
            data.AddColumn <UncertainValue>("sigma");
            data.AddColumn <SymmetricMatrix>("covariance");
            data.AddColumn <double>("p");

            for (int i = 0; i < 128; i++)
            {
                TimeSeries series = GenerateAR1TimeSeries(alpha, mu, sigma, n, n * i + 271828);

                AR1FitResult result = series.FitToAR1();

                data.AddRow(
                    result.Mu, result.Alpha, result.Sigma,
                    result.Parameters.CovarianceMatrix, result.GoodnessOfFit.Probability
                    );
            }

            data.AddComputedColumn("alphaValue", r => ((UncertainValue)r["alpha"]).Value);
            data.AddComputedColumn("muValue", r => ((UncertainValue)r["mu"]).Value);

            // Check that fit parameters agree with inputs
            Assert.IsTrue(data["mu"].As((UncertainValue v) => v.Value).PopulationMean().ConfidenceInterval(0.99).ClosedContains(mu));
            Assert.IsTrue(data["alpha"].As((UncertainValue v) => v.Value).PopulationMean().ConfidenceInterval(0.99).ClosedContains(alpha));
            Assert.IsTrue(data["sigma"].As((UncertainValue v) => v.Value).PopulationMean().ConfidenceInterval(0.99).ClosedContains(sigma));

            // Check that reported variances agree with actual variances
            Assert.IsTrue(data["mu"].As((UncertainValue v) => v.Value).PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["mu"].As((UncertainValue v) => v.Uncertainty).Median()));
            Assert.IsTrue(data["alpha"].As((UncertainValue v) => v.Value).PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["alpha"].As((UncertainValue v) => v.Uncertainty).Median()));
            Assert.IsTrue(data["sigma"].As((UncertainValue v) => v.Value).PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["sigma"].As((UncertainValue v) => v.Uncertainty).Median()));

            // Check that reported co-variances agree with actual co-variances
            Assert.IsTrue(data["mu"].As((UncertainValue v) => v.Value).PopulationCovariance(data["alpha"].As((UncertainValue v) => v.Value)).ConfidenceInterval(0.99).ClosedContains(data["covariance"].As((SymmetricMatrix c) => c[0, 1]).Median()));

            // For small n, the fitted alpha can vary considerably, and the formula for var(m) varies
            // quite strongly with alpha, so the computed var(m) have a very long tail. This pushes the
            // mean computed var(m) quite a bit higher than a typical value, so we use medians instead
            // of means for our best guess for the predicted variance.

            TestResult ks = data["p"].As <double>().KolmogorovSmirnovTest(new UniformDistribution());

            Assert.IsTrue(ks.Probability > 0.05);

            // This is an onerous way to store values, but it does let us test how the data-frame machinery deals with
            // non-trivial storage types.
        }
        public void BivariateNullAssociation()
        {
            Random rng = new Random(31415926);

            // Create a data structure to hold the results of Pearson, Spearman, and Kendall tests.
            FrameTable data = new FrameTable();

            data.AddColumn <double>("r");
            data.AddColumn <double>("ρ");
            data.AddColumn <double>("τ");

            // Create variables to hold the claimed distribution of each test statistic.
            ContinuousDistribution PRD = null;
            ContinuousDistribution SRD = null;
            ContinuousDistribution KTD = null;

            // Generate a large number of bivariate samples and conduct our three tests on each.
            ContinuousDistribution xDistribution = new LognormalDistribution();
            ContinuousDistribution yDistribution = new CauchyDistribution();

            for (int j = 0; j < 100; j++)
            {
                List <double> x = new List <double>();
                List <double> y = new List <double>();
                for (int i = 0; i < 100; i++)
                {
                    x.Add(xDistribution.GetRandomValue(rng));
                    y.Add(yDistribution.GetRandomValue(rng));
                }

                TestResult PR = Bivariate.PearsonRTest(x, y);
                TestResult SR = Bivariate.SpearmanRhoTest(x, y);
                TestResult KT = Bivariate.KendallTauTest(x, y);

                PRD = PR.Statistic.Distribution;
                SRD = SR.Statistic.Distribution;
                KTD = KT.Statistic.Distribution;

                data.AddRow(new Dictionary <string, object>()
                {
                    { "r", PR.Statistic.Value }, { "ρ", SR.Statistic.Value }, { "τ", KT.Statistic.Value }
                });
            }

            Assert.IsTrue(data["r"].As <double>().KolmogorovSmirnovTest(PRD).Probability > 0.05);
            Assert.IsTrue(data["ρ"].As <double>().KolmogorovSmirnovTest(SRD).Probability > 0.05);
            Assert.IsTrue(data["τ"].As <double>().KolmogorovSmirnovTest(KTD).Probability > 0.05);
        }
Esempio n. 7
0
        public void FrameTableColumnManipulations()
        {
            FrameTable frame = new FrameTable();

            frame.AddColumn <int>("Integer");
            frame.AddColumn <double>("Double");
            Assert.IsTrue(frame.Columns.Count == 2);

            Assert.IsTrue(frame.Columns[0].Name == frame[frame.Columns[0].Name].Name);
            Assert.IsTrue(frame.Columns[0].StorageType == frame[frame.Columns[0].Name].StorageType);

            frame.AddColumn <DateTime>("Timestamp");
            Assert.IsTrue(frame.Columns.Count == 3);

            frame.RemoveColumn(frame.Columns[0].Name);
            Assert.IsTrue(frame.Columns.Count == 2);
        }
Esempio n. 8
0
        public static void ImportingData()
        {
            FrameTable data;

            using (TextReader reader = File.OpenText("test.csv")) {
                data = FrameTable.FromCsv(reader);
            }

            Console.WriteLine($"Imported CSV file with {data.Rows.Count} rows.");
            Console.WriteLine("The names and types of the columns are:");
            foreach (FrameColumn column in data.Columns)
            {
                Console.WriteLine($"  {column.Name} of type {column.StorageType}");
            }

            FrameTable titanic;
            Uri        url     = new Uri("https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv");
            WebRequest request = WebRequest.Create(url);

            using (WebResponse response = request.GetResponse()) {
                using (StreamReader reader = new StreamReader(response.GetResponseStream())) {
                    titanic = FrameTable.FromCsv(reader);
                }
            }

            Uri       jsonUrl = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.json");
            WebClient client  = new WebClient();
            string    input   = client.DownloadString(jsonUrl);
            List <Dictionary <string, object> > output = JsonConvert.DeserializeObject <List <Dictionary <string, object> > >(input);
            FrameTable jsonExample = FrameTable.FromDictionaries(output);

            // Define the schema.
            FrameTable table = new FrameTable();

            table.AddColumn <int>("Id");
            table.AddColumn <string>("Name");
            table.AddColumn <string>("Sex");
            table.AddColumn <DateTime>("Birthdate");
            table.AddColumn <double>("Height");
            table.AddColumn <double?>("Weight");
            table.AddColumn <bool>("Result");

            // Add rows using as arrays of objects.
            table.AddRow(1, "John", "M", DateTime.Parse("1970-01-02"), 190.0, 75.0, true);
            table.AddRow(2, "Mary", "F", DateTime.Parse("1980-02-03"), 155.0, null, true);

            // Add a row using a dictionary. This is more verbose, but very clear.
            table.AddRow(new Dictionary <string, object>()
            {
                { "Id", 3 },
                { "Name", null },
                { "Sex", "M" },
                { "Birthdate", DateTime.Parse("1990-03-04") },
                { "Height", 180.0 },
                { "Weight", 60.0 },
                { "Result", false }
            });
        }
Esempio n. 9
0
        public void FrameTableRowManipulations()
        {
            // In future, test with computed columns
            FrameTable frame = new FrameTable();

            frame.AddColumn <double>("Height");
            frame.AddColumn <string>("Name");

            Assert.IsTrue(frame.Columns.Count == 2);
            Assert.IsTrue(frame.Rows.Count == 0);

            // Insert a row
            Dictionary <string, object> row = new Dictionary <string, object>()
            {
                { "Name", "John" }, { "Height", 1.1 }
            };

            frame.AddRow(row);
            Assert.IsTrue(frame.Rows.Count == 1);

            // Try to insert a row with missing values
            Dictionary <string, object> smallRow = new Dictionary <string, object>()
            {
                { "Name", "Mark" }
            };

            try {
                frame.AddRow(smallRow);
                Assert.Fail();
            } catch (Exception) { }

            // Try to insert a row with too many values
            Dictionary <string, object> bigRow = new Dictionary <string, object>()
            {
                { "Name", "Luke" }, { "Height", 1.2 }, { "Weight", 60.0 }
            };

            try {
                frame.AddRow(bigRow);
                Assert.Fail();
            } catch (Exception) { }
        }
Esempio n. 10
0
        private FrameTable GetTestFrame()
        {
            FrameTable frame = new FrameTable();

            frame.AddColumn <string>("name");
            frame.AddColumn <double>("height");
            frame.AddColumn <double>("weight");
            frame.AddColumn <bool?>("male");

            frame.AddRow("a", 7.0, 10.0, false);
            frame.AddRow(null, 6.5, 11.0, true);
            frame.AddRow("c", 6.0, 12.0, false);
            frame.AddRow("d", 5.5, 11.0, true);
            frame.AddRow("e", 5.0, 12.0, null);
            frame.AddRow("f", 4.5, 13.0, true);
            frame.AddRow(null, 4.0, 12.0, false);

            frame.AddComputedColumn("bmi", r => ((double)r["weight"]) / MoreMath.Sqr((double)r["height"]));

            return(frame);
        }
Esempio n. 11
0
        public static void ConstructExampleData()
        {
            FrameTable table = new FrameTable();

            table.AddColumn <int>("Id");
            table.AddColumn <string>("Name");
            table.AddColumn <string>("Sex");
            table.AddColumn <DateTime>("Birthdate");
            table.AddColumn <double>("Height");
            table.AddColumns <double?>("Weight");
            table.AddColumn <bool>("Result");

            Random rng = new Random(1000001);

            string[] maleNames = new string[] { "Alex", "Chris", "David", "Eric", "Frederic", "George", "Hans", "Igor", "John", "Kevin", "Luke", "Mark", "Oscar", "Peter", "Richard", "Stephan", "Thomas", "Vincent" };
            AddRows(table, maleNames, "M", 175.0, 12.0, 24.0, 3.0, 1, rng);

            string[] femaleNames = new string[] { "Anne", "Belle", "Dorothy", "Elizabeth", "Fiona", "Helen", "Julia", "Kate", "Louise", "Mary", "Natalie", "Olivia", "Ruth", "Sarah", "Theresa", "Viola" };
            AddRows(table, femaleNames, "F", 160.0, 10.0, 24.0, 3.0, 0, rng);

            // add rows with nulls
            table.AddRow(table.Rows.Count, null, "M", DateTime.Parse("1970-07-27"), 183.0, 74.0, false);
            table.AddRow(table.Rows.Count, "Zoey", "F", DateTime.Parse("2007-09-17"), 138.0, null, false);

            string path = @"example.csv";

            using (StreamWriter writer = new StreamWriter(File.OpenWrite(path))) {
                table.ToCsv(writer);
            }
            Console.WriteLine(File.Exists(path));

            string json = JsonConvert.SerializeObject(table.ToDictionaries(), Formatting.Indented);

            File.WriteAllText("example.json", json);
        }
Esempio n. 12
0
        public void FrameTableCsvRoundtrip2()
        {
            // Let's exercise all our data adaptors
            FrameTable original = new FrameTable();

            original.AddColumn <string>("String");
            original.AddColumn <double?>("Double?");
            original.AddColumn <int>("Int");
            original.AddColumn <DateTime?>("DateTime?");
            original.AddColumn <TimeSpan>("TimeSpan");
            original.AddColumn <Boolean?>("Boolean?");

            original.AddRow("z", null, 1, DateTime.Today, TimeSpan.FromMinutes(5.0), true);
            original.AddRow("y", 4.3, 2, null, TimeSpan.FromHours(4.0), null);
            original.AddRow("x", 2.0, 3, DateTime.UtcNow.Date, TimeSpan.FromDays(3.0), false);

            TextWriter storage = new StringWriter();

            original.ToCsv(storage);

            FrameTable copy = FrameTable.FromCsv(new StringReader(storage.ToString()));

            for (int i = 0; i < original.Columns.Count; i++)
            {
                Assert.IsTrue(original.Columns[i].Name == copy.Columns[i].Name);
                Assert.IsTrue(original.Columns[i].StorageType == copy.Columns[i].StorageType);
            }

            for (int i = 0; i < original.Rows.Count; i++)
            {
                for (int j = 0; j < original.Columns.Count; j++)
                {
                    // This awkwardness is necessary because == resolves to a static method,
                    // so object == object does a reference check which will fail even if
                    // both sides are equal structures. Equals, on the other hand, is a
                    // virtual method, so it will do the appropriate comparison, but will
                    // fail if the instance is null.
                    if (original.Rows[i][j] == null)
                    {
                        Assert.IsTrue(original.Rows[i][j] == null);
                    }
                    else
                    {
                        Assert.IsTrue(original.Rows[i][j].Equals(copy.Rows[i][j]));
                    }
                }
            }
        }
Esempio n. 13
0
        public void MeansClustering2()
        {
            ColumnVector[] centers = new ColumnVector[] {
                new ColumnVector(0.0, 0.0, 0.0),
                new ColumnVector(2.0, 0.0, 0.0),
                new ColumnVector(0.0, 2.0, 0.0),
                new ColumnVector(0.0, 0.0, 2.0)
            };

            FrameTable table    = new FrameTable();
            string     alphabet = "abcdefghijklmnopqrstuvwxyz";

            for (int j = 0; j < 3; j++)
            {
                table.AddColumn <double>(alphabet[j].ToString());
            }


            List <int>          inputAssignments = new List <int>();
            List <ColumnVector> inputVectors     = new List <ColumnVector>();
            Random rng = new Random(2);
            ContinuousDistribution dist = new NormalDistribution(0.0, 1.0);

            for (int i = 0; i < 100; i++)
            {
                int inputAssignment = rng.Next(0, centers.Length);
                inputAssignments.Add(inputAssignment);
                ColumnVector inputVector = centers[inputAssignment].Copy();
                for (int k = 0; k < inputVector.Dimension; k++)
                {
                    inputVector[k] += dist.GetRandomValue(rng);
                }
                inputVectors.Add(inputVector);
                table.AddRow <double>(inputVector);
            }

            MeansClusteringResult result = table.AsColumns <double>().MeansClustering(centers.Length);

            //MultivariateSample s = new MultivariateSample(3);
            //foreach (ColumnVector v in inputVectors) { s.Add(v); }
            //MeansClusteringResult result = s.MeansClustering(centers.Length);

            List <int> outputAssignments = new List <int>();

            for (int i = 0; i < inputVectors.Count; i++)
            {
                int assignment = result.Classify(inputVectors[i]);
                outputAssignments.Add(assignment);
            }

            // Map the output centroids to the original centroids
            Dictionary <int, int> map = new Dictionary <int, int>();

            for (int outputIndex = 0; outputIndex < result.Count; outputIndex++)
            {
                ColumnVector centroid            = result.Centroid(outputIndex);
                int          mappedInputIndex    = -1;
                double       mappedInputDistance = Double.MaxValue;
                for (int inputIndex = 0; inputIndex < centers.Length; inputIndex++)
                {
                    double distance = (centroid - centers[inputIndex]).Norm();
                    if (distance < mappedInputDistance)
                    {
                        mappedInputIndex    = inputIndex;
                        mappedInputDistance = distance;
                    }
                }
                Assert.IsTrue(mappedInputIndex >= 0);
                Assert.IsTrue(mappedInputDistance < 1.0);
                map.Add(outputIndex, mappedInputIndex);
            }

            int correctCount = 0;

            for (int i = 0; i < outputAssignments.Count; i++)
            {
                if (map[outputAssignments[i]] == inputAssignments[i])
                {
                    correctCount++;
                }
            }
            Assert.IsTrue(correctCount >= 0.50 * outputAssignments.Count);
        }
Esempio n. 14
0
        public void MultivariateLinearLogisticRegressionVariances()
        {
            // define model y = a + b0 * x0 + b1 * x1 + noise
            double a  = -3.0;
            double b0 = 2.0;
            double b1 = 1.0;
            ContinuousDistribution x0distribution = new ExponentialDistribution();
            ContinuousDistribution x1distribution = new LognormalDistribution();

            FrameTable data = new FrameTable();

            data.AddColumns <double>("a", "da", "b0", "db0", "b1", "db1", "p", "dp");

            // draw a sample from the model
            Random rng = new Random(2);

            for (int j = 0; j < 32; j++)
            {
                List <double> x0s = new List <double>();
                List <double> x1s = new List <double>();
                List <bool>   ys  = new List <bool>();

                FrameTable table = new FrameTable();
                table.AddColumn <double>("x0");
                table.AddColumn <double>("x1");
                table.AddColumn <bool>("y");

                for (int i = 0; i < 32; i++)
                {
                    double x0 = x0distribution.GetRandomValue(rng);
                    double x1 = x1distribution.GetRandomValue(rng);
                    double t  = a + b0 * x0 + b1 * x1;
                    double p  = 1.0 / (1.0 + Math.Exp(-t));
                    bool   y  = (rng.NextDouble() < p);
                    x0s.Add(x0);
                    x1s.Add(x1);
                    ys.Add(y);
                }

                // do a linear regression fit on the model
                MultiLinearLogisticRegressionResult result = ys.MultiLinearLogisticRegression(
                    new Dictionary <string, IReadOnlyList <double> > {
                    { "x0", x0s }, { "x1", x1s }
                }
                    );
                UncertainValue pp = result.Predict(0.0, 1.0);

                data.AddRow(
                    result.Intercept.Value, result.Intercept.Uncertainty,
                    result.CoefficientOf("x0").Value, result.CoefficientOf("x0").Uncertainty,
                    result.CoefficientOf("x1").Value, result.CoefficientOf("x1").Uncertainty,
                    pp.Value, pp.Uncertainty
                    );
            }

            // The estimated parameters should agree with the model that generated the data.

            // The variances of the estimates should agree with the claimed variances
            Assert.IsTrue(data["a"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["da"].As <double>().Mean()));
            Assert.IsTrue(data["b0"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db0"].As <double>().Mean()));
            Assert.IsTrue(data["b1"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["db1"].As <double>().Mean()));
            Assert.IsTrue(data["p"].As <double>().PopulationStandardDeviation().ConfidenceInterval(0.99).ClosedContains(data["dp"].As <double>().Mean()));
        }
Esempio n. 15
0
        public void FrameTableManipulation()
        {
            FrameTable table = new FrameTable();

            table.AddColumn <int>("Id");
            table.AddColumn <DateTime?>("Birthdate");
            table.AddColumns <string>("FirstName", "LastName");
            Assert.IsTrue(table.Columns.Count == 4);

            // Index lookup should work
            Assert.IsTrue(table.GetColumnIndex("Birthdate") >= 0);
            Assert.IsTrue(table.GetColumnIndex("None") < 0);

            // Add rows
            Assert.IsTrue(table.Rows.Count == 0);
            table.AddRow(1, DateTime.Parse("1990-01-01"), "a", "p");
            table.AddRow(2, DateTime.Parse("2000-02-02"), null, null);
            table.AddRow(new Dictionary <string, object>()
            {
                { "Id", 3 }, { "Birthdate", null }, { "FirstName", "c" }, { "LastName", "r" }
            });
            Assert.IsTrue(table.Rows.Count == 3);

            // Adding rows with the wrong types and/or entries should fail
            // Careful, some of these will leave the table in a bad state
            //try {
            //    table.AddRow(4, DateTime.Parse("2010-04-04"), 1.0, "s");
            //    Assert.Fail();
            //} catch (Exception) { }
            try {
                table.AddRow(4, DateTime.Parse("2010-04-04"));
                Assert.Fail();
            } catch (Exception) { }
            //try {
            //    table.AddRow(new Dictionary<string, object>() {
            //        {"Id", 4}, { "FirstName", "d" }, { "LastName", "r" }
            //    });
            //    Assert.Fail();
            //} catch (Exception) { }
            //try {
            //    table.AddRow(new Dictionary<string, object>() {
            //        {"Id", 4}, { "Birthdate", null }, { "FirstName", "d" }, { "LastName", "r" }, { "MiddleName", "u" }
            //    });
            //    Assert.Fail();
            //} catch (Exception) { }

            // Adding a new column with the wrong length should fail
            try {
                table.AddColumn <double>("Score");
                Assert.Fail();
            } catch (Exception) { }
            Assert.IsTrue(table.GetColumnIndex("Score") < 0);

            // Adding a new column with the right length should work
            List <double> scores = new List <double>()
            {
                1.1, 1.2, 1.3
            };

            table.AddColumn("Score", scores);
            Assert.IsTrue(table.GetColumnIndex("Score") >= 0);

            // Adding a new computed column should work
            table.AddComputedColumn <TimeSpan?>("Age", r => {
                DateTime?b = (DateTime?)r["Birthdate"];
                if (b.HasValue)
                {
                    return(DateTime.Now - b.Value);
                }
                else
                {
                    return(null);
                }
            });
            Assert.IsTrue(table.GetColumnIndex("Age") >= 0);

            // Changing a value should change the result of the computed column that depends on it
            int      birthdateIndex = table.GetColumnIndex("Birthdate");
            int      ageIndex       = table.GetColumnIndex("Age");
            TimeSpan age1           = (TimeSpan)table[0, ageIndex];

            table[0, birthdateIndex] = DateTime.Parse("2010-01-01");
            TimeSpan age2 = (TimeSpan)table[0, ageIndex];

            Assert.IsTrue(age2 != age1);

            // Clearing a table should work
            table.Clear();
            Assert.IsTrue(table.Columns.Count > 0);
            Assert.IsTrue(table.Rows.Count == 0);
        }
Esempio n. 16
0
        public void ContingencyTableProbabilities()
        {
            // Construct data where (i) there are both reference-nulls and nullable-struct-nulls,
            // (ii) all values of one column are equally, (iii) values of other column depend on value of first column
            List <string> groups = new List <string>()
            {
                "A", "B", "C", null
            };

            FrameTable data = new FrameTable();

            data.AddColumn <string>("Group");
            data.AddColumn <bool?>("Outcome");

            int                n            = 512;
            double             pOutcomeNull = 0.05;
            Func <int, double> pOutcome     = groupIndex => 0.8 - 0.2 * groupIndex;
            Random             rng          = new Random(10101010);

            for (int i = 0; i < n; i++)
            {
                int    groupIndex = rng.Next(0, groups.Count);
                string group      = groups[groupIndex];
                bool?  outcome    = (rng.NextDouble() < pOutcome(groupIndex));
                if (rng.NextDouble() < pOutcomeNull)
                {
                    outcome = null;
                }
                data.AddRow(group, outcome);
            }

            // Form a contingency table.
            ContingencyTable <string, bool?> table = Bivariate.Crosstabs(data["Group"].As <string>(), data["Outcome"].As <bool?>());

            // Total counts should match
            Assert.IsTrue(table.Total == n);

            // All values should be represented
            foreach (string row in table.Rows)
            {
                Assert.IsTrue(groups.Contains(row));
            }

            // Counts in each cell and marginal totals should match
            foreach (string group in table.Rows)
            {
                int rowTotal = 0;
                foreach (bool?outcome in table.Columns)
                {
                    FrameView view = data.Where(r => ((string)r["Group"] == group) && ((bool?)r["Outcome"] == outcome));
                    Assert.IsTrue(table[group, outcome] == view.Rows.Count);
                    rowTotal += view.Rows.Count;
                }
                Assert.IsTrue(rowTotal == table.RowTotal(group));
            }

            // Inferred probabilities should agree with model
            Assert.IsTrue(table.ProbabilityOfColumn(null).ConfidenceInterval(0.99).ClosedContains(pOutcomeNull));
            for (int groupIndex = 0; groupIndex < groups.Count; groupIndex++)
            {
                string group = groups[groupIndex];
                Assert.IsTrue(table.ProbabilityOfRow(group).ConfidenceInterval(0.99).ClosedContains(0.25));
                Assert.IsTrue(table.ProbabilityOfColumnConditionalOnRow(true, group).ConfidenceInterval(0.99).ClosedContains(pOutcome(groupIndex) * (1.0 - pOutcomeNull)));
            }
            Assert.IsTrue(table.ProbabilityOfColumn(null).ConfidenceInterval(0.99).ClosedContains(pOutcomeNull));

            // Pearson test should catch that rows and columns are corrleated
            Assert.IsTrue(table.PearsonChiSquaredTest().Probability < 0.05);
        }