Ejemplo n.º 1
0
        public void CsvWhitespaceParsing()
        {
            StringBuilder text = new StringBuilder();

            text.AppendLine(" c0 ,\tc1  ");
            text.AppendLine(",");
            text.AppendLine(" , ");
            text.AppendLine("  ,\t");
            text.AppendLine("\t\" \" ,  \"\t\" ");
            text.AppendLine(" \" a\t\"\t,\t\"\t a \"");
            text.AppendLine("\t \" \"\"\",  \" , \" ");

            FrameTable table = FrameTable.FromCsv(new StringReader(text.ToString()));

            Assert.IsTrue(table.Columns[0].Name == "c0");
            Assert.IsTrue(table.Columns[1].Name == "c1");

            Assert.IsTrue((string)table.Rows[0][0] == null);
            Assert.IsTrue((string)table.Rows[0][1] == null);
            Assert.IsTrue((string)table.Rows[1][0] == null);
            Assert.IsTrue((string)table.Rows[1][1] == null);
            Assert.IsTrue((string)table.Rows[2][0] == null);
            Assert.IsTrue((string)table.Rows[2][1] == null);
            Assert.IsTrue((string)table.Rows[3][0] == " ");
            Assert.IsTrue((string)table.Rows[3][1] == "\t");
            Assert.IsTrue((string)table.Rows[4][0] == " a\t");
            Assert.IsTrue((string)table.Rows[4][1] == "\t a ");
            Assert.IsTrue((string)table.Rows[5][0] == " \"");
            Assert.IsTrue((string)table.Rows[5][1] == " , ");
        }
Ejemplo n.º 2
0
        public void FrameTableCsvRoundtrip()
        {
            FrameTable frame;

            using (TextReader reader = File.OpenText(csvFileName))
            {
                frame = FrameTable.FromCsv(reader);
            }

            Assert.IsTrue(frame != null);
            Assert.IsTrue(frame.Columns.Count > 0);
            Assert.IsTrue(frame.Rows.Count > 0);

            string outputPath = Path.GetTempFileName();

            try
            {
                using (FileStream stream = File.OpenWrite(outputPath))
                {
                    using (TextWriter writer = new StreamWriter(stream))
                    {
                        frame.ToCsv(writer);
                    }
                }

                Guid inputHash  = ComputeMD5Hash(csvFileName);
                Guid outputHash = ComputeMD5Hash(outputPath);
                Assert.IsTrue(inputHash == outputHash);
            }
            finally
            {
                File.Delete(outputPath);
            }
        }
Ejemplo n.º 3
0
        public static void ImportingData()
        {
            FrameTable data;

            using (TextReader reader = File.OpenText("test.csv")) {
                data = FrameTable.FromCsv(reader);
            }

            Console.WriteLine($"Imported CSV file with {data.Rows.Count} rows.");
            Console.WriteLine("The names and types of the columns are:");
            foreach (FrameColumn column in data.Columns)
            {
                Console.WriteLine($"  {column.Name} of type {column.StorageType}");
            }

            FrameTable titanic;
            Uri        url     = new Uri("https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv");
            WebRequest request = WebRequest.Create(url);

            using (WebResponse response = request.GetResponse()) {
                using (StreamReader reader = new StreamReader(response.GetResponseStream())) {
                    titanic = FrameTable.FromCsv(reader);
                }
            }

            Uri       jsonUrl = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.json");
            WebClient client  = new WebClient();
            string    input   = client.DownloadString(jsonUrl);
            List <Dictionary <string, object> > output = JsonConvert.DeserializeObject <List <Dictionary <string, object> > >(input);
            FrameTable jsonExample = FrameTable.FromDictionaries(output);

            // Define the schema.
            FrameTable table = new FrameTable();

            table.AddColumn <int>("Id");
            table.AddColumn <string>("Name");
            table.AddColumn <string>("Sex");
            table.AddColumn <DateTime>("Birthdate");
            table.AddColumn <double>("Height");
            table.AddColumn <double?>("Weight");
            table.AddColumn <bool>("Result");

            // Add rows using as arrays of objects.
            table.AddRow(1, "John", "M", DateTime.Parse("1970-01-02"), 190.0, 75.0, true);
            table.AddRow(2, "Mary", "F", DateTime.Parse("1980-02-03"), 155.0, null, true);

            // Add a row using a dictionary. This is more verbose, but very clear.
            table.AddRow(new Dictionary <string, object>()
            {
                { "Id", 3 },
                { "Name", null },
                { "Sex", "M" },
                { "Birthdate", DateTime.Parse("1990-03-04") },
                { "Height", 180.0 },
                { "Weight", 60.0 },
                { "Result", false }
            });
        }
Ejemplo n.º 4
0
        public static void ManipulatingData()
        {
            FrameTable table;
            Uri        url     = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv");
            WebRequest request = WebRequest.Create(url);

            using (WebResponse response = request.GetResponse()) {
                using (StreamReader reader = new StreamReader(response.GetResponseStream())) {
                    table = FrameTable.FromCsv(reader);
                }
            }

            FrameView selected = table.Select("Height", "Weight", "Sex");

            FrameView discarded = table.Discard("Name");

            table.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0));
            Console.WriteLine($"Bmi of first subject is {table["Bmi"][0]}.");

            FrameView noNulls                = table.WhereNotNull();
            FrameView noNullWeights          = table.WhereNotNull("Weight");
            FrameView noNullWeightsOrHeights = table.WhereNotNull("Weight", "Height");

            double meanWeight = table.WhereNotNull("Weight").Columns["Weight"].As <double>().Mean();

            FrameView men = table.Where <string>("Sex", s => s == "M");

            FrameView shortMen = table.Where(
                r => ((string)r["Sex"]) == "M" && ((double)r["Height"] < 175.0)
                );

            FrameView ordered = table.OrderBy("Height");

            FrameView reversed = table.OrderBy("Height", SortOrder.Descending);

            FrameView alsoOrdered = table.OrderBy <double>("Height", (h1, h2) => h1.CompareTo(h2));

            FrameView sorted = table.OrderBy((r1, r2) => {
                int first  = ((string)r1["Sex"]).CompareTo((string )r2["Sex"]);
                int second = ((double)r1["Height"]).CompareTo((double)r2["Height"]);
                return(first != 0 ? first : second);
            });

            List <string> sexes = table["Sex"].As <string>().Distinct().ToList();

            FrameTable counts = table.GroupBy("Sex", v => v.Rows.Count, "Count");

            FrameTable summarize = table.GroupBy("Sex", v => {
                SummaryStatistics summary = new SummaryStatistics(v["Height"].As <double>());
                return(new Dictionary <string, object>()
                {
                    { "Count", summary.Count },
                    { "Mean", summary.Mean },
                    { "StdDev", summary.StandardDeviation }
                });
            });
        }
Ejemplo n.º 5
0
        public void SmokeTest2()
        {
            FrameTable frame;
            string     path = @"C:\Users\dcw-b\Desktop\DataSets\551184489_52017_210_airline_delay_causes\551184489_52017_210_airline_delay_causes.csv";

            using (StreamReader stream = File.OpenText(path)) {
                frame = FrameTable.FromCsv(stream);
            }

            FrameView view = frame.GroupBy("carrier", (FrameView q) => q.Rows.Count, "count");
        }
Ejemplo n.º 6
0
        public FrameTable DownloadFrameTable(Uri url)
        {
            FrameTable frame;
            WebRequest request = WebRequest.Create(url);

            using (WebResponse response = request.GetResponse()) {
                using (Stream responseStream = response.GetResponseStream()) {
                    using (TextReader reader = new StreamReader(responseStream)) {
                        frame = FrameTable.FromCsv(reader);
                    }
                }
            }
            return(frame);
        }
Ejemplo n.º 7
0
        public void FrameTableCsvRoundtrip2()
        {
            // Let's exercise all our data adaptors
            FrameTable original = new FrameTable();

            original.AddColumn <string>("String");
            original.AddColumn <double?>("Double?");
            original.AddColumn <int>("Int");
            original.AddColumn <DateTime?>("DateTime?");
            original.AddColumn <TimeSpan>("TimeSpan");
            original.AddColumn <Boolean?>("Boolean?");

            original.AddRow("z", null, 1, DateTime.Today, TimeSpan.FromMinutes(5.0), true);
            original.AddRow("y", 4.3, 2, null, TimeSpan.FromHours(4.0), null);
            original.AddRow("x", 2.0, 3, DateTime.UtcNow.Date, TimeSpan.FromDays(3.0), false);

            TextWriter storage = new StringWriter();

            original.ToCsv(storage);

            FrameTable copy = FrameTable.FromCsv(new StringReader(storage.ToString()));

            for (int i = 0; i < original.Columns.Count; i++)
            {
                Assert.IsTrue(original.Columns[i].Name == copy.Columns[i].Name);
                Assert.IsTrue(original.Columns[i].StorageType == copy.Columns[i].StorageType);
            }

            for (int i = 0; i < original.Rows.Count; i++)
            {
                for (int j = 0; j < original.Columns.Count; j++)
                {
                    // This awkwardness is necessary because == resolves to a static method,
                    // so object == object does a reference check which will fail even if
                    // both sides are equal structures. Equals, on the other hand, is a
                    // virtual method, so it will do the appropriate comparison, but will
                    // fail if the instance is null.
                    if (original.Rows[i][j] == null)
                    {
                        Assert.IsTrue(original.Rows[i][j] == null);
                    }
                    else
                    {
                        Assert.IsTrue(original.Rows[i][j].Equals(copy.Rows[i][j]));
                    }
                }
            }
        }
Ejemplo n.º 8
0
        public void Smoketest()
        {
            FrameTable frame;
            string     url     = "https://raw.githubusercontent.com/pandas-dev/pandas/master/doc/data/tips.csv";
            WebRequest request = WebRequest.Create(url);

            using (WebResponse response = request.GetResponse()) {
                using (Stream responseStream = response.GetResponseStream()) {
                    using (TextReader reader = new StreamReader(responseStream)) {
                        frame = FrameTable.FromCsv(reader);
                    }
                }
            }
            frame.AddComputedColumn("tip_fraction", r => ((double)r["tip"]) / ((double)r["total_bill"]));

            FrameView counts = frame.GroupBy("day", v => v.Rows.Count, "total").OrderBy("day");
            FrameView means  = frame.GroupBy("sex", v => v["tip_fraction"].As <double>().Mean(), "mean_tip_fraction");
        }
Ejemplo n.º 9
0
        public static void AnalyzingData()
        {
            FrameTable table;
            Uri        url     = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv");
            WebRequest request = WebRequest.Create(url);

            using (WebResponse response = request.GetResponse()) {
                using (StreamReader reader = new StreamReader(response.GetResponseStream())) {
                    table = FrameTable.FromCsv(reader);
                }
            }
            FrameView view = table.WhereNotNull();

            // Get the column with (zero-based) index 4.
            FrameColumn column4 = view.Columns[4];
            // Get the column named "Height".
            FrameColumn heightsColumn = view.Columns["Height"];
            // Even easier way to get the column named "Height".
            FrameColumn alsoHeightsColumn = view["Height"];

            IReadOnlyList <double> heights = view["Height"].As <double>();

            SummaryStatistics summary = new SummaryStatistics(view["Height"].As <double>());

            Console.WriteLine($"Count = {summary.Count}");
            Console.WriteLine($"Mean = {summary.Mean}");
            Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}");
            Console.WriteLine($"Skewness = {summary.Skewness}");
            Console.WriteLine($"Estimated population mean = {summary.PopulationMean}");
            Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}");

            IReadOnlyList <double> maleHeights =
                view.Where <string>("Sex", s => s == "M").Columns["Height"].As <double>();
            IReadOnlyList <double> femaleHeights =
                view.Where <string>("Sex", s => s == "F").Columns["Height"].As <double>();
            TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights);

            Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}");
            Console.WriteLine($"P = {test.Probability}");

            TestResult maleHeightNormality  = maleHeights.ShapiroFranciaTest();
            TestResult totalHeightNormality = view["Height"].As <double>().ShapiroFranciaTest();
            TestResult heightCompatibility  = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights);

            LinearRegressionResult fit =
                view["Weight"].As <double>().LinearRegression(view["Height"].As <double>());

            Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept}).");
            Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation.");

            ContingencyTable <string, bool> contingency =
                Bivariate.Crosstabs(view["Sex"].As <string>(), view["Result"].As <bool>());

            Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}");
            Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}");
            Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}");

            view.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0));
            view.AddComputedColumn("Age", r => (DateTime.Now - (DateTime)r["Birthdate"]).TotalDays / 365.24);

            MultiLinearLogisticRegressionResult result =
                view["Result"].As <bool>().MultiLinearLogisticRegression(
                    view["Bmi"].As <double>(),
                    view["Sex"].As <string, double>(s => s == "M" ? 1.0 : 0.0)
                    );

            foreach (Parameter parameter in result.Parameters)
            {
                Console.WriteLine($"{parameter.Name} = {parameter.Estimate}");
            }

            TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As <double>(), view["Result"].As <double>());

            Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}");
        }