public void CsvWhitespaceParsing() { StringBuilder text = new StringBuilder(); text.AppendLine(" c0 ,\tc1 "); text.AppendLine(","); text.AppendLine(" , "); text.AppendLine(" ,\t"); text.AppendLine("\t\" \" , \"\t\" "); text.AppendLine(" \" a\t\"\t,\t\"\t a \""); text.AppendLine("\t \" \"\"\", \" , \" "); FrameTable table = FrameTable.FromCsv(new StringReader(text.ToString())); Assert.IsTrue(table.Columns[0].Name == "c0"); Assert.IsTrue(table.Columns[1].Name == "c1"); Assert.IsTrue((string)table.Rows[0][0] == null); Assert.IsTrue((string)table.Rows[0][1] == null); Assert.IsTrue((string)table.Rows[1][0] == null); Assert.IsTrue((string)table.Rows[1][1] == null); Assert.IsTrue((string)table.Rows[2][0] == null); Assert.IsTrue((string)table.Rows[2][1] == null); Assert.IsTrue((string)table.Rows[3][0] == " "); Assert.IsTrue((string)table.Rows[3][1] == "\t"); Assert.IsTrue((string)table.Rows[4][0] == " a\t"); Assert.IsTrue((string)table.Rows[4][1] == "\t a "); Assert.IsTrue((string)table.Rows[5][0] == " \""); Assert.IsTrue((string)table.Rows[5][1] == " , "); }
public void FrameTableCsvRoundtrip() { FrameTable frame; using (TextReader reader = File.OpenText(csvFileName)) { frame = FrameTable.FromCsv(reader); } Assert.IsTrue(frame != null); Assert.IsTrue(frame.Columns.Count > 0); Assert.IsTrue(frame.Rows.Count > 0); string outputPath = Path.GetTempFileName(); try { using (FileStream stream = File.OpenWrite(outputPath)) { using (TextWriter writer = new StreamWriter(stream)) { frame.ToCsv(writer); } } Guid inputHash = ComputeMD5Hash(csvFileName); Guid outputHash = ComputeMD5Hash(outputPath); Assert.IsTrue(inputHash == outputHash); } finally { File.Delete(outputPath); } }
public static void ImportingData() { FrameTable data; using (TextReader reader = File.OpenText("test.csv")) { data = FrameTable.FromCsv(reader); } Console.WriteLine($"Imported CSV file with {data.Rows.Count} rows."); Console.WriteLine("The names and types of the columns are:"); foreach (FrameColumn column in data.Columns) { Console.WriteLine($" {column.Name} of type {column.StorageType}"); } FrameTable titanic; Uri url = new Uri("https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv"); WebRequest request = WebRequest.Create(url); using (WebResponse response = request.GetResponse()) { using (StreamReader reader = new StreamReader(response.GetResponseStream())) { titanic = FrameTable.FromCsv(reader); } } Uri jsonUrl = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.json"); WebClient client = new WebClient(); string input = client.DownloadString(jsonUrl); List <Dictionary <string, object> > output = JsonConvert.DeserializeObject <List <Dictionary <string, object> > >(input); FrameTable jsonExample = FrameTable.FromDictionaries(output); // Define the schema. FrameTable table = new FrameTable(); table.AddColumn <int>("Id"); table.AddColumn <string>("Name"); table.AddColumn <string>("Sex"); table.AddColumn <DateTime>("Birthdate"); table.AddColumn <double>("Height"); table.AddColumn <double?>("Weight"); table.AddColumn <bool>("Result"); // Add rows using as arrays of objects. table.AddRow(1, "John", "M", DateTime.Parse("1970-01-02"), 190.0, 75.0, true); table.AddRow(2, "Mary", "F", DateTime.Parse("1980-02-03"), 155.0, null, true); // Add a row using a dictionary. This is more verbose, but very clear. table.AddRow(new Dictionary <string, object>() { { "Id", 3 }, { "Name", null }, { "Sex", "M" }, { "Birthdate", DateTime.Parse("1990-03-04") }, { "Height", 180.0 }, { "Weight", 60.0 }, { "Result", false } }); }
public static void ManipulatingData() { FrameTable table; Uri url = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv"); WebRequest request = WebRequest.Create(url); using (WebResponse response = request.GetResponse()) { using (StreamReader reader = new StreamReader(response.GetResponseStream())) { table = FrameTable.FromCsv(reader); } } FrameView selected = table.Select("Height", "Weight", "Sex"); FrameView discarded = table.Discard("Name"); table.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0)); Console.WriteLine($"Bmi of first subject is {table["Bmi"][0]}."); FrameView noNulls = table.WhereNotNull(); FrameView noNullWeights = table.WhereNotNull("Weight"); FrameView noNullWeightsOrHeights = table.WhereNotNull("Weight", "Height"); double meanWeight = table.WhereNotNull("Weight").Columns["Weight"].As <double>().Mean(); FrameView men = table.Where <string>("Sex", s => s == "M"); FrameView shortMen = table.Where( r => ((string)r["Sex"]) == "M" && ((double)r["Height"] < 175.0) ); FrameView ordered = table.OrderBy("Height"); FrameView reversed = table.OrderBy("Height", SortOrder.Descending); FrameView alsoOrdered = table.OrderBy <double>("Height", (h1, h2) => h1.CompareTo(h2)); FrameView sorted = table.OrderBy((r1, r2) => { int first = ((string)r1["Sex"]).CompareTo((string )r2["Sex"]); int second = ((double)r1["Height"]).CompareTo((double)r2["Height"]); return(first != 0 ? first : second); }); List <string> sexes = table["Sex"].As <string>().Distinct().ToList(); FrameTable counts = table.GroupBy("Sex", v => v.Rows.Count, "Count"); FrameTable summarize = table.GroupBy("Sex", v => { SummaryStatistics summary = new SummaryStatistics(v["Height"].As <double>()); return(new Dictionary <string, object>() { { "Count", summary.Count }, { "Mean", summary.Mean }, { "StdDev", summary.StandardDeviation } }); }); }
public void SmokeTest2() { FrameTable frame; string path = @"C:\Users\dcw-b\Desktop\DataSets\551184489_52017_210_airline_delay_causes\551184489_52017_210_airline_delay_causes.csv"; using (StreamReader stream = File.OpenText(path)) { frame = FrameTable.FromCsv(stream); } FrameView view = frame.GroupBy("carrier", (FrameView q) => q.Rows.Count, "count"); }
public FrameTable DownloadFrameTable(Uri url) { FrameTable frame; WebRequest request = WebRequest.Create(url); using (WebResponse response = request.GetResponse()) { using (Stream responseStream = response.GetResponseStream()) { using (TextReader reader = new StreamReader(responseStream)) { frame = FrameTable.FromCsv(reader); } } } return(frame); }
public void FrameTableCsvRoundtrip2() { // Let's exercise all our data adaptors FrameTable original = new FrameTable(); original.AddColumn <string>("String"); original.AddColumn <double?>("Double?"); original.AddColumn <int>("Int"); original.AddColumn <DateTime?>("DateTime?"); original.AddColumn <TimeSpan>("TimeSpan"); original.AddColumn <Boolean?>("Boolean?"); original.AddRow("z", null, 1, DateTime.Today, TimeSpan.FromMinutes(5.0), true); original.AddRow("y", 4.3, 2, null, TimeSpan.FromHours(4.0), null); original.AddRow("x", 2.0, 3, DateTime.UtcNow.Date, TimeSpan.FromDays(3.0), false); TextWriter storage = new StringWriter(); original.ToCsv(storage); FrameTable copy = FrameTable.FromCsv(new StringReader(storage.ToString())); for (int i = 0; i < original.Columns.Count; i++) { Assert.IsTrue(original.Columns[i].Name == copy.Columns[i].Name); Assert.IsTrue(original.Columns[i].StorageType == copy.Columns[i].StorageType); } for (int i = 0; i < original.Rows.Count; i++) { for (int j = 0; j < original.Columns.Count; j++) { // This awkwardness is necessary because == resolves to a static method, // so object == object does a reference check which will fail even if // both sides are equal structures. Equals, on the other hand, is a // virtual method, so it will do the appropriate comparison, but will // fail if the instance is null. if (original.Rows[i][j] == null) { Assert.IsTrue(original.Rows[i][j] == null); } else { Assert.IsTrue(original.Rows[i][j].Equals(copy.Rows[i][j])); } } } }
public void Smoketest() { FrameTable frame; string url = "https://raw.githubusercontent.com/pandas-dev/pandas/master/doc/data/tips.csv"; WebRequest request = WebRequest.Create(url); using (WebResponse response = request.GetResponse()) { using (Stream responseStream = response.GetResponseStream()) { using (TextReader reader = new StreamReader(responseStream)) { frame = FrameTable.FromCsv(reader); } } } frame.AddComputedColumn("tip_fraction", r => ((double)r["tip"]) / ((double)r["total_bill"])); FrameView counts = frame.GroupBy("day", v => v.Rows.Count, "total").OrderBy("day"); FrameView means = frame.GroupBy("sex", v => v["tip_fraction"].As <double>().Mean(), "mean_tip_fraction"); }
public static void AnalyzingData() { FrameTable table; Uri url = new Uri("https://raw.githubusercontent.com/dcwuser/metanumerics/master/Examples/Data/example.csv"); WebRequest request = WebRequest.Create(url); using (WebResponse response = request.GetResponse()) { using (StreamReader reader = new StreamReader(response.GetResponseStream())) { table = FrameTable.FromCsv(reader); } } FrameView view = table.WhereNotNull(); // Get the column with (zero-based) index 4. FrameColumn column4 = view.Columns[4]; // Get the column named "Height". FrameColumn heightsColumn = view.Columns["Height"]; // Even easier way to get the column named "Height". FrameColumn alsoHeightsColumn = view["Height"]; IReadOnlyList <double> heights = view["Height"].As <double>(); SummaryStatistics summary = new SummaryStatistics(view["Height"].As <double>()); Console.WriteLine($"Count = {summary.Count}"); Console.WriteLine($"Mean = {summary.Mean}"); Console.WriteLine($"Standard Deviation = {summary.StandardDeviation}"); Console.WriteLine($"Skewness = {summary.Skewness}"); Console.WriteLine($"Estimated population mean = {summary.PopulationMean}"); Console.WriteLine($"Estimated population standard deviation = {summary.PopulationStandardDeviation}"); IReadOnlyList <double> maleHeights = view.Where <string>("Sex", s => s == "M").Columns["Height"].As <double>(); IReadOnlyList <double> femaleHeights = view.Where <string>("Sex", s => s == "F").Columns["Height"].As <double>(); TestResult test = Univariate.StudentTTest(maleHeights, femaleHeights); Console.WriteLine($"{test.Statistic.Name} = {test.Statistic.Value}"); Console.WriteLine($"P = {test.Probability}"); TestResult maleHeightNormality = maleHeights.ShapiroFranciaTest(); TestResult totalHeightNormality = view["Height"].As <double>().ShapiroFranciaTest(); TestResult heightCompatibility = Univariate.KolmogorovSmirnovTest(maleHeights, femaleHeights); LinearRegressionResult fit = view["Weight"].As <double>().LinearRegression(view["Height"].As <double>()); Console.WriteLine($"Model weight = ({fit.Slope}) * height + ({fit.Intercept})."); Console.WriteLine($"Model explains {fit.RSquared * 100.0}% of variation."); ContingencyTable <string, bool> contingency = Bivariate.Crosstabs(view["Sex"].As <string>(), view["Result"].As <bool>()); Console.WriteLine($"Male incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "M")}"); Console.WriteLine($"Female incidence: {contingency.ProbabilityOfColumnConditionalOnRow(true, "F")}"); Console.WriteLine($"Log odds ratio = {contingency.Binary.LogOddsRatio}"); view.AddComputedColumn("Bmi", r => ((double)r["Weight"]) / MoreMath.Sqr((double)r["Height"] / 100.0)); view.AddComputedColumn("Age", r => (DateTime.Now - (DateTime)r["Birthdate"]).TotalDays / 365.24); MultiLinearLogisticRegressionResult result = view["Result"].As <bool>().MultiLinearLogisticRegression( view["Bmi"].As <double>(), view["Sex"].As <string, double>(s => s == "M" ? 1.0 : 0.0) ); foreach (Parameter parameter in result.Parameters) { Console.WriteLine($"{parameter.Name} = {parameter.Estimate}"); } TestResult spearman = Bivariate.SpearmanRhoTest(view["Age"].As <double>(), view["Result"].As <double>()); Console.WriteLine($"{spearman.Statistic.Name} = {spearman.Statistic.Value} P = {spearman.Probability}"); }