public void ParseHonkingBigFile() { Stopwatch sw = new Stopwatch(); using (var stream = File.OpenRead(@"c:\temp\test1.csv")) { sw.Restart(); long count = 0; var settings = this.GetDelimitedParserSettings( new[] { "field1", "field2" }, ','); var parser = DelimitedParser.Create(settings, stream); //foreach (var record in parser.Parse(stream)) //{ // count += record.LongCount(); //} //var a = parser.Parse().Last(); sw.Stop(); var elapsed = sw.Elapsed; var s = $"elapsed={elapsed}, count={count}, throughput={(stream.Length / (1024 * 1024)) / elapsed.TotalSeconds} MB/s"; Console.WriteLine(s); } }
public void Tab_delimited() { var result = new DelimitedParser("\t").Parse("123\t4,567"); Assert.IsNotNull(result); Assert.AreEqual(2, result.Length); Assert.AreEqual("123", result[0]); Assert.AreEqual("4,567", result[1]); }
public void Comma_delimited() { var result = new DelimitedParser(",").Parse("123,456"); Assert.IsNotNull(result); Assert.AreEqual(2, result.Length); Assert.AreEqual("123", result[0]); Assert.AreEqual("456", result[1]); }
public void Multiple_comma_delimited() { var result = new DelimitedParser(",").Parse("123,456", "abc"); Assert.IsNotNull(result); Assert.AreEqual(3, result.Length); Assert.AreEqual("123", result[0]); Assert.AreEqual("456", result[1]); Assert.AreEqual("abc", result[2]); }
public void Comma_tab_delimited() { var result = new DelimitedParser(",", "\t").Parse("123,456\t789"); Assert.IsNotNull(result); Assert.AreEqual(3, result.Length); Assert.AreEqual("123", result[0]); Assert.AreEqual("456", result[1]); Assert.AreEqual("789", result[2]); }
public void Delimited_with_whitespace() { var result = new DelimitedParser(",").Parse(" 123 ,\n 456 ", " \tabc, def"); Assert.IsNotNull(result); Assert.AreEqual(4, result.Length); Assert.AreEqual("123", result[0]); Assert.AreEqual("456", result[1]); Assert.AreEqual("abc", result[2]); Assert.AreEqual("def", result[3]); }
private DelimitedParser CreateDelimitedParser( string input, IEnumerable <string> delimitedColumnStrings, char delimiter) { var stream = input.ToStream(); var settings = this.GetDelimitedParserSettings(delimitedColumnStrings, delimiter); DelimitedParser parser = DelimitedParser.Create(settings, stream); return(parser); }
public void Test() { string line = "ABC,123,DEFG,,H,\r\n1,2,3,,5,7"; MemoryStream stream = new MemoryStream(Encoding.UTF8.GetBytes(line)); stream.Position = 0; DelimitedParser parser = new DelimitedParser(new Reader(stream), ','); bool b = parser.Parse(); Assert.True(b); Assert.Equal(new string[] { "ABC", "123", "DEFG", null, "H", null }, parser.Current); }
public void BasicTest() { var package = CreatePackage(); // Write DummyData data1 = new DummyData { Int32 = 123, String = "ABC", DateTime = new DateTime(1976, 04, 11) }; StringBuilder buffer = new StringBuilder(); TextWriter writer = new StringWriter(buffer); Master master = new Master(typeof(DummyData), package); master.Write(data1, writer); // Read DelimitedParser prser = new DelimitedParser(new Reader(new MemoryStream(Encoding.UTF8.GetBytes(buffer.ToString()))), '*'); //TextReader reader = new StringReader(buffer.ToString()); DummyData data2 = master.Read <DummyData>(prser); }
public void SqlBulkCopy_DelimitedParser() { string path = Path.Combine( Directory.GetCurrentDirectory(), Path.GetRandomFileName()); path = Path.ChangeExtension(path, "mdf"); // Use LocalDB as the temporary database for the test. Substitute with a // connection string more appropriate to your environment. string connectionString = $@"Server=(localdb)\MSSQLLocalDB;AttachDbFilename={path};Trusted_Connection=True"; // Build up some fake Cosmos output. StringBuilder sb = new StringBuilder(); sb.AppendLine("1,http://support.microsoft.com/kb/100,100,0.01,0.25,0.5,0.75,2.1,5.01"); sb.AppendLine("1,http://support.microsoft.com/kb/101,500,0.01,0.25,0.5,0.75,2.1,5.01"); sb.AppendLine("1,http://support.microsoft.com/kb/200,700,0.01,0.25,0.5,0.75,2.1,5.01"); sb.AppendLine("1,http://support.microsoft.com/kb/321,902,0.01,0.25,0.5,0.75,2.1,5.01"); sb.AppendLine("1,http://support.microsoft.com/kb/732,199,0.01,0.25,0.5,0.75,2.1,5.01"); sb.AppendLine("1,http://support.microsoft.com/kb/376,112,0.01,0.25,0.5,0.75,2.1,5.01"); sb.AppendLine("1,http://support.microsoft.com/kb/546,414,0.01,0.25,0.5,0.75,2.1,5.01"); // Read in the fake Cosmos output. Instead of using this stream, why not use // -> new CosmosStream("http://cosmos05.osdinfra.net:88/cosmos/0365exp.adhoc/my/stream.csv"); var factory = new DelimitedColumnFactory(); var delimitedHeader = new DelimitedHeader( factory.Create(new[] { "ID:long", "Uri", "Count:long", "Min:double", "P25:double", "P50:double", "P75:double", "P99:double", "Max:double" })); var settings = new DelimitedParserSettings(); settings.DelimitedHeader = delimitedHeader; settings.Delimiter = ','; var parser = DelimitedParser.Create(settings, sb.ToStream()); // Ensure the database is created before bulk loading the data. using (var context = new Context(connectionString)) { Assert.False(context.WebMetrics.Any()); } // Bulk load the data. using (var transactionScope = new TransactionScope( TransactionScopeOption.Required, new TransactionOptions { IsolationLevel = IsolationLevel.ReadCommitted })) using (SqlBulkCopy sqlBulkCopy = new SqlBulkCopy(connectionString)) { sqlBulkCopy.DestinationTableName = "WebMetrics"; sqlBulkCopy.BatchSize = 10000; sqlBulkCopy.WriteToServer(parser); transactionScope.Complete(); } // Read data back out of the DB...party on it. using (var context = new Context(connectionString)) { Assert.Equal(7, context.WebMetrics.Count()); // There are seven unique IDs. var set = new HashSet <long>(context.WebMetrics.Select(x => x.ID)); Assert.Equal(7, set.Count); Assert.True(context.WebMetrics.All(x => x.Min == 0.01)); Assert.True(context.WebMetrics.All(x => x.Max == 5.01)); } }