public void ParseHonkingBigFile() { Stopwatch sw = new Stopwatch(); using (var stream = File.OpenRead(@"c:\temp\test1.csv")) { sw.Restart(); long count = 0; var settings = this.GetDelimitedParserSettings( new[] { "field1", "field2" }, ','); var parser = DelimitedParser.Create(settings, stream); //foreach (var record in parser.Parse(stream)) //{ // count += record.LongCount(); //} //var a = parser.Parse().Last(); sw.Stop(); var elapsed = sw.Elapsed; var s = $"elapsed={elapsed}, count={count}, throughput={(stream.Length / (1024 * 1024)) / elapsed.TotalSeconds} MB/s"; Console.WriteLine(s); } }
private DelimitedParser CreateDelimitedParser( string input, IEnumerable <string> delimitedColumnStrings, char delimiter) { var stream = input.ToStream(); var settings = this.GetDelimitedParserSettings(delimitedColumnStrings, delimiter); DelimitedParser parser = DelimitedParser.Create(settings, stream); return(parser); }
public void SqlBulkCopy_DelimitedParser() { string path = Path.Combine( Directory.GetCurrentDirectory(), Path.GetRandomFileName()); path = Path.ChangeExtension(path, "mdf"); // Use LocalDB as the temporary database for the test. Substitute with a // connection string more appropriate to your environment. string connectionString = $@"Server=(localdb)\MSSQLLocalDB;AttachDbFilename={path};Trusted_Connection=True"; // Build up some fake Cosmos output. StringBuilder sb = new StringBuilder(); sb.AppendLine("1,http://support.microsoft.com/kb/100,100,0.01,0.25,0.5,0.75,2.1,5.01"); sb.AppendLine("1,http://support.microsoft.com/kb/101,500,0.01,0.25,0.5,0.75,2.1,5.01"); sb.AppendLine("1,http://support.microsoft.com/kb/200,700,0.01,0.25,0.5,0.75,2.1,5.01"); sb.AppendLine("1,http://support.microsoft.com/kb/321,902,0.01,0.25,0.5,0.75,2.1,5.01"); sb.AppendLine("1,http://support.microsoft.com/kb/732,199,0.01,0.25,0.5,0.75,2.1,5.01"); sb.AppendLine("1,http://support.microsoft.com/kb/376,112,0.01,0.25,0.5,0.75,2.1,5.01"); sb.AppendLine("1,http://support.microsoft.com/kb/546,414,0.01,0.25,0.5,0.75,2.1,5.01"); // Read in the fake Cosmos output. Instead of using this stream, why not use // -> new CosmosStream("http://cosmos05.osdinfra.net:88/cosmos/0365exp.adhoc/my/stream.csv"); var factory = new DelimitedColumnFactory(); var delimitedHeader = new DelimitedHeader( factory.Create(new[] { "ID:long", "Uri", "Count:long", "Min:double", "P25:double", "P50:double", "P75:double", "P99:double", "Max:double" })); var settings = new DelimitedParserSettings(); settings.DelimitedHeader = delimitedHeader; settings.Delimiter = ','; var parser = DelimitedParser.Create(settings, sb.ToStream()); // Ensure the database is created before bulk loading the data. using (var context = new Context(connectionString)) { Assert.False(context.WebMetrics.Any()); } // Bulk load the data. using (var transactionScope = new TransactionScope( TransactionScopeOption.Required, new TransactionOptions { IsolationLevel = IsolationLevel.ReadCommitted })) using (SqlBulkCopy sqlBulkCopy = new SqlBulkCopy(connectionString)) { sqlBulkCopy.DestinationTableName = "WebMetrics"; sqlBulkCopy.BatchSize = 10000; sqlBulkCopy.WriteToServer(parser); transactionScope.Complete(); } // Read data back out of the DB...party on it. using (var context = new Context(connectionString)) { Assert.Equal(7, context.WebMetrics.Count()); // There are seven unique IDs. var set = new HashSet <long>(context.WebMetrics.Select(x => x.ID)); Assert.Equal(7, set.Count); Assert.True(context.WebMetrics.All(x => x.Min == 0.01)); Assert.True(context.WebMetrics.All(x => x.Max == 5.01)); } }