Beispiel #1
0
        public void ParseHonkingBigFile()
        {
            Stopwatch sw = new Stopwatch();

            using (var stream = File.OpenRead(@"c:\temp\test1.csv"))
            {
                sw.Restart();
                long count = 0;

                var settings = this.GetDelimitedParserSettings(
                    new[] { "field1", "field2" },
                    ',');

                var parser = DelimitedParser.Create(settings, stream);

                //foreach (var record in parser.Parse(stream))
                //{
                //    count += record.LongCount();
                //}

                //var a = parser.Parse().Last();

                sw.Stop();
                var elapsed = sw.Elapsed;
                var s       = $"elapsed={elapsed}, count={count}, throughput={(stream.Length / (1024 * 1024)) / elapsed.TotalSeconds} MB/s";
                Console.WriteLine(s);
            }
        }
Beispiel #2
0
        private DelimitedParser CreateDelimitedParser(
            string input,
            IEnumerable <string> delimitedColumnStrings,
            char delimiter)
        {
            var stream   = input.ToStream();
            var settings = this.GetDelimitedParserSettings(delimitedColumnStrings, delimiter);

            DelimitedParser parser = DelimitedParser.Create(settings, stream);

            return(parser);
        }
Beispiel #3
0
        public void SqlBulkCopy_DelimitedParser()
        {
            string path = Path.Combine(
                Directory.GetCurrentDirectory(),
                Path.GetRandomFileName());

            path = Path.ChangeExtension(path, "mdf");

            // Use LocalDB as the temporary database for the test.  Substitute with a
            // connection string more appropriate to your environment.
            string connectionString = $@"Server=(localdb)\MSSQLLocalDB;AttachDbFilename={path};Trusted_Connection=True";

            // Build up some fake Cosmos output.
            StringBuilder sb = new StringBuilder();

            sb.AppendLine("1,http://support.microsoft.com/kb/100,100,0.01,0.25,0.5,0.75,2.1,5.01");
            sb.AppendLine("1,http://support.microsoft.com/kb/101,500,0.01,0.25,0.5,0.75,2.1,5.01");
            sb.AppendLine("1,http://support.microsoft.com/kb/200,700,0.01,0.25,0.5,0.75,2.1,5.01");
            sb.AppendLine("1,http://support.microsoft.com/kb/321,902,0.01,0.25,0.5,0.75,2.1,5.01");
            sb.AppendLine("1,http://support.microsoft.com/kb/732,199,0.01,0.25,0.5,0.75,2.1,5.01");
            sb.AppendLine("1,http://support.microsoft.com/kb/376,112,0.01,0.25,0.5,0.75,2.1,5.01");
            sb.AppendLine("1,http://support.microsoft.com/kb/546,414,0.01,0.25,0.5,0.75,2.1,5.01");

            // Read in the fake Cosmos output.  Instead of using this stream, why not use
            //  -> new CosmosStream("http://cosmos05.osdinfra.net:88/cosmos/0365exp.adhoc/my/stream.csv");

            var factory         = new DelimitedColumnFactory();
            var delimitedHeader = new DelimitedHeader(
                factory.Create(new[] { "ID:long", "Uri", "Count:long", "Min:double", "P25:double", "P50:double", "P75:double", "P99:double", "Max:double" }));

            var settings = new DelimitedParserSettings();

            settings.DelimitedHeader = delimitedHeader;
            settings.Delimiter       = ',';

            var parser = DelimitedParser.Create(settings, sb.ToStream());

            // Ensure the database is created before bulk loading the data.
            using (var context = new Context(connectionString))
            {
                Assert.False(context.WebMetrics.Any());
            }

            // Bulk load the data.
            using (var transactionScope = new TransactionScope(
                       TransactionScopeOption.Required,
                       new TransactionOptions {
                IsolationLevel = IsolationLevel.ReadCommitted
            }))
                using (SqlBulkCopy sqlBulkCopy = new SqlBulkCopy(connectionString))
                {
                    sqlBulkCopy.DestinationTableName = "WebMetrics";
                    sqlBulkCopy.BatchSize            = 10000;

                    sqlBulkCopy.WriteToServer(parser);
                    transactionScope.Complete();
                }

            // Read data back out of the DB...party on it.
            using (var context = new Context(connectionString))
            {
                Assert.Equal(7, context.WebMetrics.Count());

                // There are seven unique IDs.
                var set = new HashSet <long>(context.WebMetrics.Select(x => x.ID));
                Assert.Equal(7, set.Count);

                Assert.True(context.WebMetrics.All(x => x.Min == 0.01));
                Assert.True(context.WebMetrics.All(x => x.Max == 5.01));
            }
        }