Example #1
0
        public void AvroExtractor_IgnoreColumnMismatch_Ignore()
        {
            var schema = @"{""type"":""record"",""name"":""SingleColumnPoco"",""namespace"":""Microsoft.Analytics.Samples.Formats.Tests"",""fields"":[{""name"":""Value"",""type"":[""null"",""string""]}]}";
            var data   = new List <SingleColumnPoco <string> >
            {
                new SingleColumnPoco <string>()
                {
                    Value = "asdf"
                },
                new SingleColumnPoco <string>()
                {
                    Value = null
                }
            };

            var output = SingleColumnRowGenerator <int>().AsUpdatable();

            using (var dataStream = new MemoryStream())
            {
                serializeAvro(dataStream, data, schema);

                var reader    = new USqlStreamReader(dataStream);
                var extractor = new AvroExtractor(schema, ignoreColumnMismatches: true);

                var result = extractor.Extract(reader, output).ToList();
                Assert.AreEqual(0, result[0].Get <int>("Value"));
                Assert.AreEqual(0, result[1].Get <int>("Value"));
            }
        }
        private IList <IRow> ExecuteExtract <T>(List <SingleColumnPoco <T> > data, string schema)
        {
            var output = SingleColumnRowGenerator <T>().AsUpdatable();

            using (var dataStream = new MemoryStream())
            {
                serializeAvro(dataStream, data, schema);

                var reader    = new USqlStreamReader(dataStream);
                var extractor = new AvroExtractor(schema);
                return(extractor.Extract(reader, output).ToList());
            }
        }
        //[TestMethod]
        public void RealBigFileTest()
        {
            var path = "C:\\code\\notebooks\\data\\avro\\enriched-20171009-04\\-1824167840_0d754003a7694c4caf4267a0d3764150_1.avro";

            using (var s = File.OpenRead(path))
            {
                var input = new UnstructuredReaderMock(s);

                var e      = new AvroExtractor("", true);
                var output = new UpdatableRowMock();
                var sw     = Stopwatch.StartNew();
                var rows   = e.Extract(input, output);

                Console.WriteLine($"Done. Rows: {rows.Count()} in {sw.Elapsed.TotalSeconds}s");
            }
        }