Example #1
0
        private void Init()
        {
            _blockCount   = 0;
            _encoder      = new BinaryEncoder(_stream);
            _blockStream  = new MemoryStream();
            _blockEncoder = new BinaryEncoder(_blockStream);

            if (_codec == null)
            {
                _codec = Codec.CreateCodec(Codec.Type.Null);
            }

            _isOpen = true;
        }
Example #2
0
        public Encoder(Schema schema, Stream outStream)
        {
            _codec        = Codec.CreateCodec(Codec.Type.Null);
            _stream       = outStream;
            _metadata     = new Metadata();
            _schema       = schema;
            _syncInterval = DataFileConstants.DefaultSyncInterval;

            _blockCount   = 0;
            _encoder      = new Writer(_stream);
            _blockStream  = new MemoryStream();
            _blockEncoder = new Writer(_blockStream);

            _writer = Resolver.ResolveWriter(schema);

            _isOpen = true;
        }
Example #3
0
        private static void BuildDataFileWriter(string outputFilePath, Mode mode)
        {
            GenericDatumWriter <GenericRecord> datumWriter = new GenericDatumWriter <GenericRecord>(_avroSchema);
            Codec codec = Codec.CreateCodec(Codec.Type.Deflate);

            switch (mode)
            {
            case Mode.Overwrite:
                _dataFileWriter = (DataFileWriter <GenericRecord>) DataFileWriter <GenericRecord> .OpenWriter(
                    datumWriter, new FileStream(outputFilePath, FileMode.Create), codec);

                break;

            case Mode.Append:
                _dataFileWriter = (DataFileWriter <GenericRecord>) DataFileWriter <GenericRecord> .OpenWriter(
                    datumWriter, new FileStream(outputFilePath, FileMode.Append), codec);

                break;

            default:
                throw new ArgumentOutOfRangeException(nameof(mode));
            }
        }
Example #4
0
        public void TestSpecificData(string schemaStr, object[] recs, Codec.Type codecType)
        {
            // create and write out
            IList <Foo> records = MakeRecords(recs);

            foreach (var rwFactory in SpecificOptions <Foo>())
            {
                MemoryStream dataFileOutputStream = new MemoryStream();
                Schema       schema = Schema.Parse(schemaStr);
                using (IFileWriter <Foo> dataFileWriter = rwFactory.CreateWriter(dataFileOutputStream, schema, Codec.CreateCodec(codecType)))
                {
                    foreach (Foo rec in records)
                    {
                        dataFileWriter.Append(rec);
                    }
                }

                MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray());

                // read back
                IList <Foo> readRecords = new List <Foo>();

                using (IFileReader <Foo> reader = rwFactory.CreateReader(dataFileInputStream, null))
                {
                    foreach (Foo rec in reader.NextEntries)
                    {
                        readRecords.Add(rec);
                    }
                }

                // compare objects via Json
                Assert.AreEqual(records.Count, readRecords.Count);
                for (int i = 0; i < records.Count; i++)
                {
                    Assert.AreEqual(records[i].ToString(), readRecords[i].ToString());
                }
            }
        }
Example #5
0
        public void TestDifferentReaderSchema()
        {
            RecordSchema writerSchema = Schema.Parse("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"},"
                                                     + "{\"name\":\"f2\", \"type\":\"string\"}]}") as RecordSchema;
            Schema readerSchema = Schema.Parse("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"},"
                                               + "{\"name\":\"f3\", \"type\":\"string\", \"default\":\"test\"}]}");

            foreach (var rwFactory in GenericOptions <GenericRecord>())
            {
                MemoryStream dataFileOutputStream = new MemoryStream();

                using (var writer = rwFactory.CreateWriter(dataFileOutputStream, writerSchema, Codec.CreateCodec(Codec.Type.Null)))
                {
                    writer.Append(mkRecord(new [] { "f1", "f1val", "f2", "f2val" }, writerSchema));
                }

                MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray());

                using (IFileReader <GenericRecord> reader = rwFactory.CreateReader(dataFileInputStream, readerSchema))
                {
                    GenericRecord result = reader.Next();
                    object        ignore;
                    Assert.IsFalse(result.TryGetValue("f2", out ignore));
                    Assert.AreEqual("f1val", result["f1"]);
                    Assert.AreEqual("test", result["f3"]);
                }
            }
        }
Example #6
0
        // Disabled due to long runtime [TestCase(specificSchema, Codec.Type.Deflate, 1000, 588, 998)]
        public void TestSyncAndSeekPositions(string schemaStr, Codec.Type codecType, int iterations, int firstSyncPosition, int secondSyncPosition)
        {
            // create and write out
            IList <Foo>  records = MakeRecords(GetTestFooObject());
            MemoryStream dataFileOutputStream = new MemoryStream();

            Schema            schema = Schema.Parse(schemaStr);
            DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema);

            using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType)))
            {
                for (int i = 0; i < iterations; ++i)
                {
                    foreach (Foo foo in records)
                    {
                        dataFileWriter.Append(foo);
                    }

                    // write out block
                    if (i == firstSyncPosition || i == secondSyncPosition)
                    {
                        dataFileWriter.Sync();
                    }
                }
            }

            MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray());

            // read syncs
            IList <long> syncs = new List <long>();

            using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream))
            {
                long previousSync = -1;

                foreach (Foo foo in reader.NextEntries)
                {
                    if (reader.PreviousSync() != previousSync &&
                        reader.Tell() != reader.PreviousSync()) // EOF
                    {
                        previousSync = reader.PreviousSync();
                        syncs.Add(previousSync);
                    }
                }

                // verify syncs wth seeks
                reader.Sync(0); // first sync
                Assert.AreEqual(reader.PreviousSync(), syncs[0],
                                string.Format("Error syncing reader to position: {0}", syncs[0]));

                foreach (long sync in syncs) // the rest
                {
                    reader.Seek(sync);
                    Foo foo = reader.Next();
                    Assert.IsNotNull(foo, string.Format("Error seeking to sync position: {0}", sync));
                }
            }
        }
Example #7
0
        public void TestPartialReadAll(string schemaStr, Codec.Type codecType)
        {
            // create and write out
            IList <Foo> records = MakeRecords(GetTestFooObject());

            MemoryStream dataFileOutputStream = new MemoryStream();

            Schema            schema  = Schema.Parse(schemaStr);
            DatumWriter <Foo> writer  = new SpecificWriter <Foo>(schema);
            int            numRecords = 0;
            List <SyncLog> syncLogs   = new List <SyncLog>();

            using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType)))
            {
                dataFileWriter.Flush();
                syncLogs.Add(new SyncLog {
                    Position = dataFileOutputStream.Position - DataFileConstants.SyncSize + 1, RemainingRecords = numRecords
                });
                long lastPosition = dataFileOutputStream.Position;
                for (int i = 0; i < 10; ++i)
                {
                    foreach (Foo foo in records)
                    {
                        dataFileWriter.Append(foo);
                        if (dataFileOutputStream.Position != lastPosition)
                        {
                            syncLogs.Add(new SyncLog {
                                Position = dataFileOutputStream.Position - DataFileConstants.SyncSize + 1, RemainingRecords = numRecords
                            });
                            lastPosition = dataFileOutputStream.Position;
                        }
                        numRecords++;
                    }

                    // write out block
                    if (i == 1 || i == 4)
                    {
                        dataFileWriter.Sync();
                        syncLogs.Add(new SyncLog {
                            Position = dataFileOutputStream.Position - DataFileConstants.SyncSize + 1, RemainingRecords = numRecords
                        });
                        lastPosition = dataFileOutputStream.Position;
                    }
                }
                dataFileWriter.Flush();
                syncLogs.Add(new SyncLog {
                    Position = dataFileOutputStream.Position, RemainingRecords = numRecords
                });
            }

            MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray());

            // read back
            using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream))
            {
                long curPosition = 0;

                foreach (SyncLog syncLog in syncLogs)
                {
                    int  expectedRecords = numRecords - syncLog.RemainingRecords;
                    long nextSyncPoint   = syncLog.Position;
                    AssertNumRecordsFromPosition(reader, curPosition, expectedRecords);
                    AssertNumRecordsFromPosition(reader, nextSyncPoint - 1, expectedRecords);
                    curPosition = nextSyncPoint;
                }
            }
        }
Example #8
0
        [TestCase(specificSchema, Codec.Type.Null, 0, 330)]   // 330
        public void TestPartialRead(string schemaStr, Codec.Type codecType, int position, int expectedRecords)
        {
            // create and write out
            IList <Foo> records = MakeRecords(GetTestFooObject());

            MemoryStream dataFileOutputStream = new MemoryStream();

            Schema            schema = Schema.Parse(schemaStr);
            DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema);

            using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType)))
            {
                for (int i = 0; i < 10; ++i)
                {
                    foreach (Foo foo in records)
                    {
                        dataFileWriter.Append(foo);
                    }

                    // write out block
                    if (i == 1 || i == 4)
                    {
                        dataFileWriter.Sync();
                    }
                }
            }

            MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray());

            // read back
            IList <Foo> readRecords = new List <Foo>();

            using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream))
            {
                // move to next block from position
                reader.Sync(position);

                // read records from synced position
                foreach (Foo rec in reader.NextEntries)
                {
                    readRecords.Add(rec);
                }
            }

            Assert.IsTrue((readRecords != null && readRecords.Count == expectedRecords),
                          string.Format("Error performing partial read after position: {0}", position));
        }
Example #9
0
        public void TestMetaData(string key, object value, Codec.Type codecType, bool useTypeGetter)
        {
            // create and write out
            object[]     obj     = new object[] { new object[] { "John", 23 } };
            IList <Foo>  records = MakeRecords(obj);
            MemoryStream dataFileOutputStream = new MemoryStream();

            Schema            schema = Schema.Parse(specificSchema);
            DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema);

            using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType)))
            {
                SetMetaData(dataFileWriter, key, value);
                foreach (Foo rec in records)
                {
                    dataFileWriter.Append(rec);
                }
            }

            MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray());

            // read back
            using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream))
            {
                Assert.IsTrue(ValidateMetaData(reader, key, value, useTypeGetter),
                              string.Format("Error validating header meta data for key: {0}, expected value: {1}", key, value));
            }
        }
Example #10
0
        public AvroWriter(IAvroFileValueDef <V> valueDef, FileStream stream, Codec.Type codec) : base(valueDef, stream)
        {
            Preconditions.CheckArgument(Stream.CanWrite);
            var datumWriter = new GenericDatumWriter <GenericRecord>(ValueDef.Schema);

            mWriter = DataFileWriter <GenericRecord> .OpenWriter(datumWriter, Stream, Codec.CreateCodec(codec));

            Stream.Position = Stream.Length;
        }
Example #11
0
        public void TestGenericData(string schemaStr, object[] value, Codec.Type codecType)
        {
            foreach (var rwFactory in GenericOptions <GenericRecord>())
            {
                // Create and write out
                MemoryStream dataFileOutputStream = new MemoryStream();
                using (var writer = rwFactory.CreateWriter(dataFileOutputStream, Schema.Parse(schemaStr), Codec.CreateCodec(codecType)))
                {
                    writer.Append(mkRecord(value, Schema.Parse(schemaStr) as RecordSchema));
                }

                MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray());

                // Read back
                IList <GenericRecord> readFoos = new List <GenericRecord>();
                using (IFileReader <GenericRecord> reader = rwFactory.CreateReader(dataFileInputStream, null))
                {
                    foreach (GenericRecord foo in reader.NextEntries)
                    {
                        readFoos.Add(foo);
                    }
                }

                Assert.IsTrue((readFoos != null && readFoos.Count > 0),
                              string.Format(@"Generic object: {0} did not serialise/deserialise correctly", readFoos));
            }
        }
Example #12
0
        public static int Main(string[] args)
        {
            RecordSchema schema = null;
            IFileWriter <GenericRecord> writer = null;

            var options = args.ToDictionary(arg => arg.TrimStart('-').Split('=').FirstOrDefault(), arg => arg.Split('=').LastOrDefault().Trim(new[] { '\'', '"' }));

            if (string.IsNullOrEmpty(options.GetOrDefault("output")))
            {
                PrintHelpMessage();
                return(1);
            }

            var builder = new SqlConnectionStringBuilder
            {
                DataSource     = options.GetOrDefault("server", "localhost"),
                InitialCatalog = options.GetOrDefault("database", "RabotaUA2")
            };

            if (!string.IsNullOrEmpty(options.GetOrDefault("password")))
            {
                builder.UserID   = options.GetOrDefault("username", "sa");
                builder.Password = options.GetOrDefault("password", "");
            }
            else
            {
                builder.IntegratedSecurity = true;
            }

            var query    = options.GetOrDefault("query", null) ?? File.ReadAllText(options.GetOrDefault("input"));
            var provider = new CSharpCodeProvider();
            var command  = new SqlCommand(query, new SqlConnection(builder.ConnectionString))
            {
                CommandTimeout = 0
            };

            command.Connection.Open();
            var reader = command.ExecuteReader();

            if (reader.HasRows)
            {
                while (reader.Read())
                {
                    if (schema == null)
                    {
                        schema = Schema.Parse(JsonConvert.SerializeObject(new
                        {
                            type   = "record",
                            name   = "row",
                            fields = Enumerable.Range(0, reader.FieldCount).Select(index => new
                            {
                                name = reader.GetName(index),
                                type = new[] {
                                    provider.GetTypeOutput(new CodeTypeReference(reader.GetFieldType(index))),
                                    "null"
                                }
                            })
                        })) as RecordSchema;

                        writer = DataFileWriter <GenericRecord> .OpenWriter(new GenericDatumWriter <GenericRecord>(schema), options.GetOrDefault("output"), Codec.CreateCodec(Codec.Type.Deflate));
                    }

                    var r = new GenericRecord(schema);
                    for (var i = 0; i < reader.FieldCount; i++)
                    {
                        r.Add(reader.GetName(i), reader.IsDBNull(i) ? null : reader[i]);
                    }
                    writer.Append(r);
                }
                writer?.Close();
                return(0);
            }
            return(1);
        }
Example #13
0
 public static Writer OpenWriter(GenericDatumWriter writer, Stream outStream)
 {
     return(OpenWriter(writer, outStream, Codec.CreateCodec(Codec.Type.Null)));
 }
Example #14
0
 public static Writer OpenWriter(GenericDatumWriter writer, string path)
 {
     return(OpenWriter(writer, new FileStream(path, FileMode.Create), Codec.CreateCodec(Codec.Type.Null)));
 }
Example #15
0
        public void TestNonSeekableStream(string schemaStr, object[] value, Codec.Type codecType)
        {
            foreach (var rwFactory in GenericOptions <GenericRecord>())
            {
                // Create and write out
                MemoryStream compressedStream = new MemoryStream();
                // using here a DeflateStream as it is a standard non-seekable stream, so if it works for this one,
                // it should also works with any standard non-seekable stream (ie: NetworkStreams)
                DeflateStream dataFileOutputStream = new DeflateStream(compressedStream, CompressionMode.Compress);
                using (var writer = rwFactory.CreateWriter(dataFileOutputStream, Schema.Parse(schemaStr), Codec.CreateCodec(codecType)))
                {
                    writer.Append(mkRecord(value, Schema.Parse(schemaStr) as RecordSchema));

                    // The Sync method is not supported for non-seekable streams.
                    Assert.Throws <NotSupportedException>(() => writer.Sync());
                }

                DeflateStream dataFileInputStream = new DeflateStream(new MemoryStream(compressedStream.ToArray()), CompressionMode.Decompress);

                // Read back
                IList <GenericRecord> readFoos = new List <GenericRecord>();
                using (IFileReader <GenericRecord> reader = rwFactory.CreateReader(dataFileInputStream, null))
                {
                    foreach (GenericRecord foo in reader.NextEntries)
                    {
                        readFoos.Add(foo);
                    }

                    // These methods are not supported for non-seekable streams.
                    Assert.Throws <AvroRuntimeException>(() => reader.Seek(0));
                    Assert.Throws <AvroRuntimeException>(() => reader.PreviousSync());
                }

                Assert.IsTrue((readFoos != null && readFoos.Count > 0),
                              string.Format(@"Generic object: {0} did not serialise/deserialise correctly", readFoos));
            }
        }
Example #16
0
        public void TestPrimitiveData(string schemaStr, object value, Codec.Type codecType)
        {
            foreach (var rwFactory in GenericOptions <object>())
            {
                MemoryStream dataFileOutputStream = new MemoryStream();
                using (var writer = rwFactory.CreateWriter(dataFileOutputStream, Schema.Parse(schemaStr), Codec.CreateCodec(codecType)))
                {
                    writer.Append(value);
                }

                MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray());

                Assert.IsTrue(CheckPrimitive(dataFileInputStream, value, rwFactory.CreateReader),
                              string.Format("Error reading generic data for object: {0}", value));
            }
        }
Example #17
0
        private void WriteGeneric <T>(Stream output, Schema schema, T value, Codec.Type codecType)
        {
            DatumWriter <T> writer = new GenericWriter <T>(schema);

            using (IFileWriter <T> dataFileWriter = DataFileWriter <T> .OpenWriter(writer, output, Codec.CreateCodec(codecType)))
            {
                dataFileWriter.Append(value);
            }
        }