private void Init() { _blockCount = 0; _encoder = new BinaryEncoder(_stream); _blockStream = new MemoryStream(); _blockEncoder = new BinaryEncoder(_blockStream); if (_codec == null) { _codec = Codec.CreateCodec(Codec.Type.Null); } _isOpen = true; }
public Encoder(Schema schema, Stream outStream) { _codec = Codec.CreateCodec(Codec.Type.Null); _stream = outStream; _metadata = new Metadata(); _schema = schema; _syncInterval = DataFileConstants.DefaultSyncInterval; _blockCount = 0; _encoder = new Writer(_stream); _blockStream = new MemoryStream(); _blockEncoder = new Writer(_blockStream); _writer = Resolver.ResolveWriter(schema); _isOpen = true; }
private static void BuildDataFileWriter(string outputFilePath, Mode mode) { GenericDatumWriter <GenericRecord> datumWriter = new GenericDatumWriter <GenericRecord>(_avroSchema); Codec codec = Codec.CreateCodec(Codec.Type.Deflate); switch (mode) { case Mode.Overwrite: _dataFileWriter = (DataFileWriter <GenericRecord>) DataFileWriter <GenericRecord> .OpenWriter( datumWriter, new FileStream(outputFilePath, FileMode.Create), codec); break; case Mode.Append: _dataFileWriter = (DataFileWriter <GenericRecord>) DataFileWriter <GenericRecord> .OpenWriter( datumWriter, new FileStream(outputFilePath, FileMode.Append), codec); break; default: throw new ArgumentOutOfRangeException(nameof(mode)); } }
public void TestSpecificData(string schemaStr, object[] recs, Codec.Type codecType) { // create and write out IList <Foo> records = MakeRecords(recs); foreach (var rwFactory in SpecificOptions <Foo>()) { MemoryStream dataFileOutputStream = new MemoryStream(); Schema schema = Schema.Parse(schemaStr); using (IFileWriter <Foo> dataFileWriter = rwFactory.CreateWriter(dataFileOutputStream, schema, Codec.CreateCodec(codecType))) { foreach (Foo rec in records) { dataFileWriter.Append(rec); } } MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); // read back IList <Foo> readRecords = new List <Foo>(); using (IFileReader <Foo> reader = rwFactory.CreateReader(dataFileInputStream, null)) { foreach (Foo rec in reader.NextEntries) { readRecords.Add(rec); } } // compare objects via Json Assert.AreEqual(records.Count, readRecords.Count); for (int i = 0; i < records.Count; i++) { Assert.AreEqual(records[i].ToString(), readRecords[i].ToString()); } } }
public void TestDifferentReaderSchema() { RecordSchema writerSchema = Schema.Parse("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"}," + "{\"name\":\"f2\", \"type\":\"string\"}]}") as RecordSchema; Schema readerSchema = Schema.Parse("{\"type\":\"record\", \"name\":\"n\", \"fields\":[{\"name\":\"f1\", \"type\":\"string\"}," + "{\"name\":\"f3\", \"type\":\"string\", \"default\":\"test\"}]}"); foreach (var rwFactory in GenericOptions <GenericRecord>()) { MemoryStream dataFileOutputStream = new MemoryStream(); using (var writer = rwFactory.CreateWriter(dataFileOutputStream, writerSchema, Codec.CreateCodec(Codec.Type.Null))) { writer.Append(mkRecord(new [] { "f1", "f1val", "f2", "f2val" }, writerSchema)); } MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); using (IFileReader <GenericRecord> reader = rwFactory.CreateReader(dataFileInputStream, readerSchema)) { GenericRecord result = reader.Next(); object ignore; Assert.IsFalse(result.TryGetValue("f2", out ignore)); Assert.AreEqual("f1val", result["f1"]); Assert.AreEqual("test", result["f3"]); } } }
// Disabled due to long runtime [TestCase(specificSchema, Codec.Type.Deflate, 1000, 588, 998)] public void TestSyncAndSeekPositions(string schemaStr, Codec.Type codecType, int iterations, int firstSyncPosition, int secondSyncPosition) { // create and write out IList <Foo> records = MakeRecords(GetTestFooObject()); MemoryStream dataFileOutputStream = new MemoryStream(); Schema schema = Schema.Parse(schemaStr); DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema); using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType))) { for (int i = 0; i < iterations; ++i) { foreach (Foo foo in records) { dataFileWriter.Append(foo); } // write out block if (i == firstSyncPosition || i == secondSyncPosition) { dataFileWriter.Sync(); } } } MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); // read syncs IList <long> syncs = new List <long>(); using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream)) { long previousSync = -1; foreach (Foo foo in reader.NextEntries) { if (reader.PreviousSync() != previousSync && reader.Tell() != reader.PreviousSync()) // EOF { previousSync = reader.PreviousSync(); syncs.Add(previousSync); } } // verify syncs wth seeks reader.Sync(0); // first sync Assert.AreEqual(reader.PreviousSync(), syncs[0], string.Format("Error syncing reader to position: {0}", syncs[0])); foreach (long sync in syncs) // the rest { reader.Seek(sync); Foo foo = reader.Next(); Assert.IsNotNull(foo, string.Format("Error seeking to sync position: {0}", sync)); } } }
public void TestPartialReadAll(string schemaStr, Codec.Type codecType) { // create and write out IList <Foo> records = MakeRecords(GetTestFooObject()); MemoryStream dataFileOutputStream = new MemoryStream(); Schema schema = Schema.Parse(schemaStr); DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema); int numRecords = 0; List <SyncLog> syncLogs = new List <SyncLog>(); using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType))) { dataFileWriter.Flush(); syncLogs.Add(new SyncLog { Position = dataFileOutputStream.Position - DataFileConstants.SyncSize + 1, RemainingRecords = numRecords }); long lastPosition = dataFileOutputStream.Position; for (int i = 0; i < 10; ++i) { foreach (Foo foo in records) { dataFileWriter.Append(foo); if (dataFileOutputStream.Position != lastPosition) { syncLogs.Add(new SyncLog { Position = dataFileOutputStream.Position - DataFileConstants.SyncSize + 1, RemainingRecords = numRecords }); lastPosition = dataFileOutputStream.Position; } numRecords++; } // write out block if (i == 1 || i == 4) { dataFileWriter.Sync(); syncLogs.Add(new SyncLog { Position = dataFileOutputStream.Position - DataFileConstants.SyncSize + 1, RemainingRecords = numRecords }); lastPosition = dataFileOutputStream.Position; } } dataFileWriter.Flush(); syncLogs.Add(new SyncLog { Position = dataFileOutputStream.Position, RemainingRecords = numRecords }); } MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); // read back using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream)) { long curPosition = 0; foreach (SyncLog syncLog in syncLogs) { int expectedRecords = numRecords - syncLog.RemainingRecords; long nextSyncPoint = syncLog.Position; AssertNumRecordsFromPosition(reader, curPosition, expectedRecords); AssertNumRecordsFromPosition(reader, nextSyncPoint - 1, expectedRecords); curPosition = nextSyncPoint; } } }
[TestCase(specificSchema, Codec.Type.Null, 0, 330)] // 330 public void TestPartialRead(string schemaStr, Codec.Type codecType, int position, int expectedRecords) { // create and write out IList <Foo> records = MakeRecords(GetTestFooObject()); MemoryStream dataFileOutputStream = new MemoryStream(); Schema schema = Schema.Parse(schemaStr); DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema); using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType))) { for (int i = 0; i < 10; ++i) { foreach (Foo foo in records) { dataFileWriter.Append(foo); } // write out block if (i == 1 || i == 4) { dataFileWriter.Sync(); } } } MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); // read back IList <Foo> readRecords = new List <Foo>(); using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream)) { // move to next block from position reader.Sync(position); // read records from synced position foreach (Foo rec in reader.NextEntries) { readRecords.Add(rec); } } Assert.IsTrue((readRecords != null && readRecords.Count == expectedRecords), string.Format("Error performing partial read after position: {0}", position)); }
public void TestMetaData(string key, object value, Codec.Type codecType, bool useTypeGetter) { // create and write out object[] obj = new object[] { new object[] { "John", 23 } }; IList <Foo> records = MakeRecords(obj); MemoryStream dataFileOutputStream = new MemoryStream(); Schema schema = Schema.Parse(specificSchema); DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema); using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType))) { SetMetaData(dataFileWriter, key, value); foreach (Foo rec in records) { dataFileWriter.Append(rec); } } MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); // read back using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream)) { Assert.IsTrue(ValidateMetaData(reader, key, value, useTypeGetter), string.Format("Error validating header meta data for key: {0}, expected value: {1}", key, value)); } }
public AvroWriter(IAvroFileValueDef <V> valueDef, FileStream stream, Codec.Type codec) : base(valueDef, stream) { Preconditions.CheckArgument(Stream.CanWrite); var datumWriter = new GenericDatumWriter <GenericRecord>(ValueDef.Schema); mWriter = DataFileWriter <GenericRecord> .OpenWriter(datumWriter, Stream, Codec.CreateCodec(codec)); Stream.Position = Stream.Length; }
public void TestGenericData(string schemaStr, object[] value, Codec.Type codecType) { foreach (var rwFactory in GenericOptions <GenericRecord>()) { // Create and write out MemoryStream dataFileOutputStream = new MemoryStream(); using (var writer = rwFactory.CreateWriter(dataFileOutputStream, Schema.Parse(schemaStr), Codec.CreateCodec(codecType))) { writer.Append(mkRecord(value, Schema.Parse(schemaStr) as RecordSchema)); } MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); // Read back IList <GenericRecord> readFoos = new List <GenericRecord>(); using (IFileReader <GenericRecord> reader = rwFactory.CreateReader(dataFileInputStream, null)) { foreach (GenericRecord foo in reader.NextEntries) { readFoos.Add(foo); } } Assert.IsTrue((readFoos != null && readFoos.Count > 0), string.Format(@"Generic object: {0} did not serialise/deserialise correctly", readFoos)); } }
public static int Main(string[] args) { RecordSchema schema = null; IFileWriter <GenericRecord> writer = null; var options = args.ToDictionary(arg => arg.TrimStart('-').Split('=').FirstOrDefault(), arg => arg.Split('=').LastOrDefault().Trim(new[] { '\'', '"' })); if (string.IsNullOrEmpty(options.GetOrDefault("output"))) { PrintHelpMessage(); return(1); } var builder = new SqlConnectionStringBuilder { DataSource = options.GetOrDefault("server", "localhost"), InitialCatalog = options.GetOrDefault("database", "RabotaUA2") }; if (!string.IsNullOrEmpty(options.GetOrDefault("password"))) { builder.UserID = options.GetOrDefault("username", "sa"); builder.Password = options.GetOrDefault("password", ""); } else { builder.IntegratedSecurity = true; } var query = options.GetOrDefault("query", null) ?? File.ReadAllText(options.GetOrDefault("input")); var provider = new CSharpCodeProvider(); var command = new SqlCommand(query, new SqlConnection(builder.ConnectionString)) { CommandTimeout = 0 }; command.Connection.Open(); var reader = command.ExecuteReader(); if (reader.HasRows) { while (reader.Read()) { if (schema == null) { schema = Schema.Parse(JsonConvert.SerializeObject(new { type = "record", name = "row", fields = Enumerable.Range(0, reader.FieldCount).Select(index => new { name = reader.GetName(index), type = new[] { provider.GetTypeOutput(new CodeTypeReference(reader.GetFieldType(index))), "null" } }) })) as RecordSchema; writer = DataFileWriter <GenericRecord> .OpenWriter(new GenericDatumWriter <GenericRecord>(schema), options.GetOrDefault("output"), Codec.CreateCodec(Codec.Type.Deflate)); } var r = new GenericRecord(schema); for (var i = 0; i < reader.FieldCount; i++) { r.Add(reader.GetName(i), reader.IsDBNull(i) ? null : reader[i]); } writer.Append(r); } writer?.Close(); return(0); } return(1); }
public static Writer OpenWriter(GenericDatumWriter writer, Stream outStream) { return(OpenWriter(writer, outStream, Codec.CreateCodec(Codec.Type.Null))); }
public static Writer OpenWriter(GenericDatumWriter writer, string path) { return(OpenWriter(writer, new FileStream(path, FileMode.Create), Codec.CreateCodec(Codec.Type.Null))); }
public void TestNonSeekableStream(string schemaStr, object[] value, Codec.Type codecType) { foreach (var rwFactory in GenericOptions <GenericRecord>()) { // Create and write out MemoryStream compressedStream = new MemoryStream(); // using here a DeflateStream as it is a standard non-seekable stream, so if it works for this one, // it should also works with any standard non-seekable stream (ie: NetworkStreams) DeflateStream dataFileOutputStream = new DeflateStream(compressedStream, CompressionMode.Compress); using (var writer = rwFactory.CreateWriter(dataFileOutputStream, Schema.Parse(schemaStr), Codec.CreateCodec(codecType))) { writer.Append(mkRecord(value, Schema.Parse(schemaStr) as RecordSchema)); // The Sync method is not supported for non-seekable streams. Assert.Throws <NotSupportedException>(() => writer.Sync()); } DeflateStream dataFileInputStream = new DeflateStream(new MemoryStream(compressedStream.ToArray()), CompressionMode.Decompress); // Read back IList <GenericRecord> readFoos = new List <GenericRecord>(); using (IFileReader <GenericRecord> reader = rwFactory.CreateReader(dataFileInputStream, null)) { foreach (GenericRecord foo in reader.NextEntries) { readFoos.Add(foo); } // These methods are not supported for non-seekable streams. Assert.Throws <AvroRuntimeException>(() => reader.Seek(0)); Assert.Throws <AvroRuntimeException>(() => reader.PreviousSync()); } Assert.IsTrue((readFoos != null && readFoos.Count > 0), string.Format(@"Generic object: {0} did not serialise/deserialise correctly", readFoos)); } }
public void TestPrimitiveData(string schemaStr, object value, Codec.Type codecType) { foreach (var rwFactory in GenericOptions <object>()) { MemoryStream dataFileOutputStream = new MemoryStream(); using (var writer = rwFactory.CreateWriter(dataFileOutputStream, Schema.Parse(schemaStr), Codec.CreateCodec(codecType))) { writer.Append(value); } MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); Assert.IsTrue(CheckPrimitive(dataFileInputStream, value, rwFactory.CreateReader), string.Format("Error reading generic data for object: {0}", value)); } }
private void WriteGeneric <T>(Stream output, Schema schema, T value, Codec.Type codecType) { DatumWriter <T> writer = new GenericWriter <T>(schema); using (IFileWriter <T> dataFileWriter = DataFileWriter <T> .OpenWriter(writer, output, Codec.CreateCodec(codecType))) { dataFileWriter.Append(value); } }