public void TestMetaData(string key, object value, Codec.Type codecType, bool useTypeGetter) { // create and write out object[] obj = new object[] { new object[] { "John", 23 } }; IList <Foo> records = MakeRecords(obj); MemoryStream dataFileOutputStream = new MemoryStream(); Schema schema = Schema.Parse(specificSchema); DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema); using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType))) { SetMetaData(dataFileWriter, key, value); foreach (Foo rec in records) { dataFileWriter.Append(rec); } } MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); // read back using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream)) { Assert.IsTrue(ValidateMetaData(reader, key, value, useTypeGetter), string.Format("Error validating header meta data for key: {0}, expected value: {1}", key, value)); } }
public void TestEnumResolution() { Schema writerSchema = Schema.Parse("{\"type\":\"record\",\"name\":\"EnumRecord\",\"namespace\":\"Avro.Test\"," + "\"fields\":[{\"name\":\"enumType\",\"type\": { \"type\": \"enum\", \"name\": \"EnumType\", \"symbols\": [\"FIRST\", \"SECOND\"]} }]}"); Schema readerSchema = Schema.Parse("{\"type\":\"record\",\"name\":\"EnumRecord\",\"namespace\":\"Avro.Test\"," + "\"fields\":[{\"name\":\"enumType\",\"type\": { \"type\": \"enum\", \"name\": \"EnumType\", \"symbols\": [\"THIRD\", \"FIRST\", \"SECOND\"]} }]}"); EnumRecord testRecord = new EnumRecord(); testRecord.enumType = EnumType.SECOND; // serialize var stream = new MemoryStream(); var binEncoder = new BinaryEncoder(stream); var writer = new SpecificWriter <EnumRecord>(writerSchema); writer.Write(testRecord, binEncoder); // deserialize stream.Position = 0; var decoder = new BinaryDecoder(stream); var reader = new SpecificReader <EnumRecord>(writerSchema, readerSchema); var rec2 = reader.Read(null, decoder); Assert.AreEqual(EnumType.SECOND, rec2.enumType); }
// Disabled due to long runtime [TestCase(specificSchema, Codec.Type.Deflate, 1000, 588, 998)] public void TestSyncAndSeekPositions(string schemaStr, Codec.Type codecType, int iterations, int firstSyncPosition, int secondSyncPosition) { // create and write out IList <Foo> records = MakeRecords(GetTestFooObject()); MemoryStream dataFileOutputStream = new MemoryStream(); Schema schema = Schema.Parse(schemaStr); DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema); using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType))) { for (int i = 0; i < iterations; ++i) { foreach (Foo foo in records) { dataFileWriter.Append(foo); } // write out block if (i == firstSyncPosition || i == secondSyncPosition) { dataFileWriter.Sync(); } } } MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); // read syncs IList <long> syncs = new List <long>(); using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream)) { long previousSync = -1; foreach (Foo foo in reader.NextEntries) { if (reader.PreviousSync() != previousSync && reader.Tell() != reader.PreviousSync()) // EOF { previousSync = reader.PreviousSync(); syncs.Add(previousSync); } } // verify syncs wth seeks reader.Sync(0); // first sync Assert.AreEqual(reader.PreviousSync(), syncs[0], string.Format("Error syncing reader to position: {0}", syncs[0])); foreach (long sync in syncs) // the rest { reader.Seek(sync); Foo foo = reader.Next(); Assert.IsNotNull(foo, string.Format("Error seeking to sync position: {0}", sync)); } } }
public Action <Stream, T> BuildSerializer() { var avroWriter = new SpecificWriter <T>(_schema); return((Stream stream, T obj) => { var encoder = new BinaryEncoder(stream); avroWriter.Write(obj, encoder); }); }
private void Initialize() { Type writerType = typeof(T); if (typeof(ISpecificRecord).IsAssignableFrom(writerType) || writerType.IsSubclassOf(typeof(SpecificFixed))) { WriterSchema = (Avro.Schema) typeof(T).GetField("_SCHEMA", BindingFlags.Public | BindingFlags.Static).GetValue(null); } else if (writerType.Equals(typeof(int))) { WriterSchema = Avro.Schema.Parse("int"); } else if (writerType.Equals(typeof(bool))) { WriterSchema = Avro.Schema.Parse("boolean"); } else if (writerType.Equals(typeof(double))) { WriterSchema = Avro.Schema.Parse("double"); } else if (writerType.Equals(typeof(string))) { // Note: It would arguably be better to make this a union with null, to // exactly match the .NET string type, however we don't for consistency // with the Java avro serializer. WriterSchema = Avro.Schema.Parse("string"); } else if (writerType.Equals(typeof(float))) { WriterSchema = Avro.Schema.Parse("float"); } else if (writerType.Equals(typeof(long))) { WriterSchema = Avro.Schema.Parse("long"); } else if (writerType.Equals(typeof(byte[]))) { // Note: It would arguably be better to make this a union with null, to // exactly match the .NET byte[] type, however we don't for consistency // with the Java avro serializer. WriterSchema = Avro.Schema.Parse("bytes"); } else { throw new ArgumentException( $"{nameof(AvroSerializer<T>)} " + "only accepts type parameters of int, bool, double, string, float, " + "long, byte[], instances of ISpecificRecord and subclasses of SpecificFixed." ); } avroWriter = new SpecificWriter <T>(WriterSchema); writerSchemaString = WriterSchema.ToString(); }
private static Stream serialize <T>(Schema ws, T actual) { var ms = new MemoryStream(); Encoder e = new BinaryEncoder(ms); var w = new SpecificWriter <T>(ws); w.Write(actual, e); ms.Flush(); ms.Position = 0; checkAlternateSerializers(ms.ToArray(), actual, ws); return(ms); }
private static GenericRecord ConvertSpecificToGeneric <T>(T obj, Schema schema) { var stream = new MemoryStream(); var encoder = new BinaryEncoder(stream); var decoder = new BinaryDecoder(stream); var writer = new SpecificWriter <T>(schema); writer.Write(obj, encoder); encoder.Flush(); stream.Position = 0; return(new GenericReader <GenericRecord>(schema, schema).Read(null, decoder)); }
[TestCase(specificSchema, Codec.Type.Null, 0, 330)] // 330 public void TestPartialRead(string schemaStr, Codec.Type codecType, int position, int expectedRecords) { // create and write out IList <Foo> records = MakeRecords(GetTestFooObject()); MemoryStream dataFileOutputStream = new MemoryStream(); Schema schema = Schema.Parse(schemaStr); DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema); using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType))) { for (int i = 0; i < 10; ++i) { foreach (Foo foo in records) { dataFileWriter.Append(foo); } // write out block if (i == 1 || i == 4) { dataFileWriter.Sync(); } } } MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); // read back IList <Foo> readRecords = new List <Foo>(); using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream)) { // move to next block from position reader.Sync(position); // read records from synced position foreach (Foo rec in reader.NextEntries) { readRecords.Add(rec); } } Assert.IsTrue((readRecords != null && readRecords.Count == expectedRecords), string.Format("Error performing partial read after position: {0}", position)); }
private byte[] Encode <T>(T body, MessageHeader header) where T : ISpecificRecord { using (var stream = new MemoryStream()) { // create avro binary encoder to write to memory stream var headerEncoder = new BinaryEncoder(stream); var bodyEncoder = headerEncoder; var headerWriter = new SpecificWriter <MessageHeader>(header.Schema); headerWriter.Write(header, headerEncoder); var bodyWriter = new SpecificWriter <T>(body.Schema); bodyWriter.Write(body, bodyEncoder); return(stream.ToArray()); } }
/// <summary> /// Encodes the specified message header and body. /// </summary> /// <typeparam name="T">The type of the message.</typeparam> /// <param name="body">The message body.</param> /// <param name="header">The message header.</param> /// <param name="compression">The compression type.</param> /// <returns>The encoded byte array containing the message data.</returns> public static byte[] Encode <T>(this T body, IMessageHeader header, string compression) where T : ISpecificRecord { using (var stream = new MemoryStream()) { // create avro binary encoder to write to memory stream var headerEncoder = new BinaryEncoder(stream); var bodyEncoder = headerEncoder; Stream gzip = null; try { // compress message body if compression has been negotiated if (header.CanCompressMessageBody()) { if (GzipEncoding.Equals(compression, StringComparison.InvariantCultureIgnoreCase)) { // add Compressed flag to message flags before writing header header.SetBodyCompressed(); gzip = new GZipStream(stream, CompressionMode.Compress, true); bodyEncoder = new BinaryEncoder(gzip); } } // serialize header var headerWriter = new SpecificWriter <IMessageHeader>(header.Schema); headerWriter.Write(header, headerEncoder); // serialize body var bodyWriter = new SpecificWriter <T>(body.Schema); bodyWriter.Write(body, bodyEncoder); } finally { gzip?.Dispose(); } return(stream.ToArray()); } }
public void TestSpecificData(string schemaStr, object[] recs, Codec.Type codecType) { // create and write out IList <Foo> records = MakeRecords(recs); MemoryStream dataFileOutputStream = new MemoryStream(); Schema schema = Schema.Parse(schemaStr); DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema); using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType))) { foreach (Foo rec in records) { dataFileWriter.Append(rec); } } MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); // read back IList <Foo> readRecords = new List <Foo>(); using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream)) { foreach (Foo rec in reader.NextEntries) { readRecords.Add(rec); } } // compare objects via Json Assert.AreEqual(records.Count, readRecords.Count); for (int i = 0; i < records.Count; i++) { Assert.AreEqual(records[i].ToString(), readRecords[i].ToString()); } }
public async Task <byte[]> Serialize(string topic, T data, bool isKey) { try { // We need the topic name when creating the if (_writerSchema == null) { _writerSchema = (global::Avro.Schema) typeof(T).GetField("_SCHEMA", BindingFlags.Public | BindingFlags.Static).GetValue(null); _writerSchemaString = _writerSchema.ToString(); _avroWriter = new SpecificWriter <T>(_writerSchema); } await _serializeMutex.WaitAsync().ConfigureAwait(continueOnCapturedContext: false); try { var subject = isKey ? SubjectNameFactory.KeySubjectNameFrom <T>(topic) : SubjectNameFactory.ValueSubjectNameFrom <T>(topic); if (!_subjectsRegistered.Contains(subject)) { // first usage: register/get schema to check compatibility _writerSchemaId = _autoRegisterSchema ? await _schemaRegistryClient.RegisterSchemaAsync(subject, _writerSchemaString).ConfigureAwait(continueOnCapturedContext: false) : await _schemaRegistryClient.GetSchemaIdAsync(subject, _writerSchemaString).ConfigureAwait(continueOnCapturedContext: false); _subjectsRegistered.Add(subject); } } finally { _serializeMutex.Release(); } if (_writerSchemaId.HasValue == false) { throw new Exception("Not SchemaId Available For Message"); } using (var stream = new MemoryStream(_initialBufferSize)) using (var writer = new BinaryWriter(stream)) { stream.WriteByte(ConfluentConstants.MagicByte); writer.Write(IPAddress.HostToNetworkOrder(_writerSchemaId.Value)); _avroWriter.Write(data, new BinaryEncoder(stream)); // TODO: maybe change the ISerializer interface so that this copy isn't necessary. return(stream.ToArray()); } } catch (AggregateException e) { if (e.InnerException == null) { throw; } throw e.InnerException; } }
public void TestPartialReadAll(string schemaStr, Codec.Type codecType) { // create and write out IList <Foo> records = MakeRecords(GetTestFooObject()); MemoryStream dataFileOutputStream = new MemoryStream(); Schema schema = Schema.Parse(schemaStr); DatumWriter <Foo> writer = new SpecificWriter <Foo>(schema); int numRecords = 0; List <SyncLog> syncLogs = new List <SyncLog>(); using (IFileWriter <Foo> dataFileWriter = DataFileWriter <Foo> .OpenWriter(writer, dataFileOutputStream, Codec.CreateCodec(codecType))) { dataFileWriter.Flush(); syncLogs.Add(new SyncLog { Position = dataFileOutputStream.Position - DataFileConstants.SyncSize + 1, RemainingRecords = numRecords }); long lastPosition = dataFileOutputStream.Position; for (int i = 0; i < 10; ++i) { foreach (Foo foo in records) { dataFileWriter.Append(foo); if (dataFileOutputStream.Position != lastPosition) { syncLogs.Add(new SyncLog { Position = dataFileOutputStream.Position - DataFileConstants.SyncSize + 1, RemainingRecords = numRecords }); lastPosition = dataFileOutputStream.Position; } numRecords++; } // write out block if (i == 1 || i == 4) { dataFileWriter.Sync(); syncLogs.Add(new SyncLog { Position = dataFileOutputStream.Position - DataFileConstants.SyncSize + 1, RemainingRecords = numRecords }); lastPosition = dataFileOutputStream.Position; } } dataFileWriter.Flush(); syncLogs.Add(new SyncLog { Position = dataFileOutputStream.Position, RemainingRecords = numRecords }); } MemoryStream dataFileInputStream = new MemoryStream(dataFileOutputStream.ToArray()); // read back using (IFileReader <Foo> reader = DataFileReader <Foo> .OpenReader(dataFileInputStream)) { long curPosition = 0; foreach (SyncLog syncLog in syncLogs) { int expectedRecords = numRecords - syncLog.RemainingRecords; long nextSyncPoint = syncLog.Position; AssertNumRecordsFromPosition(reader, curPosition, expectedRecords); AssertNumRecordsFromPosition(reader, nextSyncPoint - 1, expectedRecords); curPosition = nextSyncPoint; } } }
public void TestEnumResolution() { Schema writerSchema = Schema.Parse("{\"type\":\"record\",\"name\":\"EnumRecord\",\"namespace\":\"Avro.Test\"," + "\"fields\":[{\"name\":\"enumType\",\"type\": { \"type\": \"enum\", \"name\": \"EnumType\", \"symbols\": [\"FIRST\", \"SECOND\"]} }]}"); Schema readerSchema = Schema.Parse("{\"type\":\"record\",\"name\":\"EnumRecord\",\"namespace\":\"Avro.Test\"," + "\"fields\":[{\"name\":\"enumType\",\"type\": { \"type\": \"enum\", \"name\": \"EnumType\", \"symbols\": [\"THIRD\", \"FIRST\", \"SECOND\"]} }]}"); EnumRecord testRecord = new EnumRecord(); testRecord.enumType = EnumType.SECOND; // serialize var stream = new MemoryStream(); var binEncoder = new BinaryEncoder(stream); var writer = new SpecificWriter<EnumRecord>(writerSchema); writer.Write(testRecord, binEncoder); // deserialize stream.Position = 0; var decoder = new BinaryDecoder(stream); var reader = new SpecificReader<EnumRecord>(writerSchema, readerSchema); var rec2 = reader.Read(null, decoder); Assert.AreEqual( EnumType.SECOND, rec2.enumType ); }
public SpecificSerializerImpl( ISchemaRegistryClient schemaRegistryClient, bool autoRegisterSchema, int initialBufferSize) { this.schemaRegistryClient = schemaRegistryClient; this.autoRegisterSchema = autoRegisterSchema; this.initialBufferSize = initialBufferSize; Type writerType = typeof(T); if (typeof(ISpecificRecord).IsAssignableFrom(writerType)) { writerSchema = (global::Avro.Schema) typeof(T).GetField("_SCHEMA", BindingFlags.Public | BindingFlags.Static).GetValue(null); } else if (writerType.Equals(typeof(int))) { writerSchema = global::Avro.Schema.Parse("int"); } else if (writerType.Equals(typeof(bool))) { writerSchema = global::Avro.Schema.Parse("boolean"); } else if (writerType.Equals(typeof(double))) { writerSchema = global::Avro.Schema.Parse("double"); } else if (writerType.Equals(typeof(string))) { // Note: It would arguably be better to make this a union with null, to // exactly match the .NET string type, however we don't for consistency // with the Java Avro serializer. writerSchema = global::Avro.Schema.Parse("string"); } else if (writerType.Equals(typeof(float))) { writerSchema = global::Avro.Schema.Parse("float"); } else if (writerType.Equals(typeof(long))) { writerSchema = global::Avro.Schema.Parse("long"); } else if (writerType.Equals(typeof(byte[]))) { // Note: It would arguably be better to make this a union with null, to // exactly match the .NET byte[] type, however we don't for consistency // with the Java Avro serializer. writerSchema = global::Avro.Schema.Parse("bytes"); } else if (writerType.Equals(typeof(Null))) { writerSchema = global::Avro.Schema.Parse("null"); } else { throw new InvalidOperationException( $"AvroSerializer only accepts type parameters of int, bool, double, string, float, " + "long, byte[], instances of ISpecificRecord and subclasses of SpecificFixed." ); } avroWriter = new SpecificWriter <T>(writerSchema); writerSchemaString = writerSchema.ToString(); }