internal object Read(Reader reader, Header header, AbstractCodec codec, Resolver resolver) { if (reader.IsReadToEnd()) { return(string.Empty); } var result = new List <object>(); do { long itemsCount = reader.ReadLong(); var data = reader.ReadDataBlock(header.SyncData, codec); reader = new Reader(new MemoryStream(data)); if (itemsCount > 1) { for (int i = 0; i < itemsCount; i++) { result.Add(resolver.Resolve(reader)); } } else { return(resolver.Resolve(reader)); } } while (!reader.IsReadToEnd()); return(result); }
internal object Read(IReader reader, Header header, AbstractCodec codec, Resolver resolver) { var remainingBlocks = reader.ReadLong(); var blockSize = reader.ReadLong(); var syncBuffer = new byte[DataFileConstants.SyncSize]; var dataBlock = new byte[blockSize]; reader.ReadFixed(dataBlock, 0, (int)blockSize); reader.ReadFixed(syncBuffer); if (!syncBuffer.SequenceEqual(header.SyncData)) { throw new AvroRuntimeException("Invalid sync!"); } dataBlock = codec.Decompress(dataBlock); reader = new Reader(new MemoryStream(dataBlock)); if (remainingBlocks > 1) { var result = new List <object>(); for (int i = 0; i < remainingBlocks; i++) { result.Add(resolver.Resolve(reader)); } return(result); } else { return(resolver.Resolve(reader)); } }
internal void WriteHeader(string schema, CodecType codecType) { GenerateSyncData(); _header.AddMetadata(DataFileConstants.CodecMetadataKey, AbstractCodec.CreateCodec(codecType).Name); _header.AddMetadata(DataFileConstants.SchemaMetadataKey, schema); _writer.WriteHeader(_header); }
internal byte[] ReadDataBlock(byte[] syncData, AbstractCodec codec) { var dataBlock = ReadRawBlock(); ReadAndValidateSync(syncData); dataBlock = codec.Decompress(dataBlock); return(dataBlock); }
internal object Decode(Stream stream, Schema.Schema schema) { var reader = new Reader(stream); var header = new Header(); // validate header byte[] firstBytes = new byte[DataFileConstants.AvroHeader.Length]; try { reader.ReadFixed(firstBytes); } catch (EndOfStreamException) { //stream shorter than AvroHeader } //does not contain header if (!firstBytes.SequenceEqual(DataFileConstants.AvroHeader)) { if (schema == null) { throw new MissingSchemaException("Provide valid schema for the Avro data"); } var resolver = new Resolver(schema); stream.Seek(0, SeekOrigin.Begin); return(resolver.Resolve(reader)); } else { // read meta data long len = reader.ReadMapStart(); if (len > 0) { do { for (long i = 0; i < len; i++) { string key = reader.ReadString(); byte[] val = reader.ReadBytes(); header.AddMetadata(key, val); } } while ((len = reader.ReadMapNext()) != 0); } schema = schema ?? Schema.Schema.Parse(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); var resolver = new Resolver(schema); // read in sync data reader.ReadFixed(header.SyncData); var codec = AbstractCodec.CreateCodecFromString(header.GetMetadata(DataFileConstants.CodecMetadataKey)); return(Read(reader, header, codec, resolver)); } }
internal AvroObjectContent ExtractAvroObjectContent(byte[] avroObject) { using (var stream = new MemoryStream(avroObject)) { var reader = new Reader(stream); // validate header byte[] firstBytes = new byte[DataFileConstants.AvroHeader.Length]; try { reader.ReadFixed(firstBytes); } catch (EndOfStreamException) { //stream shorter than AvroHeader } //does not contain header if (!firstBytes.SequenceEqual(DataFileConstants.AvroHeader)) { throw new InvalidAvroObjectException("Object does not contain Avro Header"); } else { AvroObjectContent result = new AvroObjectContent(); var header = reader.ReadHeader(); result.Codec = AbstractCodec.CreateCodecFromString(header.GetMetadata(DataFileConstants.CodecMetadataKey)); reader.ReadFixed(header.SyncData); result.Header = header; result.Header.Schema = Schema.Create(result.Header.GetMetadata(DataFileConstants.SchemaMetadataKey)); if (reader.IsReadToEnd()) { return(result); } do { var blockContent = new DataBlock { ItemsCount = reader.ReadLong(), Data = reader.ReadDataBlock(header.SyncData, result.Codec) }; result.DataBlocks.Add(blockContent); } while (!reader.IsReadToEnd()); return(result); } } }
public void CreateCodecFromString_NonExistingString_DefaultCodecIsReturned() { //Arrange //Act var result = AbstractCodec.CreateCodecFromString("NonExistingCodec"); //Assert Assert.IsType <NullCodec>(result); }
internal T Decode <T>(Stream stream, Schema.Schema readSchema) { var reader = new Reader(stream); var header = new Header(); // validate header byte[] firstBytes = new byte[DataFileConstants.AvroHeader.Length]; try { reader.ReadFixed(firstBytes); } catch (EndOfStreamException) { //stream shorter than AvroHeader } //does not contain header if (!firstBytes.SequenceEqual(DataFileConstants.AvroHeader)) { throw new InvalidAvroObjectException("Object does not contain Avro Header"); } else { // read meta data long len = reader.ReadMapStart(); if (len > 0) { do { for (long i = 0; i < len; i++) { string key = reader.ReadString(); byte[] val = reader.ReadBytes(); header.AddMetadata(key, val); } } while ((len = reader.ReadMapNext()) != 0); } readSchema = readSchema ?? Schema.Schema.Parse(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); Schema.Schema writeSchema = Schema.Schema.Parse(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); var resolver = new Resolver(writeSchema, readSchema); // read in sync data reader.ReadFixed(header.SyncData); var codec = AbstractCodec.CreateCodecFromString(header.GetMetadata(DataFileConstants.CodecMetadataKey)); return(Read <T>(reader, header, codec, resolver)); } }
internal BaseLineReader(Reader reader, byte[] syncDate, AbstractCodec codec, TypeSchema writeSchema, TypeSchema readSchema) { _reader = reader; _syncDate = syncDate; _codec = codec; _writeSchema = writeSchema; _readSchema = readSchema; if (_reader.IsReadToEnd()) { return; } LoadNextDataBlock(); }
internal MergeEncoder(Stream outStream) { _codec = new NullCodec(); _stream = outStream; _syncInterval = DataFileConstants.DefaultSyncInterval; _blockCount = 0; _writer = new Writer(_stream); _tempBuffer = new MemoryStream(); _tempWriter = new Writer(_tempBuffer); _isOpen = true; _header = new Header(); }
private Decoder(Stream stream) { _stream = stream; _header = new Header(); _reader = new Reader(stream); _syncBuffer = new byte[DataFileConstants.SyncSize]; // validate header byte[] firstBytes = new byte[DataFileConstants.AvroHeader.Length]; try { _reader.ReadFixed(firstBytes); } catch (Exception) { throw new InvalidAvroObjectException("Cannot read length of Avro Header"); } if (!firstBytes.SequenceEqual(DataFileConstants.AvroHeader)) { throw new InvalidAvroObjectException("Cannot read Avro Header"); } // read meta data long len = _reader.ReadMapStart(); if (len > 0) { do { for (long i = 0; i < len; i++) { string key = _reader.ReadString(); byte[] val = _reader.ReadBytes(); _header.MetaData.Add(key, val); } } while ((len = _reader.ReadMapNext()) != 0); } // read in sync data _reader.ReadFixed(_header.SyncData); // parse schema and set codec _header.Schema = Schema.Schema.Parse(GetMetaString(DataFileConstants.SchemaMetadataKey)); _resolver = new Resolver(_header.Schema, _readerSchema ?? _header.Schema); _codec = AbstractCodec.CreateCodecFromString(GetMetaString(DataFileConstants.CodecMetadataKey)); }
internal Encoder(Schema.Schema schema, Stream outStream, CodecType codecType) { _codec = AbstractCodec.CreateCodec(codecType); _stream = outStream; _metadata = new Metadata(); _schema = schema; _syncInterval = DataFileConstants.DefaultSyncInterval; _blockCount = 0; _encoder = new Writer(_stream); _blockStream = new MemoryStream(); _blockEncoder = new Writer(_blockStream); _writer = Resolver.ResolveWriter(schema); _isOpen = true; }
internal static ILineReader <T> OpenReader <T>(Stream stream, TypeSchema readSchema) { var reader = new Reader(stream); // validate header byte[] firstBytes = new byte[DataFileConstants.AvroHeader.Length]; try { reader.ReadFixed(firstBytes); } catch (EndOfStreamException) { //stream shorter than AvroHeader } //headless if (!firstBytes.SequenceEqual(DataFileConstants.AvroHeader)) { if (readSchema == null) { throw new MissingSchemaException("Provide valid schema for the Avro data"); } var resolver = new Resolver(readSchema, readSchema); stream.Seek(0, SeekOrigin.Begin); return(new ListLineReader <T>(reader, resolver)); } else { var header = reader.ReadHeader(); readSchema = readSchema ?? Schema.Create(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); TypeSchema writeSchema = Schema.Create(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); // read in sync data reader.ReadFixed(header.SyncData); var codec = AbstractCodec.CreateCodecFromString(header.GetMetadata(DataFileConstants.CodecMetadataKey)); return(new BaseLineReader <T>(reader, header.SyncData, codec, writeSchema, readSchema)); } }
internal T Read <T>(IReader reader, Header header, AbstractCodec codec, Resolver resolver) { var remainingBlocks = reader.ReadLong(); var blockSize = reader.ReadLong(); var syncBuffer = new byte[DataFileConstants.SyncSize]; var dataBlock = new byte[blockSize]; reader.ReadFixed(dataBlock, 0, (int)blockSize); reader.ReadFixed(syncBuffer); if (!syncBuffer.SequenceEqual(header.SyncData)) { throw new AvroRuntimeException("Invalid sync!"); } dataBlock = codec.Decompress(dataBlock); reader = new Reader(new MemoryStream(dataBlock)); return(resolver.Resolve <T>(reader, remainingBlocks)); }
internal Encoder(TypeSchema schema, Stream outStream, CodecType codecType) { _codec = AbstractCodec.CreateCodec(codecType); _stream = outStream; _header = new Header(); _schema = schema; _syncInterval = DataFileConstants.DefaultSyncInterval; _blockCount = 0; _writer = new Writer(_stream); _tempBuffer = new MemoryStream(); _tempWriter = new Writer(_tempBuffer); GenerateSyncData(); _header.AddMetadata(DataFileConstants.CodecMetadataKey, _codec.Name); _header.AddMetadata(DataFileConstants.SchemaMetadataKey, _schema.ToString()); _writeItem = Resolver.ResolveWriter(schema); _isOpen = true; }
internal object Decode(Stream stream, TypeSchema schema) { var reader = new Reader(stream); // validate header byte[] firstBytes = new byte[DataFileConstants.AvroHeader.Length]; try { reader.ReadFixed(firstBytes); } catch (EndOfStreamException) { //stream shorter than AvroHeader } //does not contain header if (!firstBytes.SequenceEqual(DataFileConstants.AvroHeader)) { if (schema == null) { throw new MissingSchemaException("Provide valid schema for the Avro data"); } var resolver = new Resolver(schema); stream.Seek(0, SeekOrigin.Begin); return(resolver.Resolve(reader)); } else { var header = reader.ReadHeader(); schema = schema ?? Schema.Create(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); var resolver = new Resolver(schema); reader.ReadFixed(header.SyncData); var codec = AbstractCodec.CreateCodecFromString(header.GetMetadata(DataFileConstants.CodecMetadataKey)); return(Read(reader, header, codec, resolver)); } }
internal T Decode <T>(Stream stream, TypeSchema readSchema) { var reader = new Reader(stream); // validate header byte[] firstBytes = new byte[DataFileConstants.AvroHeader.Length]; try { reader.ReadFixed(firstBytes); } catch (EndOfStreamException) { //stream shorter than AvroHeader } //does not contain header if (!firstBytes.SequenceEqual(DataFileConstants.AvroHeader)) { throw new InvalidAvroObjectException("Object does not contain Avro Header"); } else { var header = reader.ReadHeader(); TypeSchema writeSchema = Schema.Create(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); readSchema = readSchema ?? writeSchema; var resolver = new Resolver(writeSchema, readSchema); // read in sync data reader.ReadFixed(header.SyncData); var codec = AbstractCodec.CreateCodecFromString(header.GetMetadata(DataFileConstants.CodecMetadataKey)); return(Read <T>(reader, header, codec, resolver)); } }
internal T Read <T>(Reader reader, Header header, AbstractCodec codec, Resolver resolver) { if (reader.IsReadToEnd()) { return(default);
internal static ILineReader <T> OpenReader <T>(Stream stream, Schema.Schema readSchema) { var reader = new Reader(stream); var header = new Header(); // validate header byte[] firstBytes = new byte[DataFileConstants.AvroHeader.Length]; try { reader.ReadFixed(firstBytes); } catch (EndOfStreamException) { //stream shorter than AvroHeader } //does not contain header if (!firstBytes.SequenceEqual(DataFileConstants.AvroHeader)) { if (readSchema == null) { throw new MissingSchemaException("Provide valid schema for the Avro data"); } var resolver = new Resolver(readSchema, readSchema); stream.Seek(0, SeekOrigin.Begin); return(new ListLineReader <T>(reader, resolver)); } else { // read meta data long len = reader.ReadMapStart(); if (len > 0) { do { for (long i = 0; i < len; i++) { string key = reader.ReadString(); byte[] val = reader.ReadBytes(); header.AddMetadata(key, val); } } while ((len = reader.ReadMapNext()) != 0); } readSchema = readSchema ?? Schema.Schema.Parse(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); Schema.Schema writeSchema = Schema.Schema.Parse(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); var resolver = new Resolver(writeSchema, readSchema); // read in sync data reader.ReadFixed(header.SyncData); var codec = AbstractCodec.CreateCodecFromString(header.GetMetadata(DataFileConstants.CodecMetadataKey)); var remainingBlocks = reader.ReadLong(); var blockSize = reader.ReadLong(); var syncBuffer = new byte[DataFileConstants.SyncSize]; var dataBlock = new byte[blockSize]; reader.ReadFixed(dataBlock, 0, (int)blockSize); reader.ReadFixed(syncBuffer); if (!syncBuffer.SequenceEqual(header.SyncData)) { throw new AvroRuntimeException("Invalid sync!"); } dataBlock = codec.Decompress(dataBlock); reader = new Reader(new MemoryStream(dataBlock)); if (remainingBlocks > 1) { return(new BlockLineReader <T>(reader, resolver, remainingBlocks)); } if (writeSchema.Tag == Schema.Schema.Type.Array) { return(new ListLineReader <T>(reader, new Resolver(((ArraySchema)writeSchema).ItemSchema, readSchema))); } return(new ListLineReader <T>(reader, new Resolver(writeSchema, readSchema))); } }