internal object Decode(Stream stream, Schema.Schema schema) { var reader = new Reader(stream); var header = new Header(); // validate header byte[] firstBytes = new byte[DataFileConstants.AvroHeader.Length]; try { reader.ReadFixed(firstBytes); } catch (EndOfStreamException) { //stream shorter than AvroHeader } //does not contain header if (!firstBytes.SequenceEqual(DataFileConstants.AvroHeader)) { if (schema == null) { throw new MissingSchemaException("Provide valid schema for the Avro data"); } var resolver = new Resolver(schema); stream.Seek(0, SeekOrigin.Begin); return(resolver.Resolve(reader)); } else { // read meta data long len = reader.ReadMapStart(); if (len > 0) { do { for (long i = 0; i < len; i++) { string key = reader.ReadString(); byte[] val = reader.ReadBytes(); header.AddMetadata(key, val); } } while ((len = reader.ReadMapNext()) != 0); } schema = schema ?? Schema.Schema.Parse(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); var resolver = new Resolver(schema); // read in sync data reader.ReadFixed(header.SyncData); var codec = AbstractCodec.CreateCodecFromString(header.GetMetadata(DataFileConstants.CodecMetadataKey)); return(Read(reader, header, codec, resolver)); } }
internal AvroObjectContent ExtractAvroObjectContent(byte[] avroObject) { using (var stream = new MemoryStream(avroObject)) { var reader = new Reader(stream); // validate header byte[] firstBytes = new byte[DataFileConstants.AvroHeader.Length]; try { reader.ReadFixed(firstBytes); } catch (EndOfStreamException) { //stream shorter than AvroHeader } //does not contain header if (!firstBytes.SequenceEqual(DataFileConstants.AvroHeader)) { throw new InvalidAvroObjectException("Object does not contain Avro Header"); } else { AvroObjectContent result = new AvroObjectContent(); var header = reader.ReadHeader(); result.Codec = AbstractCodec.CreateCodecFromString(header.GetMetadata(DataFileConstants.CodecMetadataKey)); reader.ReadFixed(header.SyncData); result.Header = header; result.Header.Schema = Schema.Create(result.Header.GetMetadata(DataFileConstants.SchemaMetadataKey)); if (reader.IsReadToEnd()) { return(result); } do { var blockContent = new DataBlock { ItemsCount = reader.ReadLong(), Data = reader.ReadDataBlock(header.SyncData, result.Codec) }; result.DataBlocks.Add(blockContent); } while (!reader.IsReadToEnd()); return(result); } } }
public void CreateCodecFromString_NonExistingString_DefaultCodecIsReturned() { //Arrange //Act var result = AbstractCodec.CreateCodecFromString("NonExistingCodec"); //Assert Assert.IsType <NullCodec>(result); }
internal T Decode <T>(Stream stream, Schema.Schema readSchema) { var reader = new Reader(stream); var header = new Header(); // validate header byte[] firstBytes = new byte[DataFileConstants.AvroHeader.Length]; try { reader.ReadFixed(firstBytes); } catch (EndOfStreamException) { //stream shorter than AvroHeader } //does not contain header if (!firstBytes.SequenceEqual(DataFileConstants.AvroHeader)) { throw new InvalidAvroObjectException("Object does not contain Avro Header"); } else { // read meta data long len = reader.ReadMapStart(); if (len > 0) { do { for (long i = 0; i < len; i++) { string key = reader.ReadString(); byte[] val = reader.ReadBytes(); header.AddMetadata(key, val); } } while ((len = reader.ReadMapNext()) != 0); } readSchema = readSchema ?? Schema.Schema.Parse(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); Schema.Schema writeSchema = Schema.Schema.Parse(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); var resolver = new Resolver(writeSchema, readSchema); // read in sync data reader.ReadFixed(header.SyncData); var codec = AbstractCodec.CreateCodecFromString(header.GetMetadata(DataFileConstants.CodecMetadataKey)); return(Read <T>(reader, header, codec, resolver)); } }
private Decoder(Stream stream) { _stream = stream; _header = new Header(); _reader = new Reader(stream); _syncBuffer = new byte[DataFileConstants.SyncSize]; // validate header byte[] firstBytes = new byte[DataFileConstants.AvroHeader.Length]; try { _reader.ReadFixed(firstBytes); } catch (Exception) { throw new InvalidAvroObjectException("Cannot read length of Avro Header"); } if (!firstBytes.SequenceEqual(DataFileConstants.AvroHeader)) { throw new InvalidAvroObjectException("Cannot read Avro Header"); } // read meta data long len = _reader.ReadMapStart(); if (len > 0) { do { for (long i = 0; i < len; i++) { string key = _reader.ReadString(); byte[] val = _reader.ReadBytes(); _header.MetaData.Add(key, val); } } while ((len = _reader.ReadMapNext()) != 0); } // read in sync data _reader.ReadFixed(_header.SyncData); // parse schema and set codec _header.Schema = Schema.Schema.Parse(GetMetaString(DataFileConstants.SchemaMetadataKey)); _resolver = new Resolver(_header.Schema, _readerSchema ?? _header.Schema); _codec = AbstractCodec.CreateCodecFromString(GetMetaString(DataFileConstants.CodecMetadataKey)); }
internal static ILineReader <T> OpenReader <T>(Stream stream, TypeSchema readSchema) { var reader = new Reader(stream); // validate header byte[] firstBytes = new byte[DataFileConstants.AvroHeader.Length]; try { reader.ReadFixed(firstBytes); } catch (EndOfStreamException) { //stream shorter than AvroHeader } //headless if (!firstBytes.SequenceEqual(DataFileConstants.AvroHeader)) { if (readSchema == null) { throw new MissingSchemaException("Provide valid schema for the Avro data"); } var resolver = new Resolver(readSchema, readSchema); stream.Seek(0, SeekOrigin.Begin); return(new ListLineReader <T>(reader, resolver)); } else { var header = reader.ReadHeader(); readSchema = readSchema ?? Schema.Create(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); TypeSchema writeSchema = Schema.Create(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); // read in sync data reader.ReadFixed(header.SyncData); var codec = AbstractCodec.CreateCodecFromString(header.GetMetadata(DataFileConstants.CodecMetadataKey)); return(new BaseLineReader <T>(reader, header.SyncData, codec, writeSchema, readSchema)); } }
internal object Decode(Stream stream, TypeSchema schema) { var reader = new Reader(stream); // validate header byte[] firstBytes = new byte[DataFileConstants.AvroHeader.Length]; try { reader.ReadFixed(firstBytes); } catch (EndOfStreamException) { //stream shorter than AvroHeader } //does not contain header if (!firstBytes.SequenceEqual(DataFileConstants.AvroHeader)) { if (schema == null) { throw new MissingSchemaException("Provide valid schema for the Avro data"); } var resolver = new Resolver(schema); stream.Seek(0, SeekOrigin.Begin); return(resolver.Resolve(reader)); } else { var header = reader.ReadHeader(); schema = schema ?? Schema.Create(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); var resolver = new Resolver(schema); reader.ReadFixed(header.SyncData); var codec = AbstractCodec.CreateCodecFromString(header.GetMetadata(DataFileConstants.CodecMetadataKey)); return(Read(reader, header, codec, resolver)); } }
internal T Decode <T>(Stream stream, TypeSchema readSchema) { var reader = new Reader(stream); // validate header byte[] firstBytes = new byte[DataFileConstants.AvroHeader.Length]; try { reader.ReadFixed(firstBytes); } catch (EndOfStreamException) { //stream shorter than AvroHeader } //does not contain header if (!firstBytes.SequenceEqual(DataFileConstants.AvroHeader)) { throw new InvalidAvroObjectException("Object does not contain Avro Header"); } else { var header = reader.ReadHeader(); TypeSchema writeSchema = Schema.Create(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); readSchema = readSchema ?? writeSchema; var resolver = new Resolver(writeSchema, readSchema); // read in sync data reader.ReadFixed(header.SyncData); var codec = AbstractCodec.CreateCodecFromString(header.GetMetadata(DataFileConstants.CodecMetadataKey)); return(Read <T>(reader, header, codec, resolver)); } }
internal static ILineReader <T> OpenReader <T>(Stream stream, Schema.Schema readSchema) { var reader = new Reader(stream); var header = new Header(); // validate header byte[] firstBytes = new byte[DataFileConstants.AvroHeader.Length]; try { reader.ReadFixed(firstBytes); } catch (EndOfStreamException) { //stream shorter than AvroHeader } //does not contain header if (!firstBytes.SequenceEqual(DataFileConstants.AvroHeader)) { if (readSchema == null) { throw new MissingSchemaException("Provide valid schema for the Avro data"); } var resolver = new Resolver(readSchema, readSchema); stream.Seek(0, SeekOrigin.Begin); return(new ListLineReader <T>(reader, resolver)); } else { // read meta data long len = reader.ReadMapStart(); if (len > 0) { do { for (long i = 0; i < len; i++) { string key = reader.ReadString(); byte[] val = reader.ReadBytes(); header.AddMetadata(key, val); } } while ((len = reader.ReadMapNext()) != 0); } readSchema = readSchema ?? Schema.Schema.Parse(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); Schema.Schema writeSchema = Schema.Schema.Parse(header.GetMetadata(DataFileConstants.SchemaMetadataKey)); var resolver = new Resolver(writeSchema, readSchema); // read in sync data reader.ReadFixed(header.SyncData); var codec = AbstractCodec.CreateCodecFromString(header.GetMetadata(DataFileConstants.CodecMetadataKey)); var remainingBlocks = reader.ReadLong(); var blockSize = reader.ReadLong(); var syncBuffer = new byte[DataFileConstants.SyncSize]; var dataBlock = new byte[blockSize]; reader.ReadFixed(dataBlock, 0, (int)blockSize); reader.ReadFixed(syncBuffer); if (!syncBuffer.SequenceEqual(header.SyncData)) { throw new AvroRuntimeException("Invalid sync!"); } dataBlock = codec.Decompress(dataBlock); reader = new Reader(new MemoryStream(dataBlock)); if (remainingBlocks > 1) { return(new BlockLineReader <T>(reader, resolver, remainingBlocks)); } if (writeSchema.Tag == Schema.Schema.Type.Array) { return(new ListLineReader <T>(reader, new Resolver(((ArraySchema)writeSchema).ItemSchema, readSchema))); } return(new ListLineReader <T>(reader, new Resolver(writeSchema, readSchema))); } }