예제 #1
0
        private async Task Initalize(bool async, CancellationToken cancellationToken = default)
        {
            // Four bytes, ASCII 'O', 'b', 'j', followed by 1.
            byte[] header = await AvroParser.ReadFixedBytesAsync(_headerStream, AvroConstants.InitBytes.Length, async, cancellationToken).ConfigureAwait(false);

            if (!header.SequenceEqual(AvroConstants.InitBytes))
            {
                throw new ArgumentException("Stream is not an Avro file.");
            }

            // File metadata is written as if defined by the following map schema:
            // { "type": "map", "values": "bytes"}
            _metadata = await AvroParser.ReadMapAsync(_headerStream, AvroParser.ReadStringAsync, async, cancellationToken).ConfigureAwait(false);

            // Validate codec
            _metadata.TryGetValue(AvroConstants.CodecKey, out string codec);
            if (!(codec == null || codec == "null"))
            {
                throw new ArgumentException("Codecs are not supported");
            }

            // The 16-byte, randomly-generated sync marker for this file.
            _syncMarker = await AvroParser.ReadFixedBytesAsync(_headerStream, AvroConstants.SyncMarkerSize, async, cancellationToken).ConfigureAwait(false);

            // Parse the schema
            using JsonDocument schema = JsonDocument.Parse(_metadata[AvroConstants.SchemaKey]);
            _itemType = AvroType.FromSchema(schema.RootElement);

            if (BlockOffset == 0)
            {
                BlockOffset = _dataStream.Position;
            }

            // Populate _itemsRemainingInCurrentBlock
            _itemsRemainingInBlock = await AvroParser.ReadLongAsync(_dataStream, async, cancellationToken).ConfigureAwait(false);

            // skip block length
            await AvroParser.ReadLongAsync(_dataStream, async, cancellationToken).ConfigureAwait(false);

            _initalized = true;

            if (ObjectIndex > 0)
            {
                for (int i = 0; i < ObjectIndex; i++)
                {
                    await _itemType.ReadAsync(_dataStream, async, cancellationToken).ConfigureAwait(false);

                    _itemsRemainingInBlock--;
                }
            }
        }
예제 #2
0
        public virtual async Task <object> Next(bool async, CancellationToken cancellationToken = default)
        {
            // Initialize AvroReader, if necessary.
            if (!_initalized)
            {
                await Initalize(async, cancellationToken).ConfigureAwait(false);
            }

            if (!HasNext())
            {
                throw new ArgumentException("There are no more items in the stream");
            }


            object result = await _itemType.ReadAsync(_dataStream, async, cancellationToken).ConfigureAwait(false);

            _itemsRemainingInBlock--;
            ObjectIndex++;

            if (_itemsRemainingInBlock == 0)
            {
                byte[] marker = await AvroParser.ReadFixedBytesAsync(_dataStream, 16, async, cancellationToken).ConfigureAwait(false);

                BlockOffset = _dataStream.Position;
                ObjectIndex = 0;

                if (!_syncMarker.SequenceEqual(marker))
                {
                    throw new ArgumentException("Stream is not a valid Avro file.");
                }

                try
                {
                    _itemsRemainingInBlock = await AvroParser.ReadLongAsync(_dataStream, async, cancellationToken).ConfigureAwait(false);
                }
                catch (InvalidOperationException)
                {
                    // We hit the end of the stream.
                }

                if (_itemsRemainingInBlock > 0)
                {
                    // Ignore block size
                    await AvroParser.ReadLongAsync(_dataStream, async, cancellationToken).ConfigureAwait(false);
                }
            }

            return(result);
        }