Esempio n. 1
0
        private async Task ReadInputs(IEnumerable <string> inputs, WarcReader reader)
        {
            foreach (var input in inputs)
            {
                await _writer.Info($"Considering input file '{input}'.");

                if (!File.Exists(input))
                {
                    await _writer.Warn($"Input file '{input}' does not exist and will be ignored.");
                }
                else
                {
                    await using var fileStream = File.OpenRead(input);

                    if (input.EndsWith(".gz"))
                    {
                        await using var compressionStream = new GZipStream(fileStream, CompressionMode.Decompress);
                        await reader.WriteAsync(compressionStream);
                    }
                    else
                    {
                        await reader.WriteAsync(fileStream);
                    }

                    await _writer.Info($"Input file '{input}' has been completely buffered.");
                }
            }

            await reader.CompleteWriting();
        }
Esempio n. 2
0
        public async Task Can_read_mixed_records()
        {
            var inputStr = Example.Get("wikipedia-1-0.warc");

            var reader      = new WarcReader();
            var writingTask = reader.WriteAsync(inputStr).ContinueWith(_ => reader.CompleteWriting());

            // Act
            var result = await ToList(reader.ReadAllAsync());

            // Assert
            result.Should().HaveCount(24);

            await writingTask;
        }
Esempio n. 3
0
        public async Task Can_read_single_record()
        {
            var inputStr = Example.Get("warcinfo.warc");

            var reader = new WarcReader();
            await reader.WriteAsync(inputStr);

            await reader.CompleteWriting();

            // Act
            var result = await ToList(reader.ReadAllAsync());

            // Assert
            result.Should().HaveCount(1);
        }