private async Task ReadInputs(IEnumerable <string> inputs, WarcReader reader) { foreach (var input in inputs) { await _writer.Info($"Considering input file '{input}'."); if (!File.Exists(input)) { await _writer.Warn($"Input file '{input}' does not exist and will be ignored."); } else { await using var fileStream = File.OpenRead(input); if (input.EndsWith(".gz")) { await using var compressionStream = new GZipStream(fileStream, CompressionMode.Decompress); await reader.WriteAsync(compressionStream); } else { await reader.WriteAsync(fileStream); } await _writer.Info($"Input file '{input}' has been completely buffered."); } } await reader.CompleteWriting(); }
public async Task Can_read_mixed_records() { var inputStr = Example.Get("wikipedia-1-0.warc"); var reader = new WarcReader(); var writingTask = reader.WriteAsync(inputStr).ContinueWith(_ => reader.CompleteWriting()); // Act var result = await ToList(reader.ReadAllAsync()); // Assert result.Should().HaveCount(24); await writingTask; }
public async Task Can_read_single_record() { var inputStr = Example.Get("warcinfo.warc"); var reader = new WarcReader(); await reader.WriteAsync(inputStr); await reader.CompleteWriting(); // Act var result = await ToList(reader.ReadAllAsync()); // Assert result.Should().HaveCount(1); }