public async Task Extract_IntegrationTest() { // Arrange var config = Configuration.Default.WithDefaultLoader(); var address = "https://lenta.ru/articles/2020/05/13/coronausa/"; var context = BrowsingContext.New(config); var document = (IHtmlDocument)await context.OpenAsync(address); var extractor = new ContentExtractor(); // Act extractor.Extract(document); // Assert Assert.Pass(); }
public static void Main(string[] args) { var inputPath = ArgsUtil.GetArgument(args, 0, DefaultFileInput); var firstOutputPath = ArgsUtil.GetArgument(args, 1, DefaultFileOutput1); var secondOutputPath = ArgsUtil.GetArgument(args, 2, DefaultFileOutput2); var allLines = File.ReadAllLines(inputPath); var contentExtractor = new ContentExtractor(); var extracted = contentExtractor.Extract(allLines); var output = new OutputGenerator(extracted); File.WriteAllLines(firstOutputPath, output.GetFrequencyWithAlphabeticLines()); Console.WriteLine("Created: " + firstOutputPath); File.WriteAllLines(secondOutputPath, output.GetByStreetNameLines()); Console.WriteLine("Created: " + secondOutputPath); }
public void ExtractPopulatesCorrectly() { var allLines = new[] { "FirstName,LastName,Address,PhoneNumber", "TestName1,LastName1,1 Address,11111111", "TestName2,LastName2,2 Address,22222222", "TestName3,LastName3,3 Address,33333333", "TestName4,LastName4,4 Address,4444444" }; var contentExtractor = new ContentExtractor(); var extracted = contentExtractor.Extract(allLines); var allContentItems = extracted.AllContentItems; Assert.That(allContentItems.Count, Is.EqualTo(4)); Assert.That(allContentItems.Count(item => item.FirstName.Equals("TestName1")), Is.EqualTo(1)); Assert.That(allContentItems.Count(item => item.LastName.Equals("LastName2")), Is.EqualTo(1)); Assert.That(allContentItems.Count(item => item.Address.Equals("3 Address")), Is.EqualTo(1)); Assert.That(allContentItems.Count(item => item.PhoneNumber.Equals("4444444")), Is.EqualTo(1)); }