static void Main(string[] args) { FluentMapper.Initialize(config => { config.AddMap(new ArticleMap()); }); OnvistaArticlesParsingParameters parserParameters = new OnvistaArticlesParsingParameters(args); ArticlesRepository repository = new ArticlesRepository(); ArticlesParser parser = new ArticlesParser(repository); var newsArticles = parser.ParseArticles(parserParameters); var pendingForSave = newsArticles.Where(x => x.ResultType == ParsingResultType.PendingForSave).ToList(); Console.WriteLine($"{newsArticles.Count} Articles were parsed.{Environment.NewLine}" + $"PendingForSave: {pendingForSave.Count} | " + $"Saved: {newsArticles.Count(x => x.ResultType == ParsingResultType.Saved)} | " + $"Already exists: {newsArticles.Count(x => x.ResultType == ParsingResultType.AlreadyExists)} "); if (pendingForSave.Count > 0) { Console.WriteLine("Saving parser results..."); parser.SaveParsingResults(pendingForSave); Console.WriteLine("Done"); } SaveParsedArticlesToCsv(newsArticles); }
public List <ParsingResult <Article> > ParseArticles(OnvistaArticlesParsingParameters parameters) { string url = parameters.NewsUrl; int? pagesToParse = parameters.PagesToParse; bool stopParsingOnExistingRecord = parameters.StopParsingOnExisting; int skipPages = parameters.SkipPages; int requestDelay = parameters.RequestDelayMs; List <ParsingResult <Article> > resultArticles = new List <ParsingResult <Article> >(); RefreshExistingArticles(); try { int pagesCount = -1; int pagesCountToParse = 1; for (int page = 1; page <= pagesCountToParse; page++) { if (pagesCount != -1 && skipPages > 0) { skipPages--; continue; } string pageUrl = GetPageUrl(url, page); var document = GetDocumentFromUrl(pageUrl, requestDelay); if (pagesCount == -1) { pagesCount = GetPagesCount(document); pagesCountToParse = pagesToParse.HasValue ? Math.Min(pagesCount, pagesToParse.Value) : pagesCount; } if (skipPages > 0) { skipPages--; continue; } _logger.LogInformation($"Parsing of page {page}/{pagesCountToParse} started."); var articles = GetArticlesFromDocument(document, requestDelay); resultArticles.AddRange(articles); _logger.LogInformation($"Page {page} was parsed. Articles count: {resultArticles.Count}"); var articlesToSave = articles.Where(x => x.ResultType == ParsingResultType.PendingForSave).ToList(); if (articlesToSave.Any() && parameters.SaveWithParsing) { _logger.LogInformation("Saving..."); SaveParsingResults(articlesToSave); } if (stopParsingOnExistingRecord && articles.Any(x => x.ResultType == ParsingResultType.AlreadyExists)) { _logger.LogInformation("Stopping parser: already existing records were parsed"); break; } } } catch (Exception ex) { _logger.LogError($"Unexpected error while parsing articles: {resultArticles.Count} articles were parsed", ex); } return(resultArticles); }