Exemple #1
0
        public async Task ScrapeRecipesAsync(IRecipeManager recipeManager)
        {
            var stopWatch = new Stopwatch();

            stopWatch.Start();

            var document = new HtmlDocument();

            var pages = Config.RecipeCount / Config.PageItemCount;

            Debug.WriteLine(pages);

            for (var page = 0; page < pages; page++)
            {
                var httpResponseTask = Config.NextPage(page, document, _client);
                var httpResponse     = await httpResponseTask;

                var html = await httpResponse.Content.ReadAsStringAsync();

                document.LoadHtml(html);

                var list = document.DocumentNode.SelectNodes(Config.RecipeItemXPath);

                if (list != null)
                {
                    var links = list.Select(x => x.Attributes["href"].Value).ToList();

                    foreach (var link in links)
                    {
                        Uri uri = null;

                        try
                        {
                            if (Uri.IsWellFormedUriString(link, UriKind.Absolute))
                            {
                                uri = new Uri(link);
                            }
                            else
                            {
                                uri = new Uri(Config.RootPage + link);
                            }

                            var recipe = await ScrapeRecipeAsync(uri.ToString());

                            await recipeManager.UpdateRecipeMetaAsync(recipe);
                        }
                        catch (Exception ex)
                        {
                            var trace       = ex.StackTrace != null ? ex.StackTrace : string.Empty;
                            var failingLink = uri != null?uri.ToString() : string.Empty;

                            await recipeManager.ReportFailedRecipeAsync(failingLink, trace);
                        }
                    }
                }
                else
                {
                }
            }

            stopWatch.Stop();
            Logger.Information("{0} took {1} min to rescrape.", Config.ProviderName, stopWatch.Elapsed.Minutes);
        }