Ejemplo n.º 1
0
        public async Task <ICollection <T> > Scrape <T>(HtmlScrapeInstruction <T> instruction) where T : IScrapeResult, new()
        {
            var resultsByKey = new Dictionary <string, T>();
            var session      = instruction.StartScrapingSession(_configuration.HtmlFetcher);

            do
            {
                var html = await session.GetNextContent();

                if (html == null)
                {
                    break;
                }

                var doc = new HtmlDocument();
                doc.LoadHtml(html);

                var entryNodes = GetEntryNodes(instruction, doc).ToList();
                if (!entryNodes.Any())
                {
                    break;
                }

                bool foundNewResults = false;
                foreach (var entryNode in entryNodes)
                {
                    if (entryNode == null)
                    {
                        // TODO: Log
                        continue;
                    }

                    if (TryParseNode(entryNode, instruction, out var result) && !resultsByKey.ContainsKey(result.Key))
                    {
                        resultsByKey[result.Key] = result;
                        foundNewResults          = true;
                    }
                }

                if (!foundNewResults)
                {
                    break;
                }
            } while (true);

            return(resultsByKey.Values);
        }