示例#1
0
        public override async Task <List <XRay.Term> > GetTerms(string dataUrl, IProgressBar progress, CancellationToken token)
        {
            if (sourceHtmlDoc == null)
            {
                Logger.Log("Downloading Goodreads page...");
                sourceHtmlDoc = new HtmlDocument();
                sourceHtmlDoc.LoadHtml(await HttpDownloader.GetPageHtmlAsync(dataUrl));
            }

            var charNodes = sourceHtmlDoc.DocumentNode.SelectNodes("//div[@class='infoBoxRowTitle' and text()='Characters']/../div[@class='infoBoxRowItem']/a");

            if (charNodes == null)
            {
                return(new List <XRay.Term>());
            }
            // Check if ...more link exists on Goodreads page
            var moreCharNodes = sourceHtmlDoc.DocumentNode.SelectNodes("//div[@class='infoBoxRowTitle' and text()='Characters']/../div[@class='infoBoxRowItem']/span[@class='toggleContent']/a");
            var allChars      = moreCharNodes == null ? charNodes : charNodes.Concat(moreCharNodes);
            var termCount     = moreCharNodes == null ? charNodes.Count : charNodes.Count + moreCharNodes.Count;

            Logger.Log($"Gathering term information from Goodreads... ({termCount})");
            progress?.Set(0, termCount);
            if (termCount > 20)
            {
                Logger.Log("More than 20 characters found. Consider using the 'download to XML' option if you need to build repeatedly.");
            }
            var terms = new ConcurrentBag <XRay.Term>();
            await allChars.ParallelForEachAsync(async charNode =>
            {
                try
                {
                    terms.AddNotNull(await GetTerm(dataUrl, charNode.GetAttributeValue("href", "")).ConfigureAwait(false));
                    progress?.Add(1);
                }
                catch (Exception ex)
                {
                    if (ex.Message.Contains("(404)"))
                    {
                        Logger.Log("Error getting page for character. URL: " + "https://www.goodreads.com" + charNode.GetAttributeValue("href", "")
                                   + "\r\nMessage: " + ex.Message + "\r\n" + ex.StackTrace);
                    }
                }
            }, MaxConcurrentRequests, token);

            return(terms.ToList());
        }