public override async Task <List <XRay.Term> > GetTerms(string dataUrl, IProgressBar progress, CancellationToken token) { if (sourceHtmlDoc == null) { Logger.Log("Downloading Goodreads page..."); sourceHtmlDoc = new HtmlDocument(); sourceHtmlDoc.LoadHtml(await HttpDownloader.GetPageHtmlAsync(dataUrl)); } var charNodes = sourceHtmlDoc.DocumentNode.SelectNodes("//div[@class='infoBoxRowTitle' and text()='Characters']/../div[@class='infoBoxRowItem']/a"); if (charNodes == null) { return(new List <XRay.Term>()); } // Check if ...more link exists on Goodreads page var moreCharNodes = sourceHtmlDoc.DocumentNode.SelectNodes("//div[@class='infoBoxRowTitle' and text()='Characters']/../div[@class='infoBoxRowItem']/span[@class='toggleContent']/a"); var allChars = moreCharNodes == null ? charNodes : charNodes.Concat(moreCharNodes); var termCount = moreCharNodes == null ? charNodes.Count : charNodes.Count + moreCharNodes.Count; Logger.Log($"Gathering term information from Goodreads... ({termCount})"); progress?.Set(0, termCount); if (termCount > 20) { Logger.Log("More than 20 characters found. Consider using the 'download to XML' option if you need to build repeatedly."); } var terms = new ConcurrentBag <XRay.Term>(); await allChars.ParallelForEachAsync(async charNode => { try { terms.AddNotNull(await GetTerm(dataUrl, charNode.GetAttributeValue("href", "")).ConfigureAwait(false)); progress?.Add(1); } catch (Exception ex) { if (ex.Message.Contains("(404)")) { Logger.Log("Error getting page for character. URL: " + "https://www.goodreads.com" + charNode.GetAttributeValue("href", "") + "\r\nMessage: " + ex.Message + "\r\n" + ex.StackTrace); } } }, MaxConcurrentRequests, token); return(terms.ToList()); }