public void DoSearch(string query, LoadingContext<BookSummary> context) { context.Progress.Progress = null; Http.RequestHtml(GetSearchUrl(query), doc => { if (doc.DocumentNode.Descendants("title").First().InnerText.StartsWith("The Library Catalog : Item #")) { context.AddResult(new DetailsPageSummary(doc)); context.SetCompleted(); return; } var serverInput = doc.DocumentNode.Descendants("input").First(n => n.GetAttributeValue("name", "") == "server"); var countString = serverInput.PreviousSibling.InnerText; var count = int.Parse(countParser.Match(countString).Groups[1].Value); if (count == 0) context.SetCompleted(); else { context.Progress.Maximum = count; ParseResultList(doc, context); } }); }
static void ParseResultList(HtmlDocument doc, LoadingContext<BookSummary> context) { var mainTable = doc.DocumentNode.FirstChild.Element("body").Element("center").Element("table"); foreach (var elem in mainTable.Elements("tr")) { if (context.Progress.WasCanceled) { context.SetCompleted(); return; } if (elem.Elements("th").Count() != 2) continue; //These are <hr> rows; ignore them context.AddResult(new SearchResult(elem)); context.Progress.Progress++; } //Their next link is supposed to be in a <form> tag, //but the <form> tag is malformed. I do not want to //rely entirely on text matching for this, so I look //in the <form>'s parent. var nextLink = doc.DocumentNode.Descendants("form").SelectMany(f => f.ParentNode.Descendants("a")) .FirstOrDefault(a => a.InnerText.Contains("Next")); if (nextLink == null) { Debug.Assert(context.Progress.Progress == context.Progress.Maximum, "Book count mismatch"); context.SetCompleted(); } else { Http.RequestHtml(new Uri(baseUri, nextLink.GetAttributeValue("href", "")), newDoc => ParseResultList(newDoc, context) ); } }