private void ParseList(Document document) { string platformName = DetectPlatform(document); string regionName = DetectRegion(document); Element body = document.Body; foreach (Element table in body.Select(".sectiontable")) { foreach (Element tr in table.Select("tr")) { Elements tdElements = tr.Select("td"); if (tdElements.Count == 0) { continue; } if (sink.GameKnown(tdElements[1].Text)) { continue; } bool scrapable = tdElements[0].Children.Count > 0; Game child = new Game(); if (scrapable) { Element a = tdElements[0].Select("a")[0]; string link = a.Attributes["href"]; CdxEntry request = cdx.Entries.FirstOrDefault(x => x.Url.EndsWith(link)); CdxWebResponse resp = cdx.GetResponse(request); Document respDoc = Dcsoup.Parse(resp.GetResponseStream(), null, link); child = ScrapeGameData(respDoc); if (child == null) { child = new Game(); child.SKU = tdElements[1].Text; child.Title = tdElements[2].Text; child.Language = tdElements[3].Text; } if (child.DateReleased == DateTime.MaxValue) { continue; } } else { child.SKU = tdElements[1].Text; child.Title = tdElements[2].Text; child.Language = tdElements[3].Text; } child.Platform = platformName; child.Region = regionName; sink.HandleGame(child); Console.WriteLine(child.Title); } } }
private void Run() { foreach (string listUrl in LIST_URLS) { CdxWebResponse response = cdx.GetResponse(listUrl); Document document = Dcsoup.Parse(response.GetResponseStream(), null, listUrl); ParseList(document); } sink.Dispose(); }