/// <summary> /// Parse application page /// </summary> /// <param name="entry"></param> static GooglePlayEntry ParseGooglePlayApplicationPage(GooglePlayEntry entry) { using (var wc = new WebClient()) { WebHeaderCollection headers = new WebHeaderCollection { [HttpRequestHeader.UserAgent] = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/534.24 (KHTML, like Gecko) Ubuntu/10.04 Chromium/11.0.696.0 Chrome/11.0.696.0 Safari/534.24" }; wc.Encoding = Encoding.UTF8; wc.Headers = headers; var response = wc.DownloadString($"https://play.google.com/{entry.AppId}"); Console.WriteLine($"Parsing - {entry.AppId}"); var doc = new HtmlDocument(); doc.LoadHtml(response); var descrHtmlNode = doc.DocumentNode.SelectSingleNode(@"//div[@class='show-more-content text-body']"); if (descrHtmlNode == null) { return(entry); } var descriptionFull = descrHtmlNode.InnerText; var ratingHtmlNode = doc.DocumentNode.SelectSingleNode(@"//div[@class='rating-box']/div/meta"); if (ratingHtmlNode == null) { return(entry); } var rating = ratingHtmlNode.GetAttributeValue("content", "0.0"); foreach (var itemprop in new string[] { "datePublished", "numDownloads", "softwareVersion" }) { var node = doc.DocumentNode.SelectSingleNode($"//div[@itemprop='{itemprop}']"); if (node == null) { continue; } switch (itemprop) { case "datePublished": entry.Updated = node.InnerText; break; case "numDownloads": entry.Installations = node.InnerText; break; case "softwareVersion": entry.CurrentVersion = node.InnerText; break; } } } return(entry); }
static IEnumerable <GooglePlayEntry> ParseGooglePlaySearchResults(string response, string q) { var doc = new HtmlDocument(); doc.LoadHtml(response); //var nodes = doc.DocumentNode.SelectNodes(@"//div[@class='card no-rationale square-cover apps small']"); var nodes = doc.DocumentNode.SelectNodes(@"//div[contains(@class, 'card no-rationale square-cover apps')]"); var entries = new List <GooglePlayEntry>(); if (nodes != null) { foreach (HtmlNode node in nodes) { try { var entry = new GooglePlayEntry(); entry.SearchQuery = q; // get application var detailsNode = node.SelectSingleNode(@"div/div[@class='details']"); var hrefNode = detailsNode.SelectSingleNode("a[@class='title']"); entry.AppId = hrefNode.GetAttributeValue("href", string.Empty); entry.AppName = hrefNode.InnerText.Trim(); Console.WriteLine(entry.AppId); // get description from details var description = detailsNode.SelectSingleNode(@"div[@class='description']"); //Console.WriteLine(description.InnerText.Trim()); entry.Desc = description.InnerText.Trim(); // get developer from description var ahref = detailsNode.SelectSingleNode(@"div[@class='subtitle-container']/a"); var href = ahref.GetAttributeValue("href", string.Empty); entry.DevId = href; entries.Add(entry); //Console.WriteLine(href); } catch (Exception ex) { Console.WriteLine($"{ex.Message} while parsing {q}"); } } } return(entries); }