/// <summary> /// Parses ALL data about the app on the App's page /// </summary> /// <param name="response">HTML Response of the App's landing page</param> /// <param name="pageUrl">URL of the App's landing page</param> /// <returns>Parsed app data structure</returns> public AppModel ParseAppPage(string response, string pageUrl) { AppModel parsedApp = new AppModel(); // Updating App Url parsedApp.Url = pageUrl; // Updating Reference Date parsedApp.ReferenceDate = DateTime.Now; // Loading HTML Document HtmlDocument map = new HtmlDocument(); map.LoadHtml(response); // Parsing App Name HtmlNode currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_NAME); parsedApp.Name = currentNode == null ? String.Empty : HttpUtility.HtmlDecode(currentNode.InnerText.Trim()); // Parsing Cover Img Url currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_COVER_IMG); parsedApp.CoverImgUrl = currentNode == null ? String.Empty : currentNode.Attributes["src"].Value; // Parsing App Screenshots HtmlNodeCollection nodesCollection = map.DocumentNode.SelectNodes(Consts.APP_SCREENSHOTS); // Sanity Check if (nodesCollection != null) { // Dumping "Src" attribute of each node to an array parsedApp.Screenshots = nodesCollection.Select(t => t.Attributes["src"].Value).Distinct().ToArray(); } // Parsing App Category currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_CATEGORY); if (currentNode != null) { string catLink = currentNode.Attributes["href"].Value; if (catLink.IndexOf('/') >= 0) { string[] catLinkSplit = catLink.Split('/'); parsedApp.Category = catLinkSplit.Last(); } else { parsedApp.Category = currentNode.Attributes["href"].Value; } } else { parsedApp.Category = "NO_CATEGORY_FOUND"; } // Parsing App Developer/Author currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_DEV); parsedApp.Developer = currentNode == null ? String.Empty : currentNode.InnerText.Trim(); // Parsing If the Developer is a Top Developer currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_TOP_DEV); parsedApp.IsTopDeveloper = currentNode == null ? false : true; // Parsing App Developer Url currentNode = map.DocumentNode.SelectSingleNode(Consts.DEV_URL); if (currentNode != null && currentNode.Attributes["content"] != null) { parsedApp.DeveloperURL = currentNode.Attributes["content"].Value; } else { parsedApp.DeveloperURL = String.Empty; } // Parsing Publishing Date currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_PUBLISH_DATE); if (currentNode != null) { parsedApp.PublicationDate = ParseDate(currentNode.InnerText.Replace("-", String.Empty).Trim()); } // Parsing Free x Paid App currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_FREE_PAID); if (currentNode.Attributes["content"] != null) { string contentValue = currentNode.Attributes["content"].Value; parsedApp.IsFree = contentValue.Equals("0") ? true : false; } else { parsedApp.IsFree = true; } // Parsing App Price if (parsedApp.IsFree) { parsedApp.Price = 0.0; } else { double appPrice; string normalizedPrice = String.Join("", currentNode.Attributes["content"].Value.Where(t => Char.IsDigit(t))); if (Double.TryParse(normalizedPrice, out appPrice)) { parsedApp.Price = appPrice; // Parsing Currency parsedApp.Currency = String.Join("", currentNode.Attributes["content"].Value.Where(t => (!Char.IsDigit(t) && !(t.Equals('.') || t.Equals(','))))); if (parsedApp.Currency == "$") { parsedApp.Currency = "US$"; } } else { parsedApp.Price = 0.0; } } // Parsing number of app reviewers currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_REVIEWERS); string reviewers = currentNode == null ? String.Empty : currentNode.InnerText.Trim().Trim('(').Trim(')'); double parsedReviewers = 0; if (Double.TryParse(reviewers, out parsedReviewers)) { parsedApp.Reviewers = parsedReviewers; } else { parsedApp.Reviewers = -1; } // Parsing App Description currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_DESCRIPTION); parsedApp.Description = currentNode == null ? String.Empty : HttpUtility.HtmlDecode(currentNode.InnerText.Trim()); // Parsing App "What's new" section nodesCollection = map.DocumentNode.SelectNodes(Consts.WHATS_NEW); // Sanity Check if (nodesCollection != null) { parsedApp.WhatsNew = String.Join("\n", nodesCollection.Select(t => HttpUtility.HtmlDecode(t.InnerText)).ToArray()); } // Checking for In app Purchases if (map.DocumentNode.SelectSingleNode(Consts.IN_APP_PURCHASE) != null) { parsedApp.HaveInAppPurchases = true; } else { parsedApp.HaveInAppPurchases = false; } // Parsing App's Score Score score = new Score(); // Total Score currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_SCORE_VALUE); score.Total = ParseDouble(currentNode, "content"); // Rating Count currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_SCORE_COUNT); score.Count = ParseDouble(currentNode, "content"); // Parsing Five Stars Count currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_FIVE_STARS); score.FiveStars = ParseDouble(currentNode); // Parsing Four Stars Count currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_FOUR_STARS); score.FourStars = ParseDouble(currentNode); // Parsing Three Stars Count currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_THREE_STARS); score.ThreeStars = ParseDouble(currentNode); // Parsing Two Stars Count currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_TWO_STARS); score.TwoStars = ParseDouble(currentNode); // Parsing One Stars Count currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_ONE_STARS); score.OneStars = ParseDouble(currentNode); // Updating Parsed App's Score parsedApp.Score = score; // Parsing Last Update Date currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_UPDATE_DATE); if (currentNode != null) { parsedApp.LastUpdateDate = ParseDate(currentNode.InnerText.Replace("-", String.Empty).Trim()); } // Parsing App Size currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_SIZE); if (currentNode != null) { string stringSize = currentNode.InnerText.Trim(); Double appSize; // Checking if the app size is measured in MBs, Gbs or Kbs if (stringSize.EndsWith("M", StringComparison.InvariantCultureIgnoreCase)) // MegaBytes { // TryParse raises no exception. Its safer if (Double.TryParse(stringSize.Replace("M", "").Replace("m", ""), out appSize)) { parsedApp.AppSize = appSize; } } else if (stringSize.EndsWith("G", StringComparison.InvariantCultureIgnoreCase)) // Gigabytes { // TryParse raises no exception. Its safer if (Double.TryParse(stringSize.Replace("G", "").Replace("g", ""), out appSize)) { parsedApp.AppSize = appSize * 1024; // Normalizing Gygabites to Megabytes } } else if (stringSize.EndsWith("K", StringComparison.InvariantCultureIgnoreCase)) // Kbs { // TryParse raises no exception. Its safer if (Double.TryParse(stringSize.Replace("K", "").Replace("k", ""), out appSize)) { parsedApp.AppSize = appSize / 1024; // Normalizing Kbs to Megabytes } } else { parsedApp.AppSize = -1; // Meaning that "App Size Varies Per App" } } // Parsing App's Current Version currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_VERSION); parsedApp.CurrentVersion = currentNode == null ? String.Empty : currentNode.InnerText.Trim(); // Parsing App's Instalation Count currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_INSTALLS); parsedApp.Instalations = currentNode == null ? String.Empty : currentNode.InnerText.Trim(); // Parsing App's Content Rating currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_CONTENT_RATING); parsedApp.ContentRating = currentNode == null ? String.Empty : currentNode.InnerText.Trim(); // Parsing App's OS Version Required currentNode = map.DocumentNode.SelectSingleNode(Consts.APP_OS_REQUIRED); parsedApp.MinimumOSVersion = currentNode == null ? String.Empty : currentNode.InnerText.Trim(); // Parsing Developer Links (e-mail / website) foreach (var devLink in map.DocumentNode.SelectNodes(Consts.DEVELOPER_URLS)) { // Parsing Inner Text string tagText = devLink.InnerText.ToUpper().Trim(); // Checking for Email if (tagText.IndexOf("EMAIL", StringComparison.InvariantCultureIgnoreCase) >= 0) { parsedApp.DeveloperEmail = devLink.Attributes["href"].Value.Replace("mailto:", String.Empty).Trim(); } else if (tagText.IndexOf("WEBSITE", StringComparison.InvariantCultureIgnoreCase) >= 0) // Developer Website { parsedApp.DeveloperWebsite = HttpUtility.HtmlDecode(devLink.Attributes["href"].Value.Trim()); } else // Privacy Policy { parsedApp.DeveloperPrivacyPolicy = HttpUtility.HtmlDecode(devLink.Attributes["href"].Value.Trim()); } } // Parsing Physical Address (if available) currentNode = map.DocumentNode.SelectSingleNode(Consts.PHYSICAL_ADDRESS); parsedApp.PhysicalAddress = currentNode == null ? String.Empty : currentNode.InnerText.Replace("\n", " ").Trim(); parsedApp.FillMinAndMaxInstalls(); return(parsedApp); }