public void TestItemsCount() { ParseTTDb parse = new ParseTTDb(); HtmlNode root = PageData.GetPageRootNode(url); // tables with items urls var itemsTables = root.Descendants().Where(x => x.GetAttributeValue("class", "").Equals("specscompare no-side-border-xxs")).ToList(); // items count int itemsCount = parse.GetItemsCount(itemsTables); Assert.AreNotEqual(itemsCount, 0); }
public void TestInitializeProducers() { ParseTTDb parse = new ParseTTDb(); var root = PageData.GetPageRootNode(url); // divs with the names of producers var producersDivs = root.Descendants().Where(x => x.GetAttributeValue("class", "").Equals("dbcat max-xxs no-radius-xxs no-side-border-xxs")).ToList(); foreach (var producersDiv in producersDivs) { parse.InitializeProducer(producersDiv); } Assert.AreNotEqual(parse.ProducerList.Count, 0); }
// return Name, ratings and ratings' count of item private Tuple <string, double[], int, HtmlNode> ParseItemMainData(string url) { // get html document of current items page var root = PageData.GetPageRootNode(url); // get name and ratings count of item string name = root.Descendants("h1").First().InnerText; int ratingsCount = Convert.ToInt32(root.SelectSingleNode(".//span[@itemprop='reviewCount']").InnerText); // get first table var ratingsTable = root.Descendants("table").FirstOrDefault(); // get all ratings including overall(average) rating var ratingsList = ratingsTable.Descendants().Where(x => x.GetAttributeValue("class", "").Equals("cell_rating") || x.GetAttributeValue("class", "").Equals("average")).ToList(); // if pips is short, add reversal parametr equals 0,0 if (pipsFlag && ratingsList.Count != 10) { ratingsList.Insert(4, null); } // get item's ratings double[] ratings = new double[ratingsList.Count]; for (int i = 0; i < ratingsList.Count; i++) { try { ratings[i] = GetRating(ratingsList[i].InnerText); } catch (NullReferenceException) { ratings[i] = 0; } } return(new Tuple <string, double[], int, HtmlNode>(name, ratings, ratingsCount, root)); }
// parsing items page public DataToSave ParseItems(string page, Type itemType, Dictionary <string, dynamic> itemList, BackgroundWorker bw, out int itemCount) { string url = site + page; var root = PageData.GetPageRootNode(url); // divs with the names of producers var producersDivs = root.Descendants().Where(x => x.GetAttributeValue("class", "").Equals("dbcat max-xxs no-radius-xxs no-side-border-xxs")).ToList(); // tables with items urls var itemsTables = root.Descendants().Where(x => x.GetAttributeValue("class", "").Equals("specscompare no-side-border-xxs")).ToList(); // items count itemCount = GetItemsCount(itemsTables); // initialize constructor, type and properties of currect items constructor = itemType.GetConstructor(new Type[0]); currentItemType = itemType; itemRatingsProperties.Clear(); foreach (PropertyInfo property in itemType.GetProperties()) { if (property.PropertyType == typeof(double)) { itemRatingsProperties.Add(property); } } // LINQ not working //itemRatingsProperties = propertyInfos.Where(x => x.PropertyType == typeof(double)).ToArray(); if (currentItemType.Name == "Pips") { pipsFlag = true; } int counter = 0; currentItemId = itemList.Count; DataToSave dataToSave = new DataToSave(); for (int i = 0; i < producersDivs.Count; i++) { var producerId = InitializeProducer(producersDivs[i]); // get items "a" tags of the producer var itemsUrls = itemsTables[i].Descendants("a").ToList(); Parallel.ForEach(itemsUrls, (itemUrl) => //foreach (HtmlNode itemUrl in itemsUrls) { // get full item's url url = site + itemUrl.Attributes["href"].Value; dynamic item = ParseItem(url, producerId); lock (locker) { try { if (!itemList.ContainsKey(item.Name)) { item.Id = ++currentItemId; item.ItemId = ++allItemId; dataToSave.ItemsToInsert.Add(item); } else if (itemList[item.Name].Ratings != item.Ratings) { item.Id = itemList[item.Name].Id; item.ItemId = itemList[item.Name].ItemId; dataToSave.ItemsToUpdate.Add(item); } bw.ReportProgress(++counter, url); } catch (Exception e) { Console.WriteLine(e); throw; } } }); } return(dataToSave); }