public void Process(Website website) { Console.WriteLine($"{nameof(CityParser)} Start"); var nativeObj = SouqApi.GetCities(); HtmlDocument document = new HtmlDocument(); document.LoadHtml(nativeObj.sOptions); var options = document.DocumentNode.Descendants().FindByName("option").Skip(1); website.Cities = new List <City>(); foreach (var option in options) { var country = new City() { Id = Guid.NewGuid(), Name = option.InnerText, Code = option.Attributes["value"].Value }; website.Cities.Add(country); } }
public void Process(Website website) { Console.WriteLine($"{nameof(CategoryParser)} Start"); sw.Start(); var categoryGroups = new List <CategoryGroup>(); var categories = new List <Category>(); var document = SouqApi.GetCategories(); var divNodes = document.DocumentNode.Descendants().FindByNameNClass("div", "grouped-list"); foreach (var divNode in divNodes) { var categoryGroup = new CategoryGroup { Id = Guid.NewGuid(), Name = divNode.PreviousSibling.InnerText.Cleanify(), Parent = null, Categories = new List <Category>(), CategoryGroups = new List <CategoryGroup>() }; ParseRecursively(categories, categoryGroup, divNode); categoryGroups.Add(categoryGroup); } website.CategoryGroups = categoryGroups; website.Categories = categories; sw.Stop(); Console.WriteLine($"{nameof(CategoryParser)} ElapseTime: {sw.Elapsed.ToString()}"); }
public void Process(Website website) { Console.WriteLine($"{nameof(CategoryProductsParser)} Start"); Stopwatch sw = new Stopwatch(); sw.Start(); var categories = website.Categories.Shuffle().ToList(); //foreach(var category in categories) Parallel.ForEach(categories /*.Where(l => l.Url.Contains("/home-decor/"))*/ /*.Take(1)*/, new ParallelOptions() { MaxDegreeOfParallelism = 50 }, (category) => { try { Console.WriteLine(category.Name); category.Products = new List <Product>(); string categoryId = category.Url.ExtractCategoryId(); var gridNativeObjs = SouqApi.GetProductsFromCategory(categoryId); if (gridNativeObjs == null) { category.IsFaulty = true; //Debugger.Break(); //continue; return; } category.ExpectedProductCount = gridNativeObjs.Count() * SouqApiConstants.ProductSectionListLimit; foreach (var gridObj in gridNativeObjs) { var document = new HtmlDocument(); document.LoadHtml(gridObj.body); var productsNodes = document.DocumentNode.Descendants() .FindByNameNAttribute("div", "data-category-name"); foreach (var productNode in productsNodes) { var quickViewNode = productNode.Descendants() .SingleByNameNContainClass("a", new[] { "sPrimaryLink", "img-link" }); Product product = new Product() { Id = Guid.NewGuid(), Url = quickViewNode.Attributes["href"].Value }; if (category.Products.All(l => l.Url != product.Url)) { product.Category = category; category.Products.Add(product); } } } category.ActualProductCount = category.Products.Count; } catch (Exception e) { Console.WriteLine(category.Name); Console.WriteLine(e); Debugger.Break(); } }); website.Categories = categories.Where(l => !l.IsFaulty).ToList(); sw.Stop(); Console.WriteLine($"{nameof(CategoryProductsParser)} ElapseTime: {sw.Elapsed.ToString()}"); }
private void ProcessSeller(Seller seller) { var sellerNative = SouqApi.GetSellerProfile(seller.UserId); var sellerInfoAvailable = !sellerNative.DocumentNode.Descendants().Any(l => l.Name == "div" && l.InnerText.Contains("This seller has no ratings yet")); var sellerInfoError = !sellerNative.DocumentNode.Descendants() .AnyByNameNContainClass("div", new[] { "grouped-list", "rating-tabs" }); if (sellerInfoError) { seller.IsFaulty = true; seller.HasNoData = true; return; } if (sellerInfoAvailable) { var sellerDetailsNode = sellerNative.DocumentNode.Descendants() .SingleByNameNContainClass("div", new[] { "grouped-list", "rating-tabs" }); var sellerInfoNode = sellerDetailsNode.ChildNodes.FindByName("div").First(); var sellerStatsNode = sellerDetailsNode.ChildNodes.FindByName("div").Last(); var sellerNameUrlNode = sellerInfoNode.Descendants().SingleByName("h6").ChildNodes.SingleByName("a"); var sellerRateNode = sellerInfoNode.Descendants().SingleByNameNClass("i", "star-rating-svg") .ChildNodes.SingleByName("i"); var sellerDateNode = sellerNameUrlNode.ParentNode.ParentNode.ParentNode.ChildNodes.FindByName("div") .Last(); var sellerRatingTabs = sellerDetailsNode.Descendants().SingleByNameNClass("div", "tabs-content") .ChildNodes.FindByName("section"); var sellerRate = float.Parse(sellerRateNode.Attributes["style"].Value.Replace("width:", "").Replace("%", "")) / 100f * 5f; var sellerDate = DateTime.Parse(sellerDateNode.InnerText.TrimStart().TrimEnd().Replace("Member since: ", "")); SellerRate[] rates = new SellerRate[3]; int index = 0; foreach (var sellerRatingTab in sellerRatingTabs) { var positiveNode = sellerRatingTab.Descendants().FindByName("ul").FirstOrDefault(l => l.InnerText.Contains("Positive")); var negativeNode = sellerRatingTab.Descendants().FindByName("ul").FirstOrDefault(l => l.InnerText.Contains("Negative")); var totalRatingNodeNode = sellerRatingTab.ChildNodes.SingleOrDefaultByName("div"); SellerRate rate = null; if (totalRatingNodeNode != null) { rate = new SellerRate(); rate.TotalRating = int.Parse(totalRatingNodeNode.InnerText.TrimStart().TrimEnd() .Replace("Total Ratings: ", "")); } if (positiveNode != null) { var postivePercentageNode = positiveNode.ChildNodes.FindByName("li").Last(); rate.PositiveRatePercentage = int.Parse(postivePercentageNode.InnerText.TrimStart().TrimEnd().Replace("%", "")); } if (negativeNode != null) { var negativePercentageNode = negativeNode.ChildNodes.FindByName("li").Last(); rate.NegativeRatePercentage = int.Parse(negativePercentageNode.InnerText.TrimStart().TrimEnd().Replace("%", "")); } rates[index++] = rate; } seller.AverageRate = sellerRate; seller.JoinDate = sellerDate; seller.LastYearRate = rates[0]; seller.LastQuarterRate = rates[1]; seller.LastMonthRate = rates[2]; } else { seller.HasNoData = true; } }
public void Process(Website website) { //ThreadPool.SetMaxThreads(int.MaxValue, int.MaxValue); Console.WriteLine($"{nameof(CategoryProductsSearchParser)} Start"); sw.Start(); var categories = website.CategoryGroups.FlattenCategories(); Parallel.ForEach(categories /*, new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount }*/, (category) => { category.Products = new List <Product>(); Console.WriteLine(category.Name); string categoryId = category.Url.ExtractCategoryId(); var gridNativeObjs = SouqApi.SearchProducts(categoryId); category.HasRedirection = !string.IsNullOrEmpty(gridNativeObjs.First().redirect_url); category.Query = !category.HasRedirection ? gridNativeObjs.First().jsonData.meta_data.query : null; category.RedirectQuery = category.HasRedirection ? gridNativeObjs.First().jsonData.meta_data.query : null; category.RedirectUrl = category.HasRedirection ? gridNativeObjs.First().redirect_url : null; category.ExpectedProductCount = gridNativeObjs.Count() * SouqApiConstants.ProductSectionListLimit; foreach (var gridObj in gridNativeObjs) { var productsNative = gridObj.jsonData.units; foreach (var productNative in productsNative) { var discount = productNative.discount.ToString(); var price = productNative.price / 100; var market_price = Math.Abs(productNative.market_price) > 0f ? (double?)(productNative.market_price / 100) : null; Product product = new Product() { Category = category, Id = Guid.NewGuid(), UnitId = productNative.unit_id.ToString(), Title = productNative.title, ProductId = productNative.item_id.ToString(), Url = productNative.primary_link, ImageUrl = productNative.image_url, Manufacturer = productNative.manufacturer.ToTitleCase(), Ean = productNative.ean.FlattenString(), IsFreeShipping = productNative.free_shipping_eligiblity, CurrentPrice = price, //MarketPrice = market_price, //IsRevisioned = productNative.revisioning == "revisioned", //SalesRank = productNative.sales_rank, IsSouqFulfiled = productNative.is_fbs, IsBundled = productNative.bundle_label != "false", //HasDiscountFlag = discount != "False", //DiscountFlag = discount != "False" ? discount : string.Empty, //DiscountFlagPercentage = discount != "False" // ? int.Parse(discount.ToLower().Replace(" %", "")) // : 0, Currency = gridObj.jsonData.meta_data.currency, //Page = gridObj.page, //Section = gridObj.section }; if (category.Products.All(l => l.Url != product.Url)) { category.Products.Add(product); } } } category.ActualProductCount = category.Products.Count; }); //categories.First(l=> l.Products != null && l.Products.Count > 0).Products = categories.First(l => l.Products != null && l.Products.Count > 0).Products.Take(50).ToList(); sw.Stop(); Console.WriteLine($"{nameof(CategoryProductsSearchParser)} ElapseTime: {sw.Elapsed.ToString()}"); Console.ReadLine(); }
private void ParseProductDeliveryInfo(Product product) { object lockObject = new object(); if (product.Availability == ProductAvailability.OutStock) { return; } ConcurrentBag <ProductDelivery> productDeliveries = new ConcurrentBag <ProductDelivery>(); var token = SouqApi.GetProductAccessTokens(product.Url.ExtractProductFullId())["searchForm"]; int errorCount = 0; int tryCount = cities.Count; ManualResetEvent manualResetEvent = new ManualResetEvent(false); AutoResetEvent autoResetEvent = new AutoResetEvent(false); bool validToken = true; //foreach(var city in cities) Parallel.ForEach(cities, (city) => { retoken: SouqDeliveryInfo deliveryInfo; try { deliveryInfo = SouqApi.GetDeliveryInfo(city.Code, city.Name, product.UnitId, token.hitsCfs, token.hitsCfsMeta); --tryCount; } catch (Exception ex) { ++errorCount; validToken = false; if (errorCount > 0 && tryCount != errorCount) { manualResetEvent.WaitOne(); } else if (errorCount > 0 && tryCount == errorCount) { autoResetEvent.Set(); manualResetEvent.Set(); } autoResetEvent.WaitOne(); if (!validToken) { errorCount = 0; manualResetEvent.Reset(); token = SouqApi.GetProductAccessTokens(product.Url.ExtractProductFullId())["searchForm"]; validToken = true; } autoResetEvent.Set(); goto retoken; } var canDeliver = deliveryInfo.estimate_by_days != null; var daysNo = deliveryInfo.estimate_by_days; ProductDelivery delivery = new ProductDelivery() { City = city, Id = Guid.NewGuid(), EstimatedDays = daysNo, CanDeliver = canDeliver }; productDeliveries.Add(delivery); }); product.Deliveries = productDeliveries.ToList(); }
private void ParseLogic(Product product, Product mainProduct) { Stopwatch sw_tmp = new Stopwatch(); sw_tmp.Start(); var id = index++; Console.WriteLine(id); List <string> discoveriedProducts = new List <string>(); var productBucket = SouqApi.GetProductBucket(product.Url.ExtractProductFullId()); if (productBucket == null) { return; } var productNative = SouqApi.GetProduct(product.Url.ExtractProductFullId()); if (productNative == null) { return; } ParseProductBucket(product, productBucket); try { ParseProductBody(product, productNative.body, discoveriedProducts); if (mainProduct != null) { discoveriedProducts = discoveriedProducts .Where(l => l != mainProduct.Url) .Where(l => mainProduct.ProductConfigurations.All(c => c.Url != l)) .ToList(); } } catch (Exception ex) { if (ex.Message == "Inconsist Product Body & Bucket") { product.IsFaulty = true; //website.Sellers.Remove(product.Seller); return; } } if (productNative.bundles != null) { ParseProductBundles(product, productNative.bundles); } var productReviewsNative = SouqApi.GetProductReview(product.ProductId); ParseProductReviews(product, productReviewsNative); ParseProductDeliveryInfo(product); if (mainProduct == null) { ParseDiscoveredProducts(product, discoveriedProducts); } else { ParseDiscoveredProducts(mainProduct, discoveriedProducts); } sw_tmp.Stop(); Console.WriteLine($"Task({id}) ElapseTime: {sw_tmp.Elapsed.ToString()}"); lock (lockObj) { finish++; //var totals = (int)(totalSpan.TotalSeconds / finished.Count); //Console.WriteLine($"{nameof(ProductParser)} AvgElapseTime: {new TimeSpan(0,0,totals).ToString()} + Count: {finished.Count}"); Console.WriteLine( $@"{nameof(ProductParser)} ElapseTime: {DateTime.Now - start} + Count: {finish} + Task Time: {(DateTime.Now - start).TotalSeconds / finish} [Sec]"); } }