コード例 #1
0
        public void Process(Website website)
        {
            Console.WriteLine($"{nameof(CityParser)} Start");

            var          nativeObj = SouqApi.GetCities();
            HtmlDocument document  = new HtmlDocument();

            document.LoadHtml(nativeObj.sOptions);

            var options = document.DocumentNode.Descendants().FindByName("option").Skip(1);

            website.Cities = new List <City>();

            foreach (var option in options)
            {
                var country = new City()
                {
                    Id   = Guid.NewGuid(),
                    Name = option.InnerText,
                    Code = option.Attributes["value"].Value
                };

                website.Cities.Add(country);
            }
        }
コード例 #2
0
        public void Process(Website website)
        {
            Console.WriteLine($"{nameof(CategoryParser)} Start");
            sw.Start();
            var categoryGroups = new List <CategoryGroup>();
            var categories     = new List <Category>();

            var document = SouqApi.GetCategories();
            var divNodes = document.DocumentNode.Descendants().FindByNameNClass("div", "grouped-list");

            foreach (var divNode in divNodes)
            {
                var categoryGroup = new CategoryGroup
                {
                    Id             = Guid.NewGuid(),
                    Name           = divNode.PreviousSibling.InnerText.Cleanify(),
                    Parent         = null,
                    Categories     = new List <Category>(),
                    CategoryGroups = new List <CategoryGroup>()
                };

                ParseRecursively(categories, categoryGroup, divNode);

                categoryGroups.Add(categoryGroup);
            }

            website.CategoryGroups = categoryGroups;
            website.Categories     = categories;
            sw.Stop();
            Console.WriteLine($"{nameof(CategoryParser)} ElapseTime: {sw.Elapsed.ToString()}");
        }
コード例 #3
0
        public void Process(Website website)
        {
            Console.WriteLine($"{nameof(CategoryProductsParser)} Start");
            Stopwatch sw = new Stopwatch();

            sw.Start();
            var categories = website.Categories.Shuffle().ToList();

            //foreach(var category in categories)
            Parallel.ForEach(categories /*.Where(l => l.Url.Contains("/home-decor/"))*/ /*.Take(1)*/,
                             new ParallelOptions()
            {
                MaxDegreeOfParallelism = 50
            }, (category) =>
            {
                try
                {
                    Console.WriteLine(category.Name);
                    category.Products = new List <Product>();
                    string categoryId = category.Url.ExtractCategoryId();

                    var gridNativeObjs = SouqApi.GetProductsFromCategory(categoryId);

                    if (gridNativeObjs == null)
                    {
                        category.IsFaulty = true;
                        //Debugger.Break();
                        //continue;
                        return;
                    }

                    category.ExpectedProductCount =
                        gridNativeObjs.Count() * SouqApiConstants.ProductSectionListLimit;

                    foreach (var gridObj in gridNativeObjs)
                    {
                        var document = new HtmlDocument();
                        document.LoadHtml(gridObj.body);

                        var productsNodes = document.DocumentNode.Descendants()
                                            .FindByNameNAttribute("div", "data-category-name");

                        foreach (var productNode in productsNodes)
                        {
                            var quickViewNode = productNode.Descendants()
                                                .SingleByNameNContainClass("a", new[] { "sPrimaryLink", "img-link" });

                            Product product = new Product()
                            {
                                Id  = Guid.NewGuid(),
                                Url = quickViewNode.Attributes["href"].Value
                            };

                            if (category.Products.All(l => l.Url != product.Url))
                            {
                                product.Category = category;
                                category.Products.Add(product);
                            }
                        }
                    }

                    category.ActualProductCount = category.Products.Count;
                }
                catch (Exception e)
                {
                    Console.WriteLine(category.Name);
                    Console.WriteLine(e);
                    Debugger.Break();
                }
            });

            website.Categories = categories.Where(l => !l.IsFaulty).ToList();
            sw.Stop();
            Console.WriteLine($"{nameof(CategoryProductsParser)} ElapseTime: {sw.Elapsed.ToString()}");
        }
コード例 #4
0
        private void ProcessSeller(Seller seller)
        {
            var sellerNative        = SouqApi.GetSellerProfile(seller.UserId);
            var sellerInfoAvailable = !sellerNative.DocumentNode.Descendants().Any(l => l.Name == "div" && l.InnerText.Contains("This seller has no ratings yet"));
            var sellerInfoError     = !sellerNative.DocumentNode.Descendants()
                                      .AnyByNameNContainClass("div", new[] { "grouped-list", "rating-tabs" });


            if (sellerInfoError)
            {
                seller.IsFaulty  = true;
                seller.HasNoData = true;
                return;
            }

            if (sellerInfoAvailable)
            {
                var sellerDetailsNode = sellerNative.DocumentNode.Descendants()
                                        .SingleByNameNContainClass("div", new[] { "grouped-list", "rating-tabs" });
                var sellerInfoNode    = sellerDetailsNode.ChildNodes.FindByName("div").First();
                var sellerStatsNode   = sellerDetailsNode.ChildNodes.FindByName("div").Last();
                var sellerNameUrlNode =
                    sellerInfoNode.Descendants().SingleByName("h6").ChildNodes.SingleByName("a");
                var sellerRateNode = sellerInfoNode.Descendants().SingleByNameNClass("i", "star-rating-svg")
                                     .ChildNodes.SingleByName("i");
                var sellerDateNode = sellerNameUrlNode.ParentNode.ParentNode.ParentNode.ChildNodes.FindByName("div")
                                     .Last();
                var sellerRatingTabs = sellerDetailsNode.Descendants().SingleByNameNClass("div", "tabs-content")
                                       .ChildNodes.FindByName("section");

                var sellerRate =
                    float.Parse(sellerRateNode.Attributes["style"].Value.Replace("width:", "").Replace("%", "")) /
                    100f * 5f;
                var sellerDate =
                    DateTime.Parse(sellerDateNode.InnerText.TrimStart().TrimEnd().Replace("Member since: ", ""));

                SellerRate[] rates = new SellerRate[3];
                int          index = 0;

                foreach (var sellerRatingTab in sellerRatingTabs)
                {
                    var positiveNode        = sellerRatingTab.Descendants().FindByName("ul").FirstOrDefault(l => l.InnerText.Contains("Positive"));
                    var negativeNode        = sellerRatingTab.Descendants().FindByName("ul").FirstOrDefault(l => l.InnerText.Contains("Negative"));
                    var totalRatingNodeNode = sellerRatingTab.ChildNodes.SingleOrDefaultByName("div");

                    SellerRate rate = null;

                    if (totalRatingNodeNode != null)
                    {
                        rate             = new SellerRate();
                        rate.TotalRating = int.Parse(totalRatingNodeNode.InnerText.TrimStart().TrimEnd()
                                                     .Replace("Total Ratings: ", ""));
                    }

                    if (positiveNode != null)
                    {
                        var postivePercentageNode = positiveNode.ChildNodes.FindByName("li").Last();
                        rate.PositiveRatePercentage =
                            int.Parse(postivePercentageNode.InnerText.TrimStart().TrimEnd().Replace("%", ""));
                    }

                    if (negativeNode != null)
                    {
                        var negativePercentageNode = negativeNode.ChildNodes.FindByName("li").Last();
                        rate.NegativeRatePercentage =
                            int.Parse(negativePercentageNode.InnerText.TrimStart().TrimEnd().Replace("%", ""));
                    }

                    rates[index++] = rate;
                }

                seller.AverageRate     = sellerRate;
                seller.JoinDate        = sellerDate;
                seller.LastYearRate    = rates[0];
                seller.LastQuarterRate = rates[1];
                seller.LastMonthRate   = rates[2];
            }
            else
            {
                seller.HasNoData = true;
            }
        }
コード例 #5
0
        public void Process(Website website)
        {
            //ThreadPool.SetMaxThreads(int.MaxValue, int.MaxValue);
            Console.WriteLine($"{nameof(CategoryProductsSearchParser)} Start");
            sw.Start();
            var categories = website.CategoryGroups.FlattenCategories();

            Parallel.ForEach(categories /*, new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount }*/, (category) =>
            {
                category.Products = new List <Product>();
                Console.WriteLine(category.Name);
                string categoryId = category.Url.ExtractCategoryId();

                var gridNativeObjs = SouqApi.SearchProducts(categoryId);

                category.HasRedirection = !string.IsNullOrEmpty(gridNativeObjs.First().redirect_url);
                category.Query          = !category.HasRedirection ? gridNativeObjs.First().jsonData.meta_data.query : null;
                category.RedirectQuery  =
                    category.HasRedirection ? gridNativeObjs.First().jsonData.meta_data.query : null;
                category.RedirectUrl          = category.HasRedirection ? gridNativeObjs.First().redirect_url : null;
                category.ExpectedProductCount = gridNativeObjs.Count() * SouqApiConstants.ProductSectionListLimit;

                foreach (var gridObj in gridNativeObjs)
                {
                    var productsNative = gridObj.jsonData.units;

                    foreach (var productNative in productsNative)
                    {
                        var discount     = productNative.discount.ToString();
                        var price        = productNative.price / 100;
                        var market_price = Math.Abs(productNative.market_price) > 0f
                            ? (double?)(productNative.market_price / 100)
                            : null;

                        Product product = new Product()
                        {
                            Category       = category,
                            Id             = Guid.NewGuid(),
                            UnitId         = productNative.unit_id.ToString(),
                            Title          = productNative.title,
                            ProductId      = productNative.item_id.ToString(),
                            Url            = productNative.primary_link,
                            ImageUrl       = productNative.image_url,
                            Manufacturer   = productNative.manufacturer.ToTitleCase(),
                            Ean            = productNative.ean.FlattenString(),
                            IsFreeShipping = productNative.free_shipping_eligiblity,
                            CurrentPrice   = price,
                            //MarketPrice = market_price,
                            //IsRevisioned = productNative.revisioning == "revisioned",
                            //SalesRank = productNative.sales_rank,
                            IsSouqFulfiled = productNative.is_fbs,
                            IsBundled      = productNative.bundle_label != "false",
                            //HasDiscountFlag = discount != "False",
                            //DiscountFlag = discount != "False" ? discount : string.Empty,
                            //DiscountFlagPercentage = discount != "False"
                            //    ? int.Parse(discount.ToLower().Replace(" %", ""))
                            //    : 0,
                            Currency = gridObj.jsonData.meta_data.currency,
                            //Page = gridObj.page,
                            //Section = gridObj.section
                        };

                        if (category.Products.All(l => l.Url != product.Url))
                        {
                            category.Products.Add(product);
                        }
                    }
                }

                category.ActualProductCount = category.Products.Count;
            });

            //categories.First(l=> l.Products != null && l.Products.Count > 0).Products = categories.First(l => l.Products != null && l.Products.Count > 0).Products.Take(50).ToList();

            sw.Stop();
            Console.WriteLine($"{nameof(CategoryProductsSearchParser)} ElapseTime: {sw.Elapsed.ToString()}");
            Console.ReadLine();
        }
コード例 #6
0
        private void ParseProductDeliveryInfo(Product product)
        {
            object lockObject = new object();

            if (product.Availability == ProductAvailability.OutStock)
            {
                return;
            }

            ConcurrentBag <ProductDelivery> productDeliveries = new ConcurrentBag <ProductDelivery>();

            var token = SouqApi.GetProductAccessTokens(product.Url.ExtractProductFullId())["searchForm"];

            int errorCount = 0;
            int tryCount   = cities.Count;
            ManualResetEvent manualResetEvent = new ManualResetEvent(false);
            AutoResetEvent   autoResetEvent   = new AutoResetEvent(false);
            bool             validToken       = true;

            //foreach(var city in cities)
            Parallel.ForEach(cities, (city) =>
            {
                retoken:
                SouqDeliveryInfo deliveryInfo;
                try
                {
                    deliveryInfo = SouqApi.GetDeliveryInfo(city.Code, city.Name, product.UnitId, token.hitsCfs,
                                                           token.hitsCfsMeta);
                    --tryCount;
                }
                catch (Exception ex)
                {
                    ++errorCount;
                    validToken = false;

                    if (errorCount > 0 && tryCount != errorCount)
                    {
                        manualResetEvent.WaitOne();
                    }
                    else if (errorCount > 0 && tryCount == errorCount)
                    {
                        autoResetEvent.Set();
                        manualResetEvent.Set();
                    }

                    autoResetEvent.WaitOne();

                    if (!validToken)
                    {
                        errorCount = 0;
                        manualResetEvent.Reset();
                        token      = SouqApi.GetProductAccessTokens(product.Url.ExtractProductFullId())["searchForm"];
                        validToken = true;
                    }

                    autoResetEvent.Set();
                    goto retoken;
                }

                var canDeliver = deliveryInfo.estimate_by_days != null;
                var daysNo     = deliveryInfo.estimate_by_days;

                ProductDelivery delivery = new ProductDelivery()
                {
                    City          = city,
                    Id            = Guid.NewGuid(),
                    EstimatedDays = daysNo,
                    CanDeliver    = canDeliver
                };

                productDeliveries.Add(delivery);
            });

            product.Deliveries = productDeliveries.ToList();
        }
コード例 #7
0
        private void ParseLogic(Product product, Product mainProduct)
        {
            Stopwatch sw_tmp = new Stopwatch();

            sw_tmp.Start();
            var id = index++;

            Console.WriteLine(id);
            List <string> discoveriedProducts = new List <string>();

            var productBucket = SouqApi.GetProductBucket(product.Url.ExtractProductFullId());

            if (productBucket == null)
            {
                return;
            }

            var productNative = SouqApi.GetProduct(product.Url.ExtractProductFullId());

            if (productNative == null)
            {
                return;
            }

            ParseProductBucket(product, productBucket);

            try
            {
                ParseProductBody(product, productNative.body, discoveriedProducts);

                if (mainProduct != null)
                {
                    discoveriedProducts = discoveriedProducts
                                          .Where(l => l != mainProduct.Url)
                                          .Where(l => mainProduct.ProductConfigurations.All(c => c.Url != l))
                                          .ToList();
                }
            }
            catch (Exception ex)
            {
                if (ex.Message == "Inconsist Product Body & Bucket")
                {
                    product.IsFaulty = true;
                    //website.Sellers.Remove(product.Seller);
                    return;
                }
            }

            if (productNative.bundles != null)
            {
                ParseProductBundles(product, productNative.bundles);
            }

            var productReviewsNative = SouqApi.GetProductReview(product.ProductId);

            ParseProductReviews(product, productReviewsNative);

            ParseProductDeliveryInfo(product);

            if (mainProduct == null)
            {
                ParseDiscoveredProducts(product, discoveriedProducts);
            }
            else
            {
                ParseDiscoveredProducts(mainProduct, discoveriedProducts);
            }

            sw_tmp.Stop();
            Console.WriteLine($"Task({id}) ElapseTime: {sw_tmp.Elapsed.ToString()}");

            lock (lockObj)
            {
                finish++;
                //var totals = (int)(totalSpan.TotalSeconds / finished.Count);
                //Console.WriteLine($"{nameof(ProductParser)} AvgElapseTime: {new TimeSpan(0,0,totals).ToString()} + Count: {finished.Count}");
                Console.WriteLine(
                    $@"{nameof(ProductParser)} ElapseTime: {DateTime.Now - start} + Count: {finish} +
                                    Task Time: {(DateTime.Now - start).TotalSeconds / finish} [Sec]");
            }
        }