示例#1
0
        private static async Task Parse()
        {
            var html = await GetData(PageUrl);

            var doc = new HtmlDocument();

            doc.LoadHtml(html);
            var result = new List <PriceChartingModel>();
            var brands = HtmlDocumentHelper.GetNodesByParams(doc.DocumentNode, HtmlTag.li, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "brand").Where(x => x.GetAttributeValue("id", null) != null && x.GetAttributeValue("id", null) != "tools");

            foreach (var brand in brands)
            {
                try
                {
                    var dropDownMenu = HtmlDocumentHelper.GetNodeByParamsUseXpathStartsWith(brand, HtmlTag.ul, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "menu-dropdown");
                    if (dropDownMenu != null)
                    {
                        var regions = HtmlDocumentHelper.GetNodesByParams(dropDownMenu, HtmlTag.ul, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "region");
                        if (regions != null)
                        {
                            foreach (var region in regions)
                            {
                                var regionName = HtmlDocumentHelper.GetNodeByParams(region, HtmlTag.li, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "title")?.InnerText;
                                var categories = region.SelectNodes(".//li").Where(x => x.InnerHtml != string.Empty);
                                foreach (var category in categories)
                                {
                                    if (category.ChildNodes[0] == null || category.ChildNodes[0].GetAttributeValue("href", null) == null)
                                    {
                                        continue;
                                    }
                                    var url  = category.ChildNodes[0].GetAttributeValue("href", null);
                                    var name = category.ChildNodes[0].InnerText;
                                    result.Add(new PriceChartingModel
                                    {
                                        Region       = regionName,
                                        CategoryName = name,
                                        URL          = url
                                    });
                                }
                            }
                        }
                        else
                        {
                            var categories = brand.SelectNodes(".//li").Where(x => x.InnerHtml != string.Empty);
                            foreach (var category in categories)
                            {
                                if (category.ChildNodes[0]?.GetAttributeValue("href", null) == null)
                                {
                                    continue;
                                }
                                var url  = category.ChildNodes[0].GetAttributeValue("href", null);
                                var name = category.ChildNodes[0].InnerText;
                                result.Add(new PriceChartingModel
                                {
                                    Region       = "NTSC (USA)",
                                    CategoryName = name,
                                    URL          = url
                                });
                            }
                        }
                    }
                }
                catch { }
            }
            foreach (var category in result)
            {
                try
                {
                    var categoryData = new List <Data>();
                    category.Data = new List <Data>();
                    var response  = new List <Product>();
                    var pageCount = 0;


                    while (true)
                    {
                        try
                        {
                            var jUrl     = PageUrlJson.Replace("{category}", category.URL.Split('/').Last()).Replace("{count}", pageCount.ToString());
                            var dataJson = await GetData(jUrl);

                            var jsonObj = JsonConvert.DeserializeObject <Response>(dataJson);
                            response.AddRange(jsonObj.products);
                            if (jsonObj.products.Length == 50)
                            {
                                pageCount += 50;
                                Thread.Sleep(100);
                                continue;
                            }
                            break;
                        }
                        catch
                        {
                        }
                    }

                    foreach (var item in response)
                    {
                        categoryData.Add(new Data
                        {
                            Title      = item.productName,
                            LoosePrice = item.price1,
                            CIBPrice   = item.price3,
                            NewPrice   = item.price2
                        });
                    }
                    category.Data = categoryData;
                }
                catch (Exception ex)
                {
                }
                // details
                foreach (var data in category.Data)
                {
                    try
                    {
                        var url = $"{category.URL.Replace("console", "game")}/{ExtensionMethods.Replace(data.Title, new char[] { '[', ']', '(', ')', '/', '\\', '.', ':', ',', '?' }, " ").Replace(' ', '-')}";

                        Thread.Sleep(200);
                        html = await GetData(url);

                        doc = new HtmlDocument();
                        doc.LoadHtml(html);
                        var attributeNode = HtmlDocumentHelper.GetNodeByParams(doc.DocumentNode, HtmlTag.table, Scrapping.AllPossibilities.Enums.HtmlAttribute.id, "attribute");
                        if (attributeNode != null)
                        {
                            var detailNodes     = attributeNode.SelectNodes(".//tr");
                            var genre           = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "Genre:");
                            var releaseDate     = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "Release Date:");
                            var rating          = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "ESRB Rating:");
                            var publisher       = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "Publisher:");
                            var developer       = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "Developer:");
                            var playerCount     = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "Player Count:");
                            var upc             = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "UPC:");
                            var asin            = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "ASIN (Amazon):");
                            var epid            = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "ePID (eBay):");
                            var priceChartingId = detailNodes.FirstOrDefault(x => x.SelectSingleNode(".//td")?.InnerText == "PriceCharting ID:");

                            if (genre != null)
                            {
                                data.Genre = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(genre, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (releaseDate != null)
                            {
                                data.ReleaseDate = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(releaseDate, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (rating != null)
                            {
                                data.Rating = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(rating, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (publisher != null)
                            {
                                data.Publisher = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(publisher, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (developer != null)
                            {
                                data.Developer = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(developer, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (playerCount != null)
                            {
                                data.PlayerCount = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(playerCount, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (upc != null)
                            {
                                data.UPC = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(upc, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (asin != null)
                            {
                                data.Amazon_ASIN = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(asin, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (epid != null)
                            {
                                data.Ebay_ePID = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(epid, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                            if (priceChartingId != null)
                            {
                                data.PriceChartingId = WebUtility.HtmlDecode(HtmlDocumentHelper.GetNodeByParams(priceChartingId, HtmlTag.td, Scrapping.AllPossibilities.Enums.HtmlAttribute._class, "details")?.InnerText?.Trim(' ', '\n'));
                            }
                        }
                    }
                    catch
                    {
                    }
                }
                await WriteJson(JsonConvert.SerializeObject(category, Formatting.Indented), category.CategoryName);

                await Task.Delay(500);
            }
            await WriteJson(JsonConvert.SerializeObject(result, Formatting.Indented));
        }
示例#2
0
        private static async Task Parse()
        {
            var requestHelper = new RequestHelper();
            var header        = HeaderBuilder.BuildOwnHeaders(new HeaderModel()
            {
                Accept     = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
                Host       = "gamevaluenow.com",
                Referer    = "https://gamevaluenow.com/",
                User_Agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36"
            });
            var html = await requestHelper.SendRequestAsync(PageUrl, headers : HeaderBuilder.GetDefaultHeaders());

            var doc = new HtmlDocument();

            doc.LoadHtml(html);
            var result = new List <GameValueNowModel>();
            var nodes  = HtmlDocumentHelper.GetNodesByParamsUseXpathStartsWith(doc.DocumentNode, HtmlTag.a, HtmlAttribute._class, "brand-");

            foreach (var node in nodes)
            {
                try
                {
                    var href = node.GetAttributeValue("href", null);
                    if (href is null || href == "#")
                    {
                        continue;
                    }
                    var model = new GameValueNowModel();
                    var url   = $"{PageUrl}{href}";
                    var name  = node.InnerText;
                    model.URL          = url;
                    model.PlatformName = WebUtility.HtmlDecode(name);
                    if (result.Select(x => x.PlatformName).Contains(model.PlatformName))
                    {
                        continue;
                    }
                    result.Add(model);
                }
                catch
                {
                    //ignore;
                }
            }

            foreach (var item in result)
            {
                try
                {
                    item.Data = new List <Data>();
                    var document = new HtmlDocument();

                    var dataHtml = await requestHelper.SendRequestAsync(item.URL, headers : header);

                    document.LoadHtml(dataHtml);

                    // stats
                    var statsNode    = HtmlDocumentHelper.GetNodeByParams(document.DocumentNode, HtmlTag.div, HtmlAttribute.id, "stats");
                    var statListNode = HtmlDocumentHelper.GetNodesByParams(statsNode, HtmlTag.div, HtmlAttribute._class, "col-100 stat");

                    var avgLoosePrice    = statListNode.FirstOrDefault(x => x.InnerText.Contains("Avg Loose"));
                    var avgCompletePrice = statListNode.FirstOrDefault(x => x.InnerText.Contains("Avg Complete"));
                    var looseSetValue    = statListNode.FirstOrDefault(x => x.InnerText.Contains("Loose Set"));
                    var completeSetValue = statListNode.FirstOrDefault(x => x.InnerText.Contains("Complete Set"));
                    var sharpOfGames     = statListNode.FirstOrDefault(x => x.InnerText.Contains("#"));

                    if (avgLoosePrice != null)
                    {
                        item.AvgLoosePrice = HtmlDocumentHelper.GetNodeByParamsUseXpathContains(avgLoosePrice, HtmlTag.div, HtmlAttribute._class, "stat-value")?.InnerText?.Trim();
                    }
                    if (avgCompletePrice != null)
                    {
                        item.AvgCompletePrice = HtmlDocumentHelper.GetNodeByParamsUseXpathContains(avgCompletePrice, HtmlTag.div, HtmlAttribute._class, "stat-value")?.InnerText?.Trim();
                    }
                    if (looseSetValue != null)
                    {
                        item.LooseSetValue = HtmlDocumentHelper.GetNodeByParamsUseXpathContains(looseSetValue, HtmlTag.div, HtmlAttribute._class, "stat-value")?.InnerText?.Trim();
                    }
                    if (completeSetValue != null)
                    {
                        item.CompleteSetValue = HtmlDocumentHelper.GetNodeByParamsUseXpathContains(completeSetValue, HtmlTag.div, HtmlAttribute._class, "stat-value")?.InnerText?.Trim();
                    }
                    if (sharpOfGames != null)
                    {
                        item.SharpOfGames = HtmlDocumentHelper.GetNodeByParamsUseXpathContains(sharpOfGames, HtmlTag.div, HtmlAttribute._class, "stat-value")?.InnerText?.Trim();
                    }

                    // items
                    var listNode =
                        HtmlDocumentHelper.GetNodeByParams(document.DocumentNode, HtmlTag.div, HtmlAttribute.id, "item-list");
                    var collectionItemNodes = HtmlDocumentHelper.GetNodesByParamsUseXpathStartsWith(listNode, HtmlTag.div,
                                                                                                    HtmlAttribute._class, "item-row desktop all");
                    foreach (var collectionItemNode in collectionItemNodes)
                    {
                        try
                        {
                            var data = new Data();
                            var name = HtmlDocumentHelper
                                       .GetNodeByParams(collectionItemNode, HtmlTag.a, HtmlAttribute._class, "game-link").InnerText;
                            var id = HtmlDocumentHelper
                                     .GetNodeByParams(collectionItemNode, HtmlTag.div, HtmlAttribute._class, "item-number").InnerText
                                     .Replace("\n", "").Replace(" ", "");
                            data.Title = name;
                            data.Id    = id;
                            var priceContainer = HtmlDocumentHelper.GetNodeByParamsUseXpathContains(collectionItemNode, HtmlTag.div, HtmlAttribute._class, "price-col-container");
                            if (priceContainer is null)
                            {
                                continue;
                            }
                            var prices = HtmlDocumentHelper.GetNodesByParamsUseXpathStartsWith(priceContainer, HtmlTag.a, HtmlAttribute._class, "game-link");
                            if (prices is null)
                            {
                                continue;
                            }
                            var priceCount = prices.Count;
                            if (priceCount > 0)
                            {
                                data.Loose = prices[0].InnerText;
                            }
                            if (priceCount > 1)
                            {
                                data.Complete = prices[1].InnerText;
                            }
                            if (priceCount > 2)
                            {
                                data.New = prices[2].InnerText;
                            }
                            if (priceCount > 3)
                            {
                                data.Graded = prices[3].InnerText;
                            }
                            data.PlatformName = item.PlatformName;
                            item.Data.Add(data);
                        }
                        catch
                        {
                            //ignore
                        }
                    }
                }
                catch (Exception e)
                {
                    //ignore
                }
                await Task.Delay(500);
            }

            await SaveToJsonFile(result);

            //await SaveToSql(result);
        }