Пример #1
0
        /// <summary>
        /// Fetches and returns information about the site that would later be used to parse cases
        /// </summary>
        /// <returns></returns>
        private static async Task <SiteData> GetSiteData()
        {
            string site = await HtmlFetcher.RetrieveFromUrl("https://csgostash.com/");

            var doc = new HtmlDocument();

            doc.LoadHtml(site);

            //Target the li tags with a class of dropdown
            IEnumerable <HtmlNode> liData =
                doc.DocumentNode.SelectNodes("//li[@class='dropdown']");

            var siteData = new SiteData
            {
                //Get URL data from dropdowns
                CaseUrLs       = GetDropdownOption(liData, "newest cases", "/case/"),
                KnifeUrLs      = GetDropdownOption(liData, "newest knives", "/weapon/"),
                CollectionUrLs = GetDropdownOption(liData, "newest collections", "/collection/"),

                //Fetch the urLs for souvenirs, stickers, tournament stickers
                SouvenirUrLs          = await GetPaginationUrls(liData, "newest cases", "souvenir-package"),
                StickerUrLs           = await GetPaginationUrls(liData, "tournament stickers", "/stickers/regular"),
                TournamentStickerUrLs = await GetPaginationUrls(liData, "tournament stickers", "/stickers/tournament")
            };

            return(siteData);
        }
Пример #2
0
        private static async Task <int> GetPaginationPagesCount(string url)
        {
            var page = await HtmlFetcher.RetrieveFromUrl(url);

            var doc = new HtmlDocument();

            doc.LoadHtml(page);

            doc.LoadHtml(doc.DocumentNode.SelectNodes("//ul[@class='pagination']").FirstOrDefault()?.InnerHtml); //Select and load the first pagination button
            IEnumerable <string> innerText = doc.DocumentNode.SelectNodes("//li").Select(n => n.InnerText);      //Extract the inner text from li options

            //Cycle through each li and get the highest one
            int highestVal = 1;

            foreach (string s in innerText)
            {
                if (int.TryParse(s, out int val))
                {
                    if (val > highestVal)
                    {
                        highestVal = val;
                    }
                }
            }

            return(highestVal);
        }
Пример #3
0
        private static async Task <List <DataCollection> > ParseGridItems(IEnumerable <string> souvenirUrLs, string nameXpath, string collectionXpath)
        {
            Logger.Log("Fetching/parsing Souvenirs...");

            var souvenirCollections = new List <DataCollection>();

            //Parse through the souvenirs for names, and the collection
            foreach (string souvenirUrL in souvenirUrLs.ToList())
            {
                string page = await HtmlFetcher.RetrieveFromUrl(souvenirUrL);

                HtmlDocument doc = new HtmlDocument();
                doc.LoadHtml(page);

                //Filter to the class "well result-box nomargin"
                List <string> filteredDivs = doc.DocumentNode.SelectNodes("html/body/div/div/div/div").Where(n =>
                                                                                                             n.Attributes["class"].Value.Contains("well result-box nomargin")).Select(n => n.InnerHtml).ToList();

                //Extract name and collection
                foreach (string filteredDiv in filteredDivs)
                {
                    HtmlDocument filteredDoc = new HtmlDocument();
                    filteredDoc.LoadHtml(filteredDiv); //Exception is likely from loading invalid HTML

                    //Extract case name
                    string name = ExtractStringFromTags(filteredDoc, nameXpath);

                    //Extract case collection
                    string itemCollection = ExtractStringFromTags(filteredDoc, collectionXpath).Replace("\n", "");

                    string iconUrL = ExtractImgSrc(doc, "img-responsive center-block");

                    //Add to souvenirCollections
                    var souvenirCollection = new DataCollection()
                    {
                        Name           = name,
                        CaseCollection = itemCollection,
                        IconUrL        = iconUrL
                    };

                    souvenirCollections.Add(souvenirCollection);
                }
            }

            return(souvenirCollections);
        }
Пример #4
0
        private static async Task <Dictionary <string, List <string> > > ParseKnives(IEnumerable <string> masterKnifeUrLs)
        {
            masterKnifeUrLs = masterKnifeUrLs.ToList();

            Logger.Log("Extracting knife names from URL...");

            List <string> knifeUrLsList = masterKnifeUrLs.ToList();

            var knifeNames = new List <string>();

            //Extract the knife names by themselves from the list
            foreach (string knifeUrL in knifeUrLsList)
            {
                for (int i = knifeUrL.Length - 1; i > 0; i--) //subtract 1 since array is 0 based
                {
                    if (knifeUrL[i] == '/')
                    {
                        knifeNames.Add(knifeUrL.Substring(i + 1, knifeUrL.Length - i - 1) //Offset forwards by 1 since this is 1 based
                                       .Replace("+", "-"));                               //the current knife name (E.G Navaja+Knife) which will be converted to navaja-knife
                        break;
                    }
                }
            }

            Logger.Log("Fetching/extracting individual knife URLs from HTML...");

            //Pull the link <a></a> tags out of each page - store them to be used later
            var knifeUrLs = new List <string>();
            int pageindex = 0;

            foreach (string knifeUrL in masterKnifeUrLs)
            {
                string page = await HtmlFetcher.RetrieveFromUrl(knifeUrL);

                //Getting knife name
                // => /body/div[1]/div[2]/div[0]
                // => col-lg-12 text-center col-widen content-header => h1

                HtmlDocument doc = new HtmlDocument();
                doc.LoadHtml(page);

                var aTags =
                    doc.DocumentNode.SelectNodes("/html/body//a"); //Select the body before looking for all the a tags

                //Filter the A-Tags to only ones with href
                var aTagHrefs = new List <string>();
                foreach (var aTag in aTags)
                {
                    string hrefVal = "";
                    if (aTag.Attributes["href"] != null)
                    {
                        hrefVal = aTag.Attributes["href"].Value;                                  //Try to extract href val
                    }
                    if (!string.IsNullOrWhiteSpace(hrefVal))
                    {
                        aTagHrefs.Add(hrefVal);                                      //Log the https links for further processing
                    }
                }

                //Process the aTag hrefs to only those which are valid links
                aTagHrefs = aTagHrefs.Where(t => t.Contains("http") && t.Contains("/skin/") &&
                                            t.Contains(knifeNames[pageindex])).ToList();                            //Only websites && only paths containing skin && has knife name

                //Delete duplicates
                aTagHrefs = aTagHrefs.Distinct().ToList();

                aTagHrefs.ForEach(t => knifeUrLs.Add(t)); //add to master url list

                pageindex++;
            }

            //Follow links pulled above to get the names of the actual knives
            Logger.Log("Fetching/extracting individual knife HTML...");

            var knifeCaseData = new Dictionary <string, List <string> >();

            foreach (string knifeUrL in knifeUrLs)
            {
                string page = await HtmlFetcher.RetrieveFromUrl(knifeUrL);

                HtmlDocument doc = new HtmlDocument();

                doc.LoadHtml(page);

                //Extracting case data
                List <string> knifeCases = doc.DocumentNode.SelectNodes("html/body//p")
                                           .Where(n => n.HasClass("collection-text-label")).Select(n => n.InnerHtml)
                                           .ToList(); //Search for p with class "collection-text-label"

                //Get knife name
                string knifeName = doc.DocumentNode.SelectNodes("html/head/title").Select(n => n.InnerHtml)
                                   .FirstOrDefault();
                knifeName = knifeName?.Replace(" - CS:GO Stash", ""); //Null prorogation

                if (knifeName != null)
                {
                    //Add to dictionary
                    knifeCaseData.Add(knifeName, knifeCases);
                }
            }

            return(knifeCaseData);
        }
Пример #5
0
        /// <summary>
        /// Parses through the block structure for item data and case name+collection
        /// </summary>
        /// <param name="caseUrLs"></param>
        /// <returns></returns>
        private static async Task <Dictionary <string, DataCollection> > ParseGridBlocks(IEnumerable <string> caseUrLs)
        {
            var csgoData = new Dictionary <string, DataCollection>();

            foreach (string caseUrL in caseUrLs)
            {
                string page = await HtmlFetcher.RetrieveFromUrl(caseUrL);

                //Get case name and collection
                HtmlDocument htmlDoc = new HtmlDocument();
                htmlDoc.LoadHtml(page);

                //Get the data out of the <a3> tags
                HtmlNode caseNodeData =
                    htmlDoc.DocumentNode.SelectSingleNode(
                        "//div[@class='inline-middle collapsed-top-margin']"); //inline-middle collapsed-top-margin

                string caseName       = ExtractStringFromTags(caseNodeData, "h1");
                string caseCollection = ExtractStringFromTags(caseNodeData, "h4"); //img-responsive center-block content-header-img-margin

                string iconUrL = ExtractImgSrc(htmlDoc, "img-responsive center-block content-header-img-margin");

                var caseData = new DataCollection {
                    Name = caseName, CaseCollection = caseCollection, IconUrL = iconUrL
                };

                //If case name already exists, pass
                if (csgoData.ContainsKey(caseName))
                {
                    continue;
                }
                //Separate single string with line breaks into array
                string[] lines = page.Split(
                    new[] { "\r\n", "\r", "\n" },
                    StringSplitOptions.None
                    );

                //Filter to lines containing " | "
                List <string> fLines = lines.Where(l => l.Contains(" | ")).ToList();
                //Select the text out of the html
                foreach (var dataDuoLines in fLines) //The 2 lines containing the item name and collection
                {
                    HtmlDocument duoHtml = new HtmlDocument();
                    duoHtml.LoadHtml(dataDuoLines);

                    //Get the data out of the <a3> tags
                    HtmlNodeCollection duoLineDataNodes = duoHtml.DocumentNode.SelectNodes("/h3/a");
                    if (duoLineDataNodes != null && duoLineDataNodes.Any())
                    {
                        List <string> duoLineData = duoLineDataNodes.Select(i => i.InnerHtml).ToList();

                        //Concat into item name
                        if (duoLineData.Any())
                        {
                            caseData.Items.Add(string.Join(" | ", duoLineData));                    //Weapon name | skin name
                        }
                    }
                }

                if (!csgoData.TryGetValue(caseName, out _))
                {
                    csgoData.Add(caseName, caseData);
                }
            }

            return(csgoData);
        }