public override void Perform()
        {
            int totalTimes = 0;

            while (ID < To)
            {
                try
                {
                    if (totalTimes == 0)
                    {
                        Extension.ScrapeExt.ProgID = ID;
                        Console.WriteLine("test");
                    }
                    else
                    {
                        ID = Extension.ScrapeExt.ProgID + 1;
                    }

                    string             url = $"https://www.roblox.com/users/{ID}/profile";
                    HtmlDocument       doc = new HtmlWeb().Load(url);
                    HtmlNodeCollection col = doc.DocumentNode.SelectNodes("//div[@class='header-title']//h2");

                    if (col.First() != null && Regex.IsMatch(col.First().InnerText, @"^[a-zA-Z0-9_]+$") && !col.First().InnerText.Contains(' '))
                    {
                        Profile user = new Profile()
                        {
                            Username    = col.First().InnerText,
                            JoinDate    = null,
                            PlaceVisits = null,
                        };

                        if (Extension.ScrapeExt.profiles.Any(x => x.Username == user.Username))
                        {
                            Colorful.Console.WriteLine("Duplicate!", System.Drawing.Color.Red);
                        }
                        else
                        {
                            Extension.ScrapeExt.profiles.Add(user);

                            Colorful.Console.WriteLine(user.Username, System.Drawing.Color.Orange);
                        }
                        Extension.ScrapeExt.ProgID++;
                        totalTimes++;
                    }
                }
                catch
                {
                    Extension.ScrapeExt.ProgID++;
                    totalTimes++;
                }
                Console.Title = ID.ToString();
            }
            Colorful.Console.WriteLine("Thread Finished!", System.Drawing.Color.Lime);
        }
        private Dictionary <string, string> GetAttributes(HtmlDocument document)
        {
            Dictionary <string, string> parameters          = new Dictionary <string, string>();
            HtmlNodeCollection          paginationContainer =
                document.DocumentNode.SelectNodes("//*[@data-prototype-id=\"allegro.showoffer.parameters\"]");

            if (paginationContainer.Count > 0)
            {
                HtmlDocument parameterDocument = new HtmlDocument();
                parameterDocument.LoadHtml(paginationContainer.First().OuterHtml);
                var nodes = parameterDocument.DocumentNode.SelectNodes("//li");
                foreach (var listElement in nodes.Skip(1))
                {
                    HtmlDocument parameterInnerDocument = new HtmlDocument();
                    parameterInnerDocument.LoadHtml(listElement.InnerHtml);
                    var divs = parameterInnerDocument.DocumentNode.SelectNodes("//div");

                    if (divs[1].InnerText.Where(x => x.Equals(':')).Count() > 1)
                    {
                        continue;
                    }
                    string keyArg = divs[1].InnerText;
                    string valArg = divs[2].InnerText;

                    if (keyArg[keyArg.Length - 1].Equals(':'))
                    {
                        keyArg = keyArg.Substring(0, keyArg.Length - 1);
                    }
                    parameters.TryAdd(keyArg, valArg);
                }
            }

            return(parameters);
        }
示例#3
0
        Sport GetSport(HtmlDocument doc)
        {
            try
            {
                HtmlNodeCollection node  = doc.DocumentNode.SelectNodes("//td[@class='smwndcap']");
                string             sport = node.First().InnerText;

                if (sport.Contains("Soccer"))
                {
                    return(Sport.Football);
                }
                if (sport.Contains("Basketball"))
                {
                    return(Sport.Basketball);
                }
                if (sport.Contains("Tennis"))
                {
                    return(Sport.Tennis);
                }
                if (sport.Contains("Volleyball"))
                {
                    return(Sport.Volleyball);
                }

                return(Sport.NotSupported);
            }
            catch
            {
                return(Sport.NotSupported);
            }
        }
示例#4
0
        MatchName GetFullMatchName(HtmlDocument doc)
        {
            try
            {
                string matchName = string.Empty;

                HtmlNodeCollection Team      = doc.DocumentNode.SelectNodes("//div[@class='live-today-member-name nowrap ']");
                HtmlDocument       FirstDoc  = new HtmlDocument();
                HtmlDocument       SecondDoc = new HtmlDocument();

                FirstDoc.LoadHtml(Team.First().InnerHtml);

                SecondDoc.LoadHtml(Team.Last().InnerHtml);

                string firstTeam  = FirstDoc.DocumentNode.SelectNodes("//span").First().InnerText;
                string secondTeam = SecondDoc.DocumentNode.SelectNodes("//span").First().InnerText;

                matchName = firstTeam + " vs " + secondTeam;

                var matchNameSplit = matchName.Split(new string[] { " vs ", " @ ", " - " }, StringSplitOptions.RemoveEmptyEntries);
                if (matchNameSplit[0].Contains("("))
                {
                    matchNameSplit[0] = matchNameSplit[0].Split(new string[] { " (" }, StringSplitOptions.RemoveEmptyEntries)[0];
                }
                if (matchNameSplit[1].Contains("("))
                {
                    matchNameSplit[1] = matchNameSplit[1].Split(new string[] { " (" }, StringSplitOptions.RemoveEmptyEntries)[0];
                }
                return(new MatchName(matchNameSplit[0], matchNameSplit[1]));
            }
            catch { return(null); }
        }
示例#5
0
        private void GetMakers()
        {
            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(this.pageContent);

            HtmlNodeCollection Maker_div = null;

            if (htmlDoc.DocumentNode != null && htmlDoc.ParseErrors != null && !htmlDoc.ParseErrors.Any())
            {
                Maker_div = htmlDoc.DocumentNode.SelectNodes("//*[@id=\"marke\"]");
            }

            if (Maker_div != null)
            {
                htmlDoc = new HtmlDocument();
                htmlDoc.LoadHtml(Maker_div.First().InnerHtml);
                var MakerNodes = htmlDoc.DocumentNode.SelectNodes("//*//td");

                foreach (var node in MakerNodes
                         ) //.Zip(descriptions, (n, d) => new MakerClass { MakerName = n.InnerText, MakerUrlPath = d.InnerText }))
                {
                    MakerObj MakerObj = new MakerObj();

                    MakerObj.MakerName = node.InnerText;
                    string value = node.Attributes.AttributesWithName("onclick").First().Value;
                    value = value.Substring(value.IndexOf("'") + 1, value.LastIndexOf("'") - value.IndexOf("'") - 1);
                    MakerObj.MakerUrlPath = value;

                    this.MakersList.Add(MakerObj);
                }
            }
        }
示例#6
0
        Sport GetSport(HtmlDocument doc)
        {
            try
            {
                HtmlNodeCollection node  = doc.DocumentNode.SelectNodes("//span[@class='preInfo type2']");
                string             sport = node.First().InnerText;


                sport = sport.Split(new string[] { " -" }, StringSplitOptions.RemoveEmptyEntries)[0];

                switch (sport)
                {
                case "Soccer": return(Sport.Football);

                case "Basketball": return(Sport.Basketball);

                case "Tennis": return(Sport.Tennis);

                case "Volleyball": return(Sport.Volleyball);
                }

                return(Sport.NotSupported);
            }
            catch
            {
                return(Sport.NotSupported);
            }
        }
        public async Task <Mini> ParseFromUrl(Uri url)
        {
            HtmlWeb      web     = new HtmlWeb();
            HtmlDocument htmlDoc = await web.LoadFromWebAsync(url, null, null);


            HtmlNode creatorLink = htmlDoc.DocumentNode.SelectNodes("//a[@class='stats']")
                                   .FirstOrDefault();

            string creatorUrl  = creatorLink.GetAttributeValue("href", null);
            string creatorName = Uri.UnescapeDataString(creatorUrl.Split('/')[3]);

            Creator creator = new Creator
            {
                Name = creatorName
            };
            Cults3dSource source = new Cults3dSource(creator, creatorName);

            creator.Sites.Add(source);

            Mini mini = new Mini()
            {
                Creator   = creator,
                Name      = System.Web.HttpUtility.HtmlDecode(htmlDoc.DocumentNode.SelectNodes("//h1").FirstOrDefault().InnerText.Trim()),
                Thumbnail = htmlDoc.DocumentNode.SelectNodes("//meta").Where(n => n.Attributes.Any(a => a.Value == "og:image")).First()
                            .Attributes.Where(a => a.Name == "content").First().Value,
                Link = url.ToString()
            };

            int cost = 0;
            HtmlNodeCollection priceNode = htmlDoc.DocumentNode.SelectNodes("//span[@class='btn-group-end btn-third']");

            if (priceNode != null && priceNode.First().InnerText != "Free")
            {
                cost = Int32.Parse(priceNode.First().InnerText.Remove(0, 3).Split(".").First());
            }
            mini.Cost = cost;

            mini.Sources.Add(new MiniSourceSite(mini, source, url));

            return(mini);
        }
示例#8
0
        public static HtmlNode GetNode(HtmlNode node, string className)
        {
            HtmlNode           linkNode             = null;
            HtmlNodeCollection allElementsWithClass = FindNodesByClass(node, className);

            if (allElementsWithClass != null && allElementsWithClass.Count > 0)
            {
                linkNode = allElementsWithClass.First();
            }
            return(linkNode);
        }
示例#9
0
 private static MatchStatistics FillStatistics(HtmlNodeCollection nodes, MatchStatistics team, int index)
 {
     team.Kicks        = Int32.Parse(nodes.First(n => n.InnerText.Contains("Kicks")).SelectNodes("td")[index].InnerText);
     team.Handballs    = Int32.Parse(nodes.First(n => n.InnerText.Contains("Handballs")).SelectNodes("td")[index].InnerText);
     team.Marks        = Int32.Parse(nodes.First(n => n.InnerText.Contains("Marks")).SelectNodes("td")[index].InnerText);
     team.HitOuts      = Int32.Parse(nodes.First(n => n.InnerText.Contains("Hit Outs")).SelectNodes("td")[index].InnerText);
     team.Tackles      = Int32.Parse(nodes.First(n => n.InnerText.Contains("Tackles")).SelectNodes("td")[index].InnerText);
     team.FreesFor     = Int32.Parse(nodes.First(n => n.InnerText.Contains("Frees For")).SelectNodes("td")[index].InnerText);
     team.FreesAgainst = Int32.Parse(nodes.First(n => n.InnerText.Contains("Frees Against")).SelectNodes("td")[index].InnerText);
     return(team);
 }
示例#10
0
        public SelectorTests()
        {
            var path = Directory.GetCurrentDirectory() + "\\Data\\CvBankasPost.txt";

            var decodedData = WebUtility.HtmlDecode(File.ReadAllText(path));

            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(decodedData);
            HtmlNodeCollection nodes = doc.DocumentNode.ChildNodes;

            _html = nodes.First();
        }
示例#11
0
        public void ParseMatchList(int index)
        {
            HtmlDocument doc = LoadWithTimeout(MatchListUrl);

            if (doc == null)
            {
                return;
            }

            HtmlNodeCollection matchList = doc.DocumentNode.SelectNodes(string.Format("//tr[@data-sport='{0}']", type_of_sport[index]));

            if (matchList == null)
            {
                return;
            }
            foreach (var node in matchList)
            {
                HtmlNodeCollection matchNodes = node.SelectNodes(".//a");
                var node2 = matchNodes.First();
                if (matchNodes == null)
                {
                    return;
                }
                var idNode = node2.Attributes["id"];

                var hrefNode = node2.Attributes["href"];
                if (idNode == null || hrefNode == null)
                {
                    continue;
                }

                string id = idNode.Value;
                if (!id.Contains("match_live_name"))
                {
                    continue;
                }

                string url = "https://www.olimpkz.com/" + hrefNode.Value;
                url = url.Replace("amp;", "");
                //     Console.WriteLine(url);
                string[]  name      = node2.InnerText.Split(new string[] { " - " }, StringSplitOptions.RemoveEmptyEntries);
                MatchName matchname = new MatchName(name[0], name[1]);
                MatchDict.Add(matchname, url);

                if (MatchDict.Count == MaximumMatches)
                {
                    break;
                }
            }
        }
示例#12
0
        internal Member(HtmlNodeCollection nodes)
        {
            var firstNode = nodes.First();

            _encEmail  = Regex.Replace(firstNode.Attributes["name"].Value, "_\\w*$", "");
            this.Email = HttpUtility.UrlDecode(_encEmail);

            foreach (var prop in _props.Unignored())
            {
                var name     = String.Format("{0}_{1}", _encEmail, prop.Name.ToLower());
                var thisNode = nodes.SingleOrDefault(n => n.Attributes["name"].Value == name);
                if (thisNode != null)
                {
                    var val = thisNode.Attributes["value"].Value;
                    if (prop.PropertyType == typeof(string))
                    {
                        prop.SetValue(this, val);
                    }
                    else if (prop.PropertyType == typeof(bool))
                    {
                        prop.SetValue(this, val == "on");
                    }
                }
            }

            if (this.NoMail)
            {
                this.NoMailReason = NoMailReason.Unknown;
                string name = _encEmail + "_nomail";
                var    node = nodes.SingleOrDefault(n => n.Attributes["name"].Value == name);
                if (node != null)
                {
                    string reason = node.NextSibling.InnerText;
                    switch (reason)
                    {
                    case "[A]":
                        this.NoMailReason = NoMailReason.Administrator;
                        break;

                    case "[B]":
                        this.NoMailReason = NoMailReason.Bounce;
                        break;

                    case "[U]":
                        this.NoMailReason = NoMailReason.User;
                        break;
                    }
                }
            }
        }
示例#13
0
        public async Task <WeeklyMenu> GetMenu()
        {
            var client = new HttpClient();
            HttpResponseMessage result = await client.GetAsync(Url);

            Stream stream = await result.Content.ReadAsStreamAsync();

            HtmlDocument doc = new HtmlDocument();

            doc.Load(stream);

            HtmlNodeCollection links = doc.DocumentNode.SelectNodes("//script");
            var lunchData            = links.First().InnerHtml;

            var indexOfFirstWing = lunchData.IndexOf('{');
            var newData          = lunchData.Substring(indexOfFirstWing, lunchData.Length - 2 - indexOfFirstWing);

            newData = WebUtility.HtmlDecode(newData);

            var regex = @"new Date\([0-9]{1,}\)"; // finds all "new Date(123)"

            var match = Regex.Match(newData, regex);

            while (match.Success)
            {
                var val = match.Value;

                var intRegex = @"[0-9]{1,}";
                var intMatch = Regex.Match(val, intRegex);

                var initialValue = string.Empty;
                var newValue     = string.Empty;
                while (intMatch.Success)
                {
                    initialValue = intMatch.Value;
                    var timespan = TimeSpan.FromMilliseconds(long.Parse(initialValue));
                    var date     = new DateTime(1970, 1, 1);
                    date     = date.AddMilliseconds(long.Parse(initialValue)).AddHours(2);
                    newValue = date.ToString("yyyyMMdd");

                    intMatch = intMatch.NextMatch(); // hopefully just one iteration... if not, something is fishy!
                }

                newData = newData.Replace(val, "\"" + newValue + "\"");

                match = match.NextMatch();
            }

            return(JsonConvert.DeserializeObject <WeeklyMenu>(newData));
        }
示例#14
0
        public string GetSiteIconUrl(string url)
        {
            HttpResponseMessage html = null;


            try
            {
                using (var client = new HttpClient())
                {
                    client.Timeout = new TimeSpan(0, 0, 1);
                    html           = client.GetAsync(url).Result;
                }
            }
            catch { }

            if (html != null)
            {
                HtmlDocument htmlDoc = new HtmlDocument();
                htmlDoc.Load(html.Content.ReadAsStreamAsync().Result);

                // ParseErrors is an ArrayList containing any errors from the Load statement
                try
                {
                    if (htmlDoc.DocumentNode != null)
                    {
                        HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body");

                        //if (bodyNode != null)
                        //{
                        // Do something with bodyNode
                        // <link rel="apple-touch-icon-precomposed" href="http://www.blogsmithmedia.com/www.engadget.com/media/apple-touch-icon-z.png" />
                        //<link rel="apple-touch-icon" href="http://gfx.aftonbladet-cdn.se/assets/gfx/social/abAppIcon.png" />
                        //<link rel="icon" href="//s.ytimg.com/yts/img/favicon_32-vflWoMFGx.png" sizes="32x32">
                        HtmlNodeCollection resultNodes = htmlDoc.DocumentNode.SelectNodes("//link[(@rel='apple-touch-icon-precomposed' or @rel='apple-touch-icon')]");



                        if (resultNodes != null && resultNodes.Count > 0)
                        {
                            return(resultNodes.First().GetAttributeValue("href", ""));
                        }
                        //}
                    }
                }
                catch (Exception) { };
            }

            return("");
        }
示例#15
0
        public static List <User> getUser(string id)
        {
            List <User>        list     = new List <User>();
            HtmlWeb            htmlWeb  = new HtmlWeb();
            HtmlDocument       document = htmlWeb.Load(makeUrlTKB(id));
            HtmlNodeCollection node1    = document.DocumentNode.SelectNodes(string.Format("//*[@id='ctl00_ContentPlaceHolder1_ctl00_lblContentTenSV']"));
            HtmlNode           node     = node1.First();
            User user = new User();

            user.Id       = id;
            user.Name     = node.InnerText.Trim();
            user.Password = "";
            list.Add(user);
            return(list);
        }
示例#16
0
        public void GetAttributesFromInfoBox(Dictionary <string, string> vehicleAttributes, HtmlNodeCollection rows)
        {
            // Traverse the info box and pull out all of the attribute title and value pairs
            foreach (HtmlNode row in rows)
            {
                HtmlNodeCollection cells = row.SelectNodes("td");

                // Get the property name and value and add them to the dictionary before writing them out
                string rowTitle = cells.First().SelectNodes("b").Single().InnerText.Trim();
                string rowValue = cells.Last().InnerText.Trim();

                vehicleAttributes.Add(rowTitle, rowValue);

                _consoleManager.WriteLineInColour(ConsoleColor.DarkGreen, $"{rowTitle}: {rowValue}");
            }
        }
示例#17
0
        static void Main(string[] args)
        {
            var url = "https://meczreplay.blogspot.com/search?max-results=125";
            var web = new HtmlWeb();
            var doc = web.Load(url);

            HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("/html/body/div[1]/div[3]/div/div[2]/main/div/div/div[1]/article");

            foreach (HtmlNode node in nodes)
            {
                if (node != null)
                {
                    HtmlNodeCollection nodesTitleOrNameLinks = node.SelectNodes("div/div/div[3]/div[2]/div");
                    if (nodesTitleOrNameLinks != null)
                    {
                        string[] separators  = new string[] { "<br>", "\n", "<b>", "</b>" };
                        string[] linesNodesA = nodesTitleOrNameLinks.First().InnerText.Split(separators, StringSplitOptions.RemoveEmptyEntries);
                        foreach (string line in linesNodesA)
                        {
                            string lineNormalized = line.Replace("&#160", " ").Replace(";", "");
                            int    indexHttp      = lineNormalized.IndexOf("http");

                            if (indexHttp == -1)
                            {
                                string Title = lineNormalized;
                                Console.WriteLine(Title);
                            }
                            else
                            {
                                string Name = lineNormalized.Remove(indexHttp);
                                string Link = lineNormalized.Remove(0, indexHttp);
                                Console.WriteLine(Name);
                                Console.WriteLine(Link);
                            }
                        }
                    }
                }
                HtmlNode nodeImg = node.SelectSingleNode("div/div/div[3]/div[1]/img");
                if (nodeImg != null)
                {
                    string Img = nodeImg.GetAttributeValue("src", "");
                    Console.WriteLine(Img);
                }
                Console.WriteLine("=====================================================");
            }
            Console.ReadKey();
        }
示例#18
0
        public void ParseMatchList(int index)
        {
            string       html = matchListBrowser.GetSourceAsync().Result;
            HtmlDocument doc  = new HtmlDocument();

            doc.LoadHtml(html);
            HtmlNodeCollection matchList = doc.DocumentNode.SelectNodes("//li[@class='groupedListItem first' or @class='groupedListItem' or @class='groupedListItem last' or @class='groupedListItem first last']");


            if (matchList == null)
            {
                return;
            }
            foreach (var node in matchList)
            {
                string       All     = node.InnerHtml;
                HtmlDocument All_Doc = new HtmlDocument();
                All_Doc.LoadHtml(All);
                //
                HtmlNodeCollection h3 = All_Doc.DocumentNode.SelectNodes("//h3");
                string             l  = h3.First().InnerText;
                if (l == type_of_sport[index])
                {
                    HtmlNodeCollection matchNodes = All_Doc.DocumentNode.SelectNodes("//li[@class='groupedListSubItem first' or @class='groupedListSubItem last' or @class='groupedListSubItem' or @class='groupedListSubItem first last']");
                    if (matchNodes == null)
                    {
                        return;
                    }
                    foreach (var node2 in matchNodes)
                    {
                        string id = String.Empty;

                        id = node2.Attributes["id"].Value;
                        id = id.Remove(0, 2);
                        MatchName Name = GetMatchName(node2);

                        string url = "https://mobile.leonbets.net/mobile/#eventDetails/:" + id;

                        MatchDict.Add(Name, url);
                        if (MatchDict.Count == MaximumMatches)
                        {
                            break;
                        }
                    }
                }
            }
        }
示例#19
0
        private IScraper SetupMockScraper(string filePath)
        {
            var scraperMock = new Mock <IScraper>();

            var path = Directory.GetCurrentDirectory() + filePath;

            var decodedData = WebUtility.HtmlDecode(File.ReadAllText(path));

            HtmlDocument doc = new HtmlDocument();

            doc.LoadHtml(decodedData);
            HtmlNodeCollection nodes = doc.DocumentNode.ChildNodes;

            scraperMock.Setup(s => s.GetHtml(It.IsAny <string>())).Returns(nodes.First());

            return(scraperMock.Object);
        }
示例#20
0
        public static List <string> GetCategoryPaginationUrls(string category_url, int pages_to_get)
        {
            int          numPagesToGet = pages_to_get;
            HtmlDocument doc           = DataGetter.GetHtmlpage(new Uri(category_url));

            //infer page name from 2nd page link
            List <string> liElements = new List <string>();

            liElements.Add("searchResultsPagination");
            List <KeyValuePair <string, HtmlNode> > paginationElements = DataGetter.GetDataByID(doc, "div", liElements);

            List <string> liElements1 = new List <string>();

            liElements1.Add("pg2");
            List <KeyValuePair <string, HtmlNode> > page2 = DataGetter.GetDataByID(doc, "li", liElements1);
            string page2Link = ExtractLinkFromHtml(page2[0].Value, "href", "'>");


            HtmlNodeCollection divNodeChildren  = paginationElements[paginationElements.Count - 1].Value.ChildNodes;
            HtmlNode           divNodeChildren1 = divNodeChildren.First(x => x.Name == "ul");
            HtmlNode           lastPage         = divNodeChildren1.ChildNodes[divNodeChildren1.ChildNodes.Count - 2];
            string             lastPageLink     = ExtractLinkFromHtml(lastPage, "href", "'>");
            int lastPageNumber = ExtractNumberFromString(lastPageLink, "page=");

            //Now build the pagination links...
            string[] pageParts = page2Link.Split(new string[] { "page=" }, StringSplitOptions.None);
            //pageParts[0] = pageParts[0].Substring(1);
            pageParts[1] = RemoveNumberFromStartOfString(pageParts[1]);
            //pageParts[1] = pageParts[1].Substring(0, pageParts[1].Length - 1);
            List <string> paginationLinks = new List <string>();

            if (numPagesToGet == 0)
            {
                //get all products
                numPagesToGet = lastPageNumber;
            }

            for (int i = 0; i < numPagesToGet; i++)
            {
                paginationLinks.Add(SigmaAldrichConstants.SigmaAldrichMain + pageParts[0] + "page=" + (i + 1) + pageParts[1]);
            }

            return(paginationLinks);
        }
        public async Task <Mini> ParseFromUrl(Uri url)
        {
            HtmlWeb      web     = new HtmlWeb();
            HtmlDocument htmlDoc = await web.LoadFromWebAsync(url, null, null);


            HtmlNode creatorLink = htmlDoc.DocumentNode.SelectNodes("//a[@class='by-vendor-name-link']")
                                   .FirstOrDefault();

            string creatorUrl  = creatorLink.GetAttributeValue("href", null);
            string creatorName = Uri.UnescapeDataString(creatorUrl.Split('/')[4]);

            Creator creator = new Creator
            {
                Name = creatorName
            };
            Wargaming3dSource source = new Wargaming3dSource(creator, creatorName);

            creator.Sites.Add(source);

            Mini mini = new Mini()
            {
                Creator   = creator,
                Name      = System.Web.HttpUtility.HtmlDecode(htmlDoc.DocumentNode.SelectNodes("//h1").FirstOrDefault().InnerText.Trim()),
                Thumbnail = htmlDoc.DocumentNode.SelectNodes("//meta").Where(n => n.Attributes.Any(a => a.Value == "og:image")).First()
                            .Attributes.Where(a => a.Name == "content").First().Value,
                Link = url.ToString()
            };

            int cost = 0;
            HtmlNodeCollection priceNode = htmlDoc.DocumentNode.SelectNodes("//div[@class='price-wrapper']");

            if (priceNode != null && !priceNode.First().InnerText.Contains("0.00"))
            {
                //TODO (GitHub #167) - Parsing cost here is a bit hard, so just setting it to be 1 for now since we only have a boolean for cost.
                cost = 1;
            }
            mini.Cost = cost;

            mini.Sources.Add(new MiniSourceSite(mini, source, url));

            return(mini);
        }
示例#22
0
        MatchName GetMatchName(HtmlNode node)
        {
            HtmlDocument h1_doc = new HtmlDocument();

            h1_doc.LoadHtml(node.InnerHtml);
            HtmlNodeCollection h1_nodes = h1_doc.DocumentNode.SelectNodes("//h1");
            string             Name     = h1_nodes.First().InnerText;
            var matchNameSplit          = Name.Split(new string[] { " vs ", " @ ", " - " }, StringSplitOptions.RemoveEmptyEntries);

            if (matchNameSplit[0].Contains("("))
            {
                matchNameSplit[0] = matchNameSplit[0].Split(new string[] { " (" }, StringSplitOptions.RemoveEmptyEntries)[0];
            }
            if (matchNameSplit[1].Contains("("))
            {
                matchNameSplit[1] = matchNameSplit[1].Split(new string[] { " (" }, StringSplitOptions.RemoveEmptyEntries)[0];
            }
            return(new MatchName(matchNameSplit[0], matchNameSplit[1]));
        }
示例#23
0
        private void GetModelTypes(ModelObj modelObj)
        {
            string modelTypesUrl = string.Format("{0}{1}", this.baseUrl, modelObj.ModelUrlPath);
            string modelsContent = GetContent(modelTypesUrl);

            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(modelsContent);

            HtmlNodeCollection types_div = null;

            if (htmlDoc.DocumentNode != null && htmlDoc.ParseErrors != null && !htmlDoc.ParseErrors.Any())
            {
                types_div = htmlDoc.DocumentNode.SelectNodes("//*[@id=\"modelis\"]/table");
            }

            if (types_div != null)
            {
                foreach (var node in types_div.First().ChildNodes)
                {
                    ModelTypeObj typeObj = new ModelTypeObj();

                    typeObj.ModelTypeID    = modelTypesList.Count + 1;
                    typeObj.ModelID        = modelObj.ModelID;
                    typeObj.ModelTypeName  = node.ChildNodes[0].InnerText;
                    typeObj.ModelTypeCubic = node.ChildNodes[1].InnerText
                                             .Substring(0, node.ChildNodes[1].InnerText.IndexOf("cm3"));
                    typeObj.ModelTypeFuel     = node.ChildNodes[2].InnerText;
                    typeObj.ModelTypePower    = node.ChildNodes[3].InnerText;
                    typeObj.ModelTypeTank     = node.ChildNodes[4].InnerText;
                    typeObj.ModelTypeFromYear = node.ChildNodes[5].InnerText;
                    typeObj.ModelTypeToYear   = node.ChildNodes[6].InnerText;

                    string value = node.ChildNodes[0].Attributes.AttributesWithName("onclick").First().Value;
                    value = value.Substring(value.IndexOf("'") + 1, value.LastIndexOf("'") - value.IndexOf("'") - 1);

                    typeObj.ModelTypeDetailsUrl = value;

                    modelTypesList.Add(typeObj);
                }
            }
        }
        private static string GetCurrentCoditions(string url)
        {
            string className = "summary_timestamp";
            string htmlTag   = "td";
            //Get the html text from the web.
            string webData = GetHtmlCode(url);
            // Search an specific value from the html file
            HtmlDocument html = new HtmlDocument();

            html.LoadHtml(webData);
            HtmlNodeCollection htmlNodes = GetHtmlNodes(url, htmlTag, className);

            if (htmlNodes.Count == 1)
            {
                var item = htmlNodes.First();

                return(item.InnerHtml);
            }

            return(string.Empty);
        }
示例#25
0
        /*сайт автоматически подбирает размер машины в зависимости от веса груза*/
        private int GetCar(string inCity, string outCity, int weight, int volume)
        {
            //тэг span.number
            string url = $@"https://www.eastlines.ru/raschet-stoimosti/?from={outCity}&from-place-id=&from-reference=&where={inCity}&where-place-id=&where-reference=&weight={weight}&volume={volume}";

            var pageContent = LoadPage(url);
            var document    = new HtmlDocument();

            document.LoadHtml(pageContent);

            HtmlNodeCollection collection = document.DocumentNode.SelectNodes("//script[@type='text/javascript' and contains(.,'var selected_car_id = ')]");

            //  var neededRow = collection.First().InnerText.Split('\n')[6].Split('=').Last().Trim(';').Trim(' ');
            var neededRow = collection.First().InnerText.Split('\n').Where(r => r.Contains("selected_car_id")).First().Split('=').Last().Trim(';').Trim(' ');
            int result    = 0;

            int.TryParse(neededRow, out result);
            Console.WriteLine(result.ToString());

            return(result);
        }
示例#26
0
        /*сайт автоматически подбирает размер машины в зависимости от веса груза*/
        private string GetCar(string inCity, string outCity, int weight)
        {
            if (weight <= 0)
            {
                return("Неправильно указан вес");
            }

            string url = $@"https://www.eastlines.ru/raschet-stoimosti/?from={outCity}&from-place-id=&from-reference=&where={inCity}&where-place-id=&where-reference=&weight={weight}";

            var pageContent = LoadPage(url).Result;
            var document    = new HtmlDocument();

            document.LoadHtml(pageContent);

            HtmlNodeCollection collection = document.DocumentNode.SelectNodes("//script[@type='text/javascript' and contains(.,'var selected_car_id = ')]");

            if (collection == null)
            {
                return(null);
            }
            var neededRow = collection.First().InnerText.Split('\n').Where(r => r.Contains("selected_car_id")).First().Split('=').Last().Trim(';').Trim(' ');

            return(neededRow);
        }
示例#27
0
        private void GetMakers()
        {
            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(pageContent);

            HtmlNodeCollection Maker_div = null;

            if (htmlDoc.DocumentNode != null)
            {
                Maker_div = htmlDoc.DocumentNode.SelectNodes(
                    "//*[@id=\"ctl00_ctl00_cphContentRow_cphContent_wucNFBAutokatalogMarken1_updatePanelMarken\"]/div[2]/div[2]/ul");
            }

            if (Maker_div != null)
            {
                htmlDoc = new HtmlDocument();
                htmlDoc.LoadHtml(Maker_div.First().InnerHtml);
                var MakerNodes = htmlDoc.DocumentNode.SelectNodes("//li");

                foreach (var node in MakerNodes
                         ) //.Zip(descriptions, (n, d) => new MakerClass { MakerName = n.InnerText, MakerUrlPath = d.InnerText }))
                {
                    if (Core.Conf.MakerName.Trim().Equals(string.Empty) ||
                        Core.Conf.MakerName.Trim().Equals("*") ||
                        Core.Conf.MakerName.ToUpper().Equals(CleanNameString(node.InnerText.Trim().ToUpper())))
                    {
                        try
                        {
                            MakerObj MakerObj = new MakerObj();

                            MakerObj.MakerName = CleanNameString(node.InnerText.Trim());


                            //MakerObj.MakerLogoUrl = node.SelectSingleNode("/li[1]/a[1]/div[1]/img[1]").Attributes //"//*[@id=\"  //
                            //    .AttributesWithName("src").First().Value;

                            MakerObj.MakerLogoUrl = node.FirstChild.ChildNodes[1].FirstChild.ChildAttributes("src")
                                                    .First().Value;
                            //    .AttributesWithName("src").First().Value;

                            //Maker name
                            //   ChildNodes[1].ChildNodes[0].InnerText;

                            string localImgFile = DownloadMakerImage(MakerObj.MakerLogoUrl);
                            MakerObj.MakerLogoLocalFile = localImgFile;

                            //string value = node.Attributes.AttributesWithName("onclick").First().Value;
                            //value = value.Substring(value.IndexOf("'") + 1, value.LastIndexOf("'") - value.IndexOf("'") - 1);
                            //MakerObj.MakerUrlPath = node.SelectSingleNode("/li[1]/a[1]").Attributes.AttributesWithName("href")
                            //    .First().Value;

                            MakerObj.MakerUrlPath = node.FirstChild.Attributes.AttributesWithName("href")
                                                    .First().Value;

                            MakersList.Add(MakerObj);
                            Thread.Sleep(100);
                        }
                        catch (Exception ex)
                        {
                            if (Core != null && Core.Log != null)
                            {
                                Core.Log.Error(string.Format("ADACImporter::GetMakers : {0}", ex.Message));
                            }
                            else
                            {
                                throw new Exception("ADACImporter::GetMakers", ex);
                            }
                        }
                    }
                }

                if (Core != null && Core.Log != null)
                {
                    Core.Log.Info(string.Format("{0} Maker Records imported.", MakersList.Count));
                }
            }
        }
示例#28
0
        private void GetCarDetails()
        {
            foreach (ModelTypeObj type in modelTypesList)
            {
                string modelDetailsUrl = string.Format("{0}{1}", baseUrl, type.ModelTypeDetailsUrl);
                try
                {
                    string carContent = GetContent(modelDetailsUrl);

                    var htmlDoc = new HtmlDocument();
                    htmlDoc.LoadHtml(carContent);

                    HtmlNodeCollection cars_div = null;
                    if (htmlDoc.DocumentNode != null)
                    {
                        cars_div = htmlDoc.DocumentNode.SelectNodes(
                            "//*[@id=\"ctl00_ctl00_cphContentRow_cphContent_wucNFBAutokatalogDetail1_ctl01_updatePanelDetail\"]/div[2]/div[3]/div[1]/table/tbody");
                    }

                    if (cars_div != null)
                    {
                        CarDetailsObj carObj = new CarDetailsObj();

                        carObj.ModelTypeID       = type.ModelTypeID;
                        carObj.ModelID           = type.ModelID;
                        carObj.Maker             = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[1].ChildNodes[1].InnerText);
                        carObj.Model             = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[2].ChildNodes[1].InnerText);
                        carObj.Type              = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[3].ChildNodes[1].InnerText);
                        carObj.Series            = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[4].ChildNodes[1].InnerText);
                        carObj.ModelTypeName     = type.ModelTypeName;
                        carObj.InternalClassName =
                            HttpUtility.HtmlDecode(cars_div.First().ChildNodes[5].ChildNodes[1].InnerText);
                        carObj.ModelStart =
                            HttpUtility.HtmlDecode(cars_div.First().ChildNodes[6].ChildNodes[1].InnerText);
                        carObj.ModelEnd =
                            HttpUtility.HtmlDecode(cars_div.First().ChildNodes[7].ChildNodes[1].InnerText);
                        carObj.SeriesStart =
                            HttpUtility.HtmlDecode(cars_div.First().ChildNodes[8].ChildNodes[1].InnerText);
                        carObj.SeriesEnd =
                            HttpUtility.HtmlDecode(cars_div.First().ChildNodes[9].ChildNodes[1].InnerText);
                        carObj.HSN      = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[10].ChildNodes[1].InnerText);
                        carObj.TSN      = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[11].ChildNodes[1].InnerText);
                        carObj.TSN2     = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[12].ChildNodes[1].InnerText);
                        carObj.CarTax   = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[13].ChildNodes[1].InnerText);
                        carObj.CO2Class =
                            HttpUtility.HtmlDecode(cars_div.First().ChildNodes[14].ChildNodes[1].InnerText);
                        carObj.BasePrice =
                            HttpUtility.HtmlDecode(cars_div.First().ChildNodes[15].ChildNodes[1].InnerText);



                        // Motor & Antrieb
                        HtmlNodeCollection carEngine_div = null;
                        carEngine_div = htmlDoc.DocumentNode.SelectNodes(
                            "//*[@id=\"ctl00_ctl00_cphContentRow_cphContent_wucNFBAutokatalogDetail1_ctl01_updatePanelDetail\"]/div[2]/div[3]/div[2]/table/tbody");

                        if (carEngine_div != null)
                        {
                            carObj.EngineType =
                                HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[0].ChildNodes[1].InnerText);
                            carObj.Fuel =
                                HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[1].ChildNodes[1].InnerText);
                            carObj.Fuel2 =
                                HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[2].ChildNodes[1].InnerText);
                            carObj.EmissionControl =
                                HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[3].ChildNodes[1].InnerText);
                            carObj.EngineDesign =
                                HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[4].ChildNodes[1].InnerText);
                            carObj.Cylinder = carEngine_div.First().ChildNodes[5].ChildNodes[1].InnerText.ToInt32OrDefault(0);
                            //Convert.ToInt32();
                            carObj.FuelType =
                                HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[6].ChildNodes[1].InnerText);
                            carObj.Charge =
                                HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[7].ChildNodes[1].InnerText);
                            carObj.Valves = carEngine_div.First().ChildNodes[8].ChildNodes[1].InnerText.ToInt32OrDefault(0);
                            carObj.Cubic  =
                                HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[9].ChildNodes[1].InnerText);
                            carObj.PowerKW = carEngine_div.First().ChildNodes[10].ChildNodes[1].InnerText.ToInt32OrDefault(0);
                            carObj.PowerPS = carEngine_div.First().ChildNodes[11].ChildNodes[1].InnerText
                                             .ToInt32OrDefault(0);
                            carObj.MaxPower =
                                HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[12].ChildNodes[1].InnerText);
                            carObj.TurningMoment =
                                HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[13].ChildNodes[1].InnerText);
                            carObj.MaxTurningMoment =
                                HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[14].ChildNodes[1].InnerText);
                            carObj.TypeOfDrive =
                                HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[15].ChildNodes[1].InnerText);
                            carObj.Gearing =
                                HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[16].ChildNodes[1].InnerText);
                            carObj.Gears = carEngine_div.First().ChildNodes[17].ChildNodes[1].InnerText
                                           .ToInt32OrDefault(0);
                            carObj.StartStopAutomatic =
                                HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[18].ChildNodes[1].InnerText);
                            carObj.EmissionClass =
                                HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[20].ChildNodes[1].InnerText);
                        }


                        // Maße & Gewicht
                        HtmlNodeCollection carDimensions_div = null;
                        carDimensions_div = htmlDoc.DocumentNode.SelectNodes(
                            "//*[@id=\"ctl00_ctl00_cphContentRow_cphContent_wucNFBAutokatalogDetail1_ctl01_updatePanelDetail\"]/div[2]/div[3]/div[3]/table/tbody");

                        if (carDimensions_div != null)
                        {
                            carObj.Length =
                                HttpUtility.HtmlDecode(carDimensions_div.First().ChildNodes[0].ChildNodes[1].InnerText);
                            carObj.Width =
                                HttpUtility.HtmlDecode(carDimensions_div.First().ChildNodes[1].ChildNodes[1].InnerText);
                            carObj.Height =
                                HttpUtility.HtmlDecode(carDimensions_div.First().ChildNodes[2].ChildNodes[1].InnerText);
                        }

                        // Karosserie & Fahrwerk
                        HtmlNodeCollection carChassis_div = null;
                        carChassis_div = htmlDoc.DocumentNode.SelectNodes(
                            "//*[@id=\"ctl00_ctl00_cphContentRow_cphContent_wucNFBAutokatalogDetail1_ctl01_updatePanelDetail\"]/div[2]/div[3]/div[4]/table/tbody");

                        if (carChassis_div != null)
                        {
                            carObj.Chassis =
                                HttpUtility.HtmlDecode(carChassis_div.First().ChildNodes[0].ChildNodes[1].InnerText);
                            carObj.Doors    = carChassis_div.First().ChildNodes[1].ChildNodes[1].InnerText.ToInt32OrDefault(0);
                            carObj.CarClass =
                                HttpUtility.HtmlDecode(carChassis_div.First().ChildNodes[3].ChildNodes[1].InnerText);
                            carObj.Seats = carChassis_div.First().ChildNodes[4].ChildNodes[1].InnerText.ToInt32OrDefault(0);
                        }

                        //Messwerte Hersteller
                        HtmlNodeCollection carMeasured_div = null;
                        carMeasured_div = htmlDoc.DocumentNode.SelectNodes(
                            "//*[@id=\"ctl00_ctl00_cphContentRow_cphContent_wucNFBAutokatalogDetail1_ctl01_updatePanelDetail\"]/div[2]/div[3]/div[5]/table/tbody");

                        if (carMeasured_div != null)
                        {
                            carObj.SpeedUp =
                                HttpUtility.HtmlDecode(carMeasured_div.First().ChildNodes[0].ChildNodes[1].InnerText);
                            carObj.MaxSpeed =
                                HttpUtility.HtmlDecode(carMeasured_div.First().ChildNodes[1].ChildNodes[1].InnerText);
                            carObj.Tank =
                                HttpUtility.HtmlDecode(carMeasured_div.First().ChildNodes[26].ChildNodes[1].InnerText);
                            carObj.Tank2 =
                                HttpUtility.HtmlDecode(carMeasured_div.First().ChildNodes[27].ChildNodes[1].InnerText);
                        }

                        carDetailsList.Add(carObj);
                    }
                }
                catch (Exception ex)
                {
                    if (Core != null && Core.Log != null)
                    {
                        Core.Log.Error(string.Format("ADACImporter::GetCarDetails : {0} ({1})", ex.Message, modelDetailsUrl));
                    }
                    else
                    {
                        throw new Exception("ADACImporter::GetCarDetails", ex);
                    }
                }
            }
            if (Core != null && Core.Log != null)
            {
                Core.Log.Info(string.Format("{0} Car Detail Records imported.", carDetailsList.Count));
            }
        }
示例#29
0
        private void GetModelTypes(ModelObj modelObj)
        {
            string modelTypesUrl = string.Format("{0}{1}", baseUrl, modelObj.ModelUrlPath);
            string modelsContent = GetContent(modelTypesUrl);

            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(modelsContent);

            HtmlNodeCollection types_div = null;

            if (htmlDoc.DocumentNode != null)
            {
                types_div = htmlDoc.DocumentNode.SelectNodes("//*[@id=\"car_db_select_hits\"]/tbody");
            }

            if (types_div != null)
            {
                int limit_cnt = 0;
                foreach (var node in types_div.First().ChildNodes)
                {
                    try
                    {
                        if (node.Name.ToLower().Equals("tr"))
                        {
                            limit_cnt++;

                            ModelTypeObj typeObj = new ModelTypeObj();

                            typeObj.ModelTypeID         = modelTypesList.Count + 1;
                            typeObj.ModelID             = modelObj.ModelID;
                            typeObj.MakerName           = modelObj.MakerName;
                            typeObj.ModelName           = modelObj.ModelName;
                            typeObj.ModelTypeDetailsUrl = node.ChildNodes[3].ChildNodes[1].Attributes
                                                          .AttributesWithName("href")
                                                          .First().Value;
                            typeObj.ModelTypeName    = node.ChildNodes[5].InnerText.Trim();
                            typeObj.ModelTypeChassis = node.ChildNodes[7].InnerText.Trim();
                            typeObj.ModelTypeDoors   = node.ChildNodes[9].InnerText.Trim().ToInt32OrDefault(0);
                            typeObj.ModelTypeFuel    = node.ChildNodes[11].InnerText.Trim();
                            typeObj.ModelTypePower   = node.ChildNodes[13].InnerText.Trim();
                            typeObj.ModelTypeCubic   = node.ChildNodes[17].InnerText.Trim();



                            //link to details page

                            //Type Name
                            //Chassis
                            //Doors
                            //Fuel
                            //KW


                            modelTypesList.Add(typeObj);

                            //DEBUG: Break after x number of model types
                            if (IsLimited(limit_cnt))
                            {
                                break;
                            }
                        }
                    }
                    catch (Exception ex)
                    {
                        if (Core != null && Core.Log != null)
                        {
                            Core.Log.Error(string.Format("ADACImporter::GetModelTypes : {0}", ex.Message));
                        }
                        else
                        {
                            throw new Exception("ADACImporter::GetModelTypes", ex);
                        }
                    }
                }
            }
        }
示例#30
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="text"></param>
        /// <param name="apiKey"></param>
        /// <returns></returns>
        public List <BrokenLinkModel> Check(string text, string apiKey)
        {
            List <BrokenLinkModel> response = new List <BrokenLinkModel>();

            if (!apiKey.HasValue())
            {
                return(response);
            }

            var doc = new HtmlDocument();

            doc.LoadHtml(text);

            HtmlNodeCollection links = doc.DocumentNode.SelectNodes(KnownStrings.HrefXPath);

            if (links == null || !links.Any())
            {
                return(response);
            }

            string[] hrefs = links.Select(l => l.GetAttributeValue("href", string.Empty))
                             .Where(l => l.StartsWith("http")).ToArray();

            // check for cached responses - avoids request when page is being resaved
            List <BrokenLinkModel> fromCache = new List <BrokenLinkModel>();

            foreach (string href in hrefs)
            {
                var cacheItem = Current.AppCaches.RuntimeCache.GetCacheItem <BrokenLinkModel>(KnownStrings.CacheKey + href);
                if (null == cacheItem)
                {
                    continue;
                }

                fromCache.Add(cacheItem);
                hrefs = hrefs.Except(href.AsEnumerableOfOne()).ToArray();
            }

            SafeBrowsingResponseModel safeBrowsingResult = SafeBrowsingLookup(hrefs, apiKey);

            if (safeBrowsingResult.Matches.Any())
            {
                response.AddRange(safeBrowsingResult.Matches.Select(m => new BrokenLinkModel
                {
                    Href   = m.Threat.Url,
                    Status = m.ThreatType,
                    Unsafe = true,
                    Text   = links.First(l => l.GetAttributeValue("href", string.Empty) == m.Threat.Url)
                             .InnerText
                }));

                foreach (BrokenLinkModel item in response)
                {
                    Current.AppCaches.RuntimeCache.InsertCacheItem(KnownStrings.CacheKey + item.Href, () => item, new TimeSpan(24, 0, 0), false);
                }
            }

            // add cached results
            response.AddRange(fromCache);

            return(response);
        }