public override void Perform() { int totalTimes = 0; while (ID < To) { try { if (totalTimes == 0) { Extension.ScrapeExt.ProgID = ID; Console.WriteLine("test"); } else { ID = Extension.ScrapeExt.ProgID + 1; } string url = $"https://www.roblox.com/users/{ID}/profile"; HtmlDocument doc = new HtmlWeb().Load(url); HtmlNodeCollection col = doc.DocumentNode.SelectNodes("//div[@class='header-title']//h2"); if (col.First() != null && Regex.IsMatch(col.First().InnerText, @"^[a-zA-Z0-9_]+$") && !col.First().InnerText.Contains(' ')) { Profile user = new Profile() { Username = col.First().InnerText, JoinDate = null, PlaceVisits = null, }; if (Extension.ScrapeExt.profiles.Any(x => x.Username == user.Username)) { Colorful.Console.WriteLine("Duplicate!", System.Drawing.Color.Red); } else { Extension.ScrapeExt.profiles.Add(user); Colorful.Console.WriteLine(user.Username, System.Drawing.Color.Orange); } Extension.ScrapeExt.ProgID++; totalTimes++; } } catch { Extension.ScrapeExt.ProgID++; totalTimes++; } Console.Title = ID.ToString(); } Colorful.Console.WriteLine("Thread Finished!", System.Drawing.Color.Lime); }
private Dictionary <string, string> GetAttributes(HtmlDocument document) { Dictionary <string, string> parameters = new Dictionary <string, string>(); HtmlNodeCollection paginationContainer = document.DocumentNode.SelectNodes("//*[@data-prototype-id=\"allegro.showoffer.parameters\"]"); if (paginationContainer.Count > 0) { HtmlDocument parameterDocument = new HtmlDocument(); parameterDocument.LoadHtml(paginationContainer.First().OuterHtml); var nodes = parameterDocument.DocumentNode.SelectNodes("//li"); foreach (var listElement in nodes.Skip(1)) { HtmlDocument parameterInnerDocument = new HtmlDocument(); parameterInnerDocument.LoadHtml(listElement.InnerHtml); var divs = parameterInnerDocument.DocumentNode.SelectNodes("//div"); if (divs[1].InnerText.Where(x => x.Equals(':')).Count() > 1) { continue; } string keyArg = divs[1].InnerText; string valArg = divs[2].InnerText; if (keyArg[keyArg.Length - 1].Equals(':')) { keyArg = keyArg.Substring(0, keyArg.Length - 1); } parameters.TryAdd(keyArg, valArg); } } return(parameters); }
Sport GetSport(HtmlDocument doc) { try { HtmlNodeCollection node = doc.DocumentNode.SelectNodes("//td[@class='smwndcap']"); string sport = node.First().InnerText; if (sport.Contains("Soccer")) { return(Sport.Football); } if (sport.Contains("Basketball")) { return(Sport.Basketball); } if (sport.Contains("Tennis")) { return(Sport.Tennis); } if (sport.Contains("Volleyball")) { return(Sport.Volleyball); } return(Sport.NotSupported); } catch { return(Sport.NotSupported); } }
MatchName GetFullMatchName(HtmlDocument doc) { try { string matchName = string.Empty; HtmlNodeCollection Team = doc.DocumentNode.SelectNodes("//div[@class='live-today-member-name nowrap ']"); HtmlDocument FirstDoc = new HtmlDocument(); HtmlDocument SecondDoc = new HtmlDocument(); FirstDoc.LoadHtml(Team.First().InnerHtml); SecondDoc.LoadHtml(Team.Last().InnerHtml); string firstTeam = FirstDoc.DocumentNode.SelectNodes("//span").First().InnerText; string secondTeam = SecondDoc.DocumentNode.SelectNodes("//span").First().InnerText; matchName = firstTeam + " vs " + secondTeam; var matchNameSplit = matchName.Split(new string[] { " vs ", " @ ", " - " }, StringSplitOptions.RemoveEmptyEntries); if (matchNameSplit[0].Contains("(")) { matchNameSplit[0] = matchNameSplit[0].Split(new string[] { " (" }, StringSplitOptions.RemoveEmptyEntries)[0]; } if (matchNameSplit[1].Contains("(")) { matchNameSplit[1] = matchNameSplit[1].Split(new string[] { " (" }, StringSplitOptions.RemoveEmptyEntries)[0]; } return(new MatchName(matchNameSplit[0], matchNameSplit[1])); } catch { return(null); } }
private void GetMakers() { var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(this.pageContent); HtmlNodeCollection Maker_div = null; if (htmlDoc.DocumentNode != null && htmlDoc.ParseErrors != null && !htmlDoc.ParseErrors.Any()) { Maker_div = htmlDoc.DocumentNode.SelectNodes("//*[@id=\"marke\"]"); } if (Maker_div != null) { htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(Maker_div.First().InnerHtml); var MakerNodes = htmlDoc.DocumentNode.SelectNodes("//*//td"); foreach (var node in MakerNodes ) //.Zip(descriptions, (n, d) => new MakerClass { MakerName = n.InnerText, MakerUrlPath = d.InnerText })) { MakerObj MakerObj = new MakerObj(); MakerObj.MakerName = node.InnerText; string value = node.Attributes.AttributesWithName("onclick").First().Value; value = value.Substring(value.IndexOf("'") + 1, value.LastIndexOf("'") - value.IndexOf("'") - 1); MakerObj.MakerUrlPath = value; this.MakersList.Add(MakerObj); } } }
Sport GetSport(HtmlDocument doc) { try { HtmlNodeCollection node = doc.DocumentNode.SelectNodes("//span[@class='preInfo type2']"); string sport = node.First().InnerText; sport = sport.Split(new string[] { " -" }, StringSplitOptions.RemoveEmptyEntries)[0]; switch (sport) { case "Soccer": return(Sport.Football); case "Basketball": return(Sport.Basketball); case "Tennis": return(Sport.Tennis); case "Volleyball": return(Sport.Volleyball); } return(Sport.NotSupported); } catch { return(Sport.NotSupported); } }
public async Task <Mini> ParseFromUrl(Uri url) { HtmlWeb web = new HtmlWeb(); HtmlDocument htmlDoc = await web.LoadFromWebAsync(url, null, null); HtmlNode creatorLink = htmlDoc.DocumentNode.SelectNodes("//a[@class='stats']") .FirstOrDefault(); string creatorUrl = creatorLink.GetAttributeValue("href", null); string creatorName = Uri.UnescapeDataString(creatorUrl.Split('/')[3]); Creator creator = new Creator { Name = creatorName }; Cults3dSource source = new Cults3dSource(creator, creatorName); creator.Sites.Add(source); Mini mini = new Mini() { Creator = creator, Name = System.Web.HttpUtility.HtmlDecode(htmlDoc.DocumentNode.SelectNodes("//h1").FirstOrDefault().InnerText.Trim()), Thumbnail = htmlDoc.DocumentNode.SelectNodes("//meta").Where(n => n.Attributes.Any(a => a.Value == "og:image")).First() .Attributes.Where(a => a.Name == "content").First().Value, Link = url.ToString() }; int cost = 0; HtmlNodeCollection priceNode = htmlDoc.DocumentNode.SelectNodes("//span[@class='btn-group-end btn-third']"); if (priceNode != null && priceNode.First().InnerText != "Free") { cost = Int32.Parse(priceNode.First().InnerText.Remove(0, 3).Split(".").First()); } mini.Cost = cost; mini.Sources.Add(new MiniSourceSite(mini, source, url)); return(mini); }
public static HtmlNode GetNode(HtmlNode node, string className) { HtmlNode linkNode = null; HtmlNodeCollection allElementsWithClass = FindNodesByClass(node, className); if (allElementsWithClass != null && allElementsWithClass.Count > 0) { linkNode = allElementsWithClass.First(); } return(linkNode); }
private static MatchStatistics FillStatistics(HtmlNodeCollection nodes, MatchStatistics team, int index) { team.Kicks = Int32.Parse(nodes.First(n => n.InnerText.Contains("Kicks")).SelectNodes("td")[index].InnerText); team.Handballs = Int32.Parse(nodes.First(n => n.InnerText.Contains("Handballs")).SelectNodes("td")[index].InnerText); team.Marks = Int32.Parse(nodes.First(n => n.InnerText.Contains("Marks")).SelectNodes("td")[index].InnerText); team.HitOuts = Int32.Parse(nodes.First(n => n.InnerText.Contains("Hit Outs")).SelectNodes("td")[index].InnerText); team.Tackles = Int32.Parse(nodes.First(n => n.InnerText.Contains("Tackles")).SelectNodes("td")[index].InnerText); team.FreesFor = Int32.Parse(nodes.First(n => n.InnerText.Contains("Frees For")).SelectNodes("td")[index].InnerText); team.FreesAgainst = Int32.Parse(nodes.First(n => n.InnerText.Contains("Frees Against")).SelectNodes("td")[index].InnerText); return(team); }
public SelectorTests() { var path = Directory.GetCurrentDirectory() + "\\Data\\CvBankasPost.txt"; var decodedData = WebUtility.HtmlDecode(File.ReadAllText(path)); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(decodedData); HtmlNodeCollection nodes = doc.DocumentNode.ChildNodes; _html = nodes.First(); }
public void ParseMatchList(int index) { HtmlDocument doc = LoadWithTimeout(MatchListUrl); if (doc == null) { return; } HtmlNodeCollection matchList = doc.DocumentNode.SelectNodes(string.Format("//tr[@data-sport='{0}']", type_of_sport[index])); if (matchList == null) { return; } foreach (var node in matchList) { HtmlNodeCollection matchNodes = node.SelectNodes(".//a"); var node2 = matchNodes.First(); if (matchNodes == null) { return; } var idNode = node2.Attributes["id"]; var hrefNode = node2.Attributes["href"]; if (idNode == null || hrefNode == null) { continue; } string id = idNode.Value; if (!id.Contains("match_live_name")) { continue; } string url = "https://www.olimpkz.com/" + hrefNode.Value; url = url.Replace("amp;", ""); // Console.WriteLine(url); string[] name = node2.InnerText.Split(new string[] { " - " }, StringSplitOptions.RemoveEmptyEntries); MatchName matchname = new MatchName(name[0], name[1]); MatchDict.Add(matchname, url); if (MatchDict.Count == MaximumMatches) { break; } } }
internal Member(HtmlNodeCollection nodes) { var firstNode = nodes.First(); _encEmail = Regex.Replace(firstNode.Attributes["name"].Value, "_\\w*$", ""); this.Email = HttpUtility.UrlDecode(_encEmail); foreach (var prop in _props.Unignored()) { var name = String.Format("{0}_{1}", _encEmail, prop.Name.ToLower()); var thisNode = nodes.SingleOrDefault(n => n.Attributes["name"].Value == name); if (thisNode != null) { var val = thisNode.Attributes["value"].Value; if (prop.PropertyType == typeof(string)) { prop.SetValue(this, val); } else if (prop.PropertyType == typeof(bool)) { prop.SetValue(this, val == "on"); } } } if (this.NoMail) { this.NoMailReason = NoMailReason.Unknown; string name = _encEmail + "_nomail"; var node = nodes.SingleOrDefault(n => n.Attributes["name"].Value == name); if (node != null) { string reason = node.NextSibling.InnerText; switch (reason) { case "[A]": this.NoMailReason = NoMailReason.Administrator; break; case "[B]": this.NoMailReason = NoMailReason.Bounce; break; case "[U]": this.NoMailReason = NoMailReason.User; break; } } } }
public async Task <WeeklyMenu> GetMenu() { var client = new HttpClient(); HttpResponseMessage result = await client.GetAsync(Url); Stream stream = await result.Content.ReadAsStreamAsync(); HtmlDocument doc = new HtmlDocument(); doc.Load(stream); HtmlNodeCollection links = doc.DocumentNode.SelectNodes("//script"); var lunchData = links.First().InnerHtml; var indexOfFirstWing = lunchData.IndexOf('{'); var newData = lunchData.Substring(indexOfFirstWing, lunchData.Length - 2 - indexOfFirstWing); newData = WebUtility.HtmlDecode(newData); var regex = @"new Date\([0-9]{1,}\)"; // finds all "new Date(123)" var match = Regex.Match(newData, regex); while (match.Success) { var val = match.Value; var intRegex = @"[0-9]{1,}"; var intMatch = Regex.Match(val, intRegex); var initialValue = string.Empty; var newValue = string.Empty; while (intMatch.Success) { initialValue = intMatch.Value; var timespan = TimeSpan.FromMilliseconds(long.Parse(initialValue)); var date = new DateTime(1970, 1, 1); date = date.AddMilliseconds(long.Parse(initialValue)).AddHours(2); newValue = date.ToString("yyyyMMdd"); intMatch = intMatch.NextMatch(); // hopefully just one iteration... if not, something is fishy! } newData = newData.Replace(val, "\"" + newValue + "\""); match = match.NextMatch(); } return(JsonConvert.DeserializeObject <WeeklyMenu>(newData)); }
public string GetSiteIconUrl(string url) { HttpResponseMessage html = null; try { using (var client = new HttpClient()) { client.Timeout = new TimeSpan(0, 0, 1); html = client.GetAsync(url).Result; } } catch { } if (html != null) { HtmlDocument htmlDoc = new HtmlDocument(); htmlDoc.Load(html.Content.ReadAsStreamAsync().Result); // ParseErrors is an ArrayList containing any errors from the Load statement try { if (htmlDoc.DocumentNode != null) { HtmlAgilityPack.HtmlNode bodyNode = htmlDoc.DocumentNode.SelectSingleNode("//body"); //if (bodyNode != null) //{ // Do something with bodyNode // <link rel="apple-touch-icon-precomposed" href="http://www.blogsmithmedia.com/www.engadget.com/media/apple-touch-icon-z.png" /> //<link rel="apple-touch-icon" href="http://gfx.aftonbladet-cdn.se/assets/gfx/social/abAppIcon.png" /> //<link rel="icon" href="//s.ytimg.com/yts/img/favicon_32-vflWoMFGx.png" sizes="32x32"> HtmlNodeCollection resultNodes = htmlDoc.DocumentNode.SelectNodes("//link[(@rel='apple-touch-icon-precomposed' or @rel='apple-touch-icon')]"); if (resultNodes != null && resultNodes.Count > 0) { return(resultNodes.First().GetAttributeValue("href", "")); } //} } } catch (Exception) { }; } return(""); }
public static List <User> getUser(string id) { List <User> list = new List <User>(); HtmlWeb htmlWeb = new HtmlWeb(); HtmlDocument document = htmlWeb.Load(makeUrlTKB(id)); HtmlNodeCollection node1 = document.DocumentNode.SelectNodes(string.Format("//*[@id='ctl00_ContentPlaceHolder1_ctl00_lblContentTenSV']")); HtmlNode node = node1.First(); User user = new User(); user.Id = id; user.Name = node.InnerText.Trim(); user.Password = ""; list.Add(user); return(list); }
public void GetAttributesFromInfoBox(Dictionary <string, string> vehicleAttributes, HtmlNodeCollection rows) { // Traverse the info box and pull out all of the attribute title and value pairs foreach (HtmlNode row in rows) { HtmlNodeCollection cells = row.SelectNodes("td"); // Get the property name and value and add them to the dictionary before writing them out string rowTitle = cells.First().SelectNodes("b").Single().InnerText.Trim(); string rowValue = cells.Last().InnerText.Trim(); vehicleAttributes.Add(rowTitle, rowValue); _consoleManager.WriteLineInColour(ConsoleColor.DarkGreen, $"{rowTitle}: {rowValue}"); } }
static void Main(string[] args) { var url = "https://meczreplay.blogspot.com/search?max-results=125"; var web = new HtmlWeb(); var doc = web.Load(url); HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes("/html/body/div[1]/div[3]/div/div[2]/main/div/div/div[1]/article"); foreach (HtmlNode node in nodes) { if (node != null) { HtmlNodeCollection nodesTitleOrNameLinks = node.SelectNodes("div/div/div[3]/div[2]/div"); if (nodesTitleOrNameLinks != null) { string[] separators = new string[] { "<br>", "\n", "<b>", "</b>" }; string[] linesNodesA = nodesTitleOrNameLinks.First().InnerText.Split(separators, StringSplitOptions.RemoveEmptyEntries); foreach (string line in linesNodesA) { string lineNormalized = line.Replace(" ", " ").Replace(";", ""); int indexHttp = lineNormalized.IndexOf("http"); if (indexHttp == -1) { string Title = lineNormalized; Console.WriteLine(Title); } else { string Name = lineNormalized.Remove(indexHttp); string Link = lineNormalized.Remove(0, indexHttp); Console.WriteLine(Name); Console.WriteLine(Link); } } } } HtmlNode nodeImg = node.SelectSingleNode("div/div/div[3]/div[1]/img"); if (nodeImg != null) { string Img = nodeImg.GetAttributeValue("src", ""); Console.WriteLine(Img); } Console.WriteLine("====================================================="); } Console.ReadKey(); }
public void ParseMatchList(int index) { string html = matchListBrowser.GetSourceAsync().Result; HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(html); HtmlNodeCollection matchList = doc.DocumentNode.SelectNodes("//li[@class='groupedListItem first' or @class='groupedListItem' or @class='groupedListItem last' or @class='groupedListItem first last']"); if (matchList == null) { return; } foreach (var node in matchList) { string All = node.InnerHtml; HtmlDocument All_Doc = new HtmlDocument(); All_Doc.LoadHtml(All); // HtmlNodeCollection h3 = All_Doc.DocumentNode.SelectNodes("//h3"); string l = h3.First().InnerText; if (l == type_of_sport[index]) { HtmlNodeCollection matchNodes = All_Doc.DocumentNode.SelectNodes("//li[@class='groupedListSubItem first' or @class='groupedListSubItem last' or @class='groupedListSubItem' or @class='groupedListSubItem first last']"); if (matchNodes == null) { return; } foreach (var node2 in matchNodes) { string id = String.Empty; id = node2.Attributes["id"].Value; id = id.Remove(0, 2); MatchName Name = GetMatchName(node2); string url = "https://mobile.leonbets.net/mobile/#eventDetails/:" + id; MatchDict.Add(Name, url); if (MatchDict.Count == MaximumMatches) { break; } } } } }
private IScraper SetupMockScraper(string filePath) { var scraperMock = new Mock <IScraper>(); var path = Directory.GetCurrentDirectory() + filePath; var decodedData = WebUtility.HtmlDecode(File.ReadAllText(path)); HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(decodedData); HtmlNodeCollection nodes = doc.DocumentNode.ChildNodes; scraperMock.Setup(s => s.GetHtml(It.IsAny <string>())).Returns(nodes.First()); return(scraperMock.Object); }
public static List <string> GetCategoryPaginationUrls(string category_url, int pages_to_get) { int numPagesToGet = pages_to_get; HtmlDocument doc = DataGetter.GetHtmlpage(new Uri(category_url)); //infer page name from 2nd page link List <string> liElements = new List <string>(); liElements.Add("searchResultsPagination"); List <KeyValuePair <string, HtmlNode> > paginationElements = DataGetter.GetDataByID(doc, "div", liElements); List <string> liElements1 = new List <string>(); liElements1.Add("pg2"); List <KeyValuePair <string, HtmlNode> > page2 = DataGetter.GetDataByID(doc, "li", liElements1); string page2Link = ExtractLinkFromHtml(page2[0].Value, "href", "'>"); HtmlNodeCollection divNodeChildren = paginationElements[paginationElements.Count - 1].Value.ChildNodes; HtmlNode divNodeChildren1 = divNodeChildren.First(x => x.Name == "ul"); HtmlNode lastPage = divNodeChildren1.ChildNodes[divNodeChildren1.ChildNodes.Count - 2]; string lastPageLink = ExtractLinkFromHtml(lastPage, "href", "'>"); int lastPageNumber = ExtractNumberFromString(lastPageLink, "page="); //Now build the pagination links... string[] pageParts = page2Link.Split(new string[] { "page=" }, StringSplitOptions.None); //pageParts[0] = pageParts[0].Substring(1); pageParts[1] = RemoveNumberFromStartOfString(pageParts[1]); //pageParts[1] = pageParts[1].Substring(0, pageParts[1].Length - 1); List <string> paginationLinks = new List <string>(); if (numPagesToGet == 0) { //get all products numPagesToGet = lastPageNumber; } for (int i = 0; i < numPagesToGet; i++) { paginationLinks.Add(SigmaAldrichConstants.SigmaAldrichMain + pageParts[0] + "page=" + (i + 1) + pageParts[1]); } return(paginationLinks); }
public async Task <Mini> ParseFromUrl(Uri url) { HtmlWeb web = new HtmlWeb(); HtmlDocument htmlDoc = await web.LoadFromWebAsync(url, null, null); HtmlNode creatorLink = htmlDoc.DocumentNode.SelectNodes("//a[@class='by-vendor-name-link']") .FirstOrDefault(); string creatorUrl = creatorLink.GetAttributeValue("href", null); string creatorName = Uri.UnescapeDataString(creatorUrl.Split('/')[4]); Creator creator = new Creator { Name = creatorName }; Wargaming3dSource source = new Wargaming3dSource(creator, creatorName); creator.Sites.Add(source); Mini mini = new Mini() { Creator = creator, Name = System.Web.HttpUtility.HtmlDecode(htmlDoc.DocumentNode.SelectNodes("//h1").FirstOrDefault().InnerText.Trim()), Thumbnail = htmlDoc.DocumentNode.SelectNodes("//meta").Where(n => n.Attributes.Any(a => a.Value == "og:image")).First() .Attributes.Where(a => a.Name == "content").First().Value, Link = url.ToString() }; int cost = 0; HtmlNodeCollection priceNode = htmlDoc.DocumentNode.SelectNodes("//div[@class='price-wrapper']"); if (priceNode != null && !priceNode.First().InnerText.Contains("0.00")) { //TODO (GitHub #167) - Parsing cost here is a bit hard, so just setting it to be 1 for now since we only have a boolean for cost. cost = 1; } mini.Cost = cost; mini.Sources.Add(new MiniSourceSite(mini, source, url)); return(mini); }
MatchName GetMatchName(HtmlNode node) { HtmlDocument h1_doc = new HtmlDocument(); h1_doc.LoadHtml(node.InnerHtml); HtmlNodeCollection h1_nodes = h1_doc.DocumentNode.SelectNodes("//h1"); string Name = h1_nodes.First().InnerText; var matchNameSplit = Name.Split(new string[] { " vs ", " @ ", " - " }, StringSplitOptions.RemoveEmptyEntries); if (matchNameSplit[0].Contains("(")) { matchNameSplit[0] = matchNameSplit[0].Split(new string[] { " (" }, StringSplitOptions.RemoveEmptyEntries)[0]; } if (matchNameSplit[1].Contains("(")) { matchNameSplit[1] = matchNameSplit[1].Split(new string[] { " (" }, StringSplitOptions.RemoveEmptyEntries)[0]; } return(new MatchName(matchNameSplit[0], matchNameSplit[1])); }
private void GetModelTypes(ModelObj modelObj) { string modelTypesUrl = string.Format("{0}{1}", this.baseUrl, modelObj.ModelUrlPath); string modelsContent = GetContent(modelTypesUrl); var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(modelsContent); HtmlNodeCollection types_div = null; if (htmlDoc.DocumentNode != null && htmlDoc.ParseErrors != null && !htmlDoc.ParseErrors.Any()) { types_div = htmlDoc.DocumentNode.SelectNodes("//*[@id=\"modelis\"]/table"); } if (types_div != null) { foreach (var node in types_div.First().ChildNodes) { ModelTypeObj typeObj = new ModelTypeObj(); typeObj.ModelTypeID = modelTypesList.Count + 1; typeObj.ModelID = modelObj.ModelID; typeObj.ModelTypeName = node.ChildNodes[0].InnerText; typeObj.ModelTypeCubic = node.ChildNodes[1].InnerText .Substring(0, node.ChildNodes[1].InnerText.IndexOf("cm3")); typeObj.ModelTypeFuel = node.ChildNodes[2].InnerText; typeObj.ModelTypePower = node.ChildNodes[3].InnerText; typeObj.ModelTypeTank = node.ChildNodes[4].InnerText; typeObj.ModelTypeFromYear = node.ChildNodes[5].InnerText; typeObj.ModelTypeToYear = node.ChildNodes[6].InnerText; string value = node.ChildNodes[0].Attributes.AttributesWithName("onclick").First().Value; value = value.Substring(value.IndexOf("'") + 1, value.LastIndexOf("'") - value.IndexOf("'") - 1); typeObj.ModelTypeDetailsUrl = value; modelTypesList.Add(typeObj); } } }
private static string GetCurrentCoditions(string url) { string className = "summary_timestamp"; string htmlTag = "td"; //Get the html text from the web. string webData = GetHtmlCode(url); // Search an specific value from the html file HtmlDocument html = new HtmlDocument(); html.LoadHtml(webData); HtmlNodeCollection htmlNodes = GetHtmlNodes(url, htmlTag, className); if (htmlNodes.Count == 1) { var item = htmlNodes.First(); return(item.InnerHtml); } return(string.Empty); }
/*сайт автоматически подбирает размер машины в зависимости от веса груза*/ private int GetCar(string inCity, string outCity, int weight, int volume) { //тэг span.number string url = $@"https://www.eastlines.ru/raschet-stoimosti/?from={outCity}&from-place-id=&from-reference=&where={inCity}&where-place-id=&where-reference=&weight={weight}&volume={volume}"; var pageContent = LoadPage(url); var document = new HtmlDocument(); document.LoadHtml(pageContent); HtmlNodeCollection collection = document.DocumentNode.SelectNodes("//script[@type='text/javascript' and contains(.,'var selected_car_id = ')]"); // var neededRow = collection.First().InnerText.Split('\n')[6].Split('=').Last().Trim(';').Trim(' '); var neededRow = collection.First().InnerText.Split('\n').Where(r => r.Contains("selected_car_id")).First().Split('=').Last().Trim(';').Trim(' '); int result = 0; int.TryParse(neededRow, out result); Console.WriteLine(result.ToString()); return(result); }
/*сайт автоматически подбирает размер машины в зависимости от веса груза*/ private string GetCar(string inCity, string outCity, int weight) { if (weight <= 0) { return("Неправильно указан вес"); } string url = $@"https://www.eastlines.ru/raschet-stoimosti/?from={outCity}&from-place-id=&from-reference=&where={inCity}&where-place-id=&where-reference=&weight={weight}"; var pageContent = LoadPage(url).Result; var document = new HtmlDocument(); document.LoadHtml(pageContent); HtmlNodeCollection collection = document.DocumentNode.SelectNodes("//script[@type='text/javascript' and contains(.,'var selected_car_id = ')]"); if (collection == null) { return(null); } var neededRow = collection.First().InnerText.Split('\n').Where(r => r.Contains("selected_car_id")).First().Split('=').Last().Trim(';').Trim(' '); return(neededRow); }
private void GetMakers() { var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(pageContent); HtmlNodeCollection Maker_div = null; if (htmlDoc.DocumentNode != null) { Maker_div = htmlDoc.DocumentNode.SelectNodes( "//*[@id=\"ctl00_ctl00_cphContentRow_cphContent_wucNFBAutokatalogMarken1_updatePanelMarken\"]/div[2]/div[2]/ul"); } if (Maker_div != null) { htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(Maker_div.First().InnerHtml); var MakerNodes = htmlDoc.DocumentNode.SelectNodes("//li"); foreach (var node in MakerNodes ) //.Zip(descriptions, (n, d) => new MakerClass { MakerName = n.InnerText, MakerUrlPath = d.InnerText })) { if (Core.Conf.MakerName.Trim().Equals(string.Empty) || Core.Conf.MakerName.Trim().Equals("*") || Core.Conf.MakerName.ToUpper().Equals(CleanNameString(node.InnerText.Trim().ToUpper()))) { try { MakerObj MakerObj = new MakerObj(); MakerObj.MakerName = CleanNameString(node.InnerText.Trim()); //MakerObj.MakerLogoUrl = node.SelectSingleNode("/li[1]/a[1]/div[1]/img[1]").Attributes //"//*[@id=\" // // .AttributesWithName("src").First().Value; MakerObj.MakerLogoUrl = node.FirstChild.ChildNodes[1].FirstChild.ChildAttributes("src") .First().Value; // .AttributesWithName("src").First().Value; //Maker name // ChildNodes[1].ChildNodes[0].InnerText; string localImgFile = DownloadMakerImage(MakerObj.MakerLogoUrl); MakerObj.MakerLogoLocalFile = localImgFile; //string value = node.Attributes.AttributesWithName("onclick").First().Value; //value = value.Substring(value.IndexOf("'") + 1, value.LastIndexOf("'") - value.IndexOf("'") - 1); //MakerObj.MakerUrlPath = node.SelectSingleNode("/li[1]/a[1]").Attributes.AttributesWithName("href") // .First().Value; MakerObj.MakerUrlPath = node.FirstChild.Attributes.AttributesWithName("href") .First().Value; MakersList.Add(MakerObj); Thread.Sleep(100); } catch (Exception ex) { if (Core != null && Core.Log != null) { Core.Log.Error(string.Format("ADACImporter::GetMakers : {0}", ex.Message)); } else { throw new Exception("ADACImporter::GetMakers", ex); } } } } if (Core != null && Core.Log != null) { Core.Log.Info(string.Format("{0} Maker Records imported.", MakersList.Count)); } } }
private void GetCarDetails() { foreach (ModelTypeObj type in modelTypesList) { string modelDetailsUrl = string.Format("{0}{1}", baseUrl, type.ModelTypeDetailsUrl); try { string carContent = GetContent(modelDetailsUrl); var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(carContent); HtmlNodeCollection cars_div = null; if (htmlDoc.DocumentNode != null) { cars_div = htmlDoc.DocumentNode.SelectNodes( "//*[@id=\"ctl00_ctl00_cphContentRow_cphContent_wucNFBAutokatalogDetail1_ctl01_updatePanelDetail\"]/div[2]/div[3]/div[1]/table/tbody"); } if (cars_div != null) { CarDetailsObj carObj = new CarDetailsObj(); carObj.ModelTypeID = type.ModelTypeID; carObj.ModelID = type.ModelID; carObj.Maker = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[1].ChildNodes[1].InnerText); carObj.Model = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[2].ChildNodes[1].InnerText); carObj.Type = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[3].ChildNodes[1].InnerText); carObj.Series = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[4].ChildNodes[1].InnerText); carObj.ModelTypeName = type.ModelTypeName; carObj.InternalClassName = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[5].ChildNodes[1].InnerText); carObj.ModelStart = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[6].ChildNodes[1].InnerText); carObj.ModelEnd = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[7].ChildNodes[1].InnerText); carObj.SeriesStart = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[8].ChildNodes[1].InnerText); carObj.SeriesEnd = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[9].ChildNodes[1].InnerText); carObj.HSN = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[10].ChildNodes[1].InnerText); carObj.TSN = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[11].ChildNodes[1].InnerText); carObj.TSN2 = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[12].ChildNodes[1].InnerText); carObj.CarTax = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[13].ChildNodes[1].InnerText); carObj.CO2Class = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[14].ChildNodes[1].InnerText); carObj.BasePrice = HttpUtility.HtmlDecode(cars_div.First().ChildNodes[15].ChildNodes[1].InnerText); // Motor & Antrieb HtmlNodeCollection carEngine_div = null; carEngine_div = htmlDoc.DocumentNode.SelectNodes( "//*[@id=\"ctl00_ctl00_cphContentRow_cphContent_wucNFBAutokatalogDetail1_ctl01_updatePanelDetail\"]/div[2]/div[3]/div[2]/table/tbody"); if (carEngine_div != null) { carObj.EngineType = HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[0].ChildNodes[1].InnerText); carObj.Fuel = HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[1].ChildNodes[1].InnerText); carObj.Fuel2 = HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[2].ChildNodes[1].InnerText); carObj.EmissionControl = HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[3].ChildNodes[1].InnerText); carObj.EngineDesign = HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[4].ChildNodes[1].InnerText); carObj.Cylinder = carEngine_div.First().ChildNodes[5].ChildNodes[1].InnerText.ToInt32OrDefault(0); //Convert.ToInt32(); carObj.FuelType = HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[6].ChildNodes[1].InnerText); carObj.Charge = HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[7].ChildNodes[1].InnerText); carObj.Valves = carEngine_div.First().ChildNodes[8].ChildNodes[1].InnerText.ToInt32OrDefault(0); carObj.Cubic = HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[9].ChildNodes[1].InnerText); carObj.PowerKW = carEngine_div.First().ChildNodes[10].ChildNodes[1].InnerText.ToInt32OrDefault(0); carObj.PowerPS = carEngine_div.First().ChildNodes[11].ChildNodes[1].InnerText .ToInt32OrDefault(0); carObj.MaxPower = HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[12].ChildNodes[1].InnerText); carObj.TurningMoment = HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[13].ChildNodes[1].InnerText); carObj.MaxTurningMoment = HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[14].ChildNodes[1].InnerText); carObj.TypeOfDrive = HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[15].ChildNodes[1].InnerText); carObj.Gearing = HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[16].ChildNodes[1].InnerText); carObj.Gears = carEngine_div.First().ChildNodes[17].ChildNodes[1].InnerText .ToInt32OrDefault(0); carObj.StartStopAutomatic = HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[18].ChildNodes[1].InnerText); carObj.EmissionClass = HttpUtility.HtmlDecode(carEngine_div.First().ChildNodes[20].ChildNodes[1].InnerText); } // Maße & Gewicht HtmlNodeCollection carDimensions_div = null; carDimensions_div = htmlDoc.DocumentNode.SelectNodes( "//*[@id=\"ctl00_ctl00_cphContentRow_cphContent_wucNFBAutokatalogDetail1_ctl01_updatePanelDetail\"]/div[2]/div[3]/div[3]/table/tbody"); if (carDimensions_div != null) { carObj.Length = HttpUtility.HtmlDecode(carDimensions_div.First().ChildNodes[0].ChildNodes[1].InnerText); carObj.Width = HttpUtility.HtmlDecode(carDimensions_div.First().ChildNodes[1].ChildNodes[1].InnerText); carObj.Height = HttpUtility.HtmlDecode(carDimensions_div.First().ChildNodes[2].ChildNodes[1].InnerText); } // Karosserie & Fahrwerk HtmlNodeCollection carChassis_div = null; carChassis_div = htmlDoc.DocumentNode.SelectNodes( "//*[@id=\"ctl00_ctl00_cphContentRow_cphContent_wucNFBAutokatalogDetail1_ctl01_updatePanelDetail\"]/div[2]/div[3]/div[4]/table/tbody"); if (carChassis_div != null) { carObj.Chassis = HttpUtility.HtmlDecode(carChassis_div.First().ChildNodes[0].ChildNodes[1].InnerText); carObj.Doors = carChassis_div.First().ChildNodes[1].ChildNodes[1].InnerText.ToInt32OrDefault(0); carObj.CarClass = HttpUtility.HtmlDecode(carChassis_div.First().ChildNodes[3].ChildNodes[1].InnerText); carObj.Seats = carChassis_div.First().ChildNodes[4].ChildNodes[1].InnerText.ToInt32OrDefault(0); } //Messwerte Hersteller HtmlNodeCollection carMeasured_div = null; carMeasured_div = htmlDoc.DocumentNode.SelectNodes( "//*[@id=\"ctl00_ctl00_cphContentRow_cphContent_wucNFBAutokatalogDetail1_ctl01_updatePanelDetail\"]/div[2]/div[3]/div[5]/table/tbody"); if (carMeasured_div != null) { carObj.SpeedUp = HttpUtility.HtmlDecode(carMeasured_div.First().ChildNodes[0].ChildNodes[1].InnerText); carObj.MaxSpeed = HttpUtility.HtmlDecode(carMeasured_div.First().ChildNodes[1].ChildNodes[1].InnerText); carObj.Tank = HttpUtility.HtmlDecode(carMeasured_div.First().ChildNodes[26].ChildNodes[1].InnerText); carObj.Tank2 = HttpUtility.HtmlDecode(carMeasured_div.First().ChildNodes[27].ChildNodes[1].InnerText); } carDetailsList.Add(carObj); } } catch (Exception ex) { if (Core != null && Core.Log != null) { Core.Log.Error(string.Format("ADACImporter::GetCarDetails : {0} ({1})", ex.Message, modelDetailsUrl)); } else { throw new Exception("ADACImporter::GetCarDetails", ex); } } } if (Core != null && Core.Log != null) { Core.Log.Info(string.Format("{0} Car Detail Records imported.", carDetailsList.Count)); } }
private void GetModelTypes(ModelObj modelObj) { string modelTypesUrl = string.Format("{0}{1}", baseUrl, modelObj.ModelUrlPath); string modelsContent = GetContent(modelTypesUrl); var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(modelsContent); HtmlNodeCollection types_div = null; if (htmlDoc.DocumentNode != null) { types_div = htmlDoc.DocumentNode.SelectNodes("//*[@id=\"car_db_select_hits\"]/tbody"); } if (types_div != null) { int limit_cnt = 0; foreach (var node in types_div.First().ChildNodes) { try { if (node.Name.ToLower().Equals("tr")) { limit_cnt++; ModelTypeObj typeObj = new ModelTypeObj(); typeObj.ModelTypeID = modelTypesList.Count + 1; typeObj.ModelID = modelObj.ModelID; typeObj.MakerName = modelObj.MakerName; typeObj.ModelName = modelObj.ModelName; typeObj.ModelTypeDetailsUrl = node.ChildNodes[3].ChildNodes[1].Attributes .AttributesWithName("href") .First().Value; typeObj.ModelTypeName = node.ChildNodes[5].InnerText.Trim(); typeObj.ModelTypeChassis = node.ChildNodes[7].InnerText.Trim(); typeObj.ModelTypeDoors = node.ChildNodes[9].InnerText.Trim().ToInt32OrDefault(0); typeObj.ModelTypeFuel = node.ChildNodes[11].InnerText.Trim(); typeObj.ModelTypePower = node.ChildNodes[13].InnerText.Trim(); typeObj.ModelTypeCubic = node.ChildNodes[17].InnerText.Trim(); //link to details page //Type Name //Chassis //Doors //Fuel //KW modelTypesList.Add(typeObj); //DEBUG: Break after x number of model types if (IsLimited(limit_cnt)) { break; } } } catch (Exception ex) { if (Core != null && Core.Log != null) { Core.Log.Error(string.Format("ADACImporter::GetModelTypes : {0}", ex.Message)); } else { throw new Exception("ADACImporter::GetModelTypes", ex); } } } } }
/// <summary> /// /// </summary> /// <param name="text"></param> /// <param name="apiKey"></param> /// <returns></returns> public List <BrokenLinkModel> Check(string text, string apiKey) { List <BrokenLinkModel> response = new List <BrokenLinkModel>(); if (!apiKey.HasValue()) { return(response); } var doc = new HtmlDocument(); doc.LoadHtml(text); HtmlNodeCollection links = doc.DocumentNode.SelectNodes(KnownStrings.HrefXPath); if (links == null || !links.Any()) { return(response); } string[] hrefs = links.Select(l => l.GetAttributeValue("href", string.Empty)) .Where(l => l.StartsWith("http")).ToArray(); // check for cached responses - avoids request when page is being resaved List <BrokenLinkModel> fromCache = new List <BrokenLinkModel>(); foreach (string href in hrefs) { var cacheItem = Current.AppCaches.RuntimeCache.GetCacheItem <BrokenLinkModel>(KnownStrings.CacheKey + href); if (null == cacheItem) { continue; } fromCache.Add(cacheItem); hrefs = hrefs.Except(href.AsEnumerableOfOne()).ToArray(); } SafeBrowsingResponseModel safeBrowsingResult = SafeBrowsingLookup(hrefs, apiKey); if (safeBrowsingResult.Matches.Any()) { response.AddRange(safeBrowsingResult.Matches.Select(m => new BrokenLinkModel { Href = m.Threat.Url, Status = m.ThreatType, Unsafe = true, Text = links.First(l => l.GetAttributeValue("href", string.Empty) == m.Threat.Url) .InnerText })); foreach (BrokenLinkModel item in response) { Current.AppCaches.RuntimeCache.InsertCacheItem(KnownStrings.CacheKey + item.Href, () => item, new TimeSpan(24, 0, 0), false); } } // add cached results response.AddRange(fromCache); return(response); }