public IActionResult SaveXpath(IFormCollection form) { var keyValues = new SortedList <string, string>(); foreach (var contain in form) { try { if (contain.Key.Contains("cmb")) { keyValues.Add(contain.Value, form[contain.Key.Replace("cmb", "chck")]); } } catch (Exception ex) { } } var list = typeof(ScrapingXpath).GetProperties().Select(s => new { p = s, a = s.GetCustomAttributes(typeof(DisplayAttribute)).First() }).ToList() .Select(c => new { c.p, n = ((DisplayAttribute)c.a).Name }).ToList(); var model = new ScrapingXpath(); foreach (var item in list) { if (keyValues.ContainsKey(item.n)) { item.p.SetValue(model, keyValues[item.n]); } } model.SiteUrl = form["SiteUrl"]; model.Id = Guid.NewGuid().ToString(); ElasticSearchManager elasticSearchManager = new ElasticSearchManager(); elasticSearchManager.Save(model, "scrapingxpath"); return(null); }
private void GetDetailPageUrl(string pageUrl) { Console.WriteLine(pageUrl); ChromeOptions chromeOptions = new ChromeOptions(); chromeOptions.AddArgument("--headless"); chromeOptions.AddArgument("--no-sandbox"); chromeOptions.AddArgument("--disable-dev-shm-usage"); chromeOptions.AddArgument("--ignore-certificate-errors"); ScrapingModelData data = new ScrapingModelData(); var gg = ""; //using (var driver = new ChromeDriver(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location))) using (var driver = new ChromeDriver("/bin", chromeOptions, TimeSpan.FromMinutes(1))) { driver.Navigate().GoToUrl(pageUrl); gg = driver.PageSource; var doc = new HtmlDocument(); doc.LoadHtml(gg); data.SiteUrl = pageUrl; data.Title = doc.DocumentNode.SelectSingleNode("//*[@id='details']/div/div[4]/div/div/div/div/h1").InnerText.Trim(); data.Price = doc.DocumentNode.SelectSingleNode("//*[@id='details']/div/div[4]/div/div/div/div[2]/strong[2]").InnerText.Trim(); data.Adres = doc.DocumentNode.SelectSingleNode("//*[@id='details']/div/div[4]/div/div/div/div/div/h2").InnerText.Trim(); data.Owner = doc.DocumentNode.SelectSingleNode("//*[@id='details']/div/div[5]/div/div[2]/div/div/div/a").InnerText.Trim(); data.Firm = doc.DocumentNode.SelectSingleNode("//*[@id='details']/div/div[5]/div/div[2]/div/div/div/a[2]").InnerText.Trim(); data.Phone = string.Join(';', doc.DocumentNode.SelectNodes("//*[@class='contact-number-area number-area']/a").Select(x => x.Attributes["href"].Value)); data.Property = doc.DocumentNode.SelectSingleNode("//*[@id='details']/div/div[5]/div/div/div[2]/div/div[2]").InnerHtml.Trim(); foreach (var selectNode in doc.DocumentNode.SelectNodes("//*[@id='details']/div/div[5]/div/div/div[2]/div/div[2]/ul/li")) { try { var tt = $"{selectNode.SelectSingleNode("./strong").InnerText.Trim()}:{selectNode.SelectSingleNode("./span").InnerText.Trim()},"; data.Propertystr += tt; } catch { } } data.Description = doc.DocumentNode.SelectSingleNode("//*[@id='detailDescription']/div/p").InnerText.Trim(); data.Feature = doc.DocumentNode.SelectSingleNode("//*[@id='otherFacilities']/div").InnerHtml.Trim(); foreach (var selectNode in doc.DocumentNode.SelectNodes("//*[@id='otherFacilities']/div/div/div")) { try { string tt = ""; if (selectNode.Attributes["class"].Value.Contains("passive")) { data.Featurestr += $"passive:{selectNode.InnerText.Trim()},"; } else { data.Featurestr += $"active:{selectNode.InnerText.Trim()},"; } } catch (Exception e) { } } data.Category = doc.DocumentNode.SelectSingleNode("//*[@id='breadcrumbContainer']/div/div/ol").InnerHtml.Trim(); data.Categorystr = doc.DocumentNode.SelectSingleNode("//*[@id='breadcrumbContainer']/div/div/ol").InnerText.Trim(); data.Picture = string.Join(',', doc.DocumentNode.SelectNodes("//div[@class='gallery-container']/a[@class='gallery-item zoon-in-image']").Select(x => x.Attributes["data-lg"].Value)); data.IsTransfer = false; var lot = doc.DocumentNode.SelectSingleNode("//*[@data-locationapi='/api/locationReport']").Attributes["data-id"].Value; driver.Navigate().GoToUrl("https://www.zingat.com/api/locationReport?type=all&locId=" + lot); gg = driver.PageSource; doc.LoadHtml(gg); data.Column1 = doc.DocumentNode.InnerText; } ElasticSearchManager elasticSearchManager = new ElasticSearchManager(); elasticSearchManager.Save(data, "scrapingmodeldata"); }