Exemplo n.º 1
0
        public IActionResult SaveXpath(IFormCollection form)
        {
            var keyValues = new SortedList <string, string>();

            foreach (var contain in form)
            {
                try
                {
                    if (contain.Key.Contains("cmb"))
                    {
                        keyValues.Add(contain.Value, form[contain.Key.Replace("cmb", "chck")]);
                    }
                }
                catch (Exception ex)
                {
                }
            }
            var list = typeof(ScrapingXpath).GetProperties().Select(s => new { p = s, a = s.GetCustomAttributes(typeof(DisplayAttribute)).First() }).ToList()
                       .Select(c => new { c.p, n = ((DisplayAttribute)c.a).Name }).ToList();
            var model = new ScrapingXpath();

            foreach (var item in list)
            {
                if (keyValues.ContainsKey(item.n))
                {
                    item.p.SetValue(model, keyValues[item.n]);
                }
            }
            model.SiteUrl = form["SiteUrl"];

            model.Id = Guid.NewGuid().ToString();
            ElasticSearchManager elasticSearchManager = new ElasticSearchManager();

            elasticSearchManager.Save(model, "scrapingxpath");

            return(null);
        }
Exemplo n.º 2
0
        private void GetDetailPageUrl(string pageUrl)
        {
            Console.WriteLine(pageUrl);
            ChromeOptions chromeOptions = new ChromeOptions();

            chromeOptions.AddArgument("--headless");
            chromeOptions.AddArgument("--no-sandbox");
            chromeOptions.AddArgument("--disable-dev-shm-usage");
            chromeOptions.AddArgument("--ignore-certificate-errors");
            ScrapingModelData data = new ScrapingModelData();
            var gg = "";

            //using (var driver = new ChromeDriver(Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location)))
            using (var driver = new ChromeDriver("/bin", chromeOptions, TimeSpan.FromMinutes(1)))
            {
                driver.Navigate().GoToUrl(pageUrl);
                gg = driver.PageSource;


                var doc = new HtmlDocument();
                doc.LoadHtml(gg);
                data.SiteUrl  = pageUrl;
                data.Title    = doc.DocumentNode.SelectSingleNode("//*[@id='details']/div/div[4]/div/div/div/div/h1").InnerText.Trim();
                data.Price    = doc.DocumentNode.SelectSingleNode("//*[@id='details']/div/div[4]/div/div/div/div[2]/strong[2]").InnerText.Trim();
                data.Adres    = doc.DocumentNode.SelectSingleNode("//*[@id='details']/div/div[4]/div/div/div/div/div/h2").InnerText.Trim();
                data.Owner    = doc.DocumentNode.SelectSingleNode("//*[@id='details']/div/div[5]/div/div[2]/div/div/div/a").InnerText.Trim();
                data.Firm     = doc.DocumentNode.SelectSingleNode("//*[@id='details']/div/div[5]/div/div[2]/div/div/div/a[2]").InnerText.Trim();
                data.Phone    = string.Join(';', doc.DocumentNode.SelectNodes("//*[@class='contact-number-area number-area']/a").Select(x => x.Attributes["href"].Value));
                data.Property = doc.DocumentNode.SelectSingleNode("//*[@id='details']/div/div[5]/div/div/div[2]/div/div[2]").InnerHtml.Trim();

                foreach (var selectNode in doc.DocumentNode.SelectNodes("//*[@id='details']/div/div[5]/div/div/div[2]/div/div[2]/ul/li"))
                {
                    try
                    {
                        var tt = $"{selectNode.SelectSingleNode("./strong").InnerText.Trim()}:{selectNode.SelectSingleNode("./span").InnerText.Trim()},";
                        data.Propertystr += tt;
                    }
                    catch
                    {
                    }
                }

                data.Description = doc.DocumentNode.SelectSingleNode("//*[@id='detailDescription']/div/p").InnerText.Trim();
                data.Feature     = doc.DocumentNode.SelectSingleNode("//*[@id='otherFacilities']/div").InnerHtml.Trim();

                foreach (var selectNode in doc.DocumentNode.SelectNodes("//*[@id='otherFacilities']/div/div/div"))
                {
                    try
                    {
                        string tt = "";
                        if (selectNode.Attributes["class"].Value.Contains("passive"))
                        {
                            data.Featurestr += $"passive:{selectNode.InnerText.Trim()},";
                        }
                        else
                        {
                            data.Featurestr += $"active:{selectNode.InnerText.Trim()},";
                        }
                    }
                    catch (Exception e)
                    {
                    }
                }
                data.Category    = doc.DocumentNode.SelectSingleNode("//*[@id='breadcrumbContainer']/div/div/ol").InnerHtml.Trim();
                data.Categorystr = doc.DocumentNode.SelectSingleNode("//*[@id='breadcrumbContainer']/div/div/ol").InnerText.Trim();
                data.Picture     = string.Join(',', doc.DocumentNode.SelectNodes("//div[@class='gallery-container']/a[@class='gallery-item zoon-in-image']").Select(x => x.Attributes["data-lg"].Value));
                data.IsTransfer  = false;
                var lot = doc.DocumentNode.SelectSingleNode("//*[@data-locationapi='/api/locationReport']").Attributes["data-id"].Value;
                driver.Navigate().GoToUrl("https://www.zingat.com/api/locationReport?type=all&locId=" + lot);
                gg = driver.PageSource;
                doc.LoadHtml(gg);
                data.Column1 = doc.DocumentNode.InnerText;
            }
            ElasticSearchManager elasticSearchManager = new ElasticSearchManager();

            elasticSearchManager.Save(data, "scrapingmodeldata");
        }