internal static BulletinTemplateCache ToCache(BulletinTemplate entity) { var cache = new BulletinTemplateCache(); cache.URL = entity.URL; cache.Title = entity.Title; cache.Description = entity.Description; if (!string.IsNullOrEmpty(cache.Description)) { cache.Description = cache.Description.Replace("<br>", Environment.NewLine); } cache.Price = entity.Price; cache.Count = entity.Count; cache.Images = entity.Images.Replace(";", Environment.NewLine); cache.Category1 = entity.Category1; cache.Category2 = entity.Category2; cache.Category3 = entity.Category3; cache.Category4 = entity.Category4; cache.Category5 = entity.Category5; cache.Region1 = entity.Region1; cache.Region2 = entity.Region2; cache.Region3 = entity.Region3; entity.IsIndividualSeller = cache.IsIndividualSeller; entity.IsHandled = cache.IsHandled; return(cache); }
private BulletinTemplate ToEntity(BulletinTemplateCache cache, BulletinTemplate entity) { entity.URL = cache.URL; entity.Title = cache.Title; entity.Description = cache.Description; entity.Price = cache.Price; entity.Count = cache.Count; entity.Images = cache.Images; entity.Category1 = cache.Category1; entity.Category2 = cache.Category2; entity.Category3 = cache.Category3; entity.Category4 = cache.Category4; entity.Category5 = cache.Category5; entity.Region1 = cache.Region1; entity.Region2 = cache.Region2; entity.Region3 = cache.Region3; entity.IsIndividualSeller = cache.IsIndividualSeller; entity.IsHandled = cache.IsHandled; return(entity); }
public override IEnumerable <BulletinTemplateCache> BulletinTemplateCollector(FirefoxDriver driver, TaskBulletinTemplateCollectorCache taskModel) { var result = new List <BulletinTemplateCache>(); try { //Получаю данные var html = ""; foreach (var query in taskModel.Queries) { var pageCount = 2; for (int i = 1; i <= pageCount; i++) { var url = $"https://www.avito.ru/rossiya?p={i}&q={query}"; WaitExecute(driver); driver.Navigate().GoToUrl(url); html += driver.PageSource; } } //Получаю вхождения var pattern = "id=\"([\\s,\\S,\\n].*?)\" data-type[\\s,\\S,\\n]*?" + //ID "class=\"item-photo item-photo_large\">([\\s,\\S,\\n]*?)<div class=\"favorites[\\s,\\S,\\n]*?" + //Images "class=\"item-description-title-link\"[\\s,\\S,\\n].*?href=\"([\\s,\\S,\\n].*?)\"[\\s,\\S,\\n]*?" + //Links "title=\"([\\s,\\S,\\n].*?)\"[\\s,\\S,\\n]*?<div class=\"about"; //Title var patternLinks = "//([\\s,\\S,\\n].*?)\""; var matches = RegexHelper.Execute(pattern, html); foreach (var m in matches) { try { var id = m.Groups[1].Value; var imgSource = m.Groups[2].Value.ToString(); var imgMatches = RegexHelper.Execute(patternLinks, imgSource); var images = imgMatches.Select(q => "https://" + q.Groups[1].Value); var temp = new BulletinTemplateCache(); temp.URL = "https://avito.ru" + m.Groups[3].Value; temp.Title = m.Groups[4].Value; temp.Images = string.Join(";", images); WaitExecute(driver); driver.Navigate().GoToUrl(temp.URL); //Price var price = RegexHelper.GetValue("js-price-value-string\">([\\s,\\S,\\n,\\r]*?) ", driver.PageSource); try { if (price.Contains("Цена не указана") || price.Contains("Договорная") || string.IsNullOrWhiteSpace(price)) { temp.Price = 0; } else { var priceText = price.Trim(); priceText = priceText.Replace(" ", ""); temp.Price = Int32.Parse(priceText); } } catch (Exception ex) { var r3 = ex; } //Title temp.Title = RegexHelper.GetValue("title-info-title-text\">([\\s,\\S,\\n].*?)</span>", driver.PageSource); //Count var count = RegexHelper.GetValue("<a href=\"#\" class=\"js-show-stat pseudo-link\"[\\s,\\S,\\n]*?>([\\s,\\S,\\n]*?)</a>", driver.PageSource); try { if (string.IsNullOrEmpty(count)) { temp.Count = 0; } else { temp.Count = Int32.Parse(Regex.Match(count, "\\d+").Value); } } catch (Exception ex) { var r4 = ex; } //Description temp.Description = RegexHelper.GetValue("itemprop=\"description\">([\\s,\\S,\\n]*?)</div>", driver.PageSource); //Owner type var seller = RegexHelper.GetValue("seller-info-name([\\s,\\S,\\n]*?)seller-info-value", driver.PageSource); temp.IsIndividualSeller = seller.Contains("Продавец"); //City var city = RegexHelper.GetValue("Адрес</div> <div class=\"seller-info-value\">([\\s,\\S,\\n]*?)</div>", driver.PageSource); var cityParts = city.Split(','); temp.Region1 = cityParts[0].Trim(); temp.Region2 = cityParts.Count() > 1 ? cityParts[1].Trim() : null; //Category var categoryText = RegexHelper.GetValue("<div class=\"breadcrumbs js-breadcrumbs\">([\\s,\\S,\\n]*?)</div>", driver.PageSource); var categories = RegexHelper.Execute("title=\".*?>([\\s,\\S,\\n]*?)</a>", categoryText).ToArray(); var rawCategories = new string[5]; for (var i = 1; i < categories.Length; i++) { var categoryElement = categories[i]; rawCategories[i - 1] = categoryElement.Groups[1].Value; } temp.Category1 = rawCategories[0]; temp.Category2 = rawCategories[1]; temp.Category3 = rawCategories[2]; temp.Category4 = rawCategories[3]; temp.Category5 = rawCategories[4]; temp.IsHandled = true; result.Add(temp); } catch (Exception ex) { var r2 = ex; } } } catch (Exception ex) { var r = ex; } return(result); }
public static void Save(BulletinTemplateCache model) { DCT.Execute(c => c.HubClient.Save <BulletinTemplateCache>((a) => { }, model)); }