private TaskBulletinTemplateCollectorCache ToCache5(Task arg2) { var arg1 = new TaskBulletinTemplateCollectorCache(); arg1.BulletinId = arg2.BulletinId.Value; var bulletin = BCT.Context.BulletinDb.Bulletins.Find(arg1.BulletinId); if (bulletin != null) { arg1.Queries = new string[] { bulletin.Title } } ; return(arg1); }
public override IEnumerable <BulletinTemplateCache> BulletinTemplateCollector(FirefoxDriver driver, TaskBulletinTemplateCollectorCache taskModel) { var result = new List <BulletinTemplateCache>(); try { //Получаю данные var html = ""; foreach (var query in taskModel.Queries) { var pageCount = 2; for (int i = 1; i <= pageCount; i++) { var url = $"https://www.avito.ru/rossiya?p={i}&q={query}"; WaitExecute(driver); driver.Navigate().GoToUrl(url); html += driver.PageSource; } } //Получаю вхождения var pattern = "id=\"([\\s,\\S,\\n].*?)\" data-type[\\s,\\S,\\n]*?" + //ID "class=\"item-photo item-photo_large\">([\\s,\\S,\\n]*?)<div class=\"favorites[\\s,\\S,\\n]*?" + //Images "class=\"item-description-title-link\"[\\s,\\S,\\n].*?href=\"([\\s,\\S,\\n].*?)\"[\\s,\\S,\\n]*?" + //Links "title=\"([\\s,\\S,\\n].*?)\"[\\s,\\S,\\n]*?<div class=\"about"; //Title var patternLinks = "//([\\s,\\S,\\n].*?)\""; var matches = RegexHelper.Execute(pattern, html); foreach (var m in matches) { try { var id = m.Groups[1].Value; var imgSource = m.Groups[2].Value.ToString(); var imgMatches = RegexHelper.Execute(patternLinks, imgSource); var images = imgMatches.Select(q => "https://" + q.Groups[1].Value); var temp = new BulletinTemplateCache(); temp.URL = "https://avito.ru" + m.Groups[3].Value; temp.Title = m.Groups[4].Value; temp.Images = string.Join(";", images); WaitExecute(driver); driver.Navigate().GoToUrl(temp.URL); //Price var price = RegexHelper.GetValue("js-price-value-string\">([\\s,\\S,\\n,\\r]*?) ", driver.PageSource); try { if (price.Contains("Цена не указана") || price.Contains("Договорная") || string.IsNullOrWhiteSpace(price)) { temp.Price = 0; } else { var priceText = price.Trim(); priceText = priceText.Replace(" ", ""); temp.Price = Int32.Parse(priceText); } } catch (Exception ex) { var r3 = ex; } //Title temp.Title = RegexHelper.GetValue("title-info-title-text\">([\\s,\\S,\\n].*?)</span>", driver.PageSource); //Count var count = RegexHelper.GetValue("<a href=\"#\" class=\"js-show-stat pseudo-link\"[\\s,\\S,\\n]*?>([\\s,\\S,\\n]*?)</a>", driver.PageSource); try { if (string.IsNullOrEmpty(count)) { temp.Count = 0; } else { temp.Count = Int32.Parse(Regex.Match(count, "\\d+").Value); } } catch (Exception ex) { var r4 = ex; } //Description temp.Description = RegexHelper.GetValue("itemprop=\"description\">([\\s,\\S,\\n]*?)</div>", driver.PageSource); //Owner type var seller = RegexHelper.GetValue("seller-info-name([\\s,\\S,\\n]*?)seller-info-value", driver.PageSource); temp.IsIndividualSeller = seller.Contains("Продавец"); //City var city = RegexHelper.GetValue("Адрес</div> <div class=\"seller-info-value\">([\\s,\\S,\\n]*?)</div>", driver.PageSource); var cityParts = city.Split(','); temp.Region1 = cityParts[0].Trim(); temp.Region2 = cityParts.Count() > 1 ? cityParts[1].Trim() : null; //Category var categoryText = RegexHelper.GetValue("<div class=\"breadcrumbs js-breadcrumbs\">([\\s,\\S,\\n]*?)</div>", driver.PageSource); var categories = RegexHelper.Execute("title=\".*?>([\\s,\\S,\\n]*?)</a>", categoryText).ToArray(); var rawCategories = new string[5]; for (var i = 1; i < categories.Length; i++) { var categoryElement = categories[i]; rawCategories[i - 1] = categoryElement.Groups[1].Value; } temp.Category1 = rawCategories[0]; temp.Category2 = rawCategories[1]; temp.Category3 = rawCategories[2]; temp.Category4 = rawCategories[3]; temp.Category5 = rawCategories[4]; temp.IsHandled = true; result.Add(temp); } catch (Exception ex) { var r2 = ex; } } } catch (Exception ex) { var r = ex; } return(result); }