public async void ParseArticles(params string[] refs) { foreach (var url in refs) { try { Articles _article = AvitoDb.Articles.FirstOrDefault(art => art.Url == url); if (_article != null) { continue; } //асинхронно загружаем содержание url Task <HtmlDocument> _task = new Task <HtmlDocument>(() => { return(new HtmlWeb().Load(url)); }); _task.Start(); HtmlDocument _doc = await _task.ConfigureAwait(false); //парсим заголовок var _title = _doc.DocumentNode.SelectSingleNode("//header[@class='single-item-header b-with-padding']").InnerText.Remove(0, 5); _title = _title.Remove(_title.Length - 2, 2); //парсим цену var _price = _doc.DocumentNode.SelectSingleNode("//span[@class='price-value']").InnerText.Remove(0, 3); _price = _price.Split('&')[0].Replace(" ", "").Replace("\n", ""); //парсим имя хозяина // var _person = _doc.DocumentNode.SelectSingleNode("//div[@class='person-name person-contact-name']").InnerText.Remove(0, 2).Replace("\n",""); // _person = _person.Remove(_person.Length - 21, 21); //парсим адрес var _address = _doc.DocumentNode.SelectSingleNode("//div[@class='person-address padding-bottom']").InnerText.Remove(0, 10).Replace("\n", "").Replace("\t", "").Replace(" ", ""); //парсим содержание var _info = _doc.DocumentNode.SelectSingleNode("//div[@class='description-preview-wrapper']"); HtmlNode _info2 = null; string _infoStr = ""; if (_info == null) { _info = _doc.DocumentNode.SelectSingleNode("//div[@class='description-preview-wrapper description-with-html']"); _info2 = _doc.DocumentNode.SelectSingleNode("//div[@class='shop-description']"); } _infoStr = _info.InnerText.Replace("\n", "");//.Remove(0, 1).Remove(_infoStr.Length - 3, 3); if (_info2 != null) { _infoStr += _info2.InnerText.Replace("\n", ""); //.Remove(0, 1).Remove(_infoStr.Length - 3, 3); } //парсим дату размещения var _publicData = _doc.DocumentNode.SelectSingleNode("//div[@class='item-add-date']").InnerText; if (_publicData.Contains("сегодня")) { _publicData = _publicData.Replace("сегодня", DateTime.Now.ToString("dd MMMM")); } if (_publicData.Contains("вчера")) { DateTime date = new DateTime(DateTime.Now.Year, DateTime.Now.Month, DateTime.Now.Day - 1); _publicData = _publicData.Replace("вчера", date.ToString("dd MMMM")); } //парсим номер объявления var _number = _doc.DocumentNode.SelectSingleNode("//div[@class='item-id']").InnerHtml.Replace("Объявление №", ""); //парсим номер телефона var _phone = await _parsePhone(AvitoUrl + _doc.DocumentNode.SelectSingleNode("//a[@title='Телефон продавца']").Attributes["href"].Value + "?async", url); _article = new Articles(); _article.Url = url; _article.Numder = _number; _article.Title = _title; _article.Info = _infoStr; _article.Price = _price; _article.Phone = _phone; _article.Address = _address; _article.PublicDate = _publicData; AvitoDb.Articles.Add(_article); await AvitoDb.SaveChangesAsync(); } catch (Exception ex) { Logger.Invoke(ex.ToString()); } } }
public async void ParseArticles(params string[] refs) { foreach (var url in refs) { try { Articles _article = AvitoDb.Articles.FirstOrDefault(art => art.Url == url); if (_article != null) { continue; } //асинхронно загружаем содержание url Task<HtmlDocument> _task = new Task<HtmlDocument>(() => { return new HtmlWeb().Load(url); }); _task.Start(); HtmlDocument _doc = await _task.ConfigureAwait(false); //парсим заголовок var _title = _doc.DocumentNode.SelectSingleNode("//header[@class='single-item-header b-with-padding']").InnerText.Remove(0, 5); _title = _title.Remove(_title.Length - 2, 2); //парсим цену var _price = _doc.DocumentNode.SelectSingleNode("//span[@class='price-value']").InnerText.Remove(0, 3); _price = _price.Split('&')[0].Replace(" ", "").Replace("\n", ""); //парсим имя хозяина // var _person = _doc.DocumentNode.SelectSingleNode("//div[@class='person-name person-contact-name']").InnerText.Remove(0, 2).Replace("\n",""); // _person = _person.Remove(_person.Length - 21, 21); //парсим адрес var _address = _doc.DocumentNode.SelectSingleNode("//div[@class='person-address padding-bottom']").InnerText.Remove(0, 10).Replace("\n", "").Replace("\t", "").Replace(" ", ""); //парсим содержание var _info = _doc.DocumentNode.SelectSingleNode("//div[@class='description-preview-wrapper']"); HtmlNode _info2 = null; string _infoStr = ""; if (_info == null) { _info = _doc.DocumentNode.SelectSingleNode("//div[@class='description-preview-wrapper description-with-html']"); _info2 = _doc.DocumentNode.SelectSingleNode("//div[@class='shop-description']"); } _infoStr = _info.InnerText.Replace("\n", "");//.Remove(0, 1).Remove(_infoStr.Length - 3, 3); if (_info2 != null) _infoStr += _info2.InnerText.Replace("\n", "");//.Remove(0, 1).Remove(_infoStr.Length - 3, 3); //парсим дату размещения var _publicData = _doc.DocumentNode.SelectSingleNode("//div[@class='item-add-date']").InnerText; if (_publicData.Contains("сегодня")) { _publicData = _publicData.Replace("сегодня", DateTime.Now.ToString("dd MMMM")); } if (_publicData.Contains("вчера")) { DateTime date = new DateTime(DateTime.Now.Year, DateTime.Now.Month, DateTime.Now.Day - 1); _publicData = _publicData.Replace("вчера", date.ToString("dd MMMM")); } //парсим номер объявления var _number = _doc.DocumentNode.SelectSingleNode("//div[@class='item-id']").InnerHtml.Replace("Объявление №", ""); //парсим номер телефона var _phone = await _parsePhone(AvitoUrl + _doc.DocumentNode.SelectSingleNode("//a[@title='Телефон продавца']").Attributes["href"].Value + "?async", url); _article = new Articles(); _article.Url = url; _article.Numder = _number; _article.Title = _title; _article.Info = _infoStr; _article.Price = _price; _article.Phone = _phone; _article.Address = _address; _article.PublicDate = _publicData; AvitoDb.Articles.Add(_article); await AvitoDb.SaveChangesAsync(); } catch (Exception ex) { Logger.Invoke(ex.ToString()); } } }