Esempio n. 1
0
        public async void ParseArticles(params string[] refs)
        {
            foreach (var url in refs)
            {
                try
                {
                    Articles _article = AvitoDb.Articles.FirstOrDefault(art => art.Url == url);

                    if (_article != null)
                    {
                        continue;
                    }

                    //асинхронно загружаем содержание url
                    Task <HtmlDocument> _task = new Task <HtmlDocument>(() => { return(new HtmlWeb().Load(url)); }); _task.Start();
                    HtmlDocument        _doc  = await _task.ConfigureAwait(false);

                    //парсим заголовок
                    var _title = _doc.DocumentNode.SelectSingleNode("//header[@class='single-item-header b-with-padding']").InnerText.Remove(0, 5);
                    _title = _title.Remove(_title.Length - 2, 2);
                    //парсим цену
                    var _price = _doc.DocumentNode.SelectSingleNode("//span[@class='price-value']").InnerText.Remove(0, 3);
                    _price = _price.Split('&')[0].Replace(" ", "").Replace("\n", "");

                    //парсим имя хозяина
                    // var _person = _doc.DocumentNode.SelectSingleNode("//div[@class='person-name person-contact-name']").InnerText.Remove(0, 2).Replace("\n","");
                    // _person = _person.Remove(_person.Length - 21, 21);

                    //парсим адрес
                    var _address = _doc.DocumentNode.SelectSingleNode("//div[@class='person-address padding-bottom']").InnerText.Remove(0, 10).Replace("\n", "").Replace("\t", "").Replace("     ", "");

                    //парсим содержание
                    var      _info    = _doc.DocumentNode.SelectSingleNode("//div[@class='description-preview-wrapper']");
                    HtmlNode _info2   = null;
                    string   _infoStr = "";
                    if (_info == null)
                    {
                        _info  = _doc.DocumentNode.SelectSingleNode("//div[@class='description-preview-wrapper description-with-html']");
                        _info2 = _doc.DocumentNode.SelectSingleNode("//div[@class='shop-description']");
                    }

                    _infoStr = _info.InnerText.Replace("\n", "");//.Remove(0, 1).Remove(_infoStr.Length - 3, 3);
                    if (_info2 != null)
                    {
                        _infoStr += _info2.InnerText.Replace("\n", "");                //.Remove(0, 1).Remove(_infoStr.Length - 3, 3);
                    }
                    //парсим дату размещения
                    var _publicData = _doc.DocumentNode.SelectSingleNode("//div[@class='item-add-date']").InnerText;
                    if (_publicData.Contains("сегодня"))
                    {
                        _publicData = _publicData.Replace("сегодня", DateTime.Now.ToString("dd MMMM"));
                    }
                    if (_publicData.Contains("вчера"))
                    {
                        DateTime date = new DateTime(DateTime.Now.Year, DateTime.Now.Month, DateTime.Now.Day - 1);
                        _publicData = _publicData.Replace("вчера", date.ToString("dd MMMM"));
                    }

                    //парсим номер объявления
                    var _number = _doc.DocumentNode.SelectSingleNode("//div[@class='item-id']").InnerHtml.Replace("Объявление №", "");

                    //парсим номер телефона
                    var _phone = await _parsePhone(AvitoUrl + _doc.DocumentNode.SelectSingleNode("//a[@title='Телефон продавца']").Attributes["href"].Value + "?async", url);

                    _article = new Articles();

                    _article.Url        = url;
                    _article.Numder     = _number;
                    _article.Title      = _title;
                    _article.Info       = _infoStr;
                    _article.Price      = _price;
                    _article.Phone      = _phone;
                    _article.Address    = _address;
                    _article.PublicDate = _publicData;


                    AvitoDb.Articles.Add(_article);

                    await AvitoDb.SaveChangesAsync();
                }
                catch (Exception ex)
                {
                    Logger.Invoke(ex.ToString());
                }
            }
        }
Esempio n. 2
0
        public async void ParseArticles(params string[] refs)
        {
            foreach (var url in refs)
            {
                try
                {
                    Articles _article = AvitoDb.Articles.FirstOrDefault(art => art.Url == url);

                    if (_article != null)
                    {
                        continue;

                    }

                    //асинхронно загружаем содержание url
                    Task<HtmlDocument> _task = new Task<HtmlDocument>(() => { return new HtmlWeb().Load(url); }); _task.Start();
                    HtmlDocument _doc = await _task.ConfigureAwait(false);

                    //парсим заголовок 
                    var _title = _doc.DocumentNode.SelectSingleNode("//header[@class='single-item-header b-with-padding']").InnerText.Remove(0, 5);
                    _title = _title.Remove(_title.Length - 2, 2);
                    //парсим цену
                    var _price = _doc.DocumentNode.SelectSingleNode("//span[@class='price-value']").InnerText.Remove(0, 3);
                    _price = _price.Split('&')[0].Replace(" ", "").Replace("\n", "");

                    //парсим имя хозяина
                    // var _person = _doc.DocumentNode.SelectSingleNode("//div[@class='person-name person-contact-name']").InnerText.Remove(0, 2).Replace("\n","");
                    // _person = _person.Remove(_person.Length - 21, 21);

                    //парсим адрес
                    var _address = _doc.DocumentNode.SelectSingleNode("//div[@class='person-address padding-bottom']").InnerText.Remove(0, 10).Replace("\n", "").Replace("\t", "").Replace("     ", "");

                    //парсим содержание
                    var _info = _doc.DocumentNode.SelectSingleNode("//div[@class='description-preview-wrapper']");
                    HtmlNode _info2 = null;
                    string _infoStr = "";
                    if (_info == null)
                    {
                        _info = _doc.DocumentNode.SelectSingleNode("//div[@class='description-preview-wrapper description-with-html']");
                        _info2 = _doc.DocumentNode.SelectSingleNode("//div[@class='shop-description']");

                    }

                    _infoStr = _info.InnerText.Replace("\n", "");//.Remove(0, 1).Remove(_infoStr.Length - 3, 3);
                    if (_info2 != null) _infoStr += _info2.InnerText.Replace("\n", "");//.Remove(0, 1).Remove(_infoStr.Length - 3, 3);


                    //парсим дату размещения
                    var _publicData = _doc.DocumentNode.SelectSingleNode("//div[@class='item-add-date']").InnerText;
                    if (_publicData.Contains("сегодня"))
                    {
                        _publicData = _publicData.Replace("сегодня", DateTime.Now.ToString("dd MMMM"));
                    }
                    if (_publicData.Contains("вчера"))
                    {
                        DateTime date = new DateTime(DateTime.Now.Year, DateTime.Now.Month, DateTime.Now.Day - 1);
                        _publicData = _publicData.Replace("вчера", date.ToString("dd MMMM"));
                    }

                    //парсим номер объявления
                    var _number = _doc.DocumentNode.SelectSingleNode("//div[@class='item-id']").InnerHtml.Replace("Объявление №", "");

                    //парсим номер телефона
                    var _phone = await _parsePhone(AvitoUrl + _doc.DocumentNode.SelectSingleNode("//a[@title='Телефон продавца']").Attributes["href"].Value + "?async", url);

                    _article = new Articles();

                    _article.Url = url;
                    _article.Numder = _number;
                    _article.Title = _title;
                    _article.Info = _infoStr;
                    _article.Price = _price;
                    _article.Phone = _phone;
                    _article.Address = _address;
                    _article.PublicDate = _publicData;


                    AvitoDb.Articles.Add(_article);

                    await AvitoDb.SaveChangesAsync();
                }
                catch (Exception ex)
                {
                    Logger.Invoke(ex.ToString());
                }               
            }
        }