public async Task <NewStrings> Parse(SyndicationItem syndicationItem) { var httpClient = new HttpClient(); var request = await httpClient.GetAsync(syndicationItem.Id); var response = await request.Content.ReadAsStringAsync(); int start = response.IndexOf("<div id=\"article_body\""); string startEnd = response.Substring(start); int end = startEnd.Contains("!--POLL--") ? startEnd.IndexOf("!--POLL--") : startEnd.IndexOf("<div class"); string listGroup = startEnd.Substring(0, end); var lastText = listGroup .Replace(" ", " ") .Replace("—", " ") .Replace("&", " ") .Replace(" ", " ") .Replace("«", " ") .Replace("»", " "); var fullNewsText = new NewStrings { ImageUrl = lastText, }; return(fullNewsText); }
public async Task <NewStrings> Parse(SyndicationItem syndicationItem) { var httpClient = new HttpClient(); var request = await httpClient.GetAsync(syndicationItem.Id); var response = await request.Content.ReadAsStringAsync(); var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(response); var htmlBody = htmlDoc.DocumentNode.SelectSingleNode("//div[@class='main-container container']"); var newsText = htmlBody.OuterHtml; var htmlDocWitout = new HtmlDocument(); htmlDocWitout.LoadHtml(newsText); var htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='col-md-4 col-xs-12 top-right-column']"); if (htmlBodyWitout is null) { return(null); } htmlBodyWitout.RemoveAllChildren(); htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//aside[@class='col-sm-3 sidebar sidebar-second']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='panel panel-info']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='field-items']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='field-item even']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//section[@id='block-views-story-story-tags-block']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } var text = htmlDocWitout.DocumentNode.OuterHtml; #region PhotoUrl var htmlDocPhotoUrl = new HtmlDocument(); htmlDocPhotoUrl.LoadHtml(text); var htmlphoto = htmlDocPhotoUrl.DocumentNode.SelectSingleNode("//img[@class='img-responsive']"); if (htmlphoto is null) { return(null); } var photoUrlDirty = htmlphoto.OuterHtml; int newsHendlerImageUrlStart = photoUrlDirty.IndexOf("\"https:"); int newsHendlerImageUrlEnd = photoUrlDirty.IndexOf(".jpg\""); string imageUrl = photoUrlDirty.Substring(newsHendlerImageUrlStart + 1, newsHendlerImageUrlEnd + 3 - newsHendlerImageUrlStart); #endregion var fullNewsText = new NewStrings { ImageUrl = imageUrl, NewsText = text }; return(fullNewsText); }
public async Task <NewStrings> Parse(SyndicationItem syndicationItem) { var httpClient = new HttpClient(); var request = await httpClient.GetAsync(syndicationItem.Id); var response = await request.Content.ReadAsStringAsync(); var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(response); var htmlBody = htmlDoc.DocumentNode.SelectSingleNode("//div[@class='page_news noselect']"); if (htmlBody is null) { return(null); } var newsText = htmlBody.OuterHtml; var htmlDocWitout = new HtmlDocument(); htmlDocWitout.LoadHtml(newsText); var htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='share_block']"); if (htmlBodyWitout is null) { return(null); } htmlBodyWitout.RemoveAllChildren(); htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='uninote console']"); if (htmlBodyWitout is null) { return(null); } htmlBodyWitout.RemoveAllChildren(); htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='share_block']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='favorite_block']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='page_news_info clearfix']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='nepncont']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } var text = htmlDocWitout.DocumentNode.OuterHtml; #region PhotoUrl var htmlDocPhotoUrl = new HtmlDocument(); htmlDocPhotoUrl.LoadHtml(text); var htmlphoto = htmlDocPhotoUrl.DocumentNode.SelectSingleNode("//div[@class='main_pic_container']"); if (htmlphoto is null) { return(null); } var photoUrlDirty = htmlphoto.OuterHtml; int newsHendlerImageUrlStart = photoUrlDirty.IndexOf("\"https:"); int newsHendlerImageUrlEnd = photoUrlDirty.IndexOf(".jpg"); string imageUrl = photoUrlDirty.Substring(newsHendlerImageUrlStart + 1, newsHendlerImageUrlEnd + 3 - newsHendlerImageUrlStart); #endregion var fullNewsText = new NewStrings { ImageUrl = imageUrl, NewsText = text }; return(fullNewsText); }
public async Task <NewStrings> Parse(SyndicationItem syndicationItem) { var httpClient = new HttpClient(); var request = await httpClient.GetAsync(syndicationItem.Id); var response = await request.Content.ReadAsStringAsync(); var htmlDoc = new HtmlDocument(); htmlDoc.LoadHtml(response); var htmlBody = htmlDoc.DocumentNode.SelectSingleNode("//div[@class='news-posts']"); var newsText = htmlBody.OuterHtml; var htmlDocWitout = new HtmlDocument(); htmlDocWitout.LoadHtml(newsText); var htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='social-likes news-reference__control']"); if (htmlBodyWitout is null) { return(null); } htmlBodyWitout.RemoveAllChildren(); htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//a[@href='https://catalog.onliner.by/furnituresafes?safetype%5B0%5D=seifbook&safetype%5Boperation%5D=union']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-widget']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//p[@style='text-align: right;']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//p[@style='text-align: right;']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-reference']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-reference']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-reference']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-reference__list']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//section[@id='news-reference__list']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-header__control']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-grid__part news-grid__part_2 news-helpers_hide_tablet js-banner-container']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-reference']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-incut news-incut_extended news-incut_position_right news-incut_shift_top news-helpers_hide_tablet']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-media news-media_condensed']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-reference']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//h2"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//hr"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//strong"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-reference__author news-helpers_show_mobile']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-reference']"); if (htmlBodyWitout != null) { htmlBodyWitout.RemoveAllChildren(); } var text = htmlDocWitout.DocumentNode.OuterHtml; #region PhotoUrl var htmlDocPhotoUrl = new HtmlDocument(); htmlDocPhotoUrl.LoadHtml(text); var htmlphoto = htmlDocPhotoUrl.DocumentNode.SelectSingleNode("//div[@class='news-header__image']"); if (htmlphoto is null) { return(null); } var photoUrlDirty = htmlphoto.OuterHtml; string imageUrl = null; int newsHendlerImageUrlStart = photoUrlDirty.IndexOf("https:"); int newsHendlerImageUrlEnd = photoUrlDirty.IndexOf(".jpeg"); if (newsHendlerImageUrlEnd != -1) { imageUrl = photoUrlDirty.Substring(newsHendlerImageUrlStart, newsHendlerImageUrlEnd + 5 - newsHendlerImageUrlStart); } if (newsHendlerImageUrlEnd == -1) { newsHendlerImageUrlEnd = photoUrlDirty.IndexOf(".jpg"); imageUrl = photoUrlDirty.Substring(newsHendlerImageUrlStart, newsHendlerImageUrlEnd + 4 - newsHendlerImageUrlStart); } #endregion var fullNewsText = new NewStrings { ImageUrl = imageUrl, NewsText = text }; return(fullNewsText); }