コード例 #1
0
        public async Task <NewStrings> Parse(SyndicationItem syndicationItem)
        {
            var httpClient = new HttpClient();
            var request    = await httpClient.GetAsync(syndicationItem.Id);

            var response = await request.Content.ReadAsStringAsync();

            int    start    = response.IndexOf("<div id=\"article_body\"");
            string startEnd = response.Substring(start);

            int end = startEnd.Contains("!--POLL--")
                ? startEnd.IndexOf("!--POLL--")
                : startEnd.IndexOf("<div class");

            string listGroup = startEnd.Substring(0, end);
            var    lastText  = listGroup
                               .Replace("&nbsp;", " ")
                               .Replace("&mdash;", " ")
                               .Replace("&amp;", " ")
                               .Replace("&nbsp;", " ")
                               .Replace("&laquo;", " ")
                               .Replace("&raquo;", " ");

            var fullNewsText = new NewStrings
            {
                ImageUrl = lastText,
            };

            return(fullNewsText);
        }
コード例 #2
0
        public async Task <NewStrings> Parse(SyndicationItem syndicationItem)
        {
            var httpClient = new HttpClient();
            var request    = await httpClient.GetAsync(syndicationItem.Id);

            var response = await request.Content.ReadAsStringAsync();


            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(response);
            var htmlBody = htmlDoc.DocumentNode.SelectSingleNode("//div[@class='main-container container']");

            var newsText = htmlBody.OuterHtml;



            var htmlDocWitout = new HtmlDocument();

            htmlDocWitout.LoadHtml(newsText);
            var htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='col-md-4 col-xs-12 top-right-column']");

            if (htmlBodyWitout is null)
            {
                return(null);
            }
            htmlBodyWitout.RemoveAllChildren();



            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//aside[@class='col-sm-3 sidebar sidebar-second']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='panel panel-info']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='field-items']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='field-item even']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//section[@id='block-views-story-story-tags-block']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }


            var text = htmlDocWitout.DocumentNode.OuterHtml;

            #region PhotoUrl

            var htmlDocPhotoUrl = new HtmlDocument();
            htmlDocPhotoUrl.LoadHtml(text);
            var htmlphoto = htmlDocPhotoUrl.DocumentNode.SelectSingleNode("//img[@class='img-responsive']");

            if (htmlphoto is null)
            {
                return(null);
            }

            var photoUrlDirty = htmlphoto.OuterHtml;

            int newsHendlerImageUrlStart = photoUrlDirty.IndexOf("\"https:");
            int newsHendlerImageUrlEnd   = photoUrlDirty.IndexOf(".jpg\"");

            string imageUrl = photoUrlDirty.Substring(newsHendlerImageUrlStart + 1, newsHendlerImageUrlEnd + 3 - newsHendlerImageUrlStart);
            #endregion

            var fullNewsText = new NewStrings
            {
                ImageUrl = imageUrl,
                NewsText = text
            };


            return(fullNewsText);
        }
コード例 #3
0
        public async Task <NewStrings> Parse(SyndicationItem syndicationItem)
        {
            var httpClient = new HttpClient();
            var request    = await httpClient.GetAsync(syndicationItem.Id);

            var response = await request.Content.ReadAsStringAsync();


            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(response);
            var htmlBody = htmlDoc.DocumentNode.SelectSingleNode("//div[@class='page_news noselect']");

            if (htmlBody is null)
            {
                return(null);
            }
            var newsText = htmlBody.OuterHtml;



            var htmlDocWitout = new HtmlDocument();

            htmlDocWitout.LoadHtml(newsText);
            var htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='share_block']");

            if (htmlBodyWitout is null)
            {
                return(null);
            }
            htmlBodyWitout.RemoveAllChildren();

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='uninote console']");


            if (htmlBodyWitout is null)
            {
                return(null);
            }
            htmlBodyWitout.RemoveAllChildren();



            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='share_block']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='favorite_block']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }


            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='page_news_info clearfix']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }


            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='nepncont']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }



            var text = htmlDocWitout.DocumentNode.OuterHtml;

            #region PhotoUrl

            var htmlDocPhotoUrl = new HtmlDocument();
            htmlDocPhotoUrl.LoadHtml(text);
            var htmlphoto = htmlDocPhotoUrl.DocumentNode.SelectSingleNode("//div[@class='main_pic_container']");

            if (htmlphoto is null)
            {
                return(null);
            }

            var photoUrlDirty = htmlphoto.OuterHtml;

            int newsHendlerImageUrlStart = photoUrlDirty.IndexOf("\"https:");
            int newsHendlerImageUrlEnd   = photoUrlDirty.IndexOf(".jpg");

            string imageUrl = photoUrlDirty.Substring(newsHendlerImageUrlStart + 1, newsHendlerImageUrlEnd + 3 - newsHendlerImageUrlStart);
            #endregion

            var fullNewsText = new NewStrings
            {
                ImageUrl = imageUrl,
                NewsText = text
            };


            return(fullNewsText);
        }
コード例 #4
0
        public async Task <NewStrings> Parse(SyndicationItem syndicationItem)
        {
            var httpClient = new HttpClient();
            var request    = await httpClient.GetAsync(syndicationItem.Id);

            var response = await request.Content.ReadAsStringAsync();

            var htmlDoc = new HtmlDocument();

            htmlDoc.LoadHtml(response);
            var htmlBody = htmlDoc.DocumentNode.SelectSingleNode("//div[@class='news-posts']");

            var newsText = htmlBody.OuterHtml;



            var htmlDocWitout = new HtmlDocument();

            htmlDocWitout.LoadHtml(newsText);
            var htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='social-likes news-reference__control']");

            if (htmlBodyWitout is null)
            {
                return(null);
            }
            htmlBodyWitout.RemoveAllChildren();



            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//a[@href='https://catalog.onliner.by/furnituresafes?safetype%5B0%5D=seifbook&safetype%5Boperation%5D=union']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-widget']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//p[@style='text-align: right;']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//p[@style='text-align: right;']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-reference']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-reference']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }
            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-reference']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-reference__list']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//section[@id='news-reference__list']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-header__control']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }


            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-grid__part news-grid__part_2 news-helpers_hide_tablet js-banner-container']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-reference']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-incut news-incut_extended news-incut_position_right news-incut_shift_top news-helpers_hide_tablet']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }


            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-media news-media_condensed']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }


            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-reference']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//h2");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//hr");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }


            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//strong");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }


            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-reference__author news-helpers_show_mobile']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }

            htmlBodyWitout = htmlDocWitout.DocumentNode.SelectSingleNode("//div[@class='news-reference']");
            if (htmlBodyWitout != null)
            {
                htmlBodyWitout.RemoveAllChildren();
            }



            var text = htmlDocWitout.DocumentNode.OuterHtml;

            #region PhotoUrl

            var htmlDocPhotoUrl = new HtmlDocument();
            htmlDocPhotoUrl.LoadHtml(text);
            var htmlphoto = htmlDocPhotoUrl.DocumentNode.SelectSingleNode("//div[@class='news-header__image']");

            if (htmlphoto is null)
            {
                return(null);
            }

            var    photoUrlDirty            = htmlphoto.OuterHtml;
            string imageUrl                 = null;
            int    newsHendlerImageUrlStart = photoUrlDirty.IndexOf("https:");
            int    newsHendlerImageUrlEnd   = photoUrlDirty.IndexOf(".jpeg");

            if (newsHendlerImageUrlEnd != -1)
            {
                imageUrl = photoUrlDirty.Substring(newsHendlerImageUrlStart, newsHendlerImageUrlEnd + 5 - newsHendlerImageUrlStart);
            }



            if (newsHendlerImageUrlEnd == -1)
            {
                newsHendlerImageUrlEnd = photoUrlDirty.IndexOf(".jpg");
                imageUrl = photoUrlDirty.Substring(newsHendlerImageUrlStart, newsHendlerImageUrlEnd + 4 - newsHendlerImageUrlStart);
            }

            #endregion



            var fullNewsText = new NewStrings
            {
                ImageUrl = imageUrl,
                NewsText = text
            };

            return(fullNewsText);
        }