Esempio n. 1
0
        /// <summary>
        /// get detail article
        /// </summary>
        /// <param name="articleUrl"></param>
        /// <returns></returns>
        public RemoteArticleInfo GetRemoteArticleInfo(string articleUrl)
        {
            SiteDetect(articleUrl);

            // get article htmlnode
            ArticleElementInfo articleElement = Site?.GetArticleElement(articleUrl);

            if (articleElement == null)
            {
                return(null);
            }

            if (articleElement.Content != null)
            {
                articleElement.Content = Utilities._ClearHtmlTag(articleElement.Content);
            }

            var remoteArticleInfo = new RemoteArticleInfo();
            //set content
            //domain
            Uri uri = new Uri(articleUrl);

            remoteArticleInfo.domain = uri.Host;
            //url
            remoteArticleInfo.url = articleUrl;
            //title
            remoteArticleInfo.title = articleElement.Title != null?HttpUtility.HtmlDecode(articleElement.Title.InnerText.Trim()) : string.Empty;

            //_excerpt
            if (articleElement.Excerpt != null)
            {
                remoteArticleInfo.excerpt = string.IsNullOrEmpty(HttpUtility.HtmlDecode(articleElement.Excerpt.InnerText.Trim())) ? articleElement.Excerpt.Attributes["content"].Value.Trim() : HttpUtility.HtmlDecode(articleElement.Excerpt.InnerText.Trim());
            }
            else
            {
                remoteArticleInfo.excerpt = string.Empty;
            }
            //content
            remoteArticleInfo.content = articleElement.Content != null?HttpUtility.HtmlDecode(articleElement.Content.InnerHtml) : string.Empty;

            //author
            remoteArticleInfo.author = articleElement.Author != null?HttpUtility.HtmlDecode(articleElement.Author.InnerText.Trim()) : string.Empty;

            //_image
            if (articleElement.Image != null)
            {
                remoteArticleInfo.lead_image_url = articleElement.Image.Attributes["src"]?.Value.Trim() ?? articleElement.Image.Attributes["content"].Value.Trim();
                if (remoteArticleInfo.lead_image_url == null)
                {
                    return(null);
                }
            }

            //publish time
            remoteArticleInfo.date_published = articleElement.PublishedTime != null?HttpUtility.HtmlDecode(articleElement.PublishedTime.InnerText.Trim()) : string.Empty;

            //keyword
            if (articleElement.Keyword != null)
            {
                for (int i = 0; i < articleElement.Keyword.Count &&
                     (remoteArticleInfo.keyword + articleElement.Keyword[i].InnerText.Trim().Replace("#", "")).Length < 255; i++)
                {
                    string xxx = HttpUtility.HtmlDecode(articleElement.Keyword[i].InnerText.Trim().Replace("#", "").Replace("&2", "&#2"));
                    remoteArticleInfo.keyword += xxx;
                    if (i != articleElement.Keyword.Count - 1)
                    {
                        remoteArticleInfo.keyword += ',';
                    }
                }
            }
            else
            {
                remoteArticleInfo.keyword = string.Empty;
            }

            //replace
            remoteArticleInfo.content = Utilities._ClearContent(remoteArticleInfo.content);
            var replaceObj = Site.GetStringToReplace();

            if (replaceObj != null)
            {
                foreach (var obj in replaceObj)
                {
                    remoteArticleInfo.content = remoteArticleInfo.content.Replace(obj.ToString(), string.Empty);
                }
            }

            return(remoteArticleInfo);
        }
Esempio n. 2
0
        static void InsertArticle(string username, RemoteArticleInfo article, string category)
        {
            if (article.url.Contains("http://bestie.vn/") || article.url.Contains("http://thegioitre.vn/"))
            {
                article.content = System.Net.WebUtility.HtmlDecode(article.content);
                article.keyword = string.Empty;
                article.author  = article.author.Split('-')[0].Trim();
            }

            if (article.author.Length > 64)
            {
                article.author = string.Empty;
            }

            if (string.IsNullOrEmpty(article.keyword))
            {
                article.keyword = "";
            }
            if (article.keyword.Length > 255)
            {
                string tmp = "";
                for (int i = 0; i < article.keyword.Split(',').Length; i++)
                {
                    if ((tmp + article.keyword.Split(',')[i]).Length < 255)
                    {
                        tmp += article.keyword.Split(',')[i];
                    }
                }
                article.keyword = tmp;
            }

            if (string.IsNullOrEmpty(article.lead_image_url) ||
                string.IsNullOrEmpty(article.content) ||
                string.IsNullOrEmpty(article.title) ||
                string.IsNullOrWhiteSpace(article.title) ||
                string.IsNullOrWhiteSpace(article.content)
                )
            {
                return;
            }

            if (article.excerpt.Length > 999)
            {
                article.excerpt = article.excerpt.Substring(0, 999);
            }

            //headline re-validate
            article.excerpt = RevalidateHeadline(article.excerpt);

            if (string.IsNullOrEmpty(article.keyword))
            {
                article.keyword = string.Empty;
            }

            article.date_published = article.date_published.Replace("  ", " ");
            article.date_published = DateTimeParse(article.date_published.Trim());

            //publish time
            article.date_published = DateTime.TryParse(article.date_published.Trim(), out var publishTime) ? publishTime.ToString(CultureInfo.InvariantCulture) : DateTime.Now.ToString(CultureInfo.InvariantCulture);

            //author
            if (!string.IsNullOrEmpty(article.author))
            {
                article.author = RemoveAtEnd(article.author);
            }

            string frendlyTitle = VietCMS.Framework.Core.Common.WebControl.ToFriendlyString(article.title);

            if (_appDbContext.Articles.Count(x => x.SourceUrl.Equals(article.url) ||
                                             x.FriendlyTitle.Equals(frendlyTitle)) == 0)
            {
                string tmp = "";
                if (string.IsNullOrEmpty(article.keyword))
                {
                    article.keyword = String.Empty;
                }
                string[]       s        = article.keyword.Split(',');
                List <Keyword> keywords = new List <Keyword>();

                for (int i = 0; i < s.Length; i++)
                {
                    string si = s[i];
                    if (si.Trim().Length > 40 || si.Trim().Length < 2)
                    {
                        continue;
                    }
                    tmp += VietCMS.Framework.Core.Common.WebControl.ToFriendlyString(si);
                    if (i != s.Length - 1)
                    {
                        tmp += ",";
                    }

                    if (_appDbContext.Keywords.Count(x => x.Title.Equals(si.Trim())) == 0)
                    {
                        keywords.Add(_appDbContext.Keywords.Add(new Keyword()
                        {
                            Title         = s[i],
                            CreatedAt     = DateTime.Now,
                            FriendlyTitle = VietCMS.Framework.Core.Common.WebControl.ToFriendlyString(si),
                            Type          = "Keyword",
                            VisitCount    = 0
                        }));
                    }
                    else
                    {
                        keywords.Add(_appDbContext.Keywords.FirstOrDefault(x => x.Title.Equals(si.Trim())));
                    }
                }

                _appDbContext.SaveChanges();

                List <RelatedArticle> relatedArticles = new List <RelatedArticle> ();
                foreach (Keyword k in keywords)
                {
                    List <Article> articles = _appDbContext.ArticleKeywords
                                              .Where(y => y.Keyword.Id == k.Id)
                                              .Take(20)
                                              .OrderByDescending(x => x.Article.CreatedAt)
                                              .Select(x => x.Article)
                                              .ToList();

                    foreach (Article item in articles)
                    {
                        bool check = false;
                        foreach (RelatedArticle ra in relatedArticles)
                        {
                            if (item.Id == ra.Origin.Id)
                            {
                                ra.Index++;
                                check = true;
                            }
                        }
                        if (!check)
                        {
                            RelatedArticle relatedArticle = new RelatedArticle()
                            {
                                Origin    = item,
                                Index     = 1,
                                CreatedAt = DateTime.Now,
                                Type      = "Related"
                            };
                            relatedArticles.Add(relatedArticle);
                        }
                    }
                }

                Article a = new Article()
                {
                    Title            = article.title.Trim(),
                    Thumbnail        = article.lead_image_url,
                    Category         = _appDbContext.Categories.FirstOrDefault(x => x.FriendlyName.Equals(category)),
                    Headlines        = article.excerpt.Trim(),
                    Content          = article.content,
                    Keywords         = article.keyword.Trim(),
                    FriendlyKeywords = tmp,
                    Source           = article.domain,
                    SourceUrl        = article.url,
                    FriendlyTitle    = VietCMS.Framework.Core.Common.WebControl.ToFriendlyString(article.title.Trim()),
                    AuthorAlias      = article.author.Trim(),
                    Status           = Const.ArticleStatusActive,
                    CreatedAt        = DateTime.Now,
                    CreatedBy        = _appDbContext.Users.FirstOrDefault(x => x.Username.Equals(username)),
                    LastModifiedBy   = _appDbContext.Users.FirstOrDefault(x => x.Username.Equals(username)),
                    LastModifiedAt   = DateTime.Now,
                    ViewCount        = 0,
                    SubTitle         = article.title.Trim().Length > 50 ? article.title.Trim().Substring(0, 47) + "..." : article.title.Trim()
                };

                foreach (Keyword keyword in keywords)
                {
                    _appDbContext.ArticleKeywords.Add(new ArticleKeyword()
                    {
                        Article = a,
                        Keyword = keyword
                    });
                }

                for (int i = 0; i < relatedArticles.Count; i++)
                {
                    var r = relatedArticles[i];
                    r.Related = a;
                    _appDbContext.RelatedArticles.Add(r);
                    _appDbContext.RelatedArticles.Add(new RelatedArticle()
                    {
                        CreatedAt = DateTime.Now,
                        Index     = r.Index,
                        Origin    = a,
                        Related   = r.Origin,
                        Type      = "Related"
                    });
                }

                _appDbContext.Articles.Add(a);

                AddedArticles.Add(a);
                _appDbContext.SaveChanges();
            }
        }