Пример #1
0
 /// <summary>
 /// изменить сайт
 /// </summary>
 /// <param name="site">сайт</param>
 public async Task EditSite(Site site)
 {
     using (var db = new JoogleContext())
     {
         var exist = db.Sites.FirstOrDefault(x => x.Id == site.Id);
         if (exist != null)
         {
             if (site.IsParsed)
             {
                 var texts = db.Texts.Where(x => x.SiteId == site.Id).ToList();
                 texts.ForEach(t => t.IsDeleted = site.IsDeleted);
             }
             exist.IsDeleted = site.IsDeleted;
             db.SaveChanges();
         }
     }
 }
Пример #2
0
 /// <summary>
 /// создать сайт
 /// </summary>
 /// <param name="request">модель создания сайта</param>
 public async Task CreateSite(CreateSiteRequest request)
 {
     using (var db = new JoogleContext())
     {
         var url   = request.Url.TrimEnd('/');
         var exist = db.Sites.FirstOrDefault(x => x.Url == url.ToLower());
         if (exist != null)
         {
             return;
         }
         var site = new Site
         {
             Url        = url.ToLower(),
             DateModify = DateTime.UtcNow
         };
         db.Sites.Add(site);
         db.SaveChanges();
     }
 }
Пример #3
0
 /// <summary>
 /// удалить сайт и все связанные текста
 /// </summary>
 /// <param name="site">сайт</param>
 public async Task DeleteSite(Site site)
 {
     try
     {
         using (var db = new JoogleContext())
         {
             var exist = db.Sites.FirstOrDefault(x => x.Id == site.Id);
             if (exist != null)
             {
                 var text = db.Texts.FirstOrDefault(x => x.SiteId == exist.Id);
                 if (text != null)
                 {
                     db.Texts.Remove(text);
                 }
                 db.Sites.Remove(exist);
                 db.SaveChanges();
             }
         }
     }
     catch { }
 }
Пример #4
0
        /// <summary>
        /// парсинг сайта
        /// </summary>
        /// <param name="obj">сайт</param>
        /// <returns></returns>
        private async Task SiteParse(Site obj)
        {
            try
            {
                using (var db = new JoogleContext())
                {
                    var site  = obj;
                    var exist = db.Sites.FirstOrDefault(x => x.Id == site.Id);
                    if (exist == null)
                    {
                        return;
                    }
                    var result = new StringBuilder();
                    var config = Configuration.Default.WithDefaultLoader();
                    var task   = BrowsingContext.New(config).OpenAsync(site.Url);
                    var html   = task.Result;

                    var hrefs = html.QuerySelectorAll("a")
                                .Where(x => x.Attributes["href"] != null)
                                .Select(x => x.Attributes["href"].Value)
                                .Distinct()
                                .ToList();

                    var selectors = html.QuerySelectorAll("h1, h2, h3, h4, p");
                    foreach (var selector in selectors)
                    {
                        result.Append(" ");
                        result.Append(selector.TextContent);
                        result.Append(" ");
                    }

                    var newSites = new List <Site>();
                    if (hrefs.Any())
                    {
                        hrefs.RemoveAll(x => !x.StartsWith("http"));
                        foreach (var href in hrefs)
                        {
                            var url      = href.Last() == '/' ? href.Remove(href.Length - 1).ToLower() : href.ToLower();
                            var existUrl = db.Sites.FirstOrDefault(x => x.Url == url);
                            if (existUrl != null || url == exist.Url)
                            {
                                continue;
                            }
                            newSites.Add(new Site
                            {
                                Url        = url,
                                DateModify = DateTime.UtcNow
                            });
                        }
                    }

                    if (!string.IsNullOrWhiteSpace(result.ToString()))
                    {
                        db.Texts.Add(new Text
                        {
                            SiteId     = exist.Id,
                            Url        = site.Url,
                            Title      = result.ToString(),
                            DateModify = DateTime.UtcNow
                        });
                    }

                    exist.IsParsed = true;
                    db.Sites.AddRange(newSites);
                    db.SaveChanges();
                }
            }
            catch { }
        }