/// <summary> /// изменить сайт /// </summary> /// <param name="site">сайт</param> public async Task EditSite(Site site) { using (var db = new JoogleContext()) { var exist = db.Sites.FirstOrDefault(x => x.Id == site.Id); if (exist != null) { if (site.IsParsed) { var texts = db.Texts.Where(x => x.SiteId == site.Id).ToList(); texts.ForEach(t => t.IsDeleted = site.IsDeleted); } exist.IsDeleted = site.IsDeleted; db.SaveChanges(); } } }
/// <summary> /// создать сайт /// </summary> /// <param name="request">модель создания сайта</param> public async Task CreateSite(CreateSiteRequest request) { using (var db = new JoogleContext()) { var url = request.Url.TrimEnd('/'); var exist = db.Sites.FirstOrDefault(x => x.Url == url.ToLower()); if (exist != null) { return; } var site = new Site { Url = url.ToLower(), DateModify = DateTime.UtcNow }; db.Sites.Add(site); db.SaveChanges(); } }
/// <summary> /// удалить сайт и все связанные текста /// </summary> /// <param name="site">сайт</param> public async Task DeleteSite(Site site) { try { using (var db = new JoogleContext()) { var exist = db.Sites.FirstOrDefault(x => x.Id == site.Id); if (exist != null) { var text = db.Texts.FirstOrDefault(x => x.SiteId == exist.Id); if (text != null) { db.Texts.Remove(text); } db.Sites.Remove(exist); db.SaveChanges(); } } } catch { } }
/// <summary> /// парсинг сайта /// </summary> /// <param name="obj">сайт</param> /// <returns></returns> private async Task SiteParse(Site obj) { try { using (var db = new JoogleContext()) { var site = obj; var exist = db.Sites.FirstOrDefault(x => x.Id == site.Id); if (exist == null) { return; } var result = new StringBuilder(); var config = Configuration.Default.WithDefaultLoader(); var task = BrowsingContext.New(config).OpenAsync(site.Url); var html = task.Result; var hrefs = html.QuerySelectorAll("a") .Where(x => x.Attributes["href"] != null) .Select(x => x.Attributes["href"].Value) .Distinct() .ToList(); var selectors = html.QuerySelectorAll("h1, h2, h3, h4, p"); foreach (var selector in selectors) { result.Append(" "); result.Append(selector.TextContent); result.Append(" "); } var newSites = new List <Site>(); if (hrefs.Any()) { hrefs.RemoveAll(x => !x.StartsWith("http")); foreach (var href in hrefs) { var url = href.Last() == '/' ? href.Remove(href.Length - 1).ToLower() : href.ToLower(); var existUrl = db.Sites.FirstOrDefault(x => x.Url == url); if (existUrl != null || url == exist.Url) { continue; } newSites.Add(new Site { Url = url, DateModify = DateTime.UtcNow }); } } if (!string.IsNullOrWhiteSpace(result.ToString())) { db.Texts.Add(new Text { SiteId = exist.Id, Url = site.Url, Title = result.ToString(), DateModify = DateTime.UtcNow }); } exist.IsParsed = true; db.Sites.AddRange(newSites); db.SaveChanges(); } } catch { } }