public void CrawlStepperToDatabase(int Id) { var stepperEng = new eqranews.crawling.Models.CrawlStepper(); List <eqranews.crawling.Models.CrawlResult> results = new List <eqranews.crawling.Models.CrawlResult>(); using (var scope = _serviceProvider.CreateScope()) { // var services = _serviceProvider; ApplicationDbContext db = scope.ServiceProvider.GetRequiredService <ApplicationDbContext>(); if (db.CrawlSteppers.Where(x => x.Id == Id).Any()) { DAL.Crawling.CrawlStepper stepper = db.CrawlSteppers.Include(s => s.CrawlSteps).ThenInclude(i => i.CrawlItems).Include("CrawlSteps.CrawlStepType").Where(x => x.Id == Id).SingleOrDefault(); foreach (DAL.Crawling.CrawlStep step in stepper.CrawlSteps.ToList()) { List <eqranews.crawling.Models.CrawlItem> CrawlItems = new List <eqranews.crawling.Models.CrawlItem>(); var newParams = new object[] { step.Id, crawling.Models.CrawlSteps.CrawlSetpType.Single, new AngleSharp.Url(step.Url), step.Selector }; step.CrawlItems.ToList().ForEach(item => { CrawlItems.Add(new eqranews.crawling.Models.CrawlItem { Name = item.Name, Selector = item.Selector, Attr = item.Attr, Value = item.Value }); }); string StepClassFullName = String.Format("eqranews.crawling.Models.CrawlSteps.{0}, eqranews.crawling, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null", step.CrawlStepType.Name); Type t = Type.GetType(StepClassFullName); if (t == null) { return; } var instance = (eqranews.crawling.Models.CrawlSteps.CrawlStep)Activator.CreateInstance(t, newParams); var myPropInfo = t.GetProperty("CrawlItems"); myPropInfo.SetValue(instance, CrawlItems); // Add step to stepper stepperEng.StepSequence.Add(instance); } ; results = stepperEng.Crawl(); SaveToStore(results, db, stepper); } } }
// Save the results in the database public void SaveToStore(List <CrawlResult> results, ApplicationDbContext db, DAL.Crawling.CrawlStepper stepper) { try { foreach (CrawlResult item in results) { if (!db.News.Any(N => N.SourceLink == item.Url.Href)) { var source = db.CrawlSources.Find(new object[] { stepper.CrawlSourceId }); News newsEntry = new News { Title = item.CrawlItems.Where(x => x.Name == "Title").FirstOrDefault()?.Value, SourceLink = item.Url.Href, Created = DateTime.Now, SourceId = source.Id, CountryId = source.CountryId }; db.News.Add(newsEntry); db.SaveChanges(); if (newsEntry.Id > 0) { // Add the Main Category to the News Item Category if ((int)stepper.CategoryId > 0) { db.NewsCategories.Add(new NewsCategory() { Id = 0, NewsId = newsEntry.Id, CategoryId = (int)stepper.CategoryId, Main = true }); db.SaveChanges(); } // Add all the News Items foreach (var x in item.CrawlItems) { NewsItem nItem = new NewsItem { NewsId = newsEntry.Id, Name = x.Name, Value = x.Value }; if (nItem.Name == "Category") { int cat = 0; if (int.TryParse(nItem.Value, out cat)) { db.NewsCategories.Add(new NewsCategory() { Id = 0, NewsId = newsEntry.Id, CategoryId = cat, Main = false }); db.SaveChanges(); } } else { db.NewsItems.Add(nItem); } } db.SaveChanges(); } } } } catch (Exception) { } }