public void FetchNewUrls() { var query = Repository.AsQueryable<Page>() .Where(page => !page.IsDeleted && !page.IsMasterPage) .Where(page => !Repository.AsQueryable<IndexSource>().Any(crawlUrl => crawlUrl.SourceId == page.Id)); if (!scrapePrivatePages) { query = query.Where(page => page.Status == PageStatus.Published); } var sourcesTosave = query .Select(page => new IndexSource { Path = page.PageUrl, SourceId = page.Id, IsPublished = page.Status == PageStatus.Published }) .ToList(); var eventArgs = BetterCms.Events.LuceneEvents.Instance.OnFetchingNewUrls(); if (eventArgs != null && eventArgs.IndexSources != null && eventArgs.IndexSources.Count > 0) { var indexSourceComparer = new BetterCMS.Module.LuceneSearch.Models.IndexSource.IndexSourceComparerByIdAndPath(); var registeredSources = Repository.AsQueryable<IndexSource>().ToList(); var additionalSources = eventArgs.IndexSources.Where(additionalSource => !registeredSources.Any(registeredSource => indexSourceComparer.Equals(additionalSource, registeredSource))) .Where(additionalSource => !sourcesTosave.Any(sourceToSave => indexSourceComparer.Equals(additionalSource, sourceToSave))) .Distinct(indexSourceComparer); sourcesTosave.AddRange(additionalSources); } // Change publish status where status has changed Repository.AsQueryable<IndexSource>() .Where(source => Repository.AsQueryable<Page>().Any(p => p.Id == source.SourceId && ((source.IsPublished && p.Status != PageStatus.Published) || (!source.IsPublished && p.Status == PageStatus.Published)))) .ToList() .ForEach(source => { source.IsPublished = !source.IsPublished; sourcesTosave.Add(source); }); UnitOfWork.BeginTransaction(); int i = 0; foreach (var source in sourcesTosave) { Repository.Save(source); // Flushes every 20-size batch. if (i++ % 20 == 0) { UnitOfWork.Session.Flush(); UnitOfWork.Session.Clear(); } } UnitOfWork.Commit(); }
public void FetchNewUrls() { var query = Repository.AsQueryable <Page>() .Where(page => !page.IsDeleted && !page.IsMasterPage) .Where(page => !Repository.AsQueryable <IndexSource>().Any(crawlUrl => crawlUrl.SourceId == page.Id)); if (!scrapePrivatePages) { query = query.Where(page => page.Status == PageStatus.Published); } var sourcesTosave = query .Select(page => new IndexSource { Path = page.PageUrl, SourceId = page.Id, IsPublished = page.Status == PageStatus.Published }) .ToList(); var eventArgs = BetterCms.Events.LuceneEvents.Instance.OnFetchingNewUrls(); if (eventArgs != null && eventArgs.IndexSources != null && eventArgs.IndexSources.Count > 0) { var indexSourceComparer = new BetterCMS.Module.LuceneSearch.Models.IndexSource.IndexSourceComparerByIdAndPath(); var registeredSources = Repository.AsQueryable <IndexSource>().ToList(); var additionalSources = eventArgs.IndexSources.Where(additionalSource => !registeredSources.Any(registeredSource => indexSourceComparer.Equals(additionalSource, registeredSource))) .Where(additionalSource => !sourcesTosave.Any(sourceToSave => indexSourceComparer.Equals(additionalSource, sourceToSave))) .Distinct(indexSourceComparer); sourcesTosave.AddRange(additionalSources); } // Change publish status where status has changed Repository.AsQueryable <IndexSource>() .Where(source => Repository.AsQueryable <Page>().Any(p => p.Id == source.SourceId && ((source.IsPublished && p.Status != PageStatus.Published) || (!source.IsPublished && p.Status == PageStatus.Published)))) .ToList() .ForEach(source => { source.IsPublished = !source.IsPublished; sourcesTosave.Add(source); }); UnitOfWork.BeginTransaction(); int i = 0; foreach (var source in sourcesTosave) { Repository.Save(source); // Flushes every 20-size batch. if (i++ % 20 == 0) { UnitOfWork.Session.Flush(); UnitOfWork.Session.Clear(); } } UnitOfWork.Commit(); }