Beispiel #1
0
 private XmlSitemapProvider[] CreateProviders(XmlSitemapBuildContext context)
 {
     return(_publishers
            .Select(x => x.Value.PublishXmlSitemap(context))
            .Where(x => x != null)
            .OrderBy(x => x.Order)
            .ToArray());
 }
Beispiel #2
0
        public async Task Run(TaskExecutionContext ctx, CancellationToken cancelToken = default)
        {
            var stores = _storeContext.GetAllStores();

            foreach (var store in stores)
            {
                var languages    = _languageService.GetAllLanguages(false, store.Id);
                var buildContext = new XmlSitemapBuildContext(store, languages.ToArray(), _settingFactory, _storeContext.IsSingleStoreMode())
                {
                    CancellationToken = cancelToken,
                    ProgressCallback  = OnProgress
                };

                await _generator.RebuildAsync(buildContext);
            }

            Task OnProgress(int value, int max, string msg)
            {
                return(ctx.SetProgressAsync(value, max, msg, true));
            }
        }
Beispiel #3
0
 protected virtual Task ProcessCustomNodesAsync(XmlSitemapBuildContext ctx, Multimap <int, XmlSitemapNode> sitemaps)
 {
     // For inheritors
     return(Task.CompletedTask);
 }
Beispiel #4
0
        public virtual async Task RebuildAsync(XmlSitemapBuildContext ctx)
        {
            Guard.NotNull(ctx, nameof(ctx));

            // TODO: (core) Check later if this is still necessary
            //var dataTokens = _httpContextAccessor.HttpContext?.GetRouteData()?.DataTokens;
            //if (dataTokens != null)
            //{
            //    // Double seo code otherwise
            //    dataTokens["CultureCodeReplacement"] = string.Empty;
            //}

            var languageData = new Dictionary <int, LanguageData>();

            foreach (var language in ctx.Languages)
            {
                var lockFilePath = GetLockFilePath(ctx.Store.Id, language.Id);

                if (_lockFileManager.TryAcquireLock(lockFilePath, out var lockFile))
                {
                    // Process only languages that are unlocked right now
                    // It is possible that an HTTP request triggered the generation
                    // of a language specific sitemap.

                    try
                    {
                        var sitemapDir = BuildSitemapDirPath(ctx.Store.Id, language.Id);
                        var data       = new LanguageData
                        {
                            Store        = ctx.Store,
                            Language     = language,
                            LockFile     = lockFile,
                            LockFilePath = lockFilePath,
                            TempDir      = sitemapDir + "~",
                            FinalDir     = sitemapDir,
                            BaseUrl      = await BuildBaseUrlAsync(ctx.Store, language)
                        };

                        _tenantRoot.TryDeleteDirectory(data.TempDir);
                        _tenantRoot.TryCreateDirectory(data.TempDir);

                        languageData[language.Id] = data;
                    }
                    catch
                    {
                        await lockFile.ReleaseAsync();

                        throw;
                    }
                }
            }

            if (languageData.Count == 0)
            {
                Logger.Warn("XML sitemap rebuild already in process.");
                return;
            }

            var languages   = languageData.Values.Select(x => x.Language);
            var languageIds = languages.Select(x => x.Id).Concat(new[] { 0 }).ToArray();

            // All sitemaps grouped by language
            var sitemaps = new Multimap <int, XmlSitemapNode>();

            var compositeFileLock = new AsyncActionDisposable(async() =>
            {
                foreach (var data in languageData.Values)
                {
                    await data.LockFile.ReleaseAsync();
                }
            });

            await using (compositeFileLock)
            {
                // Impersonate
                var prevCustomer = _services.WorkContext.CurrentCustomer;
                // no need to vary xml sitemap by customer roles: it's relevant to crawlers only.
                // TODO: (core) Do not attempt to update CurrentCustomer entity if it is untracked (determine where applicable)
                _services.WorkContext.CurrentCustomer = (await _customerService.GetCustomerBySystemNameAsync(SystemCustomerNames.SearchEngine, false)) ?? prevCustomer;

                try
                {
                    var nodes = new List <XmlSitemapNode>();

                    var providers     = CreateProviders(ctx);
                    var total         = (await providers.SelectAsync(x => x.GetTotalCountAsync()).ToListAsync(ctx.CancellationToken)).Sum();
                    var totalSegments = (int)Math.Ceiling(total / (double)MaximumSiteMapNodeCount);
                    var hasIndex      = totalSegments > 1;
                    var indexNodes    = new Multimap <int, XmlSitemapNode>();
                    var segment       = 0;
                    var numProcessed  = 0;

                    CheckSitemapCount(totalSegments);

                    using (new DbContextScope(_db, autoDetectChanges: false, forceNoTracking: true, lazyLoading: false))
                    {
                        var entities = EnlistEntitiesAsync(providers);

                        await foreach (var batch in entities.SliceAsync(ctx.CancellationToken, MaximumSiteMapNodeCount))
                        {
                            if (ctx.CancellationToken.IsCancellationRequested)
                            {
                                break;
                            }

                            segment++;
                            numProcessed = segment * MaximumSiteMapNodeCount;
                            ctx.ProgressCallback?.Invoke(numProcessed, total, "{0} / {1}".FormatCurrent(numProcessed, total));

                            var slugs = await GetUrlRecordCollectionsForBatchAsync(batch.Select(x => x.Entry).ToList(), languageIds);

                            foreach (var data in languageData.Values)
                            {
                                var language = data.Language;
                                var baseUrl  = data.BaseUrl;

                                // Create all node entries for this segment
                                var entries = batch
                                              .Where(x => x.Entry.LanguageId.GetValueOrDefault() == 0 || x.Entry.LanguageId.Value == language.Id)
                                              .Select(x => x.Provider.CreateNode(_linkGenerator, baseUrl, x.Entry, slugs[x.Entry.EntityName], language));
                                sitemaps[language.Id].AddRange(entries.Where(x => x != null));

                                // Create index node for this segment/language combination
                                if (hasIndex)
                                {
                                    indexNodes[language.Id].Add(new XmlSitemapNode
                                    {
                                        LastMod = sitemaps[language.Id].Select(x => x.LastMod).Where(x => x.HasValue).DefaultIfEmpty().Max(),
                                        Loc     = GetSitemapIndexUrl(segment, baseUrl),
                                    });
                                }

                                if (segment % 5 == 0 || segment == totalSegments)
                                {
                                    // Commit every 5th segment (10.000 nodes) temporarily to disk to minimize RAM usage
                                    var documents = GetSiteMapDocuments((IReadOnlyCollection <XmlSitemapNode>)sitemaps[language.Id]);
                                    await SaveTempAsync(documents, data, segment - documents.Count + (hasIndex ? 1 : 0));

                                    documents.Clear();
                                    sitemaps.RemoveAll(language.Id);
                                }
                            }

                            slugs.Clear();
                        }

                        // Process custom nodes
                        if (!ctx.CancellationToken.IsCancellationRequested)
                        {
                            ctx.ProgressCallback?.Invoke(numProcessed, total, "Processing custom nodes".FormatCurrent(numProcessed, total));
                            await ProcessCustomNodesAsync(ctx, sitemaps);

                            foreach (var data in languageData.Values)
                            {
                                if (sitemaps.ContainsKey(data.Language.Id) && sitemaps[data.Language.Id].Count > 0)
                                {
                                    var documents = GetSiteMapDocuments((IReadOnlyCollection <XmlSitemapNode>)sitemaps[data.Language.Id]);
                                    await SaveTempAsync(documents, data, (segment + 1) - documents.Count + (hasIndex ? 1 : 0));
                                }
                                else if (segment == 0)
                                {
                                    // Ensure that at least one entry exists. Otherwise,
                                    // the system will try to rebuild again.
                                    var homeNode = new XmlSitemapNode {
                                        LastMod = DateTime.UtcNow, Loc = data.BaseUrl
                                    };
                                    var documents = GetSiteMapDocuments(new List <XmlSitemapNode> {
                                        homeNode
                                    });
                                    await SaveTempAsync(documents, data, 0);
                                }
                            }
                        }
                    }

                    ctx.CancellationToken.ThrowIfCancellationRequested();

                    ctx.ProgressCallback?.Invoke(totalSegments, totalSegments, "Finalizing...'");

                    foreach (var data in languageData.Values)
                    {
                        // Create index documents (if any)
                        if (hasIndex && indexNodes.Any())
                        {
                            var indexDocument = CreateSitemapIndexDocument(indexNodes[data.Language.Id]);
                            await SaveTempAsync(new List <string> {
                                indexDocument
                            }, data, 0);
                        }

                        // Save finally (actually renames temp folder)
                        await SaveFinalAsync(data);
                    }
                }
                finally
                {
                    // Undo impersonation
                    _services.WorkContext.CurrentCustomer = prevCustomer;
                    sitemaps.Clear();

                    foreach (var data in languageData.Values)
                    {
                        if (_tenantRoot.DirectoryExists(data.TempDir))
                        {
                            _tenantRoot.TryDeleteDirectory(data.TempDir);
                        }
                    }
                }
            }
        }
Beispiel #5
0
        private async Task <XmlSitemapPartition> GetSitemapPartAsync(int index, bool isRetry)
        {
            Guard.NotNegative(index, nameof(index));

            var store    = _services.StoreContext.CurrentStore;
            var language = _services.WorkContext.WorkingLanguage;

            var exists = TryGetSitemapFile(store.Id, language.Id, index, out var file);

            if (exists)
            {
                return(new XmlSitemapPartition
                {
                    Index = index,
                    Name = file.Name,
                    LanguageId = language.Id,
                    StoreId = store.Id,
                    ModifiedOnUtc = file.LastModified.UtcDateTime,
                    Stream = file.OpenRead()
                });
            }

            if (isRetry)
            {
                var msg = "Could not generate XML sitemap. Index: {0}, Date: {1}".FormatInvariant(index, DateTime.UtcNow);
                Logger.Error(msg);
                throw new SmartException(msg);
            }

            if (index > 0)
            {
                // File with index greater 0 has been requested, but it does not exist.
                // Now we have to determine whether just the passed index is out of range
                // or the files have never been created before.
                // If the main file (index 0) exists, the action should return NotFoundResult,
                // otherwise the rebuild process should be started or waited for.

                if (TryGetSitemapFile(store.Id, language.Id, 0, out file))
                {
                    throw new IndexOutOfRangeException("The sitemap file '{0}' does not exist.".FormatInvariant(file.Name));
                }
            }

            // The main sitemap document with index 0 does not exist, meaning: the whole sitemap
            // needs to be created and cached by partitions.

            var wasRebuilding = false;
            var lockFilePath  = GetLockFilePath(store.Id, language.Id);

            while (await IsRebuildingAsync(lockFilePath))
            {
                // The rebuild process is already running, either started
                // by the task scheduler or another HTTP request.
                // We should wait for completion.

                wasRebuilding = true;
                await Task.Delay(1000);
            }

            if (!wasRebuilding)
            {
                // No lock. Rebuild now.
                var buildContext = new XmlSitemapBuildContext(store, new[] { language }, _services.SettingFactory, _services.StoreContext.IsSingleStoreMode())
                {
                    CancellationToken = _asyncRunner.AppShutdownCancellationToken
                };

                await RebuildAsync(buildContext);
            }

            // DRY: call self to get sitemap partition object
            return(await GetSitemapPartAsync(index, true));
        }