public void CleanPrcIndex(string serviceId) { try { var settings = serviceQuery.GetSettings <PrcSettingsElastic>(serviceId); settings.IndexSettings = null; serviceQuery.IndexSettings(settings); redisHandler.Clean(PrcIndexRedisKey.ServiceDeleteKey(serviceId)); } catch (Exception ex) { logger.LogError($"Unable to clean PRC Index {serviceId}", ex); } }
public void Index(string processId, PrcSettingsElastic prcSettings, CancellationToken token) { var logPrefix = $"Prc Index {processId}"; try { const int parallelMultiplier = 2; var service = serviceQuery.Get(prcSettings.ServiceId); redisHandler.Clean(PrcIndexRedisKey.ServiceDeleteKey(prcSettings.ServiceId)); var wordQuery = queryFactory.GetWordQuery(prcSettings.DataSetName); var lockObject = new object(); var documentQuery = queryFactory.GetDocumentQuery(prcSettings.DataSetName); var tagsWithDocumentCounts = documentQuery.CountForTags(prcSettings.IndexSettings.FilterTagIdList, GlobalStore.DataSets.Get(prcSettings.DataSetName).DataSet.TagField); var allDocumentsCount = tagsWithDocumentCounts.Sum(d => d.Value); logger.LogInformation($"{logPrefix} starts with ParallelLimit: {parallelService.ParallelLimit * parallelMultiplier}, Tags Count: {prcSettings.IndexSettings.FilterTagIdList.Count}, All documents count: {allDocumentsCount}"); var allDocProgress = new Progress(allDocumentsCount); // lekérjük az aktuális tag-hez(és Filter - hez) tartozó doksikat (elastic - ból). foreach (var tagId in prcSettings.IndexSettings.FilterTagIdList) { if (token.IsCancellationRequested) { CleanPrcIndex(prcSettings.ServiceId); processHandler.Cancelled(processId); logger.LogInformation($"{logPrefix} cancelled Tag: `{tagId}`"); return; } logger.LogTrace($"{logPrefix} preparing Tag: `{tagId}`"); var globalSubset = GlobalStore.ActivatedPrcs.Get(prcSettings.ServiceId).PrcSubsets[tagId]; if (globalSubset.WordsWithOccurences == null) { continue; } var documentElasticIds = GetDocumentIds(prcSettings.DataSetName, prcSettings.IndexSettings.FilterQuery, new List <string> { tagId }, null, prcSettings.IndexSettings.IndexDate).OrderBy(o => o).ToList(); if (documentElasticIds.Count == 0) { continue; } var wwoDocuments = wordQuery.GetWordsWithOccurencesByDocuments( documentElasticIds, prcSettings.FieldsForRecommendation.Select(DocumentQuery.MapDocumentObjectName), 1, parallelLimit: parallelService.ParallelLimit); var cleanedTextDocuments = wwoDocuments.ToDictionary(w => w.Key, w => GetCleanedText(w.Value)); var docProgress = new Progress(documentElasticIds.Count); Parallel.ForEach(documentElasticIds, parallelService.ParallelOptions(parallelMultiplier), (documentId, loopState) => { if (token.IsCancellationRequested) { loopState.Stop(); return; } try { logger.LogTrace($"{logPrefix} preparing Document: `{documentId}`/`{tagId}`"); // kiszámoljuk az aktuális doksi base dictionary - jét var scorer = GetScorer(globalSubset, wwoDocuments[documentId], cleanedTextDocuments[documentId], GlobalStore.ActivatedPrcs.Get(prcSettings.ServiceId).PrcScorers[tagId]); if (scorer == null) { return; } var similarDocuments = new List <KeyValuePair <string, double> >(); // documentElastics except document foreach (var siblingDocumentId in documentElasticIds) { if (siblingDocumentId == documentId) { continue; } var wwoSibling = wwoDocuments[siblingDocumentId]; if (wwoSibling.Keys.Intersect(scorer.BaseDic.Keys).Count() == 0) { continue; } var finalScore = GetPrcScore(scorer, cleanedTextDocuments[siblingDocumentId]); if (finalScore > 0) { similarDocuments.Add(new KeyValuePair <string, double>(siblingDocumentId, finalScore)); } } var redisKey = new PrcIndexRedisKey(prcSettings.ServiceId, tagId, documentId); redisHandler.AddDocuments(redisKey, similarDocuments); } finally { logger.LogTrace($"{logPrefix} prepared Document: `{documentId}`/`{tagId}`"); allDocProgress.Step(); var value = docProgress.Step(); if (value % 50 == 0) { lock (lockObject) { processHandler.Changed(processId, allDocProgress.Percent.Round(6)); } logger.LogTrace($"{logPrefix} progress {docProgress} in `{tagId}`"); logger.LogTrace($"{logPrefix} total progress is {allDocProgress}"); } if (value % 1000 == 0) { GC.Collect(); } } }); if (token.IsCancellationRequested) { CleanPrcIndex(prcSettings.ServiceId); processHandler.Cancelled(processId); logger.LogInformation($"{logPrefix} cancelled Tag: `{tagId}`"); return; } logger.LogInformation($"{logPrefix} prepared Tag: `{tagId}`"); logger.LogInformation($"{logPrefix} total progress is {allDocProgress}"); GC.Collect(); } processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyIndexed_0_Service_1, ServiceTypeEnum.Prc, service.Name)); logger.LogInformation($"{logPrefix} finished"); } catch (Exception ex) { logger.LogError($"{logPrefix} failed. {ex.Message} {ex.StackTrace}"); CleanPrcIndex(prcSettings.ServiceId); processHandler.Interrupted(processId, ex); } finally { GC.Collect(); } }
public void IndexPartial(string processId, PrcSettingsElastic prcSettings, CancellationToken token) { var logPrefix = $"Prc Partial Index {processId}"; try { const int parallelMultiplier = 2; var partialIndexDate = DateTime.UtcNow; var service = serviceQuery.Get(prcSettings.ServiceId); var wordQuery = queryFactory.GetWordQuery(prcSettings.DataSetName); var tagProgress = new Progress(prcSettings.IndexSettings.FilterTagIdList.Count); logger.LogInformation($"{logPrefix} starts with ParallelLimit: {parallelService.ParallelLimit * parallelMultiplier}, Tags Count: {prcSettings.IndexSettings.FilterTagIdList.Count}"); //lekérjük a legutóbbi indexelés óta módosult vagy létrehozott doksikat(Filter - t figyelve) //TODO get IndexFilterTagIds which has changed only foreach (var tagId in prcSettings.IndexSettings.FilterTagIdList) { if (token.IsCancellationRequested) { processHandler.Cancelled(processId); logger.LogInformation($"{logPrefix} cancelled Tag: `{tagId}`"); return; } var changedDocumentElasticIds = GetDocumentIds(prcSettings.DataSetName, prcSettings.IndexSettings.FilterQuery, new List <string> { tagId }, prcSettings.IndexSettings.IndexDate, partialIndexDate).OrderBy(o => o); // no changed document found since last index if (!changedDocumentElasticIds.Any()) { continue; } // kiszámoljuk az aktuális doksi base dictionary - jét var globalSubset = GlobalStore.ActivatedPrcs.Get(prcSettings.ServiceId).PrcSubsets[tagId]; if (globalSubset.WordsWithOccurences == null) { continue; } // Kellenek azok a doksik is az indexeléshez amik nem változtak var documentElasticIds = GetDocumentIds(prcSettings.DataSetName, prcSettings.IndexSettings.FilterQuery, new List <string> { tagId }, dateEnd: partialIndexDate); if (!documentElasticIds.Any()) { continue; } var wwoDocuments = wordQuery.GetWordsWithOccurencesByDocuments( documentElasticIds, prcSettings.FieldsForRecommendation.Select(DocumentQuery.MapDocumentObjectName), 1, parallelLimit: parallelService.ParallelLimit); var cleanedTextDocuments = wwoDocuments.ToDictionary(w => w.Key, w => GetCleanedText(w.Value)); Parallel.ForEach(changedDocumentElasticIds, parallelService.ParallelOptions(parallelMultiplier), (documentId, loopState) => { if (token.IsCancellationRequested) { loopState.Stop(); return; } try { logger.LogTrace($"{logPrefix} preparing Document: `{documentId}`/`{tagId}`"); var scorer = GetScorer(globalSubset, wwoDocuments[documentId], cleanedTextDocuments[documentId], GlobalStore.ActivatedPrcs.Get(prcSettings.ServiceId).PrcScorers[tagId]); if (scorer == null) { return; } var similarDocuments = new List <KeyValuePair <string, double> >(); // calculate documentElastics scores except current document foreach (var siblingDocumentId in documentElasticIds) { if (siblingDocumentId == documentId) { continue; } var wwoSibling = wwoDocuments[siblingDocumentId]; if (!wwoSibling.Any(w => scorer.BaseDic.Keys.Contains(w.Key))) { continue; } var finalScore = GetPrcScore(scorer, cleanedTextDocuments[siblingDocumentId]); if (finalScore > 0) { similarDocuments.Add(new KeyValuePair <string, double>(siblingDocumentId, finalScore)); } } var redisKey = new PrcIndexRedisKey(prcSettings.ServiceId, tagId, documentId); var indexedDocumentIds = redisHandler.GetDocuments(redisKey); var unchangedDocumentIds = indexedDocumentIds .Where(idx => similarDocuments.Any(sim => sim.Key == idx.Element && sim.Value == idx.Score)) .Select(s => s.Element) .ToList(); var documentIdsToAdjust = indexedDocumentIds.Select(s => s.Element.ToString()) .Union(similarDocuments.Select(s => s.Key)) .Where(w => !unchangedDocumentIds.Contains(w)) .Distinct() .ToList(); redisHandler.ReplaceDocuments(redisKey, similarDocuments); //ezekre doksikra (ha már kéznél vannak), visszafelé is kiszámoljuk a prc score-t foreach (var reverseDocumentId in documentIdsToAdjust) { var reverseRedisKey = new PrcIndexRedisKey(prcSettings.ServiceId, tagId, reverseDocumentId); redisHandler.RemoveDocument(reverseRedisKey, documentId); // ha van egyezőség kiszámolni a prcscore-t // és beszúrni a redisbe var reverseScorer = GetScorer(globalSubset, wwoDocuments[reverseDocumentId], cleanedTextDocuments[reverseDocumentId], GlobalStore.ActivatedPrcs.Get(prcSettings.ServiceId).PrcScorers[tagId]); if (reverseScorer == null) { continue; } var wwoReverse = wwoDocuments[documentId]; if (wwoReverse.Keys.Intersect(reverseScorer.BaseDic.Keys).Count() > 0) { var finalScore = GetPrcScore(reverseScorer, cleanedTextDocuments[documentId]); if (finalScore > 0) { redisHandler.AddDocument(reverseRedisKey, documentId, finalScore); } } // levágjuk a max listaelemszám felettieket redisHandler.TrimDocuments(reverseRedisKey); } } finally { logger.LogTrace($"{logPrefix} prepared Document: `{documentId}`/`{tagId}`"); } }); if (token.IsCancellationRequested) { processHandler.Cancelled(processId); logger.LogInformation($"{logPrefix} cancelled Tag: `{tagId}`"); return; } tagProgress.Step(); processHandler.Changed(processId, tagProgress.Percent.Round(6)); GC.Collect(); } prcSettings.IndexSettings.IndexDate = partialIndexDate; serviceQuery.IndexSettings(prcSettings); logger.LogInformation($"{logPrefix} finished"); processHandler.Finished(processId, string.Format(ServiceResources.SuccessfullyPartialIndexed_0_Service_1, ServiceTypeEnum.Prc, service.Name)); } catch (Exception ex) { processHandler.Interrupted(processId, ex); } finally { GC.Collect(); } }