/// <summary>
        /// When item in database is removed, we need to adjust cache model
        /// 1. Get all affected tokens and their documentIds where it appeared
        /// 2. Remove {document} cache data
        /// 3. Recalculate TfIdf vector for affected documentIds
        /// </summary>
        public async Task RemoveAsync(Document document)
        {
            var documentsAffected = GetRelatedDocuments(document);

            Documents.Remove(document.Id);

            foreach (var token in document.Tokens)
            {
                var key          = _tokenContextCacheKey(token);
                var tokenContext = await _keyValueStorage.GetAsync <TokenContext>(key);

                tokenContext.DocumentIds.Remove(document.Id);

                await _keyValueStorage.SetAsync(key, tokenContext);

                await _keyValueStorage.RemoveAsync(_tfIdfCacheKey(document, token));
            }

            await Task.WhenAll(documentsAffected.Select(SaveTfIdfVectorAsync));
        }
Beispiel #2
0
        /// <summary>
        /// Here is algorithm for cache adjusting when article is removed
        /// 1. Get similarityResults for {article} from cache
        /// 2. Remove similarityResults from cache for removed {article}
        /// 3. Remove similarityScore in other articles
        ///
        /// Here is better AND faster to use graph data structure,
        /// where each node is connected to all others,
        /// and connection weight is SimilarityScore (from 0 to 1),
        /// but I used simple List of Lists, because didn't have time...
        /// </summary>
        public async Task Handle(ArticleRemovedEvent notification, CancellationToken cancellationToken)
        {
            await _similarityScoring.RemoveAsync(notification.Article.ToDocument());


            var results = await _cache.GetAsync <List <SimilarityResult> >(_articleSimilarityCacheKey(notification.Article))
                          ?? new List <SimilarityResult>();

            await _cache.RemoveAsync(_articleSimilarityCacheKey(notification.Article));

            foreach (var similarityResult in results.Where(r => r.Id != notification.Article.Id))
            {
                var cacheKey = _articleSimilarityCacheKey(new Article {
                    Id = similarityResult.Id
                });
                var articleSimilarities = await _cache.GetAsync <List <SimilarityResult> >(cacheKey);

                articleSimilarities = articleSimilarities.Where(x => x.Id != notification.Article.Id)
                                      .ToList();

                await _cache.SetAsync(cacheKey, articleSimilarities);
            }
        }