/// <summary>
        /// Calculates Token Frequencies in document and persist it in Cache (for faster access)
        /// This method means that we adding {document} to our similarity model
        /// </summary>
        private async Task SaveTokenFrequenciesAsync(Document document)
        {
            foreach (var token in document.Tokens)
            {
                var key          = _tokenContextCacheKey(token);
                var tokenContext = await _keyValueStorage.GetAsync <TokenContext>(key)
                                   ?? new TokenContext();

                if (!tokenContext.DocumentIds.Contains(document.Id))
                {
                    tokenContext.DocumentIds.Add(document.Id);
                }

                await _keyValueStorage.SetAsync(key, tokenContext);
            }
        }
        public async Task DeleteRows_AddTwoRows_DeletedDisappears()
        {
            var row1 = new Value("part1", "1")
            {
                ["col1"] = "value1"
            };
            var row2 = new Value("part1", "2")
            {
                ["col1"] = "value2"
            };
            await _tables.InsertAsync(_tableName, new[] { row1, row2 });

            await _tables.DeleteAsync(_tableName, new[] { new Key("part1", "2") });

            IReadOnlyCollection <Value> rows = await _tables.GetAsync(_tableName, new Key("part1", null));

            Assert.Single(rows);
        }
        /// <summary>
        /// Gets first value by key
        /// </summary>
        /// <param name="storage"></param>
        /// <param name="tableName"></param>
        /// <param name="partitionKey"></param>
        /// <param name="rowKey"></param>
        /// <returns></returns>
        public static async Task <Value> GetSingleAsync(this IKeyValueStorage storage, string tableName, string partitionKey, string rowKey)
        {
            if (rowKey == null)
            {
                throw new ArgumentNullException(nameof(rowKey));
            }

            IReadOnlyCollection <Value> values = await storage.GetAsync(tableName, new Key(partitionKey, rowKey));

            return(values.FirstOrDefault());
        }
Beispiel #4
0
        /// <summary>
        /// Finds duplicates of article
        /// </summary>
        /// <param name="article">article for which perform duplicate scanning</param>
        /// <param name="saveInCache">Saving results to redis cache (could be useful to set false, if article was not added to database)</param>
        /// <returns></returns>
        public async Task <List <int> > FindDuplicatesAsync(Article article, bool saveInCache)
        {
            var results = await _cache.GetAsync <List <SimilarityResult> >(_articleSimilarityCacheKey(article));

            if (results == null)
            {
                results = await _similarityScoring.GetSimilarityScoresAsync(article.ToDocument());

                if (saveInCache)
                {
                    // Persist similarity results (before MatchingThreshold check, since it could be changed in future)
                    await _cache.SetAsync(_articleSimilarityCacheKey(article), results);
                }
            }

            var duplicates = results
                             .Where(r => r.Id != article.Id && r.Score >= _articleCheckConfiguration.MatchingThreshold)
                             .ToList();

            return(duplicates
                   .Select(x => x.Id)
                   .ToList());
        }
        /// <summary>
        /// Gets first value by key
        /// </summary>
        /// <param name="storage"></param>
        /// <param name="tableName"></param>
        /// <param name="key"></param>
        /// <returns></returns>
        public static async Task <Value> GetSingleAsync(this IKeyValueStorage storage, string tableName, Key key)
        {
            IReadOnlyCollection <Value> values = await storage.GetAsync(tableName, key);

            return(values.FirstOrDefault());
        }