public void Similarity() { instance.Register(BagOfWords.Create("one", "two", "three")); instance.Register(BagOfWords.Create("one", "one", "one")); instance.Register(BagOfWords.Create("one", "one", "two")); var result = instance.FindSimilar(BagOfWords.Create("thee", "two", "three")).ToArray(); Assert.AreEqual(3, result.Length); }
public async Task <HashTagData[]> Get(SmartCaption caption) { log.LogDebug("Finding similar posts - [{0}]", caption.Original); var tags = caption.Tags.ToString(); if (tags.Length < 3) { return(new HashTagData[] {}); } foreach (var tag in caption.Tags) { var topMedia = await instagram.Resilience.WebPolicy .ExecuteAsync( () => ResultExtension.UnWrap(() => instagram.HashtagProcessor.GetTopHashtagMediaListAsync(tag.Text, PaginationParameters.MaxPagesToLoad(1)), log)) .ConfigureAwait(false); foreach (var media in topMedia.Medias) { var text = media.Caption?.Text; if (string.IsNullOrEmpty(text)) { continue; } var smart = captionHandler.Extract(text); var bag = BagOfWords.Create(smart.Tags.Select(item => item.Tag).ToArray()); similarity.Register(bag); } } var result = similarity.FindSimilar(BagOfWords.Create(caption.Tags.Select(item => item.Tag).ToArray())); return(result.SelectMany( item => item.Document.Words.Select( x => { var tag = HashTagData.FromTag(x.Text); tag.Relevance = (int)(100 * item.Similarity); return tag; })) .ToArray()); }