public async Task <SmartCaption> Enrich(InstaMedia message) { logger.LogInformation("Generating caption..."); SmartCaption captionHolder = captionHandler.Extract(message.Caption?.Text); logger.LogInformation("Found [{0}] tags in original caption", captionHolder.TotalTags); if (captionHolder.TotalTags > 20) { logger.LogInformation("Found more than 20 tags on photo - ignoring it"); return(captionHolder); } var original = captionHolder.Tags.ToArray(); var locationTags = await tagsByLocation.Get(message.Location).ConfigureAwait(false); foreach (var tag in locationTags.OrderByDescending(item => item.MediaCount).Take(totalLocationTags)) { if (captionHolder.TotalTags >= totalTags) { return(captionHolder); } captionHolder.AddTag(tag); } var results = await GetMix(original, totalTags - locationTags.Length).ConfigureAwait(false); foreach (var data in results) { captionHolder.AddTag(data); } return(captionHolder); }
public async Task <HashTagData[]> Get(SectionMedia medias) { log.LogDebug("Get tags from [{0}] posts", medias.Medias.Count); var table = new Dictionary <string, HashTagData>(StringComparer.OrdinalIgnoreCase); foreach (InstaMedia media in medias.Medias) { var text = media.Caption?.Text; if (string.IsNullOrEmpty(text)) { continue; } var smart = captionHandler.Extract(text); foreach (HashTagData tag in smart.Tags.Where(x => !string.IsNullOrEmpty(x.Text))) { if (!table.ContainsKey(tag.Tag)) { table[tag.Tag] = tag; tag.MediaCount = 0; } table[tag.Tag].MediaCount += 1; } } log.LogInformation("Enriching {0} tags", table.Count); foreach (var data in table.ToArray()) { if (table[data.Key].Rank.HasValue) { continue; } var result = await smartTags.Get(data.Value).ConfigureAwait(false); foreach (var hashTagData in result) { if (table.ContainsKey(hashTagData.Tag)) { table[hashTagData.Tag] = hashTagData; } } } return(table.Values.ToArray()); }
public async Task <HashTagData[]> Get(SmartCaption caption) { log.LogDebug("Finding similar posts - [{0}]", caption.Original); var tags = caption.Tags.ToString(); if (tags.Length < 3) { return(new HashTagData[] {}); } foreach (var tag in caption.Tags) { var topMedia = await instagram.Resilience.WebPolicy .ExecuteAsync( () => ResultExtension.UnWrap(() => instagram.HashtagProcessor.GetTopHashtagMediaListAsync(tag.Text, PaginationParameters.MaxPagesToLoad(1)), log)) .ConfigureAwait(false); foreach (var media in topMedia.Medias) { var text = media.Caption?.Text; if (string.IsNullOrEmpty(text)) { continue; } var smart = captionHandler.Extract(text); var bag = BagOfWords.Create(smart.Tags.Select(item => item.Tag).ToArray()); similarity.Register(bag); } } var result = similarity.FindSimilar(BagOfWords.Create(caption.Tags.Select(item => item.Tag).ToArray())); return(result.SelectMany( item => item.Document.Words.Select( x => { var tag = HashTagData.FromTag(x.Text); tag.Relevance = (int)(100 * item.Similarity); return tag; })) .ToArray()); }