private IEnumerable <Tuple <EntityInfo, string[]> > getNgrams(LinkedUtterance utterance) { var parts = utterance.Parts.ToArray(); for (var i = 1; i < parts.Length; ++i) { var part = parts[i]; if (!part.Entities.Any()) { //we are interested only in entities continue; } var ngrams = new List <string>(); var entity = part.Entities.First(); var ngram = "[" + entity.BestAliasMatch + "-" + entity.Mid + "]"; //var ngram = entity.BestAliasMatch; for (var j = 0; j < _contextNgramSize; ++j) { var position = i - j; if (position < 0) { break; } ngram = parts[position].Token + " " + ngram; } ngram = ngram.Trim(); ngrams.Add(ngram); yield return(Tuple.Create(part.Entities.First(), ngrams.ToArray())); } }
public void Accept(LinkedUtterance utterance) { var phrases = getNonEntityPhrases(utterance).Distinct().ToArray(); foreach (var phrase in phrases) { _NonEntityPhraseInverseCounts.TryGetValue(phrase, out var count); _NonEntityPhraseInverseCounts[phrase] = count + 1; } }
static string getEntityUtterance(LinkedUtterance linkedUtterance) { var result = new List <string>(); foreach (var part in linkedUtterance.Parts) { if (part.Entities.Any()) { result.Add(FreebaseDbProvider.GetId(part.Entities.First().Mid)); } else { result.Add(part.Token); } } return(string.Join(" ", result)); }
private IEnumerable <string> getNonEntityPhrases(LinkedUtterance utterance) { var currentWords = new List <string>(); var phrases = new List <string>(); foreach (var part in utterance.Parts) { if (!part.Entities.Any()) { currentWords.Add(part.Token); continue; } if (currentWords.Count > 0) { phrases.Add(string.Join(" ", currentWords)); } currentWords.Clear(); } return(phrases); }
internal QuestionReport(QuestionInfo info, string answerId, LinkBasedExtractor extractor) { var linker = extractor.Linker; Question = linker.LinkUtterance(info.Utterance.OriginalSentence); AnswerLabel = extractor.Db.GetEntryFromId(answerId); var denotations = new List <Tuple <LinkedUtterance, EntityInfo, bool> >(); foreach (var answerHint in info.AnswerHints) { var linkedHint = linker.LinkUtterance(answerHint.OriginalSentence, Question.Entities); var denotation = extractor.ExtractAnswerEntity(info.Utterance.OriginalSentence, answerHint.OriginalSentence).FirstOrDefault(); var item = Tuple.Create(linkedHint, denotation, answerId == FreebaseDbProvider.GetId(denotation.Mid)); denotations.Add(item); } CollectedDenotations = denotations; var denotationCounts = from denotation in denotations group denotation by FreebaseDbProvider.GetId(denotation.Item2.Mid) into grouped select Tuple.Create(grouped.Key, grouped.Count()); var maxDenotation = denotationCounts.OrderByDescending(t => t.Item2).FirstOrDefault(); if (maxDenotation != null && AnswerLabel != null) { HasCorrectDenotation = maxDenotation.Item1 == AnswerLabel.Id; } if (maxDenotation != null) { TopDenotationEvidence = maxDenotation.Item2; } }
public static string LinkedUtteranceLink(LinkedUtterance utterance) { var builder = new StringBuilder(); foreach (var part in utterance.Parts) { if (builder.Length > 0) { builder.Append(' '); } if (!part.Entities.Any()) { builder.Append(part.Token); continue; } var entity = part.Entities.First(); builder.AppendFormat("<a href='/database?query={0}'>[{1}]</a>", FreebaseDbProvider.GetId(entity.Mid), entity.BestAliasMatch); } return(builder.ToString()); }