internal LinkedUtterance LinkUtterance(string utterance) { var parts = new List <LinkedUtterancePart>(); var words = utterance.Split(' '); foreach (var word in words) { if (_indexedNodes.ContainsKey(word)) { var entityInfo = new EntityInfo(word, word, 1, 1); parts.Add(LinkedUtterancePart.Entity(word, new[] { entityInfo })); } else { parts.Add(LinkedUtterancePart.Word(word)); } } return(new LinkedUtterance(parts)); }
internal LinkedUtterance LinkedUtterance_Hungry() { var usedMatches = new List <EntityMatch>(); while (true) { EntityMatch bestMatch = null; foreach (var match in _matches.SelectMany(m => m)) { var hasCollision = usedMatches.Any(u => testCollision(match, u)); if (hasCollision) { continue; } // var bestScore = bestMatch == null ? 0 : bestMatch.Entity.InBounds + bestMatch.Entity.OutBounds; // var currentScore = match.Entity.InBounds + match.Entity.OutBounds; var bestScore = bestMatch == null ? 0 : bestMatch.Entity.Score; var currentScore = match.Entity.Score; if (bestScore < currentScore) { bestMatch = match; } } if (bestMatch == null) { break; } usedMatches.Add(bestMatch); } var linkedParts = new List <LinkedUtterancePart>(); for (var i = 0; i < _words.Length; ++i) { var match = getIndexedMatch(i, usedMatches); if (match == null) { linkedParts.Add(LinkedUtterancePart.Word(_words[i])); } else { var ngram = string.Join(" ", _words.Skip(i).Take(match.Length)); var entities = new List <EntityInfo>(); foreach (var ambigMatch in _matches[i]) { if (ambigMatch.Length == match.Length) { entities.Add(ambigMatch.Entity); } } linkedParts.Add(LinkedUtterancePart.Entity(ngram, entities.ToArray())); i += match.Length - 1; } } return(new LinkedUtterance(linkedParts)); }
public LinkedUtterance LinkUtterance(string utterance, IEnumerable <EntityInfo> context = null) { _context.Clear(); if (context != null) { foreach (var entity in context) { _context[Db.GetFreebaseId(entity.Mid)] = entity; } } var linkedUtterance = base.LinkUtterance(utterance, 20).FirstOrDefault(); if (!_useDisambiguation || linkedUtterance == null) { return(linkedUtterance); } var entityClusters = new List <EntityInfo[]>(); var contextIndex = new HashSet <string>(); if (context != null) { contextIndex.UnionWith(context.Select(e => e.Mid)); } foreach (var part in linkedUtterance.Parts) { if (part.Entities.Any()) { //force context entities var entities = part.Entities.ToArray(); EntityInfo contextEntity = null; foreach (var entity in entities) { if (!contextIndex.Contains(entity.Mid)) { continue; } contextEntity = entity; break; } if (contextEntity == null) { entityClusters.Add(entities); } else { entityClusters.Add(new[] { contextEntity }); } } } var disambiguatedClusters = new List <HashSet <EntityInfo> >(); for (var clusterIndex = 0; clusterIndex < entityClusters.Count; ++clusterIndex) { disambiguatedClusters.Add(new HashSet <EntityInfo>()); } for (var i = 0; i < Nbest; ++i) { var disambiguated = disambiguateClusters(entityClusters); for (var clusterIndex = 0; clusterIndex < entityClusters.Count; ++clusterIndex) { disambiguatedClusters[clusterIndex].Add(disambiguated[clusterIndex]); if (entityClusters[clusterIndex].Length == 1) { //keep the last entity to prevent empty components continue; } entityClusters[clusterIndex] = entityClusters[clusterIndex].Except(new[] { disambiguated[clusterIndex] }).ToArray(); } } var entityQueue = new Queue <HashSet <EntityInfo> >(disambiguatedClusters); var disambiguatedParts = new List <LinkedUtterancePart>(); foreach (var part in linkedUtterance.Parts) { if (part.Entities.Any()) { disambiguatedParts.Add(LinkedUtterancePart.Entity(part.Token, entityQueue.Dequeue().ToArray())); } else { disambiguatedParts.Add(part); } } return(new LinkedUtterance(disambiguatedParts)); }