Пример #1
0
        internal LinkedUtterance LinkUtterance(string utterance)
        {
            var parts = new List <LinkedUtterancePart>();
            var words = utterance.Split(' ');

            foreach (var word in words)
            {
                if (_indexedNodes.ContainsKey(word))
                {
                    var entityInfo = new EntityInfo(word, word, 1, 1);
                    parts.Add(LinkedUtterancePart.Entity(word, new[] { entityInfo }));
                }
                else
                {
                    parts.Add(LinkedUtterancePart.Word(word));
                }
            }

            return(new LinkedUtterance(parts));
        }
Пример #2
0
        internal LinkedUtterance LinkedUtterance_Hungry()
        {
            var usedMatches = new List <EntityMatch>();

            while (true)
            {
                EntityMatch bestMatch = null;
                foreach (var match in _matches.SelectMany(m => m))
                {
                    var hasCollision = usedMatches.Any(u => testCollision(match, u));
                    if (hasCollision)
                    {
                        continue;
                    }

                    // var bestScore = bestMatch == null ? 0 : bestMatch.Entity.InBounds + bestMatch.Entity.OutBounds;
                    // var currentScore = match.Entity.InBounds + match.Entity.OutBounds;

                    var bestScore    = bestMatch == null ? 0 : bestMatch.Entity.Score;
                    var currentScore = match.Entity.Score;
                    if (bestScore < currentScore)
                    {
                        bestMatch = match;
                    }
                }
                if (bestMatch == null)
                {
                    break;
                }

                usedMatches.Add(bestMatch);
            }

            var linkedParts = new List <LinkedUtterancePart>();

            for (var i = 0; i < _words.Length; ++i)
            {
                var match = getIndexedMatch(i, usedMatches);
                if (match == null)
                {
                    linkedParts.Add(LinkedUtterancePart.Word(_words[i]));
                }
                else
                {
                    var ngram    = string.Join(" ", _words.Skip(i).Take(match.Length));
                    var entities = new List <EntityInfo>();
                    foreach (var ambigMatch in _matches[i])
                    {
                        if (ambigMatch.Length == match.Length)
                        {
                            entities.Add(ambigMatch.Entity);
                        }
                    }

                    linkedParts.Add(LinkedUtterancePart.Entity(ngram, entities.ToArray()));
                    i += match.Length - 1;
                }
            }

            return(new LinkedUtterance(linkedParts));
        }
        public LinkedUtterance LinkUtterance(string utterance, IEnumerable <EntityInfo> context = null)
        {
            _context.Clear();
            if (context != null)
            {
                foreach (var entity in context)
                {
                    _context[Db.GetFreebaseId(entity.Mid)] = entity;
                }
            }

            var linkedUtterance = base.LinkUtterance(utterance, 20).FirstOrDefault();

            if (!_useDisambiguation || linkedUtterance == null)
            {
                return(linkedUtterance);
            }

            var entityClusters = new List <EntityInfo[]>();
            var contextIndex   = new HashSet <string>();

            if (context != null)
            {
                contextIndex.UnionWith(context.Select(e => e.Mid));
            }

            foreach (var part in linkedUtterance.Parts)
            {
                if (part.Entities.Any())
                {
                    //force context entities
                    var        entities      = part.Entities.ToArray();
                    EntityInfo contextEntity = null;
                    foreach (var entity in entities)
                    {
                        if (!contextIndex.Contains(entity.Mid))
                        {
                            continue;
                        }

                        contextEntity = entity;
                        break;
                    }
                    if (contextEntity == null)
                    {
                        entityClusters.Add(entities);
                    }
                    else
                    {
                        entityClusters.Add(new[] { contextEntity });
                    }
                }
            }

            var disambiguatedClusters = new List <HashSet <EntityInfo> >();

            for (var clusterIndex = 0; clusterIndex < entityClusters.Count; ++clusterIndex)
            {
                disambiguatedClusters.Add(new HashSet <EntityInfo>());
            }

            for (var i = 0; i < Nbest; ++i)
            {
                var disambiguated = disambiguateClusters(entityClusters);

                for (var clusterIndex = 0; clusterIndex < entityClusters.Count; ++clusterIndex)
                {
                    disambiguatedClusters[clusterIndex].Add(disambiguated[clusterIndex]);
                    if (entityClusters[clusterIndex].Length == 1)
                    {
                        //keep the last entity to prevent empty components
                        continue;
                    }

                    entityClusters[clusterIndex] = entityClusters[clusterIndex].Except(new[] { disambiguated[clusterIndex] }).ToArray();
                }
            }

            var entityQueue        = new Queue <HashSet <EntityInfo> >(disambiguatedClusters);
            var disambiguatedParts = new List <LinkedUtterancePart>();

            foreach (var part in linkedUtterance.Parts)
            {
                if (part.Entities.Any())
                {
                    disambiguatedParts.Add(LinkedUtterancePart.Entity(part.Token, entityQueue.Dequeue().ToArray()));
                }
                else
                {
                    disambiguatedParts.Add(part);
                }
            }

            return(new LinkedUtterance(disambiguatedParts));
        }