Ejemplo n.º 1
0
        internal static string ToMeaningfulPhrase(string phrase)
        {
            var input      = phrase.ToLowerInvariant();
            var inputWords = Phrase.AsWords(input).ToList();

            while (inputWords.Count > 0 && AuxiliaryWords.Contains(inputWords.First()))
            {
                inputWords.RemoveAt(0);
            }

            while (inputWords.Count > 0 && AuxiliaryWords.Contains(inputWords.Last()))
            {
                inputWords.RemoveAt(inputWords.Count - 1);
            }

            return(string.Join(" ", inputWords));
        }
        private string toMeaningfulPhrase(string phrase)
        {
            var auxiliaryWords = new[] { "a", "an", "the", "some", "my" };
            var input          = phrase.ToLowerInvariant();
            var inputWords     = Phrase.AsWords(input).ToList();

            while (inputWords.Count > 0 && auxiliaryWords.Contains(inputWords.First()))
            {
                inputWords.RemoveAt(0);
            }

            while (inputWords.Count > 0 && auxiliaryWords.Contains(inputWords.Last()))
            {
                inputWords.RemoveAt(inputWords.Count - 1);
            }

            return(string.Join(" ", inputWords));
        }
Ejemplo n.º 3
0
        private double getSimilarity(string input, string conceptName, IEnumerable <string> conceptDescriptions)
        {
            var sanitizedInput  = input.ToLowerInvariant();
            var meaningFulInput = ToMeaningfulPhrase(input);
            var words           = Phrase.AsWords(sanitizedInput);
            var name            = conceptName.ToLowerInvariant();
            var weight          = 1.0 * words.Length;

            weight = 1 + words.Length / 100.0;
            if (sanitizedInput == name || meaningFulInput == name)
            {
                return(1.0 * words.Length * weight);
            }

            foreach (var description in conceptDescriptions)
            {
                if (description.ToLowerInvariant() == sanitizedInput)
                {
                    return(1.02 * words.Length * weight);
                }
            }

            var scores = new List <double>();

            foreach (var word in words)
            {
                if (AuxiliaryWords.Contains(word))
                {
                    continue;
                }

                var hitCount    = 0.0;
                var totalWeight = 0.0;
                foreach (var description in conceptDescriptions.Concat(new[] { name }))
                {
                    var descriptionWords = Phrase.AsWords(description.ToLowerInvariant());
                    foreach (var descriptionWord in descriptionWords)
                    {
                        if (AuxiliaryWords.Contains(descriptionWord))
                        {
                            continue;
                        }

                        //var wordWeight = 1.0 / _index.TotalOccurences(descriptionWord);
                        var wordWeight = 1.0; //TODO better weighing should be here
                        totalWeight += wordWeight;
                        if (descriptionWord == word)
                        {
                            hitCount += wordWeight;
                        }
                    }
                }
                var wordScore = 1.0 * hitCount / (totalWeight + 1);
                scores.Add(wordScore);
            }

            if (scores.Count == 0)
            {
                return(0);
            }

            var score = scores.Sum() * weight;

            return(score);
        }