public string GetBestMatchWord(string OriginalWord)
        {
            EnsureIndexed();
            var existing = indexReader.DocFreq(new Term("word", OriginalWord));

            if (existing > 0)
            {
                return(OriginalWord);
            }
            var suggestions = _luceneChecker.SuggestSimilar(OriginalWord, 10, null, "word", true);
            var jaro        = new JaroWinklerDistance();
            var leven       = new LevenshteinDistance();
            var ngram       = new NGramDistance();
            var metrics     = suggestions.Select(s => new
            {
                word  = s,
                freq  = indexReader.DocFreq(new Term("word", s)),
                jaro  = jaro.GetDistance(OriginalWord, s),
                leven = leven.GetDistance(OriginalWord, s),
                ngram = ngram.GetDistance(OriginalWord, s)
            })
                              .OrderByDescending(metric =>
                                                 (
                                                     (metric.freq / 100f) +
                                                     metric.jaro +
                                                     metric.leven +
                                                     metric.ngram
                                                 )
                                                 / 4f
                                                 )
                              .ToList();

            return(metrics.Select(m => m.word).FirstOrDefault());
        }
示例#2
0
        public IActionResult Get(int id)
        {
            try
            {
                var poemsString = Manipulations.GetDataByUrl(Settings.Poems);
                var poems       = JsonSerializer.Deserialize <List <Poem> >(poemsString);
                var etalonPoem  = Settings.EtalonPoem;

                var poem     = Manipulations.GetRandomPoem(poems);
                var distance = JaroWinklerDistance.distance(poem, etalonPoem);

                var person = new Person()
                {
                    Poem     = poem,
                    Distance = Math.Round(distance, 2)
                };

                using (var client = new HttpClient())
                {
                    client.BaseAddress = new Uri(Settings.Localhost);
                    var res = client.PostAsJsonAsync($"getinfo/{id}", person).Result;
                }

                return(StatusCode(StatusCodes.Status200OK));
            }
            catch (Exception ex)
            {
                return(StatusCode(StatusCodes.Status500InternalServerError));
            }
        }
        public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, List<string> equalMinDistanceDictWordList)
        {
            index = index - _minWordLength;
            word = word.ToLower();
            double NewSimilarity = 0;
            int WordLength = word.Length;
            if ((WordLength + index) < 0)
                return maxSimilarity;
            if ((WordLength + index) >= _IndexDictionary.Length)
                return maxSimilarity;
            if (_IndexDictionary[WordLength + index] == null)
                return maxSimilarity;

            for (int j = 0; j < _IndexDictionary[WordLength + index].Count; j++)
            {
                JaroWinklerDistance JaroDist = new JaroWinklerDistance();
                NGramDistance ng = new NGramDistance();
                JaccardDistance jd = new JaccardDistance();

                NewSimilarity = jd.GetDistance(word, _IndexDictionary[WordLength + index][j]);//(double)JaroDist.GetDistance(word, _IndexDictionary[WordLenght - 1 + index][j]);

                if (NewSimilarity > maxSimilarity)
                {
                    equalMinDistanceDictWordList.Clear();
                    equalMinDistanceDictWordList.Add(_IndexDictionary[WordLength + index][j]);
                    maxSimilarity = NewSimilarity;
                }
                else if (NewSimilarity == maxSimilarity)
                    equalMinDistanceDictWordList.Add(_IndexDictionary[WordLength + index][j]);
            }
            return maxSimilarity;
        }
        public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, Dictionary<string, double> equalMinDistanceDictWordList)
        {
            try
            {

                double distancethreshold = 0.3;
                index = index - _minWordLength;
                double NewDistance = 0;
                int WordLenght = word.Length;
                if ((WordLenght + index) < 0)
                    return maxSimilarity;

                if ((WordLenght + index) >= _IndexDictionary.Length)
                    return maxSimilarity;
                if (_IndexDictionary[WordLenght - 1 + index] == null)
                    return 0;
                for (int j = 0; j < _IndexDictionary[WordLenght - 1 + index].Count; j++)
                {

                    JaroWinklerDistance JaroDist = new JaroWinklerDistance();
                    NGramDistance ng = new NGramDistance();
                    JaccardDistance jd = new JaccardDistance();
                    string temp =  _IndexDictionary[WordLenght - 1 + index][j];
                    NewDistance = jd.GetDistance(word, temp);
                    double NewDistance2 = -1;

                    if (NewDistance < NewDistance2)
                        NewDistance = NewDistance2;

                    if (NewDistance > maxSimilarity)
                    {

                        foreach (var item in equalMinDistanceDictWordList.ToList())
                        {
                            if (item.Value <= NewDistance - distancethreshold)
                                equalMinDistanceDictWordList.Remove(item.Key);
                        }

                        tempReplacement = temp;
                        if (!equalMinDistanceDictWordList.ContainsKey(temp))
                            equalMinDistanceDictWordList.Add(temp, NewDistance);
                        //else
                        //    equalMinDistanceDictWordList[tempReplacement] = NewDistance;
                        maxSimilarity = NewDistance;
                    }
                    else if (NewDistance <= maxSimilarity + distancethreshold && NewDistance >= maxSimilarity - distancethreshold && NewDistance > 0)
                        if (!equalMinDistanceDictWordList.ContainsKey(temp))
                            equalMinDistanceDictWordList.Add(temp, NewDistance);

                }
            }
            catch (Exception e)
            {
                throw e;
            }
            return maxSimilarity;
        }
        private IEnumerable <User> sort(IEnumerable <User> users)
        {
            int          uId         = Int32.Parse(Session["UserId"].ToString());
            User         currentUser = db.Users.Find(uId);
            List <Int32> scores      = new List <Int32>();
            int          score;

            foreach (User user in users)
            {
                score = 0;
                if (currentUser.Gender != null)
                {
                    if (user.Gender == null)
                    {
                        score += 20;
                    }
                    else if ((currentUser.Gender != "Other" && user.Gender != currentUser.Gender) ||
                             (currentUser.Gender == "Other" && user.Gender == "Other"))
                    {
                        score += 50;
                    }
                }
                if (currentUser.Occupation != null && user.Occupation != null &&
                    JaroWinklerDistance.proximity(currentUser.Occupation, user.Occupation) >= 0.8)
                {
                    score += 10;
                }
                if (currentUser.City != null && user.City != null &&
                    JaroWinklerDistance.proximity(currentUser.City, user.City) >= 0.8)
                {
                    score += 10;
                }
                if (currentUser.Likes != null && user.Likes != null)
                {
                    score += matchWordsScore(currentUser.Likes, user.Likes);
                }
                if (currentUser.Dislikes != null && user.Dislikes != null)
                {
                    score += matchWordsScore(currentUser.Dislikes, user.Dislikes);
                }
                if (currentUser.Hobbies != null && user.Hobbies != null)
                {
                    score += matchWordsScore(currentUser.Hobbies, user.Hobbies);
                }
                if (currentUser.Bio != null && user.Bio != null)
                {
                    score += matchWordsScore(currentUser.Bio, user.Bio);
                }
                scores.Add(score);
            }
            var orderedZip = scores.Zip(users, (x, y) => new { x, y })
                             .OrderByDescending(pair => pair.x)
                             .ToList();

            users = orderedZip.Select(pair => pair.y).ToList();
            return(users);
        }
        public AlternateWordList GetAlternateWordList(string OriginalWord, int NumberToReturn)
        {
            var wordList = new AlternateWordList();

            wordList.OriginalWord = OriginalWord;

            EnsureIndexed();
            var existing = indexReader.DocFreq(new Term("word", OriginalWord));

            wordList.OriginalWordFrequency = existing;

            var suggestions = _luceneChecker.SuggestSimilar(OriginalWord, NumberToReturn, null, "word", true);
            var jaro        = new JaroWinklerDistance();
            var leven       = new LevenshteinDistance();
            var ngram       = new NGramDistance();
            var metrics     = suggestions.Select(s => new
            {
                word  = s,
                freq  = indexReader.DocFreq(new Term("word", s)),
                jaro  = jaro.GetDistance(OriginalWord, s),
                leven = leven.GetDistance(OriginalWord, s),
                ngram = ngram.GetDistance(OriginalWord, s)
            })
                              .OrderByDescending(metric =>
                                                 (
                                                     (metric.freq / 100f) +
                                                     metric.jaro +
                                                     metric.leven +
                                                     metric.ngram
                                                 )
                                                 / 4f
                                                 )
                              .ToList();

            var list      = new List <AlternateWord>();
            var sortOrder = 1;

            foreach (var item in metrics)
            {
                var altWord = new AlternateWord();
                altWord.Word               = item.word;
                altWord.Frequency          = item.freq;
                altWord.JaroWinkler        = item.jaro;
                altWord.Levenshtein        = item.leven;
                altWord.NGram              = item.ngram;
                altWord.BestMatchScore     = ((item.freq / 100f) + item.jaro + item.leven + item.ngram) / 4f;
                altWord.BestMatchSortOrder = sortOrder;

                list.Add(altWord);
                sortOrder++;
            }

            wordList.Words = list;
            return(wordList);
        }
示例#7
0
    protected void Page_Load(object sender, EventArgs e)
    {
        string s1 = "Holmes";
        string s2 = "Holmes2136";


        Response.Write(JaroWinklerDistance.GetDistance(s1, s2));

        Response.Write("<BR>");

        Response.Write(new LevenshteinDistance().GetDistance(s1, s2));

        Response.Write("<BR>");
    }
示例#8
0
    public static void Main(String[] args)
    {
        var jw = new JaroWinklerDistance();

        // Obtém o texto informado depois de converter para maíusculo e remover os acentos
        var texto1 = jw.RemoveDiacritics(args[0].ToUpper());
        var texto2 = jw.RemoveDiacritics(args[1].ToUpper());


        // Calcula a distância de Jaro-Winkler
        double retorno = jw.Proximity(texto1, texto2);

        Console.WriteLine("Proximidade: " + retorno);
    }
        public List <string> GetTopSuggestions(string value, int numberOfItems)
        {
            EnsureIndexed();
            var suggestionCollection = new List <string>();
            var existing             = _indexReader.DocFreq(new Term(SpellCheckerConstants.SpellCheckerKey, value));

            if (existing > 0)// the fist one will be correct of exist
            {
                suggestionCollection.Add(value);
            }

            var suggestions = _checker.SuggestSimilar(value, numberOfItems, null, SpellCheckerConstants.SpellCheckerKey, true);
            var jaro        = new JaroWinklerDistance();
            var leven       = new LevenshteinDistance();
            var ngram       = new NGramDistance();
            var metrics     = suggestions.Select(s => new
            {
                word  = s,
                freq  = _indexReader.DocFreq(new Term(SpellCheckerConstants.SpellCheckerKey, s)),
                jaro  = jaro.GetDistance(value, s),
                leven = leven.GetDistance(value, s),
                ngram = ngram.GetDistance(value, s)
            })
                              .OrderByDescending(metric => metric.jaro)
                              .ThenByDescending(m => m.ngram)
                              .ThenByDescending(metric =>
                                                (
                                                    metric.freq / 100f +
                                                    metric.leven
                                                )
                                                / 2f
                                                )
                              .ToList();

            var wordsOnly = metrics.Select(m => m.word).ToList();

            suggestionCollection.AddRange(wordsOnly);

            return(suggestionCollection);
        }
示例#10
0
        public double GetDistance(string text)
        {
            string[] sentences = text.Split('\n').Where(x => !string.IsNullOrWhiteSpace(x)).ToArray();
            double   maxCost = 0, curCost = 0;

            if (sentences.Length > 1)
            {
                for (int i = 0; i < sentences.Length - 1; i++)
                {
                    curCost = JaroWinklerDistance.Distance(sentences[i], sentences[i + 1]);
                    if (maxCost < curCost)
                    {
                        maxCost = curCost;
                    }
                }
            }
            else if (sentences.Length == 1)
            {
                maxCost = JaroWinklerDistance.Distance(sentences[0], "");
            }
            return(maxCost);
        }
        public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, List<string> equalMinDistanceDictWordList, bool exact)
        {
            index = index - _minWordLength;
            int WordLength = word.Length;
            int index2 = index;
            if (index < 0 || (WordLength>=2 && char.IsUpper(word[0])&&!char.IsUpper(word[1])) )
                index2 = 0;
            word = word.ToLower();
            bool noSpace = false;
            if (word.CompareTo(word.Trim()) == 0)
                noSpace = true;
            else
                word = word.Trim();

            double NewSimilarity = 0;
            if ((WordLength + index) < 0)
                return maxSimilarity;
            if ((WordLength + index) >= _IndexDictionary.Length)
                return maxSimilarity;
            if (_IndexDictionary[WordLength + index] == null)
                return maxSimilarity;

            for (int j = 0; j < _IndexDictionary[WordLength + index].Count; j++)
            {
                JaroWinklerDistance JaroDist = new JaroWinklerDistance();
                NGramDistance ng = new NGramDistance();
                JaccardDistance jd = new JaccardDistance();
                string temp = _IndexDictionary[WordLength + index][j];
                if(noSpace&&temp.CompareTo(word)==0)
                {
                    equalMinDistanceDictWordList.Clear();
                    equalMinDistanceDictWordList.Add(temp);
                    return 10;
                }
                else if (temp.Contains(word))
                {
                    equalMinDistanceDictWordList.Add(/*item);*/temp);
                    maxSimilarity = 1;
                }
                else if(index <= 2)
                {
                    for (int i = 0; i <= index2; i++)
                    {
                        string s = temp.Substring(i);
                        string s2 = temp.Substring(0, temp.Length - index2);
                        //Console.WriteLine(item);
                        if (!exact)
                            NewSimilarity = Math.Max(jd.GetDistance(word, s), jd.GetDistance(word, s2));
                        else
                        {
                            NewSimilarity = jd.GetDistance(word, temp);
                            if (NewSimilarity == 1)
                            {
                                equalMinDistanceDictWordList.Clear();
                                equalMinDistanceDictWordList.Add(s);
                                maxSimilarity = NewSimilarity;
                            }
                            return maxSimilarity;
                        }

                        if (NewSimilarity > .33)
                        {
                            //equalMinDistanceDictWordList.Clear();
                            equalMinDistanceDictWordList.Add(/*item);*/temp);
                            maxSimilarity = NewSimilarity;
                            break;
                        }

                    }
                }
                           }
            return maxSimilarity;
        }
示例#12
0
            public void ShouldReturnJaroWinklerDistanceOf1()
            {
                var result = JaroWinklerDistance.CalculateProximity(InputPassword, InputPassword);

                Assert.IsTrue(Math.Abs(result - 1.0) < 0.0000001);
            }
示例#13
0
            public void ShouldReturnJaroWinklerDistanceOfPoint5()
            {
                var result = JaroWinklerDistance.CalculateProximity(InputPassword, InputUsername);

                Assert.IsTrue(Math.Abs(result - 0.500) < 0.0000001);
            }
示例#14
0
            public void ShouldSetJaroWinklerDistanceWeightPrefixLength3()
            {
                Algorithm = new JaroWinklerDistance(InputPassword, InputPasswrod, prefixLength: PrefixLength);

                Assert.AreEqual(PrefixLength, Algorithm.PrefixLength);
            }
示例#15
0
            public void ShouldSetJaroWinklerDistanceWeightThresholdPoint8()
            {
                Algorithm = new JaroWinklerDistance(InputPassword, InputPasswrod, WeightThresholdPoint8);

                Assert.AreEqual(WeightThresholdPoint8, Algorithm.WeightThreshold);
            }
示例#16
0
            public void ShouldSetJaroWinklerDistanceInput2()
            {
                Algorithm = new JaroWinklerDistance(InputPassword, InputPasswrod);

                Assert.AreEqual(InputPasswrod, Algorithm.Input2);
            }
示例#17
0
            public void ShouldReturnJaroWinklerDistanceInstance()
            {
                Algorithm = new JaroWinklerDistance(InputPassword, InputPasswrod);

                Assert.IsNotNull(Algorithm);
            }
示例#18
0
            public void ShouldReturnJaroWinklerDistanceOfPoint025()
            {
                var result = JaroWinklerDistance.CalculateDistance(InputPassword, InputPasswrod);

                Assert.IsTrue(Math.Abs(result - 0.025) < 0.0000001);
            }