public string GetBestMatchWord(string OriginalWord) { EnsureIndexed(); var existing = indexReader.DocFreq(new Term("word", OriginalWord)); if (existing > 0) { return(OriginalWord); } var suggestions = _luceneChecker.SuggestSimilar(OriginalWord, 10, null, "word", true); var jaro = new JaroWinklerDistance(); var leven = new LevenshteinDistance(); var ngram = new NGramDistance(); var metrics = suggestions.Select(s => new { word = s, freq = indexReader.DocFreq(new Term("word", s)), jaro = jaro.GetDistance(OriginalWord, s), leven = leven.GetDistance(OriginalWord, s), ngram = ngram.GetDistance(OriginalWord, s) }) .OrderByDescending(metric => ( (metric.freq / 100f) + metric.jaro + metric.leven + metric.ngram ) / 4f ) .ToList(); return(metrics.Select(m => m.word).FirstOrDefault()); }
public IActionResult Get(int id) { try { var poemsString = Manipulations.GetDataByUrl(Settings.Poems); var poems = JsonSerializer.Deserialize <List <Poem> >(poemsString); var etalonPoem = Settings.EtalonPoem; var poem = Manipulations.GetRandomPoem(poems); var distance = JaroWinklerDistance.distance(poem, etalonPoem); var person = new Person() { Poem = poem, Distance = Math.Round(distance, 2) }; using (var client = new HttpClient()) { client.BaseAddress = new Uri(Settings.Localhost); var res = client.PostAsJsonAsync($"getinfo/{id}", person).Result; } return(StatusCode(StatusCodes.Status200OK)); } catch (Exception ex) { return(StatusCode(StatusCodes.Status500InternalServerError)); } }
public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, List<string> equalMinDistanceDictWordList) { index = index - _minWordLength; word = word.ToLower(); double NewSimilarity = 0; int WordLength = word.Length; if ((WordLength + index) < 0) return maxSimilarity; if ((WordLength + index) >= _IndexDictionary.Length) return maxSimilarity; if (_IndexDictionary[WordLength + index] == null) return maxSimilarity; for (int j = 0; j < _IndexDictionary[WordLength + index].Count; j++) { JaroWinklerDistance JaroDist = new JaroWinklerDistance(); NGramDistance ng = new NGramDistance(); JaccardDistance jd = new JaccardDistance(); NewSimilarity = jd.GetDistance(word, _IndexDictionary[WordLength + index][j]);//(double)JaroDist.GetDistance(word, _IndexDictionary[WordLenght - 1 + index][j]); if (NewSimilarity > maxSimilarity) { equalMinDistanceDictWordList.Clear(); equalMinDistanceDictWordList.Add(_IndexDictionary[WordLength + index][j]); maxSimilarity = NewSimilarity; } else if (NewSimilarity == maxSimilarity) equalMinDistanceDictWordList.Add(_IndexDictionary[WordLength + index][j]); } return maxSimilarity; }
public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, Dictionary<string, double> equalMinDistanceDictWordList) { try { double distancethreshold = 0.3; index = index - _minWordLength; double NewDistance = 0; int WordLenght = word.Length; if ((WordLenght + index) < 0) return maxSimilarity; if ((WordLenght + index) >= _IndexDictionary.Length) return maxSimilarity; if (_IndexDictionary[WordLenght - 1 + index] == null) return 0; for (int j = 0; j < _IndexDictionary[WordLenght - 1 + index].Count; j++) { JaroWinklerDistance JaroDist = new JaroWinklerDistance(); NGramDistance ng = new NGramDistance(); JaccardDistance jd = new JaccardDistance(); string temp = _IndexDictionary[WordLenght - 1 + index][j]; NewDistance = jd.GetDistance(word, temp); double NewDistance2 = -1; if (NewDistance < NewDistance2) NewDistance = NewDistance2; if (NewDistance > maxSimilarity) { foreach (var item in equalMinDistanceDictWordList.ToList()) { if (item.Value <= NewDistance - distancethreshold) equalMinDistanceDictWordList.Remove(item.Key); } tempReplacement = temp; if (!equalMinDistanceDictWordList.ContainsKey(temp)) equalMinDistanceDictWordList.Add(temp, NewDistance); //else // equalMinDistanceDictWordList[tempReplacement] = NewDistance; maxSimilarity = NewDistance; } else if (NewDistance <= maxSimilarity + distancethreshold && NewDistance >= maxSimilarity - distancethreshold && NewDistance > 0) if (!equalMinDistanceDictWordList.ContainsKey(temp)) equalMinDistanceDictWordList.Add(temp, NewDistance); } } catch (Exception e) { throw e; } return maxSimilarity; }
private IEnumerable <User> sort(IEnumerable <User> users) { int uId = Int32.Parse(Session["UserId"].ToString()); User currentUser = db.Users.Find(uId); List <Int32> scores = new List <Int32>(); int score; foreach (User user in users) { score = 0; if (currentUser.Gender != null) { if (user.Gender == null) { score += 20; } else if ((currentUser.Gender != "Other" && user.Gender != currentUser.Gender) || (currentUser.Gender == "Other" && user.Gender == "Other")) { score += 50; } } if (currentUser.Occupation != null && user.Occupation != null && JaroWinklerDistance.proximity(currentUser.Occupation, user.Occupation) >= 0.8) { score += 10; } if (currentUser.City != null && user.City != null && JaroWinklerDistance.proximity(currentUser.City, user.City) >= 0.8) { score += 10; } if (currentUser.Likes != null && user.Likes != null) { score += matchWordsScore(currentUser.Likes, user.Likes); } if (currentUser.Dislikes != null && user.Dislikes != null) { score += matchWordsScore(currentUser.Dislikes, user.Dislikes); } if (currentUser.Hobbies != null && user.Hobbies != null) { score += matchWordsScore(currentUser.Hobbies, user.Hobbies); } if (currentUser.Bio != null && user.Bio != null) { score += matchWordsScore(currentUser.Bio, user.Bio); } scores.Add(score); } var orderedZip = scores.Zip(users, (x, y) => new { x, y }) .OrderByDescending(pair => pair.x) .ToList(); users = orderedZip.Select(pair => pair.y).ToList(); return(users); }
public AlternateWordList GetAlternateWordList(string OriginalWord, int NumberToReturn) { var wordList = new AlternateWordList(); wordList.OriginalWord = OriginalWord; EnsureIndexed(); var existing = indexReader.DocFreq(new Term("word", OriginalWord)); wordList.OriginalWordFrequency = existing; var suggestions = _luceneChecker.SuggestSimilar(OriginalWord, NumberToReturn, null, "word", true); var jaro = new JaroWinklerDistance(); var leven = new LevenshteinDistance(); var ngram = new NGramDistance(); var metrics = suggestions.Select(s => new { word = s, freq = indexReader.DocFreq(new Term("word", s)), jaro = jaro.GetDistance(OriginalWord, s), leven = leven.GetDistance(OriginalWord, s), ngram = ngram.GetDistance(OriginalWord, s) }) .OrderByDescending(metric => ( (metric.freq / 100f) + metric.jaro + metric.leven + metric.ngram ) / 4f ) .ToList(); var list = new List <AlternateWord>(); var sortOrder = 1; foreach (var item in metrics) { var altWord = new AlternateWord(); altWord.Word = item.word; altWord.Frequency = item.freq; altWord.JaroWinkler = item.jaro; altWord.Levenshtein = item.leven; altWord.NGram = item.ngram; altWord.BestMatchScore = ((item.freq / 100f) + item.jaro + item.leven + item.ngram) / 4f; altWord.BestMatchSortOrder = sortOrder; list.Add(altWord); sortOrder++; } wordList.Words = list; return(wordList); }
protected void Page_Load(object sender, EventArgs e) { string s1 = "Holmes"; string s2 = "Holmes2136"; Response.Write(JaroWinklerDistance.GetDistance(s1, s2)); Response.Write("<BR>"); Response.Write(new LevenshteinDistance().GetDistance(s1, s2)); Response.Write("<BR>"); }
public static void Main(String[] args) { var jw = new JaroWinklerDistance(); // Obtém o texto informado depois de converter para maíusculo e remover os acentos var texto1 = jw.RemoveDiacritics(args[0].ToUpper()); var texto2 = jw.RemoveDiacritics(args[1].ToUpper()); // Calcula a distância de Jaro-Winkler double retorno = jw.Proximity(texto1, texto2); Console.WriteLine("Proximidade: " + retorno); }
public List <string> GetTopSuggestions(string value, int numberOfItems) { EnsureIndexed(); var suggestionCollection = new List <string>(); var existing = _indexReader.DocFreq(new Term(SpellCheckerConstants.SpellCheckerKey, value)); if (existing > 0)// the fist one will be correct of exist { suggestionCollection.Add(value); } var suggestions = _checker.SuggestSimilar(value, numberOfItems, null, SpellCheckerConstants.SpellCheckerKey, true); var jaro = new JaroWinklerDistance(); var leven = new LevenshteinDistance(); var ngram = new NGramDistance(); var metrics = suggestions.Select(s => new { word = s, freq = _indexReader.DocFreq(new Term(SpellCheckerConstants.SpellCheckerKey, s)), jaro = jaro.GetDistance(value, s), leven = leven.GetDistance(value, s), ngram = ngram.GetDistance(value, s) }) .OrderByDescending(metric => metric.jaro) .ThenByDescending(m => m.ngram) .ThenByDescending(metric => ( metric.freq / 100f + metric.leven ) / 2f ) .ToList(); var wordsOnly = metrics.Select(m => m.word).ToList(); suggestionCollection.AddRange(wordsOnly); return(suggestionCollection); }
public double GetDistance(string text) { string[] sentences = text.Split('\n').Where(x => !string.IsNullOrWhiteSpace(x)).ToArray(); double maxCost = 0, curCost = 0; if (sentences.Length > 1) { for (int i = 0; i < sentences.Length - 1; i++) { curCost = JaroWinklerDistance.Distance(sentences[i], sentences[i + 1]); if (maxCost < curCost) { maxCost = curCost; } } } else if (sentences.Length == 1) { maxCost = JaroWinklerDistance.Distance(sentences[0], ""); } return(maxCost); }
public static double findSimilarDictionaryWord(string word, double maxSimilarity, int index, List<string> equalMinDistanceDictWordList, bool exact) { index = index - _minWordLength; int WordLength = word.Length; int index2 = index; if (index < 0 || (WordLength>=2 && char.IsUpper(word[0])&&!char.IsUpper(word[1])) ) index2 = 0; word = word.ToLower(); bool noSpace = false; if (word.CompareTo(word.Trim()) == 0) noSpace = true; else word = word.Trim(); double NewSimilarity = 0; if ((WordLength + index) < 0) return maxSimilarity; if ((WordLength + index) >= _IndexDictionary.Length) return maxSimilarity; if (_IndexDictionary[WordLength + index] == null) return maxSimilarity; for (int j = 0; j < _IndexDictionary[WordLength + index].Count; j++) { JaroWinklerDistance JaroDist = new JaroWinklerDistance(); NGramDistance ng = new NGramDistance(); JaccardDistance jd = new JaccardDistance(); string temp = _IndexDictionary[WordLength + index][j]; if(noSpace&&temp.CompareTo(word)==0) { equalMinDistanceDictWordList.Clear(); equalMinDistanceDictWordList.Add(temp); return 10; } else if (temp.Contains(word)) { equalMinDistanceDictWordList.Add(/*item);*/temp); maxSimilarity = 1; } else if(index <= 2) { for (int i = 0; i <= index2; i++) { string s = temp.Substring(i); string s2 = temp.Substring(0, temp.Length - index2); //Console.WriteLine(item); if (!exact) NewSimilarity = Math.Max(jd.GetDistance(word, s), jd.GetDistance(word, s2)); else { NewSimilarity = jd.GetDistance(word, temp); if (NewSimilarity == 1) { equalMinDistanceDictWordList.Clear(); equalMinDistanceDictWordList.Add(s); maxSimilarity = NewSimilarity; } return maxSimilarity; } if (NewSimilarity > .33) { //equalMinDistanceDictWordList.Clear(); equalMinDistanceDictWordList.Add(/*item);*/temp); maxSimilarity = NewSimilarity; break; } } } } return maxSimilarity; }
public void ShouldReturnJaroWinklerDistanceOf1() { var result = JaroWinklerDistance.CalculateProximity(InputPassword, InputPassword); Assert.IsTrue(Math.Abs(result - 1.0) < 0.0000001); }
public void ShouldReturnJaroWinklerDistanceOfPoint5() { var result = JaroWinklerDistance.CalculateProximity(InputPassword, InputUsername); Assert.IsTrue(Math.Abs(result - 0.500) < 0.0000001); }
public void ShouldSetJaroWinklerDistanceWeightPrefixLength3() { Algorithm = new JaroWinklerDistance(InputPassword, InputPasswrod, prefixLength: PrefixLength); Assert.AreEqual(PrefixLength, Algorithm.PrefixLength); }
public void ShouldSetJaroWinklerDistanceWeightThresholdPoint8() { Algorithm = new JaroWinklerDistance(InputPassword, InputPasswrod, WeightThresholdPoint8); Assert.AreEqual(WeightThresholdPoint8, Algorithm.WeightThreshold); }
public void ShouldSetJaroWinklerDistanceInput2() { Algorithm = new JaroWinklerDistance(InputPassword, InputPasswrod); Assert.AreEqual(InputPasswrod, Algorithm.Input2); }
public void ShouldReturnJaroWinklerDistanceInstance() { Algorithm = new JaroWinklerDistance(InputPassword, InputPasswrod); Assert.IsNotNull(Algorithm); }
public void ShouldReturnJaroWinklerDistanceOfPoint025() { var result = JaroWinklerDistance.CalculateDistance(InputPassword, InputPasswrod); Assert.IsTrue(Math.Abs(result - 0.025) < 0.0000001); }