EditDistance_SingleDeletionAtStartAndDoubleInsertionAtEnd_2Max_EditDistanceLargerThanMax () { int editDistance = ApproximateMatcher.EditDistance("abo", "boha", 2, false); Assert.AreEqual(ApproximateMatcher.EditDistanceLargerThanMax, editDistance); }
EditDistance_SingleDeletionAtStartAndSingleSubstitutionAtEnd_1Max_EditDistanceLargerThanMax () { int editDistance = ApproximateMatcher.EditDistance("abo", "bi", 1, false); Assert.AreEqual(ApproximateMatcher.EditDistanceLargerThanMax, editDistance); }
EditDistance_SingleInsertionSingleSubstitutionAtBeginningSingleSubstitutionAtEndWithSuffixTreatedAsZeroDistance_2Cutoff_EditDistanceLargerThanMax () { Assert.AreEqual(ApproximateMatcher.EditDistanceLargerThanMax, ApproximateMatcher.EditDistance("noise", "dist", 2, true)); Assert.AreEqual(ApproximateMatcher.EditDistanceLargerThanMax, ApproximateMatcher.EditDistance("dist", "noise", 2, true)); }
public void EditDistance_MajorDifference() { Assert.AreEqual(2, ApproximateMatcher.EditDistance("ab", "\"look busy do nothing\"", int.MaxValue, true)); }
EditDistance_SingleDeletionAtBeginningWithSixInsertionsAtEndAndSuffixTreatedAsZeroDistance_7 () { // no suffix to ignore on the second word Assert.AreEqual(7, ApproximateMatcher.EditDistance("acaseinfact", "case", int.MaxValue, true)); }
public void EditDistance_SingleDeletionAtStartAndDoubleInsertionAtEnd_3() { int editDistance = ApproximateMatcher.EditDistance("abo", "boha", 3, false); Assert.AreEqual(3, editDistance); }
public void EditDistance_SingleTranspositionAtMiddleAndEnd_2() { int editDistance = ApproximateMatcher.EditDistance("abohor", "aobhro", 2, false); Assert.AreEqual(2, editDistance); }
/// <summary> /// Sorting the languages for display is tricky: we want the most relevant languages at the /// top of the list, so we can't simply sort alphabetically by language name or by language tag, /// but need to take both items into account together with the current search string. Ordering /// by relevance is clearly impossible since we'd have to read the user's mind and apply that /// knowledge to the data. But the heuristics we use here may be better than nothing... /// </summary> public int Compare(LanguageInfo x, LanguageInfo y) { if (x.LanguageTag == y.LanguageTag) { return(0); } // Favor ones where some language name matches the search string to solve BL-1141 // We restrict this to the top 2 names of each language, and to cases where the // corresponding names of the two languages are different. (If both language names // match the search string, there's no good reason to favor one over the other!) if (!x.Names[0].Equals(y.Names[0], StringComparison.InvariantCultureIgnoreCase)) { if (x.Names[0].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(-1); } if (y.Names[0].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(1); } } else if (x.Names.Count == 1 || y.Names.Count == 1 || !x.Names[1].Equals(y.Names[1], StringComparison.InvariantCultureIgnoreCase)) { // If we get here, x.Names[0] == y.Names[0]. If both equal the search string, then neither x.Names[1] // nor y.Names[1] should equal the search string since the code adding to Names checks for redundancy. // Also it's possible that neither x.Names[1] nor y.Names[1] exists at this point in the code, or that // only one of them exists, or that both of them exist (in which case they are not equal). if (x.Names.Count > 1 && x.Names[1].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(-1); } if (y.Names.Count > 1 && y.Names[1].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(1); } } // Favor a language whose tag matches the search string exactly. (equal tags are handled above) if (x.LanguageTag.Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(-1); } if (y.LanguageTag.Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(1); } // written this way to avoid having to catch predictable exceptions as the user is typing string xlanguage; string ylanguage; string script; string region; string variant; var xtagParses = IetfLanguageTag.TryGetParts(x.LanguageTag, out xlanguage, out script, out region, out variant); var ytagParses = IetfLanguageTag.TryGetParts(y.LanguageTag, out ylanguage, out script, out region, out variant); var bothTagLanguagesMatchSearch = xtagParses && ytagParses && xlanguage == ylanguage && _searchString.Equals(xlanguage, StringComparison.InvariantCultureIgnoreCase); if (!bothTagLanguagesMatchSearch) { // One of the tag language pieces may match the search string even though not both match. In that case, // sort the matching language earlier in the list. if (xtagParses && _searchString.Equals(xlanguage, StringComparison.InvariantCultureIgnoreCase)) { return(-1); // x.Tag's language part matches search string exactly, so sort it earlier in the list. } else if (ytagParses && _searchString.Equals(ylanguage, StringComparison.InvariantCultureIgnoreCase)) { return(1); // y.Tag's language part matches search string exactly, so sort it earlier in the list. } } // shortest simplest tag is most likely to be what is being looked for if (x.LanguageTag.Length < y.LanguageTag.Length) { return(-1); } if (y.LanguageTag.Length < x.LanguageTag.Length) { return(1); } // Editing distance to a language name is not useful when we've detected that the user appears to be // typing a language tag in that both language tags match what the user has typed. (For example, // it gives a strange and unwanted order to the variants of zh.) In such a case we just order the // matching codes by length (already done) and then alphabetically by code, skipping the sort by // editing distance to the language names. if (!bothTagLanguagesMatchSearch) { // Use the "editing distance" relative to the search string to sort by the primary name. // (But we don't really care once the editing distance gets very large.) // See https://silbloom.myjetbrains.com/youtrack/issue/BL-5847 for motivation. // Timing tests indicate that 1) calculating these distances doesn't slow down the sorting noticeably // and 2) caching these distances in a dictionary also doesn't speed up the sorting noticeably. var xDistance = ApproximateMatcher.EditDistance(_lowerSearch, x.Names[0].ToLowerInvariant(), 25, false); var yDistance = ApproximateMatcher.EditDistance(_lowerSearch, y.Names[0].ToLowerInvariant(), 25, false); var distanceDiff = xDistance - yDistance; if (distanceDiff != 0) { return(distanceDiff); } // If the editing distances for the primary names are the same, sort by the primary name. int res = string.Compare(x.Names[0], y.Names[0], StringComparison.InvariantCultureIgnoreCase); if (res != 0) { return(res); } } return(string.Compare(x.LanguageTag, y.LanguageTag, StringComparison.InvariantCultureIgnoreCase)); }
public void EditDistance_SingleInsertionAtEndWithSuffixTreatedAsZeroDistance_0() { Assert.AreEqual(0, ApproximateMatcher.EditDistance("case", "cased", 1, true)); }
public void EditDistance_SingleDeletionAtMiddle_1() { int editDistance = ApproximateMatcher.EditDistance("abo", "ao", 1, false); Assert.AreEqual(1, editDistance); }
public void EditDistance_SingleInsertionAtStartAndEnd_1Max_EditDistanceLargerThanMax() { int editDistance = ApproximateMatcher.EditDistance("abo", "haboh", 1, false); Assert.AreEqual(ApproximateMatcher.EditDistanceLargerThanMax, editDistance); }
public void EditDistance_SingleInsertionAtStartAndMiddle_2() { int editDistance = ApproximateMatcher.EditDistance("abo", "habho", 2, false); Assert.AreEqual(2, editDistance); }
public void EditDistance_SingleInsertionAtMiddle_0Max_EditDistanceLargerThanMax() { int editDistance = ApproximateMatcher.EditDistance("abo", "abho", 0, false); Assert.AreEqual(ApproximateMatcher.EditDistanceLargerThanMax, editDistance); }
public void EditDistance_SingleInsertionAtStart_1() { int editDistance = ApproximateMatcher.EditDistance("abo", "habo", 1, false); Assert.AreEqual(1, editDistance); }
public void EditDistance_Same_0() { int editDistance = ApproximateMatcher.EditDistance("abo", "abo", 0, false); Assert.AreEqual(0, editDistance); }
public void EditDistance_SingleDeletionAtStartAndSingleTranspositionAtEnd_2() { int editDistance = ApproximateMatcher.EditDistance("aboh", "bho", 2, false); Assert.AreEqual(2, editDistance); }
public void EditDistance_SingleSubstitutionAtEnd_1() { int editDistance = ApproximateMatcher.EditDistance("abo", "abs", 1, false); Assert.AreEqual(1, editDistance); }
public void EditDistance_SingleInsertionAtStartAndSingleSubstitutionAtEnd_2() { int editDistance = ApproximateMatcher.EditDistance("abo", "rabi", 2, false); Assert.AreEqual(2, editDistance); }
public void EditDistance_SingleSubstitutionAtStartAndMiddle_2() { int editDistance = ApproximateMatcher.EditDistance("aboh", "sbsh", 2, false); Assert.AreEqual(2, editDistance); }
public void EditDistance_SingleInsertionAtBeginningWithSuffixTreatedAsZeroDistance_1() { // has suffix to ignore on the second word Assert.AreEqual(1, ApproximateMatcher.EditDistance("case", "acaseinfact", 1, true)); }
public void EditDistance_SingleSubstitutionAtMiddleAndEnd_2() { int editDistance = ApproximateMatcher.EditDistance("aboh", "asos", 2, false); Assert.AreEqual(2, editDistance); }
EditDistance_SingleDeletionSingleSubstitutionAtBeginningSingleSubstitutionAtEndWithSuffixTreatedAsZeroDistance_3 () { // no suffix to ignore on the second word Assert.AreEqual(3, ApproximateMatcher.EditDistance("noise", "dist", 4, true)); }
public void EditDistance_SingleTranspositionAtEnd_1() { int editDistance = ApproximateMatcher.EditDistance("abo", "aob", 1, false); Assert.AreEqual(1, editDistance); }
public void EditDistance_SingleTranspositionAtStartAndMiddle_2() { int editDistance = ApproximateMatcher.EditDistance("abohor", "baoohr", 2, false); Assert.AreEqual(2, editDistance); }
public int Compare(LanguageInfo x, LanguageInfo y) { if (x.LanguageTag == y.LanguageTag) { return(0); } if (!x.Names[0].Equals(y.Names[0], StringComparison.InvariantCultureIgnoreCase)) { // Favor ones where some language matches to solve BL-1141 if (x.Names[0].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(-1); } if (y.Names[0].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(1); } if (x.Names.Count > 1 && x.Names[1].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(-1); } if (y.Names.Count > 1 && y.Names[1].Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(1); } } if (x.LanguageTag.Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(-1); } if (y.LanguageTag.Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(1); } if (IetfLanguageTag.GetLanguagePart(x.LanguageTag).Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(-1); } if (IetfLanguageTag.GetLanguagePart(y.LanguageTag).Equals(_searchString, StringComparison.InvariantCultureIgnoreCase)) { return(1); } // Use the "editing distance" relative to the search string to sort by the primary name. // (But we don't really care once the editing distance gets very large.) // See https://silbloom.myjetbrains.com/youtrack/issue/BL-5847 for motivation. // Timing tests indicate that 1) calculating these distances doesn't slow down the sorting noticeably // and 2) caching these distances in a dictionary also doesn't speed up the sorting noticeably. var xDistance = ApproximateMatcher.EditDistance(_lowerSearch, x.Names[0].ToLowerInvariant(), 25, false); var yDistance = ApproximateMatcher.EditDistance(_lowerSearch, y.Names[0].ToLowerInvariant(), 25, false); var distanceDiff = xDistance - yDistance; if (distanceDiff != 0) { return(distanceDiff); } // If the editing distances for the primary names are the same, sort by the primary name. int res = string.Compare(x.Names[0], y.Names[0], StringComparison.InvariantCultureIgnoreCase); if (res != 0) { return(res); } return(string.Compare(x.LanguageTag, y.LanguageTag, StringComparison.InvariantCultureIgnoreCase)); }