public void TestInternalLevenshteinDistance() { DirectSpellChecker spellchecker = new DirectSpellChecker(); Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, new MockAnalyzer(Random(), MockTokenizer.KEYWORD, true), Similarity, TimeZone); String[] termsToAdd = { "metanoia", "metanoian", "metanoiai", "metanoias", "metanoið‘" }; for (int i = 0; i < termsToAdd.Length; i++) { Document doc = new Document(); doc.Add(NewTextField("repentance", termsToAdd[i], Field.Store.NO)); writer.AddDocument(doc); } IndexReader ir = writer.Reader; String misspelled = "metanoix"; SuggestWord[] similar = spellchecker.SuggestSimilar(new Term("repentance", misspelled), 4, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertTrue(similar.Length == 4); IStringDistance sd = spellchecker.Distance; assertTrue(sd is LuceneLevenshteinDistance); foreach (SuggestWord word in similar) { assertTrue(word.Score == sd.GetDistance(word.String, misspelled)); assertTrue(word.Score == sd.GetDistance(misspelled, word.String)); // LUCNENET TODO: Perhaps change this to word.ToString()? } ir.Dispose(); writer.Dispose(); dir.Dispose(); }
public static void TestDistance(IStringDistance instance) { Assert.Equal(0.0, instance.Distance("", ""), 1); Assert.Equal(3.0, instance.Distance("", "foo"), 1); Assert.Equal(3.0, instance.Distance("foo", ""), 1); AssertArgumentNullExceptions(instance); }
public static double EqualPercent(string a, string b, IStringDistance distanceAlgo, IEqualityComparer <char> comparer) { comparer = comparer ?? DefaultComparer; string longer = a, shorter = b; if (a.Length < b.Length) { longer = b; shorter = a; } var longerLength = longer.Length; if (longerLength == 0) { return(1.0); } return((longerLength - distanceAlgo.CalculateDistance(longer, shorter, comparer)) / (double)longerLength); }
public SpellCheckerMock(Directory spellIndex, IStringDistance sd, IComparer <SuggestWord> comparer) : base(spellIndex, sd, comparer) { }
public SpellCheckerMock(Directory spellIndex, IStringDistance sd) : base(spellIndex, sd) { }
private static void CheckTitlesFuzzy(IStringDistance search, HashSet <string> languages, SVR_AnimeSeries a, string query, ref ConcurrentDictionary <SVR_AnimeSeries, Tuple <double, string> > distLevenshtein, int limit) { if (distLevenshtein.Count >= limit) { return; } if (a?.Contract?.AniDBAnime?.AnimeTitles == null) { return; } var dist = double.MaxValue; string match = string.Empty; var seriesTitles = a.Contract.AniDBAnime.AnimeTitles .Where(b => languages.Contains(b.Language.ToLower()) && b.TitleType != Shoko.Models.Constants.AnimeTitleType.ShortName).Select(b => b.Title) .ToList(); foreach (string title in seriesTitles) { if (string.IsNullOrWhiteSpace(title)) { continue; } var result = 0.0; // Check for exact match if (!title.Equals(query, StringComparison.Ordinal)) { result = search.Distance(title, query); } // For Dice, 1 is no reasonable match if (result >= 1) { continue; } // Don't count an error as liberally when the title is short if (title.Length < 5 && result > 0.8) { continue; } if (result < dist) { match = title; dist = result; } else if (Math.Abs(result - dist) < 0.00001) { if (title.Length < match.Length) { match = title; } } } // Keep the lowest distance, then by shortest title if (dist < double.MaxValue) { distLevenshtein.AddOrUpdate(a, new Tuple <double, string>(dist, match), (key, oldValue) => { if (oldValue.Item1 < dist) { return(oldValue); } if (Math.Abs(oldValue.Item1 - dist) < 0.00001) { return(oldValue.Item2.Length < match.Length ? oldValue : new Tuple <double, string>(dist, match)); } return(new Tuple <double, string>(dist, match)); }); } }
public static void AssertArgumentNullExceptions(IStringDistance instance) { Assert.Throws(typeof(ArgumentNullException), () => instance.Distance(null, null)); Assert.Throws(typeof(ArgumentNullException), () => instance.Distance(null, "")); Assert.Throws(typeof(ArgumentNullException), () => instance.Distance("", null)); }
/// <summary> /// Use the given directory as a spell checker index. The directory /// is created if it doesn't exist yet. </summary> /// <param name="spellIndex"> the spell index directory </param> /// <param name="sd"> the <see cref="StringDistance"/> measurement to use </param> /// <exception cref="IOException"> if Spellchecker can not open the directory </exception> public SpellChecker(Directory spellIndex, IStringDistance sd) : this(spellIndex, sd, SuggestWordQueue.DEFAULT_COMPARER) { }
/// <summary> /// Use the given directory as a spell checker index with the given <see cref="IStringDistance"/> measure /// and the given <see cref="System.Collections.Generic.IComparer{T}"/> for sorting the results. </summary> /// <param name="spellIndex"> The spelling index </param> /// <param name="sd"> The distance </param> /// <param name="comparer"> The comparer </param> /// <exception cref="IOException"> if there is a problem opening the index </exception> public SpellChecker(Directory spellIndex, IStringDistance sd, IComparer <SuggestWord> comparer) { SetSpellIndex(spellIndex); StringDistance = sd; this.comparer = comparer; }
/// <summary> /// Use the given directory as a spell checker index with the given <see cref="IStringDistance"/> measure /// and the given <see cref="System.Collections.Generic.IComparer{T}"/> for sorting the results. </summary> /// <param name="spellIndex"> The spelling index </param> /// <param name="sd"> The distance </param> /// <param name="comparator"> The comparator </param> /// <exception cref="System.IO.IOException"> if there is a problem opening the index </exception> public SpellChecker(Directory spellIndex, IStringDistance sd, IComparer <SuggestWord> comparator) { SpellIndex = spellIndex; StringDistance = sd; this.comparator = comparator; }
public SpellCheckerMock(Directory spellIndex, IStringDistance sd, IComparer<SuggestWord> comparator) : base(spellIndex, sd, comparator) { }
/// <summary> /// Use the given directory as a spell checker index with the given <see cref="IStringDistance"/> measure /// and the given <see cref="System.Collections.Generic.IComparer{T}"/> for sorting the results. </summary> /// <param name="spellIndex"> The spelling index </param> /// <param name="sd"> The distance </param> /// <param name="comparator"> The comparator </param> /// <exception cref="System.IO.IOException"> if there is a problem opening the index </exception> public SpellChecker(Directory spellIndex, IStringDistance sd, IComparer<SuggestWord> comparator) { SpellIndex = spellIndex; StringDistance = sd; this.comparator = comparator; }
/// <summary> /// Use the given directory as a spell checker index. The directory /// is created if it doesn't exist yet. </summary> /// <param name="spellIndex"> the spell index directory </param> /// <param name="sd"> the <see cref="StringDistance"/> measurement to use </param> /// <exception cref="System.IO.IOException"> if Spellchecker can not open the directory </exception> public SpellChecker(Directory spellIndex, IStringDistance sd) : this(spellIndex, sd, SuggestWordQueue.DEFAULT_COMPARATOR) { }