public SuggestionQueryIndexExtension( WorkContext workContext, string key, StringDistance distanceType, bool isRunInMemory, string field, float accuracy) { this.workContext = workContext; this.key = key; this.field = field; if (isRunInMemory) { directory = new RAMDirectory(); } else { directory = FSDirectory.Open(new DirectoryInfo(key)); } this.spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory, null); this.spellChecker.SetAccuracy(accuracy); this.spellChecker.setStringDistance(distanceType); }
public static IEnumerable <R> JoinSimilar <T, S, R>(this List <T> outer, List <S> inner, Func <T, string> outerKeySelector, Func <S, string> innerKeySelector, Func <T, S, int, R> resultSelector) where T : notnull where S : notnull { StringDistance sd = new StringDistance(); Dictionary <Tuple <T, S>, int> distances = (from o in outer from i in inner select KeyValuePair.Create(Tuple.Create(o, i), sd.LevenshteinDistance(outerKeySelector(o), innerKeySelector(i)))).ToDictionary(); while (distances.Count > 0) { var kvp = distances.MinBy(a => a.Value); var tuple = kvp.Key; distances.RemoveRange(distances.Keys.Where(a => a.Item1.Equals(tuple.Item1) || a.Item2.Equals(tuple.Item2)).ToList()); outer.Remove(tuple.Item1); inner.Remove(tuple.Item2); yield return(resultSelector(tuple.Item1, tuple.Item2, kvp.Value)); } }
public DiffLogResult GetOperationDiffLog(string id, bool simplify) { var operationLog = Database.Retrieve <OperationLogEntity>(PrimaryKey.Parse(id, typeof(OperationLogEntity))); var logs = DiffLogLogic.OperationLogNextPrev(operationLog); StringDistance sd = new StringDistance(); var prevFinal = DiffLogLogic.SimplifyDump(logs.Min?.Mixin <DiffLogMixin>().FinalState.Text, simplify); string?nextInitial = logs.Max != null?DiffLogLogic.SimplifyDump(logs.Max.Mixin <DiffLogMixin>().InitialState.Text, simplify) : operationLog.Target !.Exists() ? GetDump(operationLog.Target !) : null; string?initial = DiffLogLogic.SimplifyDump(operationLog.Mixin <DiffLogMixin>().InitialState.Text, simplify); string?final = DiffLogLogic.SimplifyDump(operationLog.Mixin <DiffLogMixin>().FinalState.Text, simplify); return(new DiffLogResult { prev = logs.Min?.ToLite(), diffPrev = prevFinal == null || initial == null ? null : sd.DiffText(prevFinal, initial), initial = initial, diff = sd.DiffText(initial, final), final = final, diffNext = final == null || nextInitial == null ? null : sd.DiffText(final, nextInitial), next = logs.Max?.ToLite(), }); }
protected static double InverseStringDistance(string a, string b) { StringDistance dist = new StringDistance(); double realDistance = dist.Distance(a, b); return(realDistance == 0 ? double.MaxValue : 1 / realDistance); }
public ActionResult <IEnumerable <string> > Get() { var stringDistance = new StringDistance(); var watch = new Stopwatch(); const long count = 100; const int length = 100; string comparestring = stringDistance.GenerateRandomString(length); var strlist = new string[count]; var steps = new int[count]; // prepare string[] for comparison Parallel.For(0, count, i => strlist[i] = stringDistance.GenerateRandomString(length)); Console.WriteLine("已经生成了" + count + "个长度为" + length + "的字符串"); watch.Start(); for (int i = 0; i < count; i++) { steps[i] = stringDistance.LevenshteinDistance(comparestring, strlist[i]); } watch.Stop(); Console.WriteLine("完成非并行计算,耗时(ms)" + watch.ElapsedMilliseconds); Console.WriteLine("性能比" + 100000d / watch.ElapsedMilliseconds); return(Ok("0.0.1")); }
public void LevenshteinStringDistance() { var dist1 = StringDistance.GetLevenshteinDistance("Test string one", "Test string one"); Assert.AreEqual(0, dist1); var dist2 = StringDistance.GetLevenshteinDistance("Test string one", "string one"); Assert.AreEqual(5, dist2); var dist3 = StringDistance.GetLevenshteinDistance("Test string one", "Tesd"); Assert.AreEqual(12, dist3); var dist4 = StringDistance.GetLevenshteinDistance("String 1 test", "Absolutly different val"); Assert.AreEqual(20, dist4); var dist5 = StringDistance.GetLevenshteinDistance("141154342", "141,154,342"); Assert.AreEqual(2, dist5); var dist6 = StringDistance.GetLevenshteinDistance("-141154342", "(141,154,342)"); Assert.AreEqual(4, dist6); }
static void Main(string[] args) { if (args.Length > 0 && args[0] == "-t") { TestTime(); Console.WriteLine("TESTING COMPLETED"); Console.ReadKey(); return; } if (args.Length > 0 && args[0] == "-rt") { TestTree(); Console.WriteLine("TESTING COMPLETED"); Console.ReadKey(); return; } string a, b; Console.Write("Первая строка: "); a = Console.ReadLine(); Console.Write("Вторая строка: "); b = Console.ReadLine(); Console.WriteLine("\nРезультат: \n"); foreach (StringDistance.Measure measure in Enum.GetValues(typeof(StringDistance.Measure))) { StringDistance distance = StringDistance.StringDistanceBuilder.GetInstance(measure, a, b); if (distance == null) { continue; } System.Diagnostics.Stopwatch stopwatch = new System.Diagnostics.Stopwatch(); GC.Collect(); stopwatch.Start(); int result = distance.GetDistance(); stopwatch.Stop(); LetterMatrix matrix = distance.GetLetterMatrix(); Console.WriteLine("Метод: " + distance.MethodName); Console.WriteLine("Значение: " + result); Console.WriteLine("Матрица: "); Console.Write(matrix.ToString()); Console.WriteLine("Прошло времени (тиков): " + stopwatch.ElapsedTicks); Console.WriteLine("Прошло времени (секунд): " + stopwatch.ElapsedMilliseconds / 1000f); Console.WriteLine("\n"); } Console.ReadKey(); }
static void TestTime() { int repeatCount = 1000; int currentWordLength = 0; int finalWordLength = 16; for (; currentWordLength < finalWordLength; currentWordLength++) { Console.WriteLine("Word length: " + currentWordLength); foreach (StringDistance.Measure method in Enum.GetValues(typeof(StringDistance.Measure))) { string a = GetRandomWord(currentWordLength); string b = GetRandomWord(currentWordLength); Stopwatch watch = new Stopwatch(); watch.Start(); for (int i = 0; i < repeatCount; i++) { StringDistance measureMethod = StringDistance.StringDistanceBuilder.GetInstance(method, a, b); int result = measureMethod.GetDistance(); } watch.Stop(); long ticks = watch.ElapsedTicks; double avgTicks = (double)ticks / repeatCount; Console.WriteLine("Method: " + method.ToString() + "; Ticks average: " + avgTicks); } Console.WriteLine(); } }
private void btnOutput_Click(object sender, EventArgs e) { using (StreamReader r = new StreamReader("cities.json")) { string json = r.ReadToEnd(); List <Item> items = JsonConvert.DeserializeObject <List <Item> >(json); items = items.Where(x => x.country.Equals("ID")).ToList(); string input = txtInput.Text; int hasil; string hasiloutput = ""; for (var i = 0; i < items.Count; i++) { hasil = StringDistance.LevenshteinDistance(input.ToLower(), items[i].name.ToLower()); decimal persenPerubahan = 0; persenPerubahan = ((decimal)hasil / (decimal)input.Length) * 100; if (persenPerubahan <= 30) { hasiloutput += items[i].name + " (" + persenPerubahan + ")" + " ,"; } } txtOutput.Text = hasiloutput; } MessageBox.Show("Done!"); }
public void ThresholdOfToStrings(double threshold, string s1, string s2) { var length = Math.Max(s1.Length, s2.Length); Assert.Equal(threshold, StringDistance.NormalizeDistance(StringDistance.DamareuLevenshteinDistance(s1, s2), length)); }
public static string GetDefaultModuleName(Type[] selected, string solutionName) { StringDistance sd = new StringDistance(); string name = null; foreach (var item in selected) { if (name == null) { name = item.FullName.RemovePrefix(solutionName + ".Entities"); } else { int startName, rubbish; int length = sd.LongestCommonSubstring(name, item.FullName, out startName, out rubbish); name = name.Substring(startName, length); if (name.IsEmpty()) { return(null); } } } return(name.Trim('.')); }
private static int GetDistance(UserInfo user, string word1, string word2) { var distance1 = StringDistance.LevenshteinDistance(user.FirstName, word1) + StringDistance.LevenshteinDistance(user.LastName, word2); var distance2 = StringDistance.LevenshteinDistance(user.FirstName, word2) + StringDistance.LevenshteinDistance(user.LastName, word1); return(Math.Min(distance1, distance2)); }
protected int SearchWordFromExSet(string word) { int res = 0; foreach (KeyValuePair <string, Dictionary <string, string> > kvp in ExceptionDict) { int cnt = StringDistance.GetDamerauLevenshteinDistance( kvp.Key, word); /* Алгоритм Ливенштейна (модицифированный) * Если cnt поставить на ноль, то он будет искать слова со 100%-ым * совпадением. А если на 1, то на одну букву будет делать погрешность, * допустим слово dadanlar он пропустит, так как отличие всего одна * буква n (а должно быть dadamlar) * в ближайшей перспективе сделаем систему РЕКОМЕНДАЦИЙ, * типа, "возможно, вы имели ввиду это слово"? */ if (cnt == 0) { this.TmpDict = kvp.Value; res = 1; break; } } return(res); }
public static T?SelectInteractive <T>(string str, Dictionary <string, T> dictionary, string context) where T : class { T?result = dictionary.TryGetC(str); if (result != null) { return(result); } StringDistance sd = new StringDistance(); var list = dictionary.Keys.Select(s => new { s, lcs = sd.LongestCommonSubsequence(str, s) }).OrderByDescending(s => s.lcs !).Select(a => a.s !).ToList(); var cs = new ConsoleSwitch <int, string>("{0} has been renamed in {1}".FormatWith(str, context)); cs.Load(list); string?selected = cs.Choose(); if (selected == null) { return(null); } return(dictionary.GetOrThrow(selected)); }
public StemFilter(TokenStream in_Renamed, LuceneSpellChecker spellChecker, int numberOfSuggestions) : base(in_Renamed) { SpellChecker = spellChecker; NumberOfSuggestions = numberOfSuggestions; _defaultDistance = spellChecker.GetStringDistance(); _customDistance = new StemDistance(_defaultDistance); }
public void Repeated_Return_Correct_Distances() { foreach (var test in TestData) { var results = StringDistance.Distance(test.Word, test.CorrectedWord); Assert.AreEqual(test.Distance, results.Distance); } }
private void CountErrorRates(ref Transcription transcription) { if (transcription.MetricsId.HasValue) { transcription.Metrics.WordErrorRate = StringDistance.CountWordErrorRate(transcription.AgentTranscription, transcription.DeepSpeechTranscription); transcription.Metrics.CharErrorRate = StringDistance.CountCharErrorRate(transcription.AgentTranscription, transcription.DeepSpeechTranscription); } }
public TemplateSynchronizationContext(Replacements replacements, StringDistance stringDistance, QueryDescription queryDescription, Type?modelType) { Variables = new ScopedDictionary <string, ValueProviderBase>(null); ModelType = modelType; Replacements = replacements; StringDistance = stringDistance; QueryDescription = queryDescription; HasChanges = false; }
public SpellCheckerWrapper(string field, StringDistance sd) { this.field = field; if (sd == null) { sd = new LevensteinDistance(); } spellChecker = new SpellChecker(new RAMDirectory(), sd); spellChecker.setAccuracy(accuracy); }
public TranslatedInstanceViewTypeTS View(string type, string?culture, string filter) { Type t = TypeLogic.GetType(type); var c = culture == null ? null : CultureInfo.GetCultureInfo(culture); var master = TranslatedInstanceLogic.FromEntities(t); var support = TranslatedInstanceLogic.TranslationsForType(t, culture: c); var all = string.IsNullOrEmpty(filter); var cultures = TranslationLogic.CurrentCultureInfos(TranslatedInstanceLogic.DefaultCulture); Func <LocalizedInstanceKey, bool> filtered = li => all || li.RowId.ToString() == filter || li.Instance.Id.ToString() == filter || li.Route.PropertyString().Contains(filter, StringComparison.InvariantCultureIgnoreCase) || master.GetOrThrow(li).Contains(filter, StringComparison.InvariantCultureIgnoreCase) || cultures.Any(ci => (support.TryGetC(ci)?.TryGetC(li)?.TranslatedText ?? "").Contains(filter, StringComparison.InvariantCultureIgnoreCase)); var sd = new StringDistance(); var supportByInstance = (from kvpCult in support from kvpLocIns in kvpCult.Value where filtered(kvpLocIns.Key) let newText = master.GetOrThrow(kvpLocIns.Key) group(lockIns: kvpLocIns.Key, translatedInstance: kvpLocIns.Value, culture: kvpCult.Key, newText: newText) by kvpLocIns.Key.Instance into gInstance select KeyValuePair.Create(gInstance.Key, gInstance.AgGroupToDictionary(a => a.lockIns.RouteAndRowId(), gr => gr.ToDictionary(a => a.culture.Name, a => new TranslatedPairViewTS { OriginalText = a.translatedInstance.OriginalText, Diff = a.translatedInstance.OriginalText.Equals(a.newText) ? null : sd.DiffText(a.translatedInstance.OriginalText, a.newText), TranslatedText = a.translatedInstance.TranslatedText }) ))).ToDictionary(); return(new TranslatedInstanceViewTypeTS { TypeName = type, Routes = TranslatedInstanceLogic.TranslateableRoutes.GetOrThrow(t).ToDictionary(a => a.Key.PropertyString(), a => a.Value), MasterCulture = TranslatedInstanceLogic.DefaultCulture.Name, Instances = master.Where(kvp => filtered(kvp.Key)).GroupBy(a => a.Key.Instance).Select(gr => new TranslatedInstanceViewTS { Lite = gr.Key, Master = gr.ToDictionary( a => a.Key.RouteAndRowId(), a => a.Value ), Translations = supportByInstance.TryGetC(gr.Key) ?? new Dictionary <string, Dictionary <string, TranslatedPairViewTS> >() }).ToList() }); }
public void SortedMatches(string value, string candidates, string expectedOutput) { Assert.Equal(expectedOutput, string.Join(",", StringDistance.GetBestMatchesSorted( StringDistance.DamareuLevenshteinDistance, value, candidates.Split(','), 0.33d ) ) ); }
private async Task <long> CpuProcess(int time) { // simulate cpu bound operation for `time` seconds for (var i = 1; i <= time; ++i) { for (int k = 0; k < 10000; ++k) { string comparestring1 = StringDistance.GenerateRandomString(1000); } await Task.Delay(100); } return(time * 10000L); }
public void MatchesBelowThresholdAreNotReturned(string value, string candidates, string expectedOutput, double treshold) { Assert.Equal(expectedOutput, string.Join(",", StringDistance.GetBestMatchesSorted( StringDistance.DamareuLevenshteinDistance, value, candidates.Split(','), treshold ) ) ); }
public static RestDiffResult RestDiffLog(RestDiffResult result) { StringDistance sd = new StringDistance(); long? size = (long?)result.current?.Length * result.previous?.Length; if (size != null && size <= int.MaxValue) { var diff = sd.DiffText(result.previous, result.current); result.diff = diff; } return(result); }
public void Scan_SpaceshipCenterNotInScan_BothShips() { Spaceship spaceship = new Spaceship("0", "spaceship", 10, 10); Spaceship spaceship1 = new Spaceship("1", "spaceship", 5, 30); Spaceship spaceship2 = new Spaceship("2", "spaceship", 50, 0); String all_codes = spaceship1.Code + spaceship2.Code; String scanCodes = spaceship.Scan(45, 90, 50); scanCodes.Replace(" ", String.Empty); Console.WriteLine("scancodes" + scanCodes); Assert.AreEqual(0, StringDistance.Compute(scanCodes, all_codes)); }
static void Main(string[] args) { string originalWord = "polite"; string correctedWord = "p0l1t3"; var results = StringDistance.Distance(originalWord, correctedWord); foreach (var mistake in results.Mistakes) { Console.WriteLine(mistake.ToString()); } Console.WriteLine($"Total distance: {results.Distance}"); Console.ReadKey(); }
public virtual void AskForReplacements( HashSet <string> oldKeys, HashSet <string> newKeys, string replacementsKey) { List <string> oldOnly = oldKeys.Where(k => !newKeys.Contains(k)).ToList(); List <string> newOnly = newKeys.Where(k => !oldKeys.Contains(k)).ToList(); if (oldOnly.Count == 0 || newOnly.Count == 0) { return; } StringDistance sd = new StringDistance(); Dictionary <string, Dictionary <string, float> > distances = oldOnly.ToDictionary(o => o, o => newOnly.ToDictionary(n => n, n => { return(Distance(sd, o, n)); })); Dictionary <string, string> replacements = new Dictionary <string, string>(); while (oldOnly.Count > 0 && newOnly.Count > 0) { var old = distances.WithMin(kvp => kvp.Value.Values.Min()); Selection selection = SelectInteractive(old.Key, old.Value.OrderBy(a => a.Value).Select(a => a.Key).ToList(), replacementsKey, Interactive); oldOnly.Remove(selection.OldValue); distances.Remove(selection.OldValue); if (selection.NewValue != null) { replacements.Add(selection.OldValue, selection.NewValue); newOnly.Remove(selection.NewValue); foreach (var dic in distances.Values) { dic.Remove(selection.NewValue); } } } if (replacements.Count != 0) { this.GetOrCreate(replacementsKey).SetRange(replacements); } }
private async Task SaveVerifiedTranscription(TranscriptionModel transcriptionModel, long agentId) { var transcription = await transcriptionRepository.GetAsync(transcriptionModel.Id); transcription.WordErrorRate = StringDistance.CountWordErrorRate(transcriptionModel.AgentTranscription, transcription.AgentTranscription); mapper.Map(transcriptionModel, transcription); transcription.VerifierId = agentId; transcription.VerificationTime = DateTime.UtcNow; transcription.InUse = false; CountErrorRates(ref transcription); await transcriptionRepository.UpdateAsync(transcription); }
public List <StringDistance.DiffPair <List <StringDistance.DiffPair <string> > > > DiffVersiones(string typeName, string id, DateTime from, DateTime to) { var type = TypeLogic.GetType(typeName); var pk = PrimaryKey.Parse(id, type); var f = SystemTime.Override(from.AddMilliseconds(1)).Using(_ => Database.Retrieve(type, pk)); var t = SystemTime.Override(to.AddMilliseconds(1)).Using(_ => Database.Retrieve(type, pk)); var fDump = GetDump(f); var tDump = GetDump(t); StringDistance sd = new StringDistance(); return(sd.DiffText(fDump, tDump)); }
static SqlPreCommand Schema_Synchronize_Tokens(Replacements replacements) { if (AvoidSynchronize) { return(null); } StringDistance sd = new StringDistance(); var emailTemplates = Database.Query <WordTemplateEntity>().ToList(); SqlPreCommand cmd = emailTemplates.Select(uq => SynchronizeWordTemplate(replacements, uq, sd)).Combine(Spacing.Double); return(cmd); }
public SuggestionQueryIndexExtension(Index indexInstance, WorkContext workContext, string key, StringDistance distanceType, bool isRunInMemory, string field, float accuracy) { _indexInstance = indexInstance; this.workContext = workContext; this.field = field; if (isRunInMemory) { directory = new RAMDirectory(); } else { directory = FSDirectory.Open(new DirectoryInfo(key)); } spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory, null); spellChecker.SetAccuracy(accuracy); spellChecker.setStringDistance(distanceType); _operationText = "Suggestions for " + field + " " + distanceType + " (" + accuracy + ")"; }
public SuggestionQueryIndexExtension( string key, IndexReader reader, StringDistance distance, string field, float accuracy) { this.key = key; this.field = field; if(reader.Directory() is RAMDirectory) { directory = new RAMDirectory(); } else { directory = FSDirectory.Open(new DirectoryInfo(key)); } this.spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory, distance); this.spellChecker.SetAccuracy(accuracy); }
public SuggestionQueryIndexExtension(StringDistance distance, string field, float accuracy) { this.field = field; this.spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory, distance); this.spellChecker.SetAccuracy(accuracy); }
/// <summary> /// Use the given directory as a spell checker index. The directory /// is created if it doesn't exist yet. </summary> /// <param name="spellIndex"> the spell index directory </param> /// <param name="sd"> the <seealso cref="StringDistance"/> measurement to use </param> /// <exception cref="IOException"> if Spellchecker can not open the directory </exception> public SpellChecker(Directory spellIndex, StringDistance sd) : this(spellIndex, sd, SuggestWordQueue.DEFAULT_COMPARATOR) { }
/// <summary> /// Use the given directory as a spell checker index with the given <seealso cref="Lucene.Net.Search.Spell.StringDistance"/> measure /// and the given <seealso cref="java.util.Comparator"/> for sorting the results. </summary> /// <param name="spellIndex"> The spelling index </param> /// <param name="sd"> The distance </param> /// <param name="comparator"> The comparator </param> /// <exception cref="IOException"> if there is a problem opening the index </exception> public SpellChecker(Directory spellIndex, StringDistance sd, IComparer<SuggestWord> comparator) { SpellIndex = spellIndex; StringDistance = sd; this.comparator = comparator; }
public SpellCheckerMock(Directory spellIndex, StringDistance sd) : base(spellIndex, sd) { }
/// <summary> /// Sets the <see cref="StringDistance"/> implementation for this /// <see cref="SpellChecker"/> instance. /// </summary> /// <param name="sd">the <see cref="StringDistance"/> implementation for this /// <see cref="SpellChecker"/> instance.</param> public void setStringDistance(StringDistance sd) { this.sd = sd; }
/// <summary> /// Use the given directory as a spell checker index. The directory /// is created if it doesn't exist yet. /// </summary> /// <param name="spellIndex">the spell index directory</param> /// <param name="sd">the <see cref="StringDistance"/> measurement to use </param> public SpellChecker(Directory spellIndex, StringDistance sd) { this.SetSpellIndex(spellIndex); this.setStringDistance(sd); }