public void TestDistance() { var damerau = new Damerau(); Assert.Equal(expected: 1.0, actual: damerau.Distance("ABCDEF", "ABDCEF")); Assert.Equal(expected: 2.0, actual: damerau.Distance("ABCDEF", "BACDFE")); Assert.Equal(expected: 1.0, actual: damerau.Distance("ABCDEF", "ABCDE")); }
public void Damerau() { var d = new Damerau(); Assert.AreEqual(3, d.Distance("abc", "123")); Assert.AreEqual(2, d.Distance("abc", "12c")); Assert.AreEqual(3, d.Distance("abc", "123abc")); Assert.AreEqual(1, d.Distance("abc", "acb")); }
public void TestDistance() { var instance = new Damerau(); Assert.Equal(expected: 1.0, actual: instance.Distance("ABCDEF", "ABDCEF")); Assert.Equal(expected: 2.0, actual: instance.Distance("ABCDEF", "BACDFE")); Assert.Equal(expected: 1.0, actual: instance.Distance("ABCDEF", "ABCDE")); NullEmptyTests.TestDistance(instance); }
//TODO - look at this and how it returns similar request private double CalculateSimilarity(string source, string target) { var d = new Damerau(); if (source == null || target == null) { return(0.0); } if (source.Length == 0 || target.Length == 0) { return(0.0); } if (source == target) { return(1.0); } var stepsToSame = d.Distance(source, target); return(1.0 - stepsToSame / Math.Max(source.Length, target.Length)); }
public async Task Execute() { using (var dbContext = new GitRepositoryDbContext(false)) { var normalizedDevelopers = new List <AliasedDeveloperName>(); var authorsPlace = new Dictionary <string, string>(); var authors = dbContext.Commits .Select(m => new { m.AuthorEmail, m.AuthorName }) .Distinct() .ToArray(); _logger.LogInformation("{datetime}: there are {count} authors submitted all the commits.", DateTime.Now, authors.Count()); foreach (var author in authors) { var normalizedEmail = author.AuthorEmail .Replace(" ", string.Empty) .Replace(".", string.Empty) .Replace("[", string.Empty) .Replace("]", string.Empty) .Replace("_", string.Empty) .Replace("-", string.Empty) .Replace("(", string.Empty) .Replace(")", string.Empty) .ToLower() .Trim() .RemoveDiacritics(); var normalizedName = author.AuthorName .Replace(" ", string.Empty) .Replace(".", string.Empty) .Replace("[", string.Empty) .Replace("]", string.Empty) .Replace("_", string.Empty) .Replace("-", string.Empty) .Replace("(", string.Empty) .Replace(")", string.Empty) .Trim() .ToLower() .RemoveDiacritics(); if (authorsPlace.ContainsKey(normalizedName)) { var uniqueId = authorsPlace[normalizedName]; if (authorsPlace.ContainsKey(normalizedEmail) && authorsPlace[normalizedEmail] != uniqueId) { /* it supports following edge case: * Occurence 1 ehsan,[email protected] * Occurence 2 ali,[email protected] * Occurence 3 ehsan,[email protected] */ var oldUniqueId = authorsPlace[normalizedEmail]; foreach (var dev in normalizedDevelopers.Where(q => q.NormalizedName == oldUniqueId)) { dev.NormalizedName = uniqueId; } } authorsPlace[normalizedEmail] = uniqueId; } else if (authorsPlace.ContainsKey(normalizedEmail)) { authorsPlace[normalizedName] = authorsPlace[normalizedEmail]; } else { authorsPlace[normalizedName] = normalizedName; authorsPlace[normalizedEmail] = normalizedName; } normalizedDevelopers.Add(new AliasedDeveloperName() { Email = author.AuthorEmail, Name = author.AuthorName, NormalizedName = authorsPlace[normalizedName] }); } var damerauDistanceAlgorithm = new Damerau(); normalizedDevelopers = normalizedDevelopers.OrderBy(q => q.NormalizedName) .ToList(); for (var i = 0; i < normalizedDevelopers.Count - 1; i++) { var firstDev = normalizedDevelopers[i]; var secondDev = normalizedDevelopers[i + 1]; var distance = damerauDistanceAlgorithm.Distance(firstDev.NormalizedName, secondDev.NormalizedName); if (distance == 1) { secondDev.NormalizedName = firstDev.NormalizedName; } } _logger.LogInformation("{datetime}: after normalization, there are {count} unique authors have been found.", DateTime.Now, normalizedDevelopers.Select(q => q.NormalizedName).Distinct().Count()); dbContext.AddRange(normalizedDevelopers); await dbContext.SaveChangesAsync().ConfigureAwait(false); _logger.LogInformation("{datetime}: aliased results have been saves successfully.", DateTime.Now); } }