public BasicCompletionData(string text, string textTarget) { this.Text = text; var engine = new JaroWinkler(); Priority = engine.Similarity(textTarget, text); }
public async Task <ActionResult <IEnumerable <Symbol> > > GetSymbolByName(string name) { var symbol = await _context.Symbols .Where(symbol => symbol.Name.ToLower() == name.ToLower()) .ToListAsync(); if (symbol.Count == 0) { var symbols = await _context.Symbols.ToListAsync(); var jw = new JaroWinkler(); foreach (var sbl in symbols) { var comp = jw.Similarity(name.ToLower(), sbl.Name.ToLower()); if (comp > 0.85) { symbol.Add(sbl); } } if (symbol.Count == 0) { return(NotFound()); } } return(symbol); }
public void TestDistance() { var instance = new JaroWinkler(); NullEmptyTests.TestDistance(instance); // TODO: regular (non-null/empty) distance tests }
public void Similarity_DifferentStrings_ReturnsDoubleValue() { const string val1 = "A String"; const string val2 = "Another String"; double actual = JaroWinkler.Similarity(val1, val2); Console.WriteLine(String.Format("Similarity result = {0}", actual)); Assert.IsInstanceOf <double>(actual); Assert.Less(actual, 1.0); }
public void TestSimilarityBothEmpty() { var instance = new JaroWinkler(); Assert.Equal( expected: 1, actual: instance.Similarity(string.Empty, string.Empty), precision: 6 // 0.000001 ); }
public void Similarity_SameStrings_ReturnOneAsDouble() { const string val1 = "A String"; const string val2 = "A String"; double actual = JaroWinkler.Similarity(val1, val2); Console.WriteLine(String.Format("Similarity result = {0}", actual)); Assert.IsInstanceOf <double>(actual); Assert.AreEqual(1.0, actual); }
/// <summary> /// Webページの一致率を調べます。 /// </summary> /// <returns>一致率(%)</returns> public async Task <double> CheckAsync() { string archive = await ReadArchiveAsync(); string webpage = await GetWebPageAsync(); JaroWinkler jaroWinkler = new JaroWinkler(); return(jaroWinkler.Similarity(archive, webpage)); }
public void JaroWinklerCase() { var jw = new JaroWinkler(); var twitter = jw.Similarity("Twitter", "twitter"); var chien = jw.Similarity(("chien"), ("niche")); var twitterv1v2 = jw.Similarity("twitter v1", "Twitter v2"); var Shazam = jw.Similarity("ShazamIphone", "ShazamAndroid"); var FamosInstagramSW = jw.Similarity("Famos Instagram SW", "Famous Instagram"); var IntFacebook1 = jw.Similarity("Int Facebook", "CI Facebook"); var IntFacebook2 = jw.Similarity("Int Facebook", "Instagram Int"); }
public async Task Handle(GetPoem message, IMessageHandlerContext context) { try { Console.WriteLine("Start GetPoem"); using (var client = new HttpClient()) using (var db = new LimeContext()) { var response = await client.GetAsync("https://www.poemist.com/api/v1/randompoems"); var json = await response.Content.ReadAsStringAsync(); var poems = JsonConvert.DeserializeObject <List <PoemsModel> >(json); if (poems.Count > 0) { var poem = new Poem { Author = poems[0].Poet.Name, Content = poems[0].Content, Title = poems[0].Title, Url = poems[0].Url, People_Id = message.PeopleId }; var sentences = poems[0].Content.Split('.'); var number = 0; double distance = 0; while (number != sentences.Length - 1 && !string.IsNullOrEmpty(sentences[number + 1])) { distance = distance + JaroWinkler.RateSimilarity(sentences[number], sentences[++number]); } distance = distance != 0 && number != 0 ? distance / number : 0; poem.Distance = distance; db.Poems.Add(poem); await db.SaveChangesAsync(); } } } catch (Exception e) { Console.WriteLine(e); } finally { Console.WriteLine("End GetPoem"); } }
public void BoundedIsLowerThanNonBounded() { var unboundedMetric = new JaroWinkler(); var originalStrings = new [] { "ab", "abcd", "abcd", "abcd", "abcd", "abcd", "abcd", "abcd", "abcd", "abcd", "abcd" }; var modifiedStrings = new[] { "ab", "acbd", "adbc", "badc", "axcd", "abxd", "axyd", "axbcd", "abcxd", "axbcd", "axbcyd" }; Assert.AreEqual(originalStrings.Length, modifiedStrings.Length); for (var i = 0; i < originalStrings.Length; i++) { var original = originalStrings[i]; var modified = modifiedStrings[i]; var similarity = unboundedMetric.Similarity(original, modified); var boundedSimilarity = StringMetric.Similarity(original, modified); Assert.IsTrue(boundedSimilarity <= similarity); } }
private async Task <YoutubeDTO> GetInfoFromYouTubeSearchAPI(string steemTitle, string steemDescription, double steemDuration, string steemAuthor) { var client = new RestClient("https://www.googleapis.com/youtube/v3/"); var request = new RestRequest("search", Method.GET); request.AddQueryParameter("part", "snippet"); request.AddQueryParameter("q", steemTitle); request.AddQueryParameter("type", "video"); request.AddQueryParameter("maxResults", "1"); request.AddQueryParameter("fields", "items(snippet(publishedAt,title,description,channelTitle,channelId),id(videoId))"); request.AddQueryParameter("key", _configurationManager.YouTubeApiKey); IRestResponse response = await client.ExecuteTaskAsync(request); YouTubeRoot resp = JsonConvert.DeserializeObject <YouTubeRoot>(response.Content); if (resp == null || resp.Items.Length == 0) { return new YoutubeDTO { ErrorMessage = "pas de réponse de YouTube" } } ; YouTubeSnippet video = resp.Items[0].Snippet; YouTubeId id = resp.Items[0].Id; // similitudes var jw = new JaroWinkler(); double distanceTitle = FormatScore(jw.Similarity(steemTitle, video.Title)); double distanceDescription = FormatScore(jw.Similarity(steemDescription, video.Description)); double distanceAuthor = FormatScore(jw.Similarity(steemAuthor, video.ChannelTitle)); var dto = new YoutubeDTO { Success = true, VideoId = id.VideoId, VideoTitle = video.Title, ChannelId = video.ChannelId, ChannelTitle = video.ChannelTitle, PublishedAt = video.PublishedAt, DistanceTitle = distanceTitle, DistanceDescription = distanceDescription, DistanceAuthor = distanceAuthor }; return(dto); }
public void TestSimilarity() { var instance = new JaroWinkler(); Assert.Equal( expected: 0.974074, actual: instance.Similarity("My string", "My tsring"), precision: 6 // 0.000001 ); Assert.Equal( expected: 0.896296, actual: instance.Similarity("My string", "My ntrisg"), precision: 6 // 0.000001 ); }
public async Task <Dictionary <IGym, double> > GetSimilarGymsByNameAsync(string name, FenceConfiguration[] fences = null, int limit = int.MaxValue) { if (string.IsNullOrWhiteSpace(name)) { return(new Dictionary <IGym, double>()); } var algorithm = new JaroWinkler(); var gyms = GetGyms(fences); var rankedList = gyms.Select(e => new { Gym = e, Rank = algorithm.GetSimilarity(TrimString(e.Name), TrimString(name)) }) .OrderByDescending(e => e.Rank) .Where(e => e.Rank > 0.5f) .Take(limit); return(await Task.FromResult(rankedList.ToDictionary(k => k.Gym, v => v.Rank))); }
public void JaroWinklerAnalisys(CallsInfo[] calls) { _logger.LogInformation("Анализ похожих методом JaroWinkler"); var l = new JaroWinkler(); foreach (var call in calls) { if (call.Text == null) { continue; } var words = call.Text.Split(' '); foreach (var announcement in call.Announcements) { if (announcement.Street == default) { continue; } double max_similarity = default; string best_word = default; foreach (var word in words) { var similarity = l.Similarity(word.ToLower(), announcement.Street.ToLower()); if (similarity >= max_similarity) { best_word = word; max_similarity = similarity; } } announcement.Features.Add(new FeatureInfo { Name = FeatureInfo.JARO, Weight = max_similarity, Data = best_word ?? "" }); } } _logger.LogInformation("Закончили"); }
void database_search(string bytes, bool jaro_state = false, double jaro_rate = 0) { var jw = new JaroWinkler(); string[] bytes_splitted = bytes.Split(','); StreamReader database = new StreamReader(jsonz); string json = database.ReadToEnd(); dynamic DynamicData = JsonConvert.DeserializeObject(json); foreach (string bytex in bytes_splitted) { for (int i = 0; i < DynamicData.Data.Count; i++) { for (int x = 0; x < DynamicData.Data[i].Bytes.Count; x++) { if (jaro_state == true) { double jaro = jw.Similarity(Convert.ToString(DynamicData.Data[i].Bytes[x]), bytex); if (jaro > jaro_rate) { //MessageBox.Show(String.Format("Found a match: {0} and {1} with jaro rate {2} in {3}", bytex, DynamicData.Data[i].Bytes[x], jaro_rate, DynamicData.Data[i].Name)); DataGridViewRow row = (DataGridViewRow)gunaDataGridView2.Rows[0].Clone(); row.Cells[0].Value = bytex; row.Cells[1].Value = DynamicData.Data[i].Bytes[x]; row.Cells[2].Value = Math.Round(jaro, 2); row.Cells[3].Value = DynamicData.Data[i].Name; gunaDataGridView2.Invoke(new Action(() => { gunaDataGridView2.Rows.Add(row); })); } } } if (jaro_state == false) { if (Convert.ToString(DynamicData.Data[i].Bytes).Contains(bytex)) { //MessageBox.Show(String.Format("Found a match: {0} in {1}", bytex, DynamicData.Data[i].Name)); DataGridViewRow row = (DataGridViewRow)gunaDataGridView2.Rows[0].Clone(); row.Cells[0].Value = bytex; row.Cells[3].Value = DynamicData.Data[i].Name; gunaDataGridView2.Invoke(new Action(() => { gunaDataGridView2.Rows.Add(row); })); } } } } }
public List <OutgestionMapItemModel> AutoMap(List <CoreTableDetailsModel> source, List <TemplateDetailModel> target) { List <OutgestionMapItemModel> matched = new List <OutgestionMapItemModel>(); var instance = new JaroWinkler(); source.ForEach(s => { target.ForEach(t => { if ((instance.Similarity(s.ColumnName.ToLower(), t.columnName.ToLower())) * 100 == 100) { matched.Add(new OutgestionMapItemModel { Source = s, Target = t }); } }); }); return(matched); }
public static List <Blocks.blocking_match> MakeBlockingMatches(IEnumerable <DataObjects.person_identifiers_pair> pairs, List <string> matchFields) { var jw = new JaroWinkler(); var matchVectors = new ConcurrentBag <Blocks.blocking_match>(); Parallel.ForEach(pairs, pair => { var matchVector = new Blocks.blocking_match(); matchVector.person_unique_entity_id_1 = pair.person_identifiers_1.person_unique_entity_id; matchVector.person_unique_entity_id_2 = pair.person_identifiers_2.person_unique_entity_id; matchVector.person_first_name = matchFields.Contains("person_first_name") && jw.GetSimilarity(pair.person_identifiers_1.person_first_name, pair.person_identifiers_2.person_first_name) > .90 ? 1 : 0; matchVector.person_middle_names = matchFields.Contains("person_middle_names") && jw.GetSimilarity(pair.person_identifiers_1.person_middle_names, pair.person_identifiers_2.person_middle_names) > .9 ? 1 : 0; matchVector.person_last_name = matchFields.Contains("person_last_name") && jw.GetSimilarity(pair.person_identifiers_1.person_last_name, pair.person_identifiers_2.person_last_name) > .90 ? 1 : 0; matchVector.person_dob_day = matchFields.Contains("person_dob_day") && Int16.Parse(pair.person_identifiers_1.person_dob_day) == Int16.Parse(pair.person_identifiers_2.person_dob_day) ? 1 : 0; matchVector.person_dob_month = matchFields.Contains("person_dob_month") && Int16.Parse(pair.person_identifiers_1.person_dob_month) == Int16.Parse(pair.person_identifiers_2.person_dob_month) ? 1 : 0; matchVector.person_dob_year = matchFields.Contains("person_dob_year") && Int16.Parse(pair.person_identifiers_1.person_dob_year) == Int16.Parse(pair.person_identifiers_2.person_dob_year) ? 1 : 0; matchVector.person_gender = matchFields.Contains("person_gender") && pair.person_identifiers_1.person_gender == pair.person_identifiers_2.person_gender ? 1 : 0; matchVector.person_fips_5 = matchFields.Contains("person_fips_5") && pair.person_identifiers_1.person_fips_5 == pair.person_identifiers_2.person_fips_5 ? 1 : 0; matchVector.person_email = matchFields.Contains("person_email") && pair.person_identifiers_1.person_email == pair.person_identifiers_2.person_email ? 1 : 0; matchVector.person_phone = matchFields.Contains("person_phone") && pair.person_identifiers_1.person_phone == pair.person_identifiers_2.person_phone ? 1 : 0; matchVector.person_match_id_1 = matchFields.Contains("person_match_id_1") && pair.person_identifiers_1.person_match_id_1 == pair.person_identifiers_2.person_match_id_1 ? 1 : 0; matchVector.person_match_id_2 = matchFields.Contains("person_match_id_2") && pair.person_identifiers_1.person_match_id_2 == pair.person_identifiers_2.person_match_id_2 ? 1 : 0; matchVector.person_match_id_3 = matchFields.Contains("person_match_id_3") && pair.person_identifiers_1.person_match_id_3 == pair.person_identifiers_2.person_match_id_3 ? 1 : 0; matchVector.person_identifiers_pair.person_identifiers_1 = pair.person_identifiers_1; matchVector.person_identifiers_pair.person_identifiers_2 = pair.person_identifiers_2; matchVectors.Add(matchVector); }); return(matchVectors.ToList <Blocks.blocking_match>()); }
public async Task <LoadResult> GetSimilarVendorNames(int fieldVendorId, DataSourceLoadOptions loadOptions) { var fieldVendor = await _context.FieldVendor.SingleAsync(fv => fv.Id == fieldVendorId); var companyName = fieldVendor?.CompanyName; var jw = new JaroWinkler(); var query = _context.Company .Where(c => c.IsActive) .OrderBy(c => c.Id) .Select(c => new { c.Id, c.Name, Similarity = jw.Similarity(c.Name, companyName), }); return(await DataSourceLoader.LoadAsync(query, loadOptions)); }
void tryAnswer() { //Challenge challenge = challenges[currentChallengeIndex]; // First iteration. // Only change answer to lowercase. string input = answerInput.text.ToLower(); string tempAnswer = ""; // Get list of correct answers from Firebase database IList answers = (IList)((IDictionary)DatabaseControllerScript.challenges[currentChallengeIndex])["answers"]; foreach (string answer in answers) { if (input == answer) { correctAnswer(input); return; } } // Second iteration. // Remove non letters and change to english letters input = removeNonLetters(input); input = parseIcelandicLetters(input); for (int i = 0; i < answers.Count; i++) { tempAnswer = removeNonLetters((string)answers[i]); tempAnswer = parseIcelandicLetters(tempAnswer); if (input == tempAnswer || JaroWinkler.Similarity(input, tempAnswer) > 0.9) { correctAnswer((string)answers[i]); return; } } // Did not find the correct answer wrongAnswer(); return; }
internal async Task <IEnumerable <Symbol> > GetCompanyNameMatches(string companyId) { //get list of symbols from cache or Finnhub var symbolsList = await GetSymbolsFromCache(); var matches = new List <Symbol>(); var jw = new JaroWinkler(); //is user input either a company name or ticker foreach (var symbol in symbolsList) { var comparisonName = jw.Similarity(companyId.ToLower(), symbol.Name.ToLower()); var comparisonTicker = jw.Similarity(companyId.ToLower(), symbol.Ticker.ToLower()); if (comparisonName > 0.85 || comparisonTicker > 0.90) { matches.Add(symbol); } } return(matches); }
///// <summary> ///// Calcualtes the Levenshtein distance between two strings ///// </summary> ///// Source: https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#C.23 ///// Explanation: https://en.wikipedia.org/wiki/Levenshtein_distance //private Int32 levenshtein(String a, String b) //{ // if (string.IsNullOrEmpty(a)) // { // if (!string.IsNullOrEmpty(b)) // { // return b.Length; // } // return 0; // } // if (string.IsNullOrEmpty(b)) // { // if (!string.IsNullOrEmpty(a)) // { // return a.Length; // } // return 0; // } // Int32 cost; // Int32[,] d = new int[a.Length + 1, b.Length + 1]; // Int32 min1; // Int32 min2; // Int32 min3; // for (Int32 i = 0; i <= d.GetUpperBound(0); i += 1) // { // d[i, 0] = i; // } // for (Int32 i = 0; i <= d.GetUpperBound(1); i += 1) // { // d[0, i] = i; // } // for (Int32 i = 1; i <= d.GetUpperBound(0); i += 1) // { // for (Int32 j = 1; j <= d.GetUpperBound(1); j += 1) // { // cost = Convert.ToInt32(!(a[i - 1] == b[j - 1])); // min1 = d[i - 1, j] + 1; // min2 = d[i, j - 1] + 1; // min3 = d[i - 1, j - 1] + cost; // d[i, j] = Math.Min(Math.Min(min1, min2), min3); // } // } // return d[d.GetUpperBound(0), d.GetUpperBound(1)]; //} ///// <summary> ///// String-similarity computed with levenshtein-distance ///// </summary> //private double similarityLevenshtein(string a, string b) //{ // if (a.Equals(b)) // { // return 1.0; // } // else // { // if (!(a.Length == 0 || b.Length == 0)) // { // double sim = 1 - (levenshtein(a, b) / Convert.ToDouble(Math.Min(a.Length, b.Length))); // return sim; // } // else // return 0.0; // } //} ///// <summary> ///// String-similarity computed with Dice Coefficient ///// </summary> ///// Source: https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Dice%27s_coefficient#C.23 ///// Explanation: https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient //private double similarityDiceCoefficient(string a, string b) //{ // //Workaround for |a| == |b| == 1 // if (a.Length <= 1 && b.Length <= 1) // { // if (a.Equals(b)) // return 1.0; // else // return 0.0; // } // HashSet<string> setA = new HashSet<string>(); // HashSet<string> setB = new HashSet<string>(); // for (int i = 0; i < a.Length - 1; ++i) // setA.Add(a.Substring(i, 2)); // for (int i = 0; i < b.Length - 1; ++i) // setB.Add(b.Substring(i, 2)); // HashSet<string> intersection = new HashSet<string>(setA); // intersection.IntersectWith(setB); // return (2.0 * intersection.Count) / (setA.Count + setB.Count); //} /// <summary> /// Combines multiple String-similarities with equal weight /// </summary> private double similarity(string a, string b) { List <double> similarities = new List <double>(); double output = 0.0; var l = new NormalizedLevenshtein(); similarities.Add(l.Similarity(a, b)); var jw = new JaroWinkler(); similarities.Add(jw.Similarity(a, b)); var jac = new Jaccard(); similarities.Add(jac.Similarity(a, b)); foreach (double sim in similarities) { output += sim; } return(output / similarities.Count); }
public async Task <RegisterUserContainer> TryRegisterUserAsync(ulong userID, string userName) { using var scope = _scopeFactory.CreateScope(); var activitiesDB = scope.ServiceProvider.GetRequiredService <IClanActivitiesDB>(); if (activitiesDB.IsDiscordUserRegistered(userID)) { return(null); } var jw = new JaroWinkler(); var users = await activitiesDB.GetUsersAsync(); var mostSimilar = users .Select(x => (jw.Similarity(userName.ToLower(), x.UserName.ToLower()), x)) .MaxBy(x => x.Item1); if (mostSimilar.x.DiscordUserID is null && mostSimilar.Item1 >= 0.9) { if (await activitiesDB.RegisterUserAsync(mostSimilar.x.UserID, userID)) { return new RegisterUserContainer { IsSuccessful = true, UserName = mostSimilar.x.UserName, Platform = ((BungieMembershipType)mostSimilar.x.MembershipType).ToString().Replace("Tiger", string.Empty) } } ; } return(new RegisterUserContainer { IsSuccessful = false }); } }
public ActionResult CheckTitle(string title) { var jw = new JaroWinkler(); // double t = jw.Similarity("My string", "My tsring"); var lstPost = GetListPosts(); var posts = new List <PostViewModel>(); foreach (var item in lstPost) { var check = jw.Similarity(title, item.Title); if (check >= 0.7) { posts.Add(item); } } if (posts.Count() > 0) { //var viewposts = Mapper.Map<IEnumerable<Posts>, IEnumerable<PostViewModel>>(posts); return(PartialView("_ListBooksPartial", posts)); } return(null); }
private static double CalculateResemblance(string s1, string s2, double cutoff) { double output; if (string.IsNullOrEmpty(s1) || string.IsNullOrEmpty(s2)) { output = -1; } else { double score = JaroWinkler.RateSimilarity(s1, s2); if (score >= cutoff) { output = score; } else { output = 0; } } return(output); }
public double GetSimilarity(string str1, string str2, string type) { IStringMetric stringMetric; switch (type) { case AlgorithmTypes.BlockDistance: stringMetric = new BlockDistance(); break; case AlgorithmTypes.ChapmanLengthDeviation: stringMetric = new ChapmanLengthDeviation(); break; case AlgorithmTypes.ChapmanMeanLength: stringMetric = new ChapmanMeanLength(); break; case AlgorithmTypes.CosineSimilarity: stringMetric = new CosineSimilarity(); break; case AlgorithmTypes.DiceSimilarity: stringMetric = new DiceSimilarity(); break; case AlgorithmTypes.EuclideanDistance: stringMetric = new EuclideanDistance(); break; case AlgorithmTypes.JaccardSimilarity: stringMetric = new JaccardSimilarity(); break; case AlgorithmTypes.Jaro: stringMetric = new Jaro(); break; case AlgorithmTypes.JaroWinkler: stringMetric = new JaroWinkler(); break; case AlgorithmTypes.Levenstein: stringMetric = new Levenstein(); break; case AlgorithmTypes.MatchingCoefficient: stringMetric = new MatchingCoefficient(); break; case AlgorithmTypes.MongeElkan: stringMetric = new MongeElkan(); break; case AlgorithmTypes.NeedlemanWunch: stringMetric = new NeedlemanWunch(); break; case AlgorithmTypes.OverlapCoefficient: stringMetric = new OverlapCoefficient(); break; case AlgorithmTypes.QGramsDistance: stringMetric = new QGramsDistance(); break; case AlgorithmTypes.SmithWaterman: stringMetric = new SmithWaterman(); break; case AlgorithmTypes.SmithWatermanGotoh: stringMetric = new SmithWatermanGotoh(); break; case AlgorithmTypes.SmithWatermanGotohWindowedAffine: stringMetric = new SmithWatermanGotohWindowedAffine(); break; default: stringMetric = new SmithWatermanGotoh(); break; } var similarity = stringMetric.GetSimilarity(str1.Trim(), str2.Trim()); return(similarity); }
public void Initializate() { _jaroWinkler = new JaroWinkler(); }
private void ButtonGo_Click(object sender, EventArgs e) { if (orgItems != null && newItems != null && outStream != null && (ListItem)comboBoxOrg1.SelectedItem != null && (ListItem)comboBoxNew1.SelectedItem != null && running == false) { running = true; Thread.CurrentThread.Name = "Main"; ListItem ItemOrg1 = (ListItem)comboBoxOrg1.SelectedItem; ListItem ItemOrg2 = (ListItem)comboBoxOrg2.SelectedItem; ListItem ItemNew1 = (ListItem)comboBoxNew1.SelectedItem; ListItem ItemNew2 = (ListItem)comboBoxNew1.SelectedItem; double threshold = ((double)trackBar1.Value) / 100; progressBar1.Maximum = newItems.Count; var prog = new Progress <int>(); var progfound = new Progress <int>(); prog.ProgressChanged += (senderOfProgressChanged, nextItem) => { progressBar1.Value = nextItem; if (nextItem == newItems.Count - 1) { running = false; } }; progfound.ProgressChanged += (senderOfProgressChanged, nextItem) => { label10.Text = "found " + nextItem + " new Entries"; }; var task = Task.Run(() => { using (StreamWriter sw = new StreamWriter(outStream)) { // retrive indexes int indexOrg1 = ItemOrg1.Index; int indexNew1 = ItemNew1.Index; int indexOrg2 = -1; int indexNew2 = -1; Console.WriteLine("th: " + threshold); if (ItemNew2 != null && ItemOrg2 != null) { indexOrg2 = ItemOrg2.Index; indexNew2 = ItemNew2.Index; } // comperator var jw = new JaroWinkler(); // create header line string headerLine = ""; foreach (string header in newHeader) { headerLine += "new-" + header + ","; } foreach (string header in orgHeader) { headerLine += "org-" + header + ","; } headerLine = headerLine.Substring(0, headerLine.Length - 1); sw.WriteLine(headerLine); int index = 0; int found = 0; int incompleteNew = 0; int incompleteOrg = 0; int incompleteOptional = 0; foreach (string[] newItemLine in newItems) { if (newItemLine.Length > indexNew1) { string element1 = newItemLine[indexNew1]; string element2 = null; if (indexNew2 > -1 && indexOrg2 > -1) { if (newItemLine.Length > indexNew2) { element2 = newItemLine[indexNew2]; } else { incompleteOptional++; } } string[] closest = null; double closestVal = -1; foreach (string[] orgItemLine in orgItems) { if (orgItemLine.Length > indexOrg1) { string canidate1 = orgItemLine[indexNew1]; string canidate2 = null; if (indexNew2 > -1 && indexOrg2 > -1) { if (orgItemLine.Length > indexOrg2) { canidate2 = orgItemLine[indexOrg2]; } else { incompleteOptional++; } } double similarity1 = jw.Similarity(element1, canidate1); double similarity2 = 1.0; if (canidate2 != null) { similarity2 = jw.Similarity(element1, canidate2); } double similarity = similarity1 * similarity2; if (similarity > closestVal) { closestVal = similarity; closest = orgItemLine; } } else { incompleteOrg++; } } if (closestVal > -1 && !(closestVal > threshold)) { string newLine = ""; foreach (string itm in newItemLine) { newLine += itm + ","; } foreach (string itm in closest) { newLine += itm + ","; } newLine = newLine.Substring(0, newLine.Length - 1); sw.WriteLine(newLine); found++; ((IProgress <int>)progfound).Report(found); } index++; ((IProgress <int>)prog).Report(index); } else { incompleteNew++; } } Console.Write("finished"); } }); } else { Help.ShowPopup(buttonGo, "Incomplete Config", new Point(buttonGo.Right, this.buttonGo.Bottom)); } }
public static double ApproximatelyEquals(this string firstWord, string secondWord, SimMetricType simMetricType = SimMetricType.Levenstein) { switch (simMetricType) { case SimMetricType.BlockDistance: var sim2 = new BlockDistance(); return(sim2.GetSimilarity(firstWord, secondWord)); case SimMetricType.ChapmanLengthDeviation: var sim3 = new ChapmanLengthDeviation(); return(sim3.GetSimilarity(firstWord, secondWord)); case SimMetricType.CosineSimilarity: var sim4 = new CosineSimilarity(); return(sim4.GetSimilarity(firstWord, secondWord)); case SimMetricType.DiceSimilarity: var sim5 = new DiceSimilarity(); return(sim5.GetSimilarity(firstWord, secondWord)); case SimMetricType.EuclideanDistance: var sim6 = new EuclideanDistance(); return(sim6.GetSimilarity(firstWord, secondWord)); case SimMetricType.JaccardSimilarity: var sim7 = new JaccardSimilarity(); return(sim7.GetSimilarity(firstWord, secondWord)); case SimMetricType.Jaro: var sim8 = new Jaro(); return(sim8.GetSimilarity(firstWord, secondWord)); case SimMetricType.JaroWinkler: var sim9 = new JaroWinkler(); return(sim9.GetSimilarity(firstWord, secondWord)); case SimMetricType.MatchingCoefficient: var sim10 = new MatchingCoefficient(); return(sim10.GetSimilarity(firstWord, secondWord)); case SimMetricType.MongeElkan: var sim11 = new MongeElkan(); return(sim11.GetSimilarity(firstWord, secondWord)); case SimMetricType.NeedlemanWunch: var sim12 = new NeedlemanWunch(); return(sim12.GetSimilarity(firstWord, secondWord)); case SimMetricType.OverlapCoefficient: var sim13 = new OverlapCoefficient(); return(sim13.GetSimilarity(firstWord, secondWord)); case SimMetricType.QGramsDistance: var sim14 = new QGramsDistance(); return(sim14.GetSimilarity(firstWord, secondWord)); case SimMetricType.SmithWaterman: var sim15 = new SmithWaterman(); return(sim15.GetSimilarity(firstWord, secondWord)); case SimMetricType.SmithWatermanGotoh: var sim16 = new SmithWatermanGotoh(); return(sim16.GetSimilarity(firstWord, secondWord)); case SimMetricType.SmithWatermanGotohWindowedAffine: var sim17 = new SmithWatermanGotohWindowedAffine(); return(sim17.GetSimilarity(firstWord, secondWord)); case SimMetricType.ChapmanMeanLength: var sim18 = new ChapmanMeanLength(); return(sim18.GetSimilarity(firstWord, secondWord)); default: var sim1 = new Levenstein(); return(sim1.GetSimilarity(firstWord, secondWord)); } }
private void StringCompareTest(string input, string[] testCases) { Debug.WriteLine("Dice Coefficient for {0}:", input); foreach (var name in testCases) { StringFuzzyComparer dice = new DiceCoefficent(); double diceValue = dice.Compare(input, name); Debug.WriteLine("\t{0} against {1}", diceValue.ToString("###,###.00000"), name); } Debug.WriteLine(""); Debug.WriteLine("Jaccard Coefficient for {0}:", input); foreach (var name in testCases) { StringFuzzyComparer jaccard = new Jaccard(); double jaccardValue = jaccard.Compare(input, name); Debug.WriteLine("\t{0} against {1}", jaccardValue.ToString("###,###.00000"), name); } Debug.WriteLine(""); Debug.WriteLine("ExtendedJaccard Coefficient for {0}:", input); foreach (var name in testCases) { StringFuzzyComparer exjaccard = new ExtendedJaccard(); double exjaccardValue = exjaccard.Compare(input, name); Debug.WriteLine("\t{0} against {1}", exjaccardValue.ToString("###,###.00000"), name); } Debug.WriteLine(""); Debug.WriteLine("DamerauLevenshteinDistance for {0}:", input); foreach (var name in testCases) { StringFuzzyComparer lev = new DamerauLevenshteinDistance(); var levenStein = lev.Compare(input, name); Debug.WriteLine("\t{0}, against {1}", levenStein, name); } Debug.WriteLine(""); Debug.WriteLine("JaroWinkler for {0}:", input); foreach (var name in testCases) { StringFuzzyComparer jw = new JaroWinkler(); var jwValue = jw.Compare(input, name); Debug.WriteLine("\t{0}, against {1}", jwValue, name); } Debug.WriteLine(""); Debug.WriteLine("Monge-Elkan for {0}:", input); foreach (var name in testCases) { StringFuzzyComparer me = new MongeElkan(); var meValue = me.Compare(input, name); Debug.WriteLine("\t{0}, against {1}", meValue, name); } Debug.WriteLine(""); Debug.WriteLine("NGramDistance(2) for {0}:", input); foreach (var name in testCases) { StringFuzzyComparer ngram2 = new NGramDistance(); (ngram2 as NGramDistance).NGramLength = 2; var ngramValue2 = ngram2.Compare(input, name); Debug.WriteLine("\t{0}, against {1}", ngramValue2, name); } Debug.WriteLine(""); Debug.WriteLine("SmithWaterman for {0}:", input); foreach (var name in testCases) { StringFuzzyComparer sw = new SmithWaterman(); var swValue = sw.Compare(input, name); Debug.WriteLine("\t{0}, against {1}", swValue, name); } Debug.WriteLine(""); Debug.WriteLine("Extended Editex for {0}:", input); foreach (var name in testCases) { StringFuzzyComparer edx = new ExtendedEditex(); var edxValue = edx.Compare(input, name); Debug.WriteLine("\t{0}, against {1}", edxValue, name); } Debug.WriteLine(""); Debug.WriteLine("Longest Common Subsequence for {0}:", input); foreach (var name in testCases) { StringFuzzyComparer lcs = new LongestCommonSubsequence(); var lcsValue = lcs.Compare(input, name); Debug.WriteLine("\t{0}, against {1}", lcsValue.ToString("###,###.00000"), name); } Debug.WriteLine(""); }
public void Init() { // Setup code goes here... _comparator = new JaroWinkler(); }