コード例 #1
0
ファイル: BasicCompletionData.cs プロジェクト: 6vein/ReverseR
        public BasicCompletionData(string text, string textTarget)
        {
            this.Text = text;
            var engine = new JaroWinkler();

            Priority = engine.Similarity(textTarget, text);
        }
コード例 #2
0
        public async Task <ActionResult <IEnumerable <Symbol> > > GetSymbolByName(string name)
        {
            var symbol = await _context.Symbols
                         .Where(symbol => symbol.Name.ToLower() == name.ToLower())
                         .ToListAsync();

            if (symbol.Count == 0)
            {
                var symbols = await _context.Symbols.ToListAsync();

                var jw = new JaroWinkler();

                foreach (var sbl in symbols)
                {
                    var comp = jw.Similarity(name.ToLower(), sbl.Name.ToLower());
                    if (comp > 0.85)
                    {
                        symbol.Add(sbl);
                    }
                }

                if (symbol.Count == 0)
                {
                    return(NotFound());
                }
            }

            return(symbol);
        }
コード例 #3
0
        public void TestDistance()
        {
            var instance = new JaroWinkler();

            NullEmptyTests.TestDistance(instance);

            // TODO: regular (non-null/empty) distance tests
        }
コード例 #4
0
        public void Similarity_DifferentStrings_ReturnsDoubleValue()
        {
            const string val1   = "A String";
            const string val2   = "Another String";
            double       actual = JaroWinkler.Similarity(val1, val2);

            Console.WriteLine(String.Format("Similarity result = {0}", actual));
            Assert.IsInstanceOf <double>(actual);
            Assert.Less(actual, 1.0);
        }
コード例 #5
0
        public void TestSimilarityBothEmpty()
        {
            var instance = new JaroWinkler();

            Assert.Equal(
                expected: 1,
                actual: instance.Similarity(string.Empty, string.Empty),
                precision: 6 // 0.000001
                );
        }
コード例 #6
0
        public void Similarity_SameStrings_ReturnOneAsDouble()
        {
            const string val1   = "A String";
            const string val2   = "A String";
            double       actual = JaroWinkler.Similarity(val1, val2);

            Console.WriteLine(String.Format("Similarity result = {0}", actual));
            Assert.IsInstanceOf <double>(actual);
            Assert.AreEqual(1.0, actual);
        }
コード例 #7
0
        /// <summary>
        /// Webページの一致率を調べます。
        /// </summary>
        /// <returns>一致率(%)</returns>
        public async Task <double> CheckAsync()
        {
            string archive = await ReadArchiveAsync();

            string webpage = await GetWebPageAsync();

            JaroWinkler jaroWinkler = new JaroWinkler();

            return(jaroWinkler.Similarity(archive, webpage));
        }
コード例 #8
0
        public void JaroWinklerCase()
        {
            var jw = new JaroWinkler();

            var twitter          = jw.Similarity("Twitter", "twitter");
            var chien            = jw.Similarity(("chien"), ("niche"));
            var twitterv1v2      = jw.Similarity("twitter v1", "Twitter v2");
            var Shazam           = jw.Similarity("ShazamIphone", "ShazamAndroid");
            var FamosInstagramSW = jw.Similarity("Famos Instagram SW", "Famous Instagram");
            var IntFacebook1     = jw.Similarity("Int Facebook", "CI Facebook");
            var IntFacebook2     = jw.Similarity("Int Facebook", "Instagram Int");
        }
コード例 #9
0
        public async Task Handle(GetPoem message, IMessageHandlerContext context)
        {
            try
            {
                Console.WriteLine("Start GetPoem");

                using (var client = new HttpClient())
                    using (var db = new LimeContext())
                    {
                        var response = await client.GetAsync("https://www.poemist.com/api/v1/randompoems");

                        var json = await response.Content.ReadAsStringAsync();

                        var poems = JsonConvert.DeserializeObject <List <PoemsModel> >(json);

                        if (poems.Count > 0)
                        {
                            var poem = new Poem
                            {
                                Author    = poems[0].Poet.Name,
                                Content   = poems[0].Content,
                                Title     = poems[0].Title,
                                Url       = poems[0].Url,
                                People_Id = message.PeopleId
                            };

                            var    sentences = poems[0].Content.Split('.');
                            var    number    = 0;
                            double distance  = 0;
                            while (number != sentences.Length - 1 && !string.IsNullOrEmpty(sentences[number + 1]))
                            {
                                distance = distance + JaroWinkler.RateSimilarity(sentences[number], sentences[++number]);
                            }

                            distance = distance != 0 && number != 0 ? distance / number : 0;

                            poem.Distance = distance;
                            db.Poems.Add(poem);
                            await db.SaveChangesAsync();
                        }
                    }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
            }
            finally
            {
                Console.WriteLine("End GetPoem");
            }
        }
コード例 #10
0
        public void BoundedIsLowerThanNonBounded()
        {
            var unboundedMetric = new JaroWinkler();

            var originalStrings = new []
            {
                "ab",
                "abcd",
                "abcd",
                "abcd",

                "abcd",
                "abcd",
                "abcd",

                "abcd",
                "abcd",
                "abcd",
                "abcd"
            };

            var modifiedStrings = new[]
            {
                "ab",
                "acbd",
                "adbc",
                "badc",

                "axcd",
                "abxd",
                "axyd",

                "axbcd",
                "abcxd",
                "axbcd",
                "axbcyd"
            };

            Assert.AreEqual(originalStrings.Length, modifiedStrings.Length);

            for (var i = 0; i < originalStrings.Length; i++)
            {
                var original = originalStrings[i];
                var modified = modifiedStrings[i];

                var similarity        = unboundedMetric.Similarity(original, modified);
                var boundedSimilarity = StringMetric.Similarity(original, modified);
                Assert.IsTrue(boundedSimilarity <= similarity);
            }
        }
コード例 #11
0
        private async Task <YoutubeDTO> GetInfoFromYouTubeSearchAPI(string steemTitle, string steemDescription, double steemDuration, string steemAuthor)
        {
            var client = new RestClient("https://www.googleapis.com/youtube/v3/");

            var request = new RestRequest("search", Method.GET);

            request.AddQueryParameter("part", "snippet");
            request.AddQueryParameter("q", steemTitle);
            request.AddQueryParameter("type", "video");
            request.AddQueryParameter("maxResults", "1");
            request.AddQueryParameter("fields", "items(snippet(publishedAt,title,description,channelTitle,channelId),id(videoId))");
            request.AddQueryParameter("key", _configurationManager.YouTubeApiKey);

            IRestResponse response = await client.ExecuteTaskAsync(request);

            YouTubeRoot resp = JsonConvert.DeserializeObject <YouTubeRoot>(response.Content);

            if (resp == null || resp.Items.Length == 0)
            {
                return new YoutubeDTO {
                           ErrorMessage = "pas de réponse de YouTube"
                }
            }
            ;

            YouTubeSnippet video = resp.Items[0].Snippet;
            YouTubeId      id    = resp.Items[0].Id;

            // similitudes
            var    jw                  = new JaroWinkler();
            double distanceTitle       = FormatScore(jw.Similarity(steemTitle, video.Title));
            double distanceDescription = FormatScore(jw.Similarity(steemDescription, video.Description));
            double distanceAuthor      = FormatScore(jw.Similarity(steemAuthor, video.ChannelTitle));

            var dto = new YoutubeDTO
            {
                Success             = true,
                VideoId             = id.VideoId,
                VideoTitle          = video.Title,
                ChannelId           = video.ChannelId,
                ChannelTitle        = video.ChannelTitle,
                PublishedAt         = video.PublishedAt,
                DistanceTitle       = distanceTitle,
                DistanceDescription = distanceDescription,
                DistanceAuthor      = distanceAuthor
            };

            return(dto);
        }
コード例 #12
0
        public void TestSimilarity()
        {
            var instance = new JaroWinkler();

            Assert.Equal(
                expected: 0.974074,
                actual: instance.Similarity("My string", "My tsring"),
                precision: 6 // 0.000001
                );

            Assert.Equal(
                expected: 0.896296,
                actual: instance.Similarity("My string", "My ntrisg"),
                precision: 6 // 0.000001
                );
        }
コード例 #13
0
        public async Task <Dictionary <IGym, double> > GetSimilarGymsByNameAsync(string name, FenceConfiguration[] fences = null, int limit = int.MaxValue)
        {
            if (string.IsNullOrWhiteSpace(name))
            {
                return(new Dictionary <IGym, double>());
            }
            var algorithm  = new JaroWinkler();
            var gyms       = GetGyms(fences);
            var rankedList =
                gyms.Select(e => new { Gym = e, Rank = algorithm.GetSimilarity(TrimString(e.Name), TrimString(name)) })
                .OrderByDescending(e => e.Rank)
                .Where(e => e.Rank > 0.5f)
                .Take(limit);

            return(await Task.FromResult(rankedList.ToDictionary(k => k.Gym, v => v.Rank)));
        }
コード例 #14
0
        public void JaroWinklerAnalisys(CallsInfo[] calls)
        {
            _logger.LogInformation("Анализ похожих методом JaroWinkler");

            var l = new JaroWinkler();

            foreach (var call in calls)
            {
                if (call.Text == null)
                {
                    continue;
                }

                var words = call.Text.Split(' ');

                foreach (var announcement in call.Announcements)
                {
                    if (announcement.Street == default)
                    {
                        continue;
                    }

                    double max_similarity = default;
                    string best_word      = default;

                    foreach (var word in words)
                    {
                        var similarity = l.Similarity(word.ToLower(), announcement.Street.ToLower());
                        if (similarity >= max_similarity)
                        {
                            best_word      = word;
                            max_similarity = similarity;
                        }
                    }

                    announcement.Features.Add(new FeatureInfo
                    {
                        Name   = FeatureInfo.JARO,
                        Weight = max_similarity,
                        Data   = best_word ?? ""
                    });
                }
            }

            _logger.LogInformation("Закончили");
        }
コード例 #15
0
        void database_search(string bytes, bool jaro_state = false, double jaro_rate = 0)
        {
            var jw = new JaroWinkler();

            string[]     bytes_splitted = bytes.Split(',');
            StreamReader database       = new StreamReader(jsonz);
            string       json           = database.ReadToEnd();
            dynamic      DynamicData    = JsonConvert.DeserializeObject(json);

            foreach (string bytex in bytes_splitted)
            {
                for (int i = 0; i < DynamicData.Data.Count; i++)
                {
                    for (int x = 0; x < DynamicData.Data[i].Bytes.Count; x++)
                    {
                        if (jaro_state == true)
                        {
                            double jaro = jw.Similarity(Convert.ToString(DynamicData.Data[i].Bytes[x]), bytex);
                            if (jaro > jaro_rate)
                            {
                                //MessageBox.Show(String.Format("Found a match: {0} and {1} with jaro rate {2} in {3}", bytex, DynamicData.Data[i].Bytes[x], jaro_rate, DynamicData.Data[i].Name));
                                DataGridViewRow row = (DataGridViewRow)gunaDataGridView2.Rows[0].Clone();
                                row.Cells[0].Value = bytex;
                                row.Cells[1].Value = DynamicData.Data[i].Bytes[x];
                                row.Cells[2].Value = Math.Round(jaro, 2);
                                row.Cells[3].Value = DynamicData.Data[i].Name;
                                gunaDataGridView2.Invoke(new Action(() => { gunaDataGridView2.Rows.Add(row); }));
                            }
                        }
                    }
                    if (jaro_state == false)
                    {
                        if (Convert.ToString(DynamicData.Data[i].Bytes).Contains(bytex))
                        {
                            //MessageBox.Show(String.Format("Found a match: {0} in {1}", bytex, DynamicData.Data[i].Name));
                            DataGridViewRow row = (DataGridViewRow)gunaDataGridView2.Rows[0].Clone();
                            row.Cells[0].Value = bytex;
                            row.Cells[3].Value = DynamicData.Data[i].Name;
                            gunaDataGridView2.Invoke(new Action(() => { gunaDataGridView2.Rows.Add(row); }));
                        }
                    }
                }
            }
        }
        public List <OutgestionMapItemModel> AutoMap(List <CoreTableDetailsModel> source, List <TemplateDetailModel> target)
        {
            List <OutgestionMapItemModel> matched = new List <OutgestionMapItemModel>();

            var instance = new JaroWinkler();

            source.ForEach(s => {
                target.ForEach(t => {
                    if ((instance.Similarity(s.ColumnName.ToLower(), t.columnName.ToLower())) * 100 == 100)
                    {
                        matched.Add(new OutgestionMapItemModel {
                            Source = s, Target = t
                        });
                    }
                });
            });

            return(matched);
        }
コード例 #17
0
        public static List <Blocks.blocking_match> MakeBlockingMatches(IEnumerable <DataObjects.person_identifiers_pair> pairs, List <string> matchFields)
        {
            var jw           = new JaroWinkler();
            var matchVectors = new ConcurrentBag <Blocks.blocking_match>();

            Parallel.ForEach(pairs, pair => {
                var matchVector = new Blocks.blocking_match();
                matchVector.person_unique_entity_id_1 = pair.person_identifiers_1.person_unique_entity_id;
                matchVector.person_unique_entity_id_2 = pair.person_identifiers_2.person_unique_entity_id;
                matchVector.person_first_name         = matchFields.Contains("person_first_name") &&
                                                        jw.GetSimilarity(pair.person_identifiers_1.person_first_name, pair.person_identifiers_2.person_first_name) > .90 ? 1 : 0;
                matchVector.person_middle_names = matchFields.Contains("person_middle_names") &&
                                                  jw.GetSimilarity(pair.person_identifiers_1.person_middle_names, pair.person_identifiers_2.person_middle_names) > .9 ? 1 : 0;
                matchVector.person_last_name = matchFields.Contains("person_last_name") &&
                                               jw.GetSimilarity(pair.person_identifiers_1.person_last_name, pair.person_identifiers_2.person_last_name) > .90 ? 1 : 0;
                matchVector.person_dob_day = matchFields.Contains("person_dob_day") &&
                                             Int16.Parse(pair.person_identifiers_1.person_dob_day) == Int16.Parse(pair.person_identifiers_2.person_dob_day) ? 1 : 0;
                matchVector.person_dob_month = matchFields.Contains("person_dob_month") &&
                                               Int16.Parse(pair.person_identifiers_1.person_dob_month) == Int16.Parse(pair.person_identifiers_2.person_dob_month) ? 1 : 0;
                matchVector.person_dob_year = matchFields.Contains("person_dob_year") &&
                                              Int16.Parse(pair.person_identifiers_1.person_dob_year) == Int16.Parse(pair.person_identifiers_2.person_dob_year) ? 1 : 0;
                matchVector.person_gender = matchFields.Contains("person_gender") &&
                                            pair.person_identifiers_1.person_gender == pair.person_identifiers_2.person_gender ? 1 : 0;
                matchVector.person_fips_5 = matchFields.Contains("person_fips_5") &&
                                            pair.person_identifiers_1.person_fips_5 == pair.person_identifiers_2.person_fips_5 ? 1 : 0;
                matchVector.person_email = matchFields.Contains("person_email") &&
                                           pair.person_identifiers_1.person_email == pair.person_identifiers_2.person_email ? 1 : 0;
                matchVector.person_phone = matchFields.Contains("person_phone") &&
                                           pair.person_identifiers_1.person_phone == pair.person_identifiers_2.person_phone ? 1 : 0;
                matchVector.person_match_id_1 = matchFields.Contains("person_match_id_1") &&
                                                pair.person_identifiers_1.person_match_id_1 == pair.person_identifiers_2.person_match_id_1 ? 1 : 0;
                matchVector.person_match_id_2 = matchFields.Contains("person_match_id_2") &&
                                                pair.person_identifiers_1.person_match_id_2 == pair.person_identifiers_2.person_match_id_2 ? 1 : 0;
                matchVector.person_match_id_3 = matchFields.Contains("person_match_id_3") &&
                                                pair.person_identifiers_1.person_match_id_3 == pair.person_identifiers_2.person_match_id_3 ? 1 : 0;
                matchVector.person_identifiers_pair.person_identifiers_1 = pair.person_identifiers_1;
                matchVector.person_identifiers_pair.person_identifiers_2 = pair.person_identifiers_2;
                matchVectors.Add(matchVector);
            });

            return(matchVectors.ToList <Blocks.blocking_match>());
        }
コード例 #18
0
        public async Task <LoadResult> GetSimilarVendorNames(int fieldVendorId, DataSourceLoadOptions loadOptions)
        {
            var fieldVendor = await _context.FieldVendor.SingleAsync(fv => fv.Id == fieldVendorId);

            var companyName = fieldVendor?.CompanyName;

            var jw = new JaroWinkler();

            var query = _context.Company
                        .Where(c => c.IsActive)
                        .OrderBy(c => c.Id)
                        .Select(c => new
            {
                c.Id,
                c.Name,
                Similarity = jw.Similarity(c.Name, companyName),
            });

            return(await DataSourceLoader.LoadAsync(query, loadOptions));
        }
コード例 #19
0
    void tryAnswer()
    {
        //Challenge challenge = challenges[currentChallengeIndex];
        // First iteration.
        // Only change answer to lowercase.
        string input      = answerInput.text.ToLower();
        string tempAnswer = "";

        // Get list of correct answers from Firebase database
        IList answers = (IList)((IDictionary)DatabaseControllerScript.challenges[currentChallengeIndex])["answers"];

        foreach (string answer in answers)
        {
            if (input == answer)
            {
                correctAnswer(input);
                return;
            }
        }

        // Second iteration.
        // Remove non letters and change to english letters
        input = removeNonLetters(input);
        input = parseIcelandicLetters(input);

        for (int i = 0; i < answers.Count; i++)
        {
            tempAnswer = removeNonLetters((string)answers[i]);
            tempAnswer = parseIcelandicLetters(tempAnswer);

            if (input == tempAnswer || JaroWinkler.Similarity(input, tempAnswer) > 0.9)
            {
                correctAnswer((string)answers[i]);
                return;
            }
        }

        // Did not find the correct answer
        wrongAnswer();
        return;
    }
コード例 #20
0
        internal async Task <IEnumerable <Symbol> > GetCompanyNameMatches(string companyId)
        {
            //get list of symbols from cache or Finnhub
            var symbolsList = await GetSymbolsFromCache();

            var matches = new List <Symbol>();
            var jw      = new JaroWinkler();

            //is user input either a company name or ticker
            foreach (var symbol in symbolsList)
            {
                var comparisonName   = jw.Similarity(companyId.ToLower(), symbol.Name.ToLower());
                var comparisonTicker = jw.Similarity(companyId.ToLower(), symbol.Ticker.ToLower());
                if (comparisonName > 0.85 || comparisonTicker > 0.90)
                {
                    matches.Add(symbol);
                }
            }

            return(matches);
        }
コード例 #21
0
        ///// <summary>
        ///// Calcualtes the Levenshtein distance between two strings
        ///// </summary>
        ///// Source: https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#C.23
        ///// Explanation: https://en.wikipedia.org/wiki/Levenshtein_distance
        //private Int32 levenshtein(String a, String b)
        //{

        //    if (string.IsNullOrEmpty(a))
        //    {
        //        if (!string.IsNullOrEmpty(b))
        //        {
        //            return b.Length;
        //        }
        //        return 0;
        //    }

        //    if (string.IsNullOrEmpty(b))
        //    {
        //        if (!string.IsNullOrEmpty(a))
        //        {
        //            return a.Length;
        //        }
        //        return 0;
        //    }

        //    Int32 cost;
        //    Int32[,] d = new int[a.Length + 1, b.Length + 1];
        //    Int32 min1;
        //    Int32 min2;
        //    Int32 min3;

        //    for (Int32 i = 0; i <= d.GetUpperBound(0); i += 1)
        //    {
        //        d[i, 0] = i;
        //    }

        //    for (Int32 i = 0; i <= d.GetUpperBound(1); i += 1)
        //    {
        //        d[0, i] = i;
        //    }

        //    for (Int32 i = 1; i <= d.GetUpperBound(0); i += 1)
        //    {
        //        for (Int32 j = 1; j <= d.GetUpperBound(1); j += 1)
        //        {
        //            cost = Convert.ToInt32(!(a[i - 1] == b[j - 1]));

        //            min1 = d[i - 1, j] + 1;
        //            min2 = d[i, j - 1] + 1;
        //            min3 = d[i - 1, j - 1] + cost;
        //            d[i, j] = Math.Min(Math.Min(min1, min2), min3);
        //        }
        //    }

        //    return d[d.GetUpperBound(0), d.GetUpperBound(1)];

        //}

        ///// <summary>
        ///// String-similarity computed with levenshtein-distance
        ///// </summary>
        //private double similarityLevenshtein(string a, string b)
        //{
        //    if (a.Equals(b))
        //    {
        //        return 1.0;
        //    }
        //    else
        //    {
        //        if (!(a.Length == 0 || b.Length == 0))
        //        {
        //            double sim = 1 - (levenshtein(a, b) / Convert.ToDouble(Math.Min(a.Length, b.Length)));
        //            return sim;
        //        }
        //        else
        //            return 0.0;
        //    }
        //}

        ///// <summary>
        ///// String-similarity computed with Dice Coefficient
        ///// </summary>
        ///// Source: https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Dice%27s_coefficient#C.23
        ///// Explanation: https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
        //private double similarityDiceCoefficient(string a, string b)
        //{
        //    //Workaround for |a| == |b| == 1
        //    if (a.Length <= 1 && b.Length <= 1)
        //    {
        //        if (a.Equals(b))
        //            return 1.0;
        //        else
        //            return 0.0;
        //    }

        //    HashSet<string> setA = new HashSet<string>();
        //    HashSet<string> setB = new HashSet<string>();

        //    for (int i = 0; i < a.Length - 1; ++i)
        //        setA.Add(a.Substring(i, 2));

        //    for (int i = 0; i < b.Length - 1; ++i)
        //        setB.Add(b.Substring(i, 2));

        //    HashSet<string> intersection = new HashSet<string>(setA);
        //    intersection.IntersectWith(setB);

        //    return (2.0 * intersection.Count) / (setA.Count + setB.Count);
        //}

        /// <summary>
        /// Combines multiple String-similarities with equal weight
        /// </summary>
        private double similarity(string a, string b)
        {
            List <double> similarities = new List <double>();
            double        output       = 0.0;

            var l = new NormalizedLevenshtein();

            similarities.Add(l.Similarity(a, b));
            var jw = new JaroWinkler();

            similarities.Add(jw.Similarity(a, b));
            var jac = new Jaccard();

            similarities.Add(jac.Similarity(a, b));

            foreach (double sim in similarities)
            {
                output += sim;
            }

            return(output / similarities.Count);
        }
コード例 #22
0
        public async Task <RegisterUserContainer> TryRegisterUserAsync(ulong userID, string userName)
        {
            using var scope = _scopeFactory.CreateScope();

            var activitiesDB = scope.ServiceProvider.GetRequiredService <IClanActivitiesDB>();

            if (activitiesDB.IsDiscordUserRegistered(userID))
            {
                return(null);
            }

            var jw = new JaroWinkler();

            var users = await activitiesDB.GetUsersAsync();

            var mostSimilar = users
                              .Select(x => (jw.Similarity(userName.ToLower(), x.UserName.ToLower()), x))
                              .MaxBy(x => x.Item1);

            if (mostSimilar.x.DiscordUserID is null && mostSimilar.Item1 >= 0.9)
            {
                if (await activitiesDB.RegisterUserAsync(mostSimilar.x.UserID, userID))
                {
                    return new RegisterUserContainer
                           {
                               IsSuccessful = true,
                               UserName     = mostSimilar.x.UserName,
                               Platform     = ((BungieMembershipType)mostSimilar.x.MembershipType).ToString().Replace("Tiger", string.Empty)
                           }
                }
                ;
            }

            return(new RegisterUserContainer
            {
                IsSuccessful = false
            });
        }
    }
コード例 #23
0
        public ActionResult CheckTitle(string title)
        {
            var jw = new JaroWinkler();
            // double t = jw.Similarity("My string", "My tsring");
            var lstPost = GetListPosts();
            var posts   = new List <PostViewModel>();

            foreach (var item in lstPost)
            {
                var check = jw.Similarity(title, item.Title);
                if (check >= 0.7)
                {
                    posts.Add(item);
                }
            }
            if (posts.Count() > 0)
            {
                //var viewposts = Mapper.Map<IEnumerable<Posts>, IEnumerable<PostViewModel>>(posts);
                return(PartialView("_ListBooksPartial", posts));
            }
            return(null);
        }
コード例 #24
0
ファイル: Article.cs プロジェクト: nnovic/GetFacts
        private static double CalculateResemblance(string s1, string s2, double cutoff)
        {
            double output;

            if (string.IsNullOrEmpty(s1) || string.IsNullOrEmpty(s2))
            {
                output = -1;
            }
            else
            {
                double score = JaroWinkler.RateSimilarity(s1, s2);
                if (score >= cutoff)
                {
                    output = score;
                }
                else
                {
                    output = 0;
                }
            }

            return(output);
        }
コード例 #25
0
        public double GetSimilarity(string str1, string str2, string type)
        {
            IStringMetric stringMetric;

            switch (type)
            {
            case AlgorithmTypes.BlockDistance:
                stringMetric = new BlockDistance();
                break;

            case AlgorithmTypes.ChapmanLengthDeviation:
                stringMetric = new ChapmanLengthDeviation();
                break;

            case AlgorithmTypes.ChapmanMeanLength:
                stringMetric = new ChapmanMeanLength();
                break;

            case AlgorithmTypes.CosineSimilarity:
                stringMetric = new CosineSimilarity();
                break;

            case AlgorithmTypes.DiceSimilarity:
                stringMetric = new DiceSimilarity();
                break;

            case AlgorithmTypes.EuclideanDistance:
                stringMetric = new EuclideanDistance();
                break;

            case AlgorithmTypes.JaccardSimilarity:
                stringMetric = new JaccardSimilarity();
                break;

            case AlgorithmTypes.Jaro:
                stringMetric = new Jaro();
                break;

            case AlgorithmTypes.JaroWinkler:
                stringMetric = new JaroWinkler();
                break;

            case AlgorithmTypes.Levenstein:
                stringMetric = new Levenstein();
                break;

            case AlgorithmTypes.MatchingCoefficient:
                stringMetric = new MatchingCoefficient();
                break;

            case AlgorithmTypes.MongeElkan:
                stringMetric = new MongeElkan();
                break;

            case AlgorithmTypes.NeedlemanWunch:
                stringMetric = new NeedlemanWunch();
                break;

            case AlgorithmTypes.OverlapCoefficient:
                stringMetric = new OverlapCoefficient();
                break;

            case AlgorithmTypes.QGramsDistance:
                stringMetric = new QGramsDistance();
                break;

            case AlgorithmTypes.SmithWaterman:
                stringMetric = new SmithWaterman();
                break;

            case AlgorithmTypes.SmithWatermanGotoh:
                stringMetric = new SmithWatermanGotoh();
                break;

            case AlgorithmTypes.SmithWatermanGotohWindowedAffine:
                stringMetric = new SmithWatermanGotohWindowedAffine();
                break;

            default:
                stringMetric = new SmithWatermanGotoh();
                break;
            }

            var similarity = stringMetric.GetSimilarity(str1.Trim(), str2.Trim());

            return(similarity);
        }
コード例 #26
0
 public void Initializate()
 {
     _jaroWinkler = new JaroWinkler();
 }
コード例 #27
0
ファイル: Form1.cs プロジェクト: JanNitschke/CSVDuplicate
        private void ButtonGo_Click(object sender, EventArgs e)
        {
            if (orgItems != null && newItems != null && outStream != null && (ListItem)comboBoxOrg1.SelectedItem != null && (ListItem)comboBoxNew1.SelectedItem != null && running == false)
            {
                running = true;
                Thread.CurrentThread.Name = "Main";
                ListItem ItemOrg1  = (ListItem)comboBoxOrg1.SelectedItem;
                ListItem ItemOrg2  = (ListItem)comboBoxOrg2.SelectedItem;
                ListItem ItemNew1  = (ListItem)comboBoxNew1.SelectedItem;
                ListItem ItemNew2  = (ListItem)comboBoxNew1.SelectedItem;
                double   threshold = ((double)trackBar1.Value) / 100;
                progressBar1.Maximum = newItems.Count;

                var prog      = new Progress <int>();
                var progfound = new Progress <int>();

                prog.ProgressChanged += (senderOfProgressChanged, nextItem) =>
                { progressBar1.Value = nextItem; if (nextItem == newItems.Count - 1)
                  {
                      running = false;
                  }
                };

                progfound.ProgressChanged += (senderOfProgressChanged, nextItem) =>
                { label10.Text = "found " + nextItem + " new Entries"; };


                var task = Task.Run(() =>
                {
                    using (StreamWriter sw = new StreamWriter(outStream))
                    {
                        // retrive indexes

                        int indexOrg1 = ItemOrg1.Index;
                        int indexNew1 = ItemNew1.Index;
                        int indexOrg2 = -1;
                        int indexNew2 = -1;
                        Console.WriteLine("th: " + threshold);
                        if (ItemNew2 != null && ItemOrg2 != null)
                        {
                            indexOrg2 = ItemOrg2.Index;
                            indexNew2 = ItemNew2.Index;
                        }

                        // comperator
                        var jw = new JaroWinkler();

                        // create header line
                        string headerLine = "";
                        foreach (string header in newHeader)
                        {
                            headerLine += "new-" + header + ",";
                        }
                        foreach (string header in orgHeader)
                        {
                            headerLine += "org-" + header + ",";
                        }
                        headerLine = headerLine.Substring(0, headerLine.Length - 1);
                        sw.WriteLine(headerLine);

                        int index              = 0;
                        int found              = 0;
                        int incompleteNew      = 0;
                        int incompleteOrg      = 0;
                        int incompleteOptional = 0;


                        foreach (string[] newItemLine in newItems)
                        {
                            if (newItemLine.Length > indexNew1)
                            {
                                string element1 = newItemLine[indexNew1];
                                string element2 = null;
                                if (indexNew2 > -1 && indexOrg2 > -1)
                                {
                                    if (newItemLine.Length > indexNew2)
                                    {
                                        element2 = newItemLine[indexNew2];
                                    }
                                    else
                                    {
                                        incompleteOptional++;
                                    }
                                }
                                string[] closest  = null;
                                double closestVal = -1;

                                foreach (string[] orgItemLine in orgItems)
                                {
                                    if (orgItemLine.Length > indexOrg1)
                                    {
                                        string canidate1 = orgItemLine[indexNew1];
                                        string canidate2 = null;

                                        if (indexNew2 > -1 && indexOrg2 > -1)
                                        {
                                            if (orgItemLine.Length > indexOrg2)
                                            {
                                                canidate2 = orgItemLine[indexOrg2];
                                            }
                                            else
                                            {
                                                incompleteOptional++;
                                            }
                                        }
                                        double similarity1 = jw.Similarity(element1, canidate1);
                                        double similarity2 = 1.0;
                                        if (canidate2 != null)
                                        {
                                            similarity2 = jw.Similarity(element1, canidate2);
                                        }
                                        double similarity = similarity1 * similarity2;
                                        if (similarity > closestVal)
                                        {
                                            closestVal = similarity;
                                            closest    = orgItemLine;
                                        }
                                    }
                                    else
                                    {
                                        incompleteOrg++;
                                    }
                                }

                                if (closestVal > -1 && !(closestVal > threshold))
                                {
                                    string newLine = "";

                                    foreach (string itm in newItemLine)
                                    {
                                        newLine += itm + ",";
                                    }
                                    foreach (string itm in closest)
                                    {
                                        newLine += itm + ",";
                                    }


                                    newLine = newLine.Substring(0, newLine.Length - 1);
                                    sw.WriteLine(newLine);
                                    found++;
                                    ((IProgress <int>)progfound).Report(found);
                                }
                                index++;
                                ((IProgress <int>)prog).Report(index);
                            }
                            else
                            {
                                incompleteNew++;
                            }
                        }
                        Console.Write("finished");
                    }
                });
            }
            else
            {
                Help.ShowPopup(buttonGo, "Incomplete Config", new Point(buttonGo.Right, this.buttonGo.Bottom));
            }
        }
コード例 #28
0
        public static double ApproximatelyEquals(this string firstWord, string secondWord, SimMetricType simMetricType = SimMetricType.Levenstein)
        {
            switch (simMetricType)
            {
            case SimMetricType.BlockDistance:
                var sim2 = new BlockDistance();
                return(sim2.GetSimilarity(firstWord, secondWord));

            case SimMetricType.ChapmanLengthDeviation:
                var sim3 = new ChapmanLengthDeviation();
                return(sim3.GetSimilarity(firstWord, secondWord));

            case SimMetricType.CosineSimilarity:
                var sim4 = new CosineSimilarity();
                return(sim4.GetSimilarity(firstWord, secondWord));

            case SimMetricType.DiceSimilarity:
                var sim5 = new DiceSimilarity();
                return(sim5.GetSimilarity(firstWord, secondWord));

            case SimMetricType.EuclideanDistance:
                var sim6 = new EuclideanDistance();
                return(sim6.GetSimilarity(firstWord, secondWord));

            case SimMetricType.JaccardSimilarity:
                var sim7 = new JaccardSimilarity();
                return(sim7.GetSimilarity(firstWord, secondWord));

            case SimMetricType.Jaro:
                var sim8 = new Jaro();
                return(sim8.GetSimilarity(firstWord, secondWord));

            case SimMetricType.JaroWinkler:
                var sim9 = new JaroWinkler();
                return(sim9.GetSimilarity(firstWord, secondWord));

            case SimMetricType.MatchingCoefficient:
                var sim10 = new MatchingCoefficient();
                return(sim10.GetSimilarity(firstWord, secondWord));

            case SimMetricType.MongeElkan:
                var sim11 = new MongeElkan();
                return(sim11.GetSimilarity(firstWord, secondWord));

            case SimMetricType.NeedlemanWunch:
                var sim12 = new NeedlemanWunch();
                return(sim12.GetSimilarity(firstWord, secondWord));

            case SimMetricType.OverlapCoefficient:
                var sim13 = new OverlapCoefficient();
                return(sim13.GetSimilarity(firstWord, secondWord));

            case SimMetricType.QGramsDistance:
                var sim14 = new QGramsDistance();
                return(sim14.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWaterman:
                var sim15 = new SmithWaterman();
                return(sim15.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWatermanGotoh:
                var sim16 = new SmithWatermanGotoh();
                return(sim16.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWatermanGotohWindowedAffine:
                var sim17 = new SmithWatermanGotohWindowedAffine();
                return(sim17.GetSimilarity(firstWord, secondWord));

            case SimMetricType.ChapmanMeanLength:
                var sim18 = new ChapmanMeanLength();
                return(sim18.GetSimilarity(firstWord, secondWord));

            default:
                var sim1 = new Levenstein();
                return(sim1.GetSimilarity(firstWord, secondWord));
            }
        }
コード例 #29
0
        private void StringCompareTest(string input, string[] testCases)
        {
            Debug.WriteLine("Dice Coefficient for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer dice = new DiceCoefficent();
                double diceValue         = dice.Compare(input, name);
                Debug.WriteLine("\t{0} against {1}", diceValue.ToString("###,###.00000"), name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("Jaccard Coefficient for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer jaccard = new Jaccard();
                double jaccardValue         = jaccard.Compare(input, name);
                Debug.WriteLine("\t{0} against {1}", jaccardValue.ToString("###,###.00000"), name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("ExtendedJaccard Coefficient for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer exjaccard = new ExtendedJaccard();
                double exjaccardValue         = exjaccard.Compare(input, name);
                Debug.WriteLine("\t{0} against {1}", exjaccardValue.ToString("###,###.00000"), name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("DamerauLevenshteinDistance for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer lev = new DamerauLevenshteinDistance();
                var levenStein          = lev.Compare(input, name);
                Debug.WriteLine("\t{0}, against {1}", levenStein, name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("JaroWinkler for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer jw = new JaroWinkler();
                var jwValue            = jw.Compare(input, name);
                Debug.WriteLine("\t{0}, against {1}", jwValue, name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("Monge-Elkan for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer me = new MongeElkan();
                var meValue            = me.Compare(input, name);
                Debug.WriteLine("\t{0}, against {1}", meValue, name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("NGramDistance(2) for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer ngram2 = new NGramDistance();
                (ngram2 as NGramDistance).NGramLength = 2;
                var ngramValue2 = ngram2.Compare(input, name);

                Debug.WriteLine("\t{0}, against {1}", ngramValue2, name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("SmithWaterman for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer sw = new SmithWaterman();
                var swValue            = sw.Compare(input, name);
                Debug.WriteLine("\t{0}, against {1}", swValue, name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("Extended Editex for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer edx = new ExtendedEditex();
                var edxValue            = edx.Compare(input, name);
                Debug.WriteLine("\t{0}, against {1}", edxValue, name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("Longest Common Subsequence for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer lcs = new LongestCommonSubsequence();
                var lcsValue            = lcs.Compare(input, name);
                Debug.WriteLine("\t{0}, against {1}", lcsValue.ToString("###,###.00000"), name);
            }

            Debug.WriteLine("");
        }
コード例 #30
0
 public void Init()
 {
     // Setup code goes here...
     _comparator = new JaroWinkler();
 }