Exemple #1
0
        public async Task <IHttpActionResult> Post([FromBody] string targetName)
        {
            try
            {
                MongoClient client     = new MongoClient();
                var         db         = client.GetDatabase("scheduler");
                var         collection = db.GetCollection <TaskDone>("tasksDone");

                List <TaskDone> toSearch = await collection.FindAsync(Builders <TaskDone> .Filter.Where(x => x.IsPublic)).Result.ToListAsync();

                List <FuzzyStringComparisonOptions> options = new List <FuzzyStringComparisonOptions>();

                options.Add(FuzzyStringComparisonOptions.UseJaccardDistance);

                FuzzyStringComparisonTolerance tolerance = FuzzyString.FuzzyStringComparisonTolerance.Normal;

                List <TaskDone> toReturn = new List <TaskDone>();

                foreach (var task in toSearch)
                {
                    if (task.TaskName.ApproximatelyEquals(targetName, options, tolerance))
                    {
                        toReturn.Add(task);
                    }
                }

                return(Ok(toReturn));
            }
            catch (Exception e)
            {
                return(InternalServerError(e));
            }
        }
Exemple #2
0
        public static bool ApproximatelyEquals(this string source, string target, FuzzyStringComparisonTolerance tolerance, params FuzzyStringComparisonOptions[] options)
        {
            if (options.Length == 0)
            {
                return(false);
            }

            var score = source.GetFuzzyEqualityScore(target, options);

            if (tolerance == FuzzyStringComparisonTolerance.Strong)
            {
                if (score < 0.25)
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            }
            else if (tolerance == FuzzyStringComparisonTolerance.Normal)
            {
                if (score < 0.5)
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            }
            else if (tolerance == FuzzyStringComparisonTolerance.Weak)
            {
                if (score < 0.75)
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            }
            else if (tolerance == FuzzyStringComparisonTolerance.Manual)
            {
                if (score > 0.6)
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            }
            else
            {
                return(false);
            }
        }
Exemple #3
0
        ////public static bool FuzzyMatchOnPossibleMatch(string source, string target, int lcsLength)
        ////{
        ////    // lcsLength = number of characters in longest common substring
        ////    if ( target.LongestCommonSubstring(source).Length >= lcsLength ) // Need to find at least lcsLength characters in the target string
        ////    {
        ////        //int _index = source.IndexOf(target.LongestCommonSubstring(source));
        ////        //if (_index > 1) // Then possible match here
        ////            return FuzzyMatch(source, target); // Need to make sure length not longer than the string
        ////    }
        ////    return false;
        ////}

        public static bool FuzzyMatch(string source, string target)
        {
            List <FuzzyStringComparisonOptions> options = new List <FuzzyStringComparisonOptions>();

            options.Add(FuzzyStringComparisonOptions.UseOverlapCoefficient);
            //options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence);
            //options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubstring);
            FuzzyStringComparisonTolerance tolerance = FuzzyStringComparisonTolerance.Normal; //.Strong;

            return(source.ApproximatelyEquals(target, options, tolerance));
        }
Exemple #4
0
        public List <Node> FindNodesByTags(string searchTerm, bool caseSens = false)
        {
            List <FuzzyStringComparisonOptions> options = new List <FuzzyStringComparisonOptions>();

            // Choose which algorithms should weigh in for the comparison
            options.Add(FuzzyStringComparisonOptions.UseOverlapCoefficient);
            options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence);
            options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubstring);
            options.Add(FuzzyStringComparisonOptions.UseLevenshteinDistance);

            // Choose the relative strength of the comparison - is it almost exactly equal? or is it just close?
            FuzzyStringComparisonTolerance tolerance = FuzzyStringComparisonTolerance.Normal;

            List <string> searchTermList = new List <string>();

            List <TAG>  searchTagsList = new List <TAG>();
            List <NODE> dbNodesList    = new List <NODE>();

            using (IdeaStorageEntities context = new IdeaStorageEntities())
            {
                List <TAG> allTagList = context.TAGS.Select(t => t).ToList();

                if (!string.IsNullOrEmpty(searchTerm) && !string.IsNullOrWhiteSpace(searchTerm))
                {
                    searchTermList = searchTerm.Split(',').Select(a => a.Trim()).ToList();

                    foreach (string tagString in searchTermList)
                    {
                        searchTagsList.AddRange(
                            allTagList.Where(t => t.Name.ApproximatelyEquals(tagString, options, tolerance)));
                    }
                }

                List <TAGSET> dbTagSets = new List <TAGSET>();
                foreach (TAG tag in searchTagsList)
                {
                    dbTagSets.AddRange(context.TAGSETS.Where(ts => ts.TagId == tag.TagId).ToList());
                }

                foreach (TAGSET dbTagSet in dbTagSets)
                {
                    dbNodesList.Add(context.NODES.Single(n => n.NodeId == dbTagSet.NodeId));
                }

                List <NODE> dbResult = (dbNodesList.GroupBy(n => n, n => n.NodeId)
                                        .Select(g => new { g, count = g.Count() })
                                        .OrderByDescending(@t => @t.count)
                                        .Select(@t => @t.g.Key)).ToList();

                List <Node> result = dbResult.Select(n => n.ToModel()).ToList();

                return(result);
            }
        }
Exemple #5
0
        bool IsANearStringMatch(string source, string target)
        {
            if (string.IsNullOrEmpty(target))
            {
                return(false);
            }

            // Choose the relative strength of the comparison - is it almost exactly equal? or is it just close?
            const FuzzyStringComparisonTolerance TOLERANCE = FuzzyStringComparisonTolerance.Strong;

            // Get a boolean determination of approximate equality
            return(source.ApproximatelyEquals(target, TOLERANCE, _searchOptions));
        }
Exemple #6
0
        public List <Show> GetShowsFuzzy(string paramTitle)
        {
            List <Tuple <Show, double> > shows = new List <Tuple <Show, double> >();

            List <FuzzyStringComparisonOptions> options = new List <FuzzyStringComparisonOptions>();

            options.Add(FuzzyStringComparisonOptions.UseHammingDistance);
            options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence);
            options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubstring);
            Func <string, string, double> calculateSimilarityFactor = (source, target) =>
            {
                if (string.IsNullOrEmpty(source) || string.IsNullOrEmpty(target))
                {
                    return(0);
                }

                double factor = (double)(source.LongestCommonSubsequence(target).Length + source.LongestCommonSubstring(target).Length) / source.Length;
                if (source.Length == target.Length)
                {
                    factor += (double)(source.Length - source.HammingDistance(target)) / source.Length;
                    factor /= 3;
                }
                else
                {
                    factor /= 2;
                }
                return(factor);
            };

            FuzzyStringComparisonTolerance tolerance = FuzzyStringComparisonTolerance.Normal;

            foreach (Show show in Shows)
            {
                if (!String.IsNullOrEmpty(show.OriginalTitle) && show.OriginalTitle.ApproximatelyEquals(paramTitle, options, tolerance) ||
                    !String.IsNullOrEmpty(show.Title) && show.Title.ApproximatelyEquals(paramTitle, options, tolerance))
                {
                    double maxSimilarityFactor = Math.Max(calculateSimilarityFactor(show.OriginalTitle, paramTitle), calculateSimilarityFactor(show.Title, paramTitle));
                    shows.Add(new Tuple <Show, double>(show, maxSimilarityFactor));
                }
            }

            return(shows.OrderByDescending(s => s.Item2)
                   .Select(s => s.Item1)
                   .ToList());
        }
Exemple #7
0
        public static bool DoNamesMatch(string RetailName, string itemName)
        {
            List <FuzzyStringComparisonOptions> options = new List <FuzzyStringComparisonOptions>();

            // Choose which algorithms should weigh in for the comparison
            options.Add(FuzzyStringComparisonOptions.UseOverlapCoefficient);
            options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence);
            options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubstring);

            // Choose the relative strength of the comparison - is it almost exactly equal? or is it just close?
            FuzzyStringComparisonTolerance tolerance = FuzzyString.FuzzyStringComparisonTolerance.Normal;

            // Get a boolean determination of approximate equality
            bool result = itemName.ApproximatelyEquals(RetailName, options, tolerance);

            //itemName.OverlapCoefficient(RetailName);
            return(result);
        }
Exemple #8
0
        public static String CityMatched(String city, List <City> cities)
        {
            List <FuzzyStringComparisonOptions> options = new List <FuzzyStringComparisonOptions>();

            // Choose which algorithms should weigh in for the comparison
            options.Add(FuzzyStringComparisonOptions.UseOverlapCoefficient);
            options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence);
            options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubstring);

            FuzzyStringComparisonTolerance tolerance = FuzzyStringComparisonTolerance.Strong;

            foreach (var c in cities)
            {
                var result = city.ApproximatelyEquals(c.Name.ToUpper(), options, tolerance);
                if (result)
                {
                    return(c.Name);
                }
            }

            return(null);
        }
        public static bool ApproximatelyEquals(this string source, string target, FuzzyStringComparisonTolerance tolerance, params FuzzyStringComparisonOptions[] options)
        {
            var comparisonAverage = CalculateComparisonAverage(source, target, options);

            switch (tolerance)
            {
            case FuzzyStringComparisonTolerance.Strong:
                return(comparisonAverage < 0.25);

            case FuzzyStringComparisonTolerance.Normal:
                return(comparisonAverage < 0.5);

            case FuzzyStringComparisonTolerance.Weak:
                return(comparisonAverage < 0.75);

            case FuzzyStringComparisonTolerance.Manual:
                return(comparisonAverage > 0.6);

            default:
                return(false);
            }
        }
Exemple #10
0
        private void TxtSearch_TextChanged(object sender, EventArgs e)
        {
            double tolare = 0;

            if (numericUpDown1.Value < 10)
            {
                tolare = ((double)numericUpDown1.Value / 10);
            }
            else if (numericUpDown1.Value >= 10 && numericUpDown1.Value < 100)
            {
                tolare = ((double)numericUpDown1.Value / 100);
            }
            else if (numericUpDown1.Value > 100 && numericUpDown1.Value <= 999)
            {
                tolare = ((double)numericUpDown1.Value / 1000);
            }
            label1.Text = tolare.ToString();

            if (txtSearch.Text.Length == 0)
            {
                return;
            }


            List <FuzzyStringComparisonOptions> options = new List <FuzzyStringComparisonOptions>();

            // Choose which algorithms should weigh in for the comparison
            options.Add(FuzzyStringComparisonOptions.UseOverlapCoefficient);
            options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence);
            options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubstring);
            options.Add(FuzzyStringComparisonOptions.UseJaccardDistance);

            // Choose the relative strength of the comparison - is it almost exactly equal? or is it just close?
            FuzzyStringComparisonTolerance tolerance = FuzzyStringComparisonTolerance.Normal;

            var result = _bindingList.Where(x => x.Text.ApproximatelyEquals(txtSearch.Text, tolare, options.ToArray())).ToList();

            binResult.DataSource = result;
        }
Exemple #11
0
        public static bool ApproximatelyEquals(this string source, string target, FuzzyStringComparisonTolerance tolerance, params FuzzyStringComparisonOptions[] options)
        {
            var comparisonResults = new List <double>();

            if (!options.Contains(FuzzyStringComparisonOptions.CaseSensitive))
            {
                source = source.Capitalize();
                target = target.Capitalize();
            }

            // Min: 0    Max: source.Length = target.Length
            if (options.Contains(FuzzyStringComparisonOptions.UseHammingDistance))
            {
                if (source.Length == target.Length)
                {
                    comparisonResults.Add(source.HammingDistance(target) / target.Length);
                }
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseJaccardDistance))
            {
                comparisonResults.Add(source.JaccardDistance(target));
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseJaroDistance))
            {
                comparisonResults.Add(source.JaroDistance(target));
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseJaroWinklerDistance))
            {
                comparisonResults.Add(source.JaroWinklerDistance(target));
            }

            // Min: 0    Max: LevenshteinDistanceUpperBounds - LevenshteinDistanceLowerBounds
            // Min: LevenshteinDistanceLowerBounds    Max: LevenshteinDistanceUpperBounds
            if (options.Contains(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance))
            {
                comparisonResults.Add(Convert.ToDouble(source.NormalizedLevenshteinDistance(target)) / Convert.ToDouble((Math.Max(source.Length, target.Length) - source.LevenshteinDistanceLowerBounds(target))));
            }
            else if (options.Contains(FuzzyStringComparisonOptions.UseLevenshteinDistance))
            {
                comparisonResults.Add(Convert.ToDouble(source.LevenshteinDistance(target)) / Convert.ToDouble(source.LevenshteinDistanceUpperBounds(target)));
            }

            if (options.Contains(FuzzyStringComparisonOptions.UseLongestCommonSubsequence))
            {
                comparisonResults.Add(1 - Convert.ToDouble((source.LongestCommonSubsequence(target).Length) / Convert.ToDouble(Math.Min(source.Length, target.Length))));
            }

            if (options.Contains(FuzzyStringComparisonOptions.UseLongestCommonSubstring))
            {
                comparisonResults.Add(1 - Convert.ToDouble((source.LongestCommonSubstring(target).Length) / Convert.ToDouble(Math.Min(source.Length, target.Length))));
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseSorensenDiceDistance))
            {
                comparisonResults.Add(source.SorensenDiceDistance(target));
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseOverlapCoefficient))
            {
                comparisonResults.Add(1 - source.OverlapCoefficient(target));
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity))
            {
                comparisonResults.Add(1 - source.RatcliffObershelpSimilarity(target));
            }

            if (comparisonResults.Count == 0)
            {
                return(false);
            }

            switch (tolerance)
            {
            case FuzzyStringComparisonTolerance.Strong:
                return(comparisonResults.Average() < 0.25);

            case FuzzyStringComparisonTolerance.Normal:
                return(comparisonResults.Average() < 0.5);

            case FuzzyStringComparisonTolerance.Weak:
                return(comparisonResults.Average() < 0.75);

            case FuzzyStringComparisonTolerance.Manual:
                return(comparisonResults.Average() > 0.6);

            default:
                return(false);
            }
        }
        public static bool ApproximatelyEquals(this string source, string target,  FuzzyStringComparisonTolerance tolerance, params FuzzyStringComparisonOptions[] options)
        {
            List<double> comparisonResults = new List<double>();

            if (!options.Contains(FuzzyStringComparisonOptions.CaseSensitive))
            {
                source = source.Capitalize();
                target = target.Capitalize();
            }

            // Min: 0    Max: source.Length = target.Length
            if (options.Contains(FuzzyStringComparisonOptions.UseHammingDistance))
            {
                if (source.Length == target.Length)
                {
                    comparisonResults.Add(source.HammingDistance(target) / target.Length);
                }
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseJaccardDistance))
            {
                comparisonResults.Add(source.JaccardDistance(target));
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseJaroDistance))
            {
                comparisonResults.Add(source.JaroDistance(target));
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseJaroWinklerDistance))
            {
                comparisonResults.Add(source.JaroWinklerDistance(target));
            }

            // Min: 0    Max: LevenshteinDistanceUpperBounds - LevenshteinDistanceLowerBounds
            // Min: LevenshteinDistanceLowerBounds    Max: LevenshteinDistanceUpperBounds
            if (options.Contains(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance))
            {
                comparisonResults.Add(Convert.ToDouble(source.NormalizedLevenshteinDistance(target)) / Convert.ToDouble((Math.Max(source.Length, target.Length) - source.LevenshteinDistanceLowerBounds(target))));
            }
            else if (options.Contains(FuzzyStringComparisonOptions.UseLevenshteinDistance))
            {
                comparisonResults.Add(Convert.ToDouble(source.LevenshteinDistance(target)) / Convert.ToDouble(source.LevenshteinDistanceUpperBounds(target)));
            }

            if (options.Contains(FuzzyStringComparisonOptions.UseLongestCommonSubsequence))
            {
                comparisonResults.Add(1 - Convert.ToDouble((source.LongestCommonSubsequence(target).Length) / Convert.ToDouble(Math.Min(source.Length, target.Length))));
            }

            if (options.Contains(FuzzyStringComparisonOptions.UseLongestCommonSubstring))
            {
                comparisonResults.Add(1 - Convert.ToDouble((source.LongestCommonSubstring(target).Length) / Convert.ToDouble(Math.Min(source.Length, target.Length))));
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseSorensenDiceDistance))
            {
                comparisonResults.Add(source.SorensenDiceDistance(target));
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseOverlapCoefficient))
            {
                comparisonResults.Add(1 - source.OverlapCoefficient(target));
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity))
            {
                comparisonResults.Add(1 - source.RatcliffObershelpSimilarity(target));
            }

            if (comparisonResults.Count == 0)
            {
                return false;
            }

            if (tolerance == FuzzyStringComparisonTolerance.Strong)
            {
                if (comparisonResults.Average() < 0.25)
                {
                    return true;
                }
                else
                {
                    return false;
                }
            }
            else if (tolerance == FuzzyStringComparisonTolerance.Normal)
            {
                if (comparisonResults.Average() < 0.5)
                {
                    return true;
                }
                else
                {
                    return false;
                }
            }
            else if (tolerance == FuzzyStringComparisonTolerance.Weak)
            {
                if (comparisonResults.Average() < 0.75)
                {
                    return true;
                }
                else
                {
                    return false;
                }
            }
            else if (tolerance == FuzzyStringComparisonTolerance.Manual)
            {
                if (comparisonResults.Average() > 0.6)
                {
                    return true;
                }
                else
                {
                    return false;
                }
            }
            else
            {
                return false;
            }
        }
        public MapTempest get_map_tempest(List <KeyValuePair <string, string> > map_images, List <KeyValuePair <string, string> > tempest_images)
        {
            var mt = new MapTempest
            {
                map     = "???",
                tempest = new Tempest
                {
                    name   = "???",
                    prefix = "none",
                    suffix = "none",
                    votes  = 0
                }
            };

            foreach (KeyValuePair <string, string> kv in map_images)
            {
                if (mt.map != "???")
                {
                    /* Stop searching if we've already found a map match */
                    break;
                }

                update_label_d(lbl_report, "Reading {0} Map image", kv.Key);
                string text = get_text(kv.Value).ToLower().Trim();
                Console.WriteLine(String.Format("{0}: '{1}'", kv.Key, text));
                update_label_d(lbl_report, "Read {0} Map image ({1})", kv.Key, text);
                foreach (Map map in listview_maps.Items)
                {
                    string mapname = map.name.Replace('_', ' ');
                    update_label_d(lbl_report, "Comparing {0} to {1}", text, mapname);
                    if (text.Contains(mapname))
                    {
                        update_label_d(lbl_report, "Setting map to {0}", mapname);
                        mt.map = map.name;
                        break;
                    }
                }
            }

            List <FuzzyStringComparisonOptions> fuzzystringoptions = new List <FuzzyStringComparisonOptions>
            {
                FuzzyStringComparisonOptions.UseLevenshteinDistance,
            };

            FuzzyStringComparisonTolerance tolerance = FuzzyStringComparisonTolerance.Strong;

            List <string> tempest_texts = new List <string>();

            foreach (KeyValuePair <string, string> kv in tempest_images)
            {
                if (mt.tempest.suffix != "none" && mt.tempest.prefix != "none")
                {
                    /* Stop searching if we've already found a two tempests */
                    break;
                }

                update_label_d(lbl_report, "Reading {0} Tempest image", kv.Key);
                string text_raw = get_text(kv.Value).ToLower().Trim();
                tempest_texts.Add(text_raw);
                string text = text_raw.Replace(" ", "");

                update_label_d(lbl_report, "Read {0} Tempest image ({1})", kv.Key, text_raw);
                foreach (KeyValuePair <string, string> tempest_kv in tempest_affixes.prefixes)
                {
                    if (mt.tempest.prefix != "none")
                    {
                        break;
                    }
                    string affix = tempest_kv.Key.ToLower().Replace('_', ' ');
                    update_label_d(lbl_report, "Comparing {0} '{1}' with prefix {2}", kv.Key, text, affix);
                    if (text.Contains(affix))
                    {
                        update_label_d(lbl_report, "Setting prefix to {0}", affix);
                        mt.tempest.prefix = tempest_kv.Key;
                        break;
                    }
                }

                foreach (KeyValuePair <string, string> tempest_kv in tempest_affixes.suffixes)
                {
                    string affix = tempest_kv.Key.ToLower().Replace('_', ' ');
                    update_label_d(lbl_report, "Comparing {0} '{1}' with suffix {2}", kv.Key, text, affix);
                    if (text.Contains(affix))
                    {
                        update_label_d(lbl_report, "Setting suffix to {0}", affix);
                        mt.tempest.suffix = tempest_kv.Key;
                        break;
                    }
                }
            }

            foreach (string text_raw in tempest_texts)
            {
                if (mt.tempest.suffix != "none" && mt.tempest.prefix != "none")
                {
                    /* Stop searching if we've already found a two tempests */
                    break;
                }

                foreach (KeyValuePair <string, string> tempest_kv in tempest_affixes.prefixes)
                {
                    if (mt.tempest.prefix != "none")
                    {
                        break;
                    }
                    string affix = tempest_kv.Key.ToLower().Replace('_', ' ');

                    foreach (string str in text_raw.Split(' '))
                    {
                        update_label_d(lbl_report, "Fuzzy comparing {0} with {1}", affix, str);
                        if (str.ApproximatelyEquals(affix, fuzzystringoptions, tolerance))
                        {
                            update_label_d(lbl_report, "Setting prefix to {0}", affix);
                            Console.WriteLine(String.Format("'{0}' matched '{1}'", str, affix));
                            mt.tempest.prefix = tempest_kv.Key;
                            break;
                        }
                    }
                }

                foreach (KeyValuePair <string, string> tempest_kv in tempest_affixes.suffixes)
                {
                    string affix = tempest_kv.Key.ToLower().Replace('_', ' ');

                    foreach (string str in text_raw.Split(' '))
                    {
                        update_label_d(lbl_report, "Fuzzy comparing {0} with {1}", affix, str);
                        if (str.ApproximatelyEquals(affix, fuzzystringoptions, tolerance))
                        {
                            update_label_d(lbl_report, "Setting prefix to {0}", affix);
                            Console.WriteLine(String.Format("'{0}' matched '{1}'", str, affix));
                            mt.tempest.prefix = tempest_kv.Key;
                            break;
                        }
                    }
                }
            }

            mt.tempest.name = mt.tempest.prefix + " Tempest Of " + mt.tempest.suffix;

            update_label_d(lbl_report, "Done looping!");

            return(mt);
        }
        static void Main(string[] args)
        {
            if (!File.Exists(args[0]))
            {
                Console.WriteLine("Error! File not exist! Press any key to exit...");
                Console.ReadKey();
                Environment.Exit(-1);
            }

            Epub epub = new Epub(args[0]);

            string title  = epub.Title[0];
            string author = epub.Creator[0];

            string str = epub.GetContentAsPlainText();

            StringBuilder sb = new StringBuilder();

            foreach (char ch in str)
            {
                if (char.IsLetter(ch) || char.IsWhiteSpace(ch) || char.IsSeparator(ch) || ch.Equals('-'))
                {
                    sb.Append(ch);
                }
            }
            string[] res = sb.ToString().Split(new Char[] { ' ', '\n', '\r', '\t' }, StringSplitOptions.RemoveEmptyEntries);
            sb.Clear();

            List <string> lines = new List <string>();

            foreach (string s in res)
            {
                if (char.IsUpper(s[0]))
                {
                    lines.Add(s);
                }
            }

            List <FuzzyStringComparisonOptions> options = new List <FuzzyStringComparisonOptions>();

            options.Add(FuzzyStringComparisonOptions.UseHammingDistance);
            options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence);
            options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubstring);

            FuzzyStringComparisonTolerance tolerance = FuzzyStringComparisonTolerance.Strong;

            string[] file = File.ReadAllLines(args[1]);
            string   path = args[2];

            List <string> cities    = new List <string>();
            List <string> countries = new List <string>();

            foreach (string src in lines)
            {
                foreach (string temp in file)
                {
                    string[] arr = temp.Split(':');

                    string cityInTheFile    = arr[0];
                    string countryInTheFile = arr[1];

                    bool boolCity    = src.ApproximatelyEquals(cityInTheFile, options, tolerance);
                    bool boolCountry = src.ApproximatelyEquals(countryInTheFile, options, tolerance);

                    if (boolCity)
                    {
                        cities.Add(cityInTheFile);
                    }

                    if (boolCountry)
                    {
                        countries.Add(countryInTheFile);
                    }
                }

                if (cities.Count() > 0)
                {
                    String.Join(", ", cities);
                    cities.Insert(0, src + " : ");
                }

                if (countries.Count() > 0)
                {
                    String.Join(", ", countries);
                    countries.Insert(0, src + " : ");
                }

                File.WriteAllLines(path, cities);
                File.WriteAllText(path, "\n");
                File.WriteAllLines(path, cities);
            }

            Console.WriteLine("Press any key to exit...");
            Console.ReadKey();
        }
Exemple #15
0
        static readonly string s_systemDir = "Sample4"; // appended to SessionBase.BaseDatabasePath

        static void Main(string[] args)
        {
            try
            {
                SessionBase.DefaultCompressPages = PageInfo.compressionKind.LZ4;
                using (SessionNoServer session = new SessionNoServer(s_systemDir))
                {
                    Console.WriteLine("Running with databases in directory: " + session.SystemDirectory);
                    session.BeginUpdate();
                    // delete (unpersist) all Person objects created in prior run
                    foreach (Person p in session.AllObjects <Person>())
                    {
                        p.Unpersist(session);
                    }
                    // delete (unpersist) all VelocityDbList<Person> objects created in prior run
                    foreach (VelocityDbList <Person> l in session.AllObjects <VelocityDbList <Person> >())
                    {
                        l.Unpersist(session);
                    }
                    Person robinHood = new Person("Robin", "Hood", 30);
                    Person billGates = new Person("Bill", "Gates", 56, robinHood);
                    Person steveJobs = new Person("Steve", "Jobs", 56, billGates);
                    robinHood.BestFriend = billGates;
                    session.Persist(steveJobs);
                    steveJobs.Friends.Add(billGates);
                    steveJobs.Friends.Add(robinHood);
                    billGates.Friends.Add(billGates);
                    robinHood.Friends.Add(steveJobs);
                    session.Commit();
                }
                using (SessionNoServer session = new SessionNoServer(s_systemDir))
                {
                    List <FuzzyStringComparisonOptions> options = new List <FuzzyStringComparisonOptions>();

                    // Choose which algorithms should weigh in for the comparison
                    options.Add(FuzzyStringComparisonOptions.UseOverlapCoefficient);
                    options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence);
                    options.Add(FuzzyStringComparisonOptions.UseLongestCommonSubstring);
                    options.Add(FuzzyStringComparisonOptions.UseHammingDistance);
                    options.Add(FuzzyStringComparisonOptions.UseJaccardDistance);
                    options.Add(FuzzyStringComparisonOptions.UseJaroDistance);
                    options.Add(FuzzyStringComparisonOptions.UseJaroWinklerDistance);
                    options.Add(FuzzyStringComparisonOptions.UseLevenshteinDistance);
                    options.Add(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity);
                    options.Add(FuzzyStringComparisonOptions.UseSorensenDiceDistance);
                    options.Add(FuzzyStringComparisonOptions.UseTanimotoCoefficient);


                    // Choose the relative strength of the comparison - is it almost exactly equal? or is it just close?
                    FuzzyStringComparisonTolerance tolerance = FuzzyStringComparisonTolerance.Normal;

                    session.BeginRead();
                    foreach (Person p in session.AllObjects <Person>())
                    {
                        // Get a boolean determination of approximate equality
                        foreach (string firstNameFuzzy in new string[] { "Rob", "Billy", "Mats", "Stevo", "stevo" })
                        {
                            bool result = firstNameFuzzy.ApproximatelyEquals(p.FirstName, options, tolerance);
                            if (result)
                            {
                                Console.WriteLine(firstNameFuzzy + " approximatly equals " + p.FirstName);
                            }
                        }
                    }
                    session.Commit();
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
            }
        }
        public static bool ApproximatelyEquals(this string source, string target, long options, FuzzyStringComparisonTolerance tolerance)
        {
            double results = source.RankEquality(target, options);

            if (tolerance == FuzzyStringComparisonTolerance.Strong)
            {
                if (results < 0.25)
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            }
            else if (tolerance == FuzzyStringComparisonTolerance.Normal)
            {
                if (results < 0.5)
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            }
            else if (tolerance == FuzzyStringComparisonTolerance.Weak)
            {
                if (results < 0.75)
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            }
            else if (tolerance == FuzzyStringComparisonTolerance.Manual)
            {
                if (results > 0.6)
                {
                    return(true);
                }
                else
                {
                    return(false);
                }
            }
            else
            {
                return(false);
            }
        }
Exemple #17
0
        public static Option <ComparisonResult> ApproximatelyEquals(this string source, string target, FuzzyStringComparisonTolerance tolerance, params FuzzyStringComparisonOptions[] options)
        {
            List <double> comparisonResults = new List <double>();

            if (!options.Contains(FuzzyStringComparisonOptions.CaseSensitive))
            {
                source = source.Capitalize();
                target = target.Capitalize();
            }

            // Min: 0    Max: source.Length = target.Length
            if (options.Contains(FuzzyStringComparisonOptions.UseHammingDistance))
            {
                if (source.Length == target.Length)
                {
                    comparisonResults.Add(source.HammingDistance(target) / target.Length);
                }
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseJaccardDistance))
            {
                comparisonResults.Add(source.JaccardDistance(target));
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseJaroDistance))
            {
                comparisonResults.Add(source.JaroDistance(target));
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseJaroWinklerDistance))
            {
                comparisonResults.Add(source.JaroWinklerDistance(target));
            }

            // Min: 0    Max: LevenshteinDistanceUpperBounds - LevenshteinDistanceLowerBounds
            // Min: LevenshteinDistanceLowerBounds    Max: LevenshteinDistanceUpperBounds
            if (options.Contains(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance))
            {
                comparisonResults.Add(Convert.ToDouble(source.NormalizedLevenshteinDistance(target)) / Convert.ToDouble((Math.Max(source.Length, target.Length) - source.LevenshteinDistanceLowerBounds(target))));
            }
            else if (options.Contains(FuzzyStringComparisonOptions.UseLevenshteinDistance))
            {
                comparisonResults.Add(Convert.ToDouble(source.LevenshteinDistance(target)) / Convert.ToDouble(source.LevenshteinDistanceUpperBounds(target)));
            }

            if (options.Contains(FuzzyStringComparisonOptions.UseLongestCommonSubsequence))
            {
                comparisonResults.Add(1 - Convert.ToDouble((source.LongestCommonSubsequence(target).Length) / Convert.ToDouble(Math.Min(source.Length, target.Length))));
            }

            if (options.Contains(FuzzyStringComparisonOptions.UseLongestCommonSubstring))
            {
                comparisonResults.Add(1 - Convert.ToDouble((source.LongestCommonSubstring(target).Length) / Convert.ToDouble(Math.Min(source.Length, target.Length))));
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseSorensenDiceDistance))
            {
                comparisonResults.Add(source.SorensenDiceDistance(target));
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseOverlapCoefficient))
            {
                comparisonResults.Add(1 - source.OverlapCoefficient(target));
            }

            // Min: 0    Max: 1
            if (options.Contains(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity))
            {
                comparisonResults.Add(1 - source.RatcliffObershelpSimilarity(target));
            }

            if (comparisonResults.Count == 0)
            {
                return(null);
            }

            var average = comparisonResults.Average();
            var success = new ComparisonResult(average, true);
            var failure = new ComparisonResult(average, false);

            return(MatchesMinimumTolerance(average, Helper.MinimumTolerance[tolerance]) ? success : failure);
        }
Exemple #18
0
        public void WhenSimilarString_ShouldReturnExpectedValue(FuzzyStringComparisonOptions fuzzyStringComparisonOption, FuzzyStringComparisonTolerance fuzzyStringComparisonTolerance, bool expectedValue)
        {
            const string kevin = "kevin";
            const string kevyn = "kevyn";

            var options = new[]
            {
                fuzzyStringComparisonOption,
            };

            var result = kevin.ApproximatelyEquals(kevyn, fuzzyStringComparisonTolerance, options);

            Assert.Equal(expectedValue, result);
        }