Exemple #1
0
        public void LookupShouldNotReturnLowCountWord()
        {
            var symSpell = new SymSpell(16, 2, 7, 10);

            symSpell.CreateDictionaryEntry("pawn", 1);
            var result = symSpell.Lookup("pawn", SymSpell.Verbosity.Top, 0);

            Assert.AreEqual(0, result.Count);
        }
Exemple #2
0
        // pre-run to ensure code has executed once before timing benchmarks
        static void WarmUp()
        {
            SymSpell dict = new SymSpell(16, 2, 7);

            dict.LoadDictionary(DictionaryPath[0], 0, 1);
            var result = dict.Lookup("hockie", SymSpell.Verbosity.All, 1);

            Original.SymSpell dictOrig = new Original.SymSpell(2, 7);
            dictOrig.LoadDictionary(DictionaryPath[0], "", 0, 1);
            var resultOrig = dictOrig.Lookup("hockie", "", 1, 2);
        }
 public List <SymSpell.SuggestItem> getSuggestions(string word, int verbosity, int distance)
 {
     try
     {
         return(symSpell.Lookup(word, (SymSpell.Verbosity)verbosity, distance));
     }
     catch (Exception e)
     {
         Console.WriteLine(e.Message, word, verbosity, distance);
         return(new List <SymSpell.SuggestItem>());
     }
 }
Exemple #4
0
        //Load a frequency dictionary or create a frequency dictionary from a text corpus
        public static void Main(string[] args)
        {
            Console.Write("Creating dictionary ...");
            long      memSize   = GC.GetTotalMemory(true);
            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();

            //set parameters
            const int initialCapacity = 82765;
            const int maxEditDistance = 2;
            const int prefixLength    = 7;
            var       symSpell        = new SymSpell(initialCapacity, maxEditDistance, prefixLength);

            //Load a frequency dictionary
            //wordfrequency_en.txt  ensures high correction quality by combining two data sources:
            //Google Books Ngram data  provides representative word frequencies (but contains many entries with spelling errors)
            //SCOWL — Spell Checker Oriented Word Lists which ensures genuine English vocabulary (but contained no word frequencies)
            string path = AppDomain.CurrentDomain.BaseDirectory + "../../../../SymSpell/frequency_dictionary_en_82_765.txt";       //path when targeting .NET Core 2.0  & using symspell.cs

            //string path = "../../../SymSpell/frequency_dictionary_en_82_765.txt";                                                //path when targeting .NET Framework & using symspell.cs
            //string path = "../../frequency_dictionary_en_82_765.txt";  //path when using symspell nuget package (frequency_dictionary_en_82_765.txt is included in nuget package)
            if (!symSpell.LoadDictionary(path, 0, 1))
            {
                Console.Error.WriteLine("\rFile not found: " + Path.GetFullPath(path)); Console.ReadKey(); return;
            }

            //Alternatively Create the dictionary from a text corpus (e.g. http://norvig.com/big.txt )
            //Make sure the corpus does not contain spelling errors, invalid terms and the word frequency is representative to increase the precision of the spelling correction.
            //You may use SymSpell.CreateDictionaryEntry() to update a (self learning) dictionary incrementally
            //To extend spelling correction beyond single words to phrases (e.g. correcting "unitedkingom" to "united kingdom") simply add those phrases with CreateDictionaryEntry(). or use  https://github.com/wolfgarbe/SymSpellCompound
            //string path = "big.txt";
            //if (!symSpell.CreateDictionary(path)) Console.Error.WriteLine("File not found: " + Path.GetFullPath(path));

            stopWatch.Stop();
            long memDelta = GC.GetTotalMemory(true) - memSize;

            Console.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, "
                              + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString()
                              + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms "
                              + (memDelta / 1024 / 1024.0).ToString("N0") + " MB");

            //warm up
            var result = symSpell.Lookup("warmup", SymSpell.Verbosity.All, 1);

            string input;

            Console.WriteLine("Type a work and hit enter key to get spelling suggestions:");
            while (!string.IsNullOrEmpty(input = (Console.ReadLine() ?? "").Trim()))
            {
                Correct(input, symSpell);
            }
        }
Exemple #5
0
        public void LookupShouldFindExactMatch()
        {
            var symSpell = new SymSpell();

            symSpell.CreateDictionaryEntry("steama", 4);
            symSpell.CreateDictionaryEntry("steamb", 6);
            symSpell.CreateDictionaryEntry("steamc", 2);
            var result = symSpell.Lookup("steama", SymSpell.Verbosity.Top, 2);

            Assert.AreEqual(1, result.Count);
            Assert.AreEqual("steama", result[0].term);
        }
        private static void Experimento1()
        {
            Stopwatch stopWatch = new Stopwatch();
            string    strPath   = @"D:\json\";

            string[]      fileEntries = Directory.GetFiles(strPath);
            StringBuilder OCROriginal = new StringBuilder();

            EditDistanceLength editDistance = new EditDistanceLength();
            //Symspell parameters
            const int initialCapacity = 82765;
            const int maxEditDistance = 5;
            const int prefixLength    = 7;
            SymSpell  symSpell        = new SymSpell(initialCapacity, maxEditDistance, prefixLength);
            Dictionary <int, ExperimentSpell> excelMatrix = new Dictionary <int, ExperimentSpell>();

            foreach (string path in fileEntries)
            {
                string jsonText = File.ReadAllText(path, Encoding.Default);
                var    response = Google.Protobuf.JsonParser.Default.Parse <Google.Cloud.Vision.V1.AnnotateFileResponse>(jsonText);
                foreach (var respuestas in response.Responses)
                {
                    var annotation = respuestas.FullTextAnnotation;
                    if (annotation != null)
                    {
                        OCROriginal.Append(annotation.Text);
                    }
                }
            }

            symSpell.LoadDictionary(@"D:\DictionaryFiles\default.txt", 0, 1);
            var arrayOCROriginal = OCROriginal.ToString().Replace("\n", " ").Replace("{", "").Replace("}", "").Replace(": ", "***").Replace(" : ", " ").Replace(":", " ").Replace("***", ": ").Replace(". ", " ").Replace(", ", " ").Replace("-", " ").Split(' ');

            int j = 0, k = 0;

            foreach (string item in arrayOCROriginal)
            {
                ExperimentSpell exp1 = new ExperimentSpell();
                exp1.correction = "igual";
                exp1.original   = item;
                exp1.correctionLookupCompound = item;

                List <SymSpell.SuggestItem> suggestions = symSpell.Lookup(item, SymSpell.Verbosity.Top);
                if (suggestions.Count > 0)
                {
                    exp1.correction = "modificada";
                    exp1.correctionLookupCompound = suggestions[0].term;
                }
                excelMatrix.Add(k++, exp1);
            }
            CreateExcelFileExperimento(excelMatrix, "1");
        }
Exemple #7
0
        public void LookupShouldReturnMostFrequent()
        {
            var symSpell = new SymSpell();

            symSpell.CreateDictionaryEntry("steama", 4);
            symSpell.CreateDictionaryEntry("steamb", 6);
            symSpell.CreateDictionaryEntry("steamc", 2);
            var result = symSpell.Lookup("steam", SymSpell.Verbosity.Top, 2);

            Assert.AreEqual(1, result.Count);
            Assert.AreEqual("steamb", result[0].term);
            Assert.AreEqual(6, result[0].count);
        }
Exemple #8
0
        public void AddAdditionalCountsShouldNotOverflow()
        {
            var symSpell = new SymSpell();
            var word     = "hello";

            symSpell.CreateDictionaryEntry(word, long.MaxValue - 10);
            var  result = symSpell.Lookup(word, SymSpell.Verbosity.Top);
            long count  = 0;

            if (result.Count == 1)
            {
                count = result[0].count;
            }
            Assert.AreEqual(long.MaxValue - 10, count);
            symSpell.CreateDictionaryEntry(word, 11);
            result = symSpell.Lookup(word, SymSpell.Verbosity.Top);
            count  = 0;
            if (result.Count == 1)
            {
                count = result[0].count;
            }
            Assert.AreEqual(long.MaxValue, count);
        }
Exemple #9
0
        public void AddAdditionalCountsShouldIncreaseCount()
        {
            var symSpell = new SymSpell();
            var word     = "hello";

            symSpell.CreateDictionaryEntry(word, 11);
            var  result = symSpell.Lookup(word, SymSpell.Verbosity.Top);
            long count  = 0;

            if (result.Count == 1)
            {
                count = result[0].count;
            }
            Assert.AreEqual(11, count);
            symSpell.CreateDictionaryEntry(word, 3);
            result = symSpell.Lookup(word, SymSpell.Verbosity.Top);
            count  = 0;
            if (result.Count == 1)
            {
                count = result[0].count;
            }
            Assert.AreEqual(11 + 3, count);
        }
Exemple #10
0
        public static void Benchmark(string path, int testNumber)
        {
            int resultSum = 0;

            string[] testList = new string[testNumber];
            List <SymSpell.SuggestItem> suggestions = null;

            //load 1000 terms with random spelling errors
            int i = 0;

            using (StreamReader sr = new StreamReader(File.OpenRead(path)))
            {
                String line;

                //process a single line at a time only for memory efficiency
                while ((line = sr.ReadLine()) != null)
                {
                    string[] lineParts = line.Split(null);
                    if (lineParts.Length >= 2)
                    {
                        string key = lineParts[0];
                        testList[i++] = key;
                    }
                }
            }

            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();

            //perform n rounds of Lookup of 1000 terms with random spelling errors
            int rounds = 10;

            for (int j = 0; j < rounds; j++)
            {
                resultSum = 0;
                //spellcheck strings
                for (i = 0; i < testNumber; i++)
                {
                    suggestions = SymSpell.Lookup(testList[i], "", SymSpell.editDistanceMax);
                    resultSum  += suggestions.Count;
                }
            }
            stopWatch.Stop();
            Console.WriteLine(resultSum.ToString("N0") + " results in " + (stopWatch.ElapsedMilliseconds / rounds).ToString() + " ms");
        }
Exemple #11
0
        public void warmAttention2(string input, bool onlyWarn, string session, string context)
        {
            var           tokens_    = input.Split(' ');
            List <string> correcteds = new List <string>();
            List <bool>   notPush    = new List <bool>();

            for (int t = 0; t != tokens_.Length; t++)
            {
                List <SymSpell.SuggestItem> items = corrector.Lookup(tokens_[t], SymSpell.Verbosity.Closest);
                notPush.Add(false);
                if (items.Count > 0)
                {
                    correcteds.Add(items[0].term);
                }
                else
                {
                    correcteds.Add("");
                }
            }

            float max = 0;

            for (int kInd = 0; kInd != knowledge.Count; kInd++)
            {
                var sp = knowledge[kInd];
                if (sp.isDirective || !sp.context.Equals(context))
                {
                    continue;
                }
                //Console.WriteLine("at pattern {0}", kInd);
                warmXcorr(sp, tokens_, correcteds.ToArray(), max, (string s, Action a, string extra) =>
                {
                    //(Parlogike self, string input,  List<Variable> args,  char dir, bool mutate, Pattern pattern)
                    if (!Parlogike.externFunctors.ContainsKey(a._operator))
                    {
                        Console.WriteLine("Operator {0} doesnt exists at line {1}", a._operator, a.line);
                        return(0);
                    }
                    return((Parlogike.externFunctors
                            [a._operator](this, s, a.arguments, 'i', false, sp, session, extra)).w);
                });
            }
        }
    public static List <SymSpell.SuggestItem> Correct(string input, SymSpell symSpell)
    {
        List <SymSpell.SuggestItem> suggestions = null;

        //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency
        const SymSpell.Verbosity verbosity = SymSpell.Verbosity.All;

        suggestions = symSpell.Lookup(input, verbosity);

        //return suggestions;
        //display term and frequency
        foreach (var suggestion in suggestions)
        {
            //Debug.Log(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));
        }
        if (verbosity != SymSpell.Verbosity.Top)
        {
            Debug.Log(suggestions.Count.ToString() + " suggestions");
        }
        return(suggestions);
    }
    // Use this for initialization
    void Start()
    {
        if (!targetkeyboard)
        {
            targetkeyboard = KeyboardLayout.Instance;
        }
        if (!targetkeyboard)
        {
            Debug.LogError("Target Keyboard Empty");
        }
        else
        {
            targetkeyboard.KeyboardLayout_OnKeyPressed += WordPrediction_KeyPressedHandler;
        }
        Debug.Log("Creating dictionary ...");

        //set parameters
        const int initialCapacity = 82765;
        const int maxEditDistance = 2;
        const int prefixLength    = 7;

        symSpell = new SymSpell(initialCapacity, maxEditDistance, prefixLength);

        //Load a frequency dictionary
        //wordfrequency_en.txt  ensures high correction quality by combining two data sources:
        //Google Books Ngram data  provides representative word frequencies (but contains many entries with spelling errors)
        //SCOWL — Spell Checker Oriented Word Lists which ensures genuine English vocabulary (but contained no word frequencies)
        string path = Application.dataPath + @"\SpellChecker\Resources\frequency_dictionary_en_82_765.txt"; //path referencing the SymSpell core project

        //string path = "../../frequency_dictionary_en_82_765.txt";  //path when using symspell nuget package (frequency_dictionary_en_82_765.txt is included in nuget package)
        if (!symSpell.LoadDictionary(path, 0, 1))
        {
            Debug.LogError("\rFile not found: " + System.IO.Path.GetFullPath(path));
        }

        //warm up
        var result = symSpell.Lookup("warmup", SymSpell.Verbosity.All);
    }
Exemple #14
0
        public void LookupShouldReplicateNoisyResults()
        {
            var dir = AppDomain.CurrentDomain.BaseDirectory;

            const int editDistanceMax          = 2;
            const int prefixLength             = 7;
            const SymSpell.Verbosity verbosity = SymSpell.Verbosity.Closest;
            var    symSpell = new SymSpell(83000, editDistanceMax, prefixLength);
            string path     = dir + "../../../SymSpell/frequency_dictionary_en_82_765.txt"; //for spelling correction (genuine English words)

            symSpell.LoadDictionary(path, 0, 1);

            //load 1000 terms with random spelling errors
            string[] testList = new string[1000];
            int      i        = 0;

            using (StreamReader sr = new StreamReader(File.OpenRead(dir + "../../../SymSpell.Demo/test_data/noisy_query_en_1000.txt")))
            {
                String line;
                //process a single line at a time only for memory efficiency
                while ((line = sr.ReadLine()) != null)
                {
                    string[] lineParts = line.Split(null);
                    if (lineParts.Length >= 2)
                    {
                        testList[i++] = lineParts[0];
                    }
                }
            }

            int resultSum = 0;

            for (i = 0; i < testList.Length; i++)
            {
                resultSum += symSpell.Lookup(testList[i], verbosity, symSpell.MaxDictionaryEditDistance).Count;
            }
            Assert.AreEqual(4945, resultSum);
        }
Exemple #15
0
        public static void Correct(string input, string language)
        {
            List <SymSpell.SuggestItem> suggestions = null;

            Stopwatch stopWatch = new Stopwatch();

            stopWatch.Start();

            //check if input term or similar terms within edit-distance are in dictionary, return results sorted by ascending edit distance, then by descending word frequency
            suggestions = SymSpell.Lookup(input, language, SymSpell.editDistanceMax);

            stopWatch.Stop();
            Console.WriteLine(stopWatch.ElapsedMilliseconds.ToString() + " ms");

            //display term and frequency
            foreach (var suggestion in suggestions)
            {
                Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));
            }
            if (SymSpell.verbose != 0)
            {
                Console.WriteLine(suggestions.Count.ToString() + " suggestions");
            }
        }
        static void Main(string[] args)
        {
            if (args.Length > 2)
            {
                Console.Error.Write("Creating dictionary ...");
                long      memSize   = GC.GetTotalMemory(true);
                Stopwatch stopWatch = new Stopwatch();
                stopWatch.Start();

                //parameters
                int initialCapacity = 82765;
                int termIndex       = 0; //column of the term in the dictionary text file
                int countIndex      = 1; //column of the term frequency in the dictionary text file

                //dictionaryType
                string dictionaryType = args[0].ToLower();
                if ("load.create".IndexOf(dictionaryType) == -1)
                {
                    Console.Error.WriteLine("Error in parameter 1"); return;
                }

                //dictionaryPath
                string dictionaryPath = AppDomain.CurrentDomain.BaseDirectory + args[1];

                //prefix length (optional parameter)
                int    offset       = 0;
                string lookupType   = "";
                int    prefixLength = 7;
                if (!int.TryParse(args[2], out prefixLength))
                {
                    prefixLength = 7;
                }
                else
                {
                    offset = 1;
                }

                //lookupType
                if (args.Length > 2 + offset)
                {
                    lookupType = args[2 + offset].ToLower();
                    if ("lookup.lookupcompound.wordsegment".IndexOf(lookupType) == -1)
                    {
                        Console.Error.WriteLine("Error in parameter " + (3 + offset).ToString()); return;
                    }
                }

                //maxEditDistance
                int maxEditDistanceDictionary = 2; //maximum edit distance per dictionary precalculation
                if (args.Length > 3 + offset)
                {
                    if (!int.TryParse(args[3 + offset], out maxEditDistanceDictionary))
                    {
                        Console.Error.WriteLine("Error in parameter " + (4 + offset).ToString()); return;
                    }
                }

                //output stats
                bool outputStats = false;//false, true
                if (args.Length > 4 + offset)
                {
                    if (!bool.TryParse(args[4 + offset], out outputStats))
                    {
                        Console.Error.WriteLine("Error in parameter " + (5 + offset).ToString()); return;
                    }
                }

                //verbosity
                var suggestionVerbosity = SymSpell.Verbosity.Top; //Top, Closest, All
                if (args.Length > 5 + offset)
                {
                    if (!Enum.TryParse(args[5 + offset], true, out suggestionVerbosity))
                    {
                        Console.Error.WriteLine("Error in parameter " + (6 + offset).ToString()); return;
                    }
                }

                //create object
                var symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary, prefixLength);

                //load dictionary
                switch (dictionaryType)
                {
                case "load":
                    if (!symSpell.LoadDictionary(dictionaryPath, termIndex, countIndex))
                    {
                        Console.Error.WriteLine("File not found!");
                        return;
                    }
                    break;

                case "create":
                    if (!symSpell.CreateDictionary(dictionaryPath))
                    {
                        Console.Error.WriteLine("File not found!");
                        return;
                    }
                    break;

                default:
                    break;
                }

                stopWatch.Stop();
                long memDelta = GC.GetTotalMemory(true) - memSize;

                //not to stdout, but to Console.Error: status info will alway be on console, but not redirected or piped
                Console.Error.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, "
                                        + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString()
                                        + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms "
                                        + (memDelta / 1024 / 1024.0).ToString("N0") + " MB");

                //warm up
                var result = symSpell.Lookup("warmup", SymSpell.Verbosity.All);

                //lookup suggestions for single-word input strings
                string inputTerm;
                while (!string.IsNullOrEmpty(inputTerm = (Console.ReadLine() ?? "").Trim()))
                {
                    switch (lookupType)
                    {
                    case "lookup":
                        var suggestions = symSpell.Lookup(inputTerm, suggestionVerbosity, maxEditDistanceDictionary, true);
                        //display suggestions, edit distance and term frequency
                        foreach (var suggestion in suggestions)
                        {
                            if (outputStats)
                            {
                                Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));
                            }
                            else
                            {
                                Console.WriteLine(suggestion.term);
                            }
                        }
                        break;

                    case "lookupcompound":
                        var suggestions2 = symSpell.LookupCompound(inputTerm);
                        //display suggestions, edit distance and term frequency
                        foreach (var suggestion in suggestions2)
                        {
                            if (outputStats)
                            {
                                Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));
                            }
                            else
                            {
                                Console.WriteLine(suggestion.term);
                            }
                        }
                        break;

                    case "wordsegment":
                        var suggestions3 = symSpell.WordSegmentation(inputTerm);
                        //display suggestions, edit distance and term frequency
                        foreach (var suggestion in suggestions3)
                        {
                            if (outputStats)
                            {
                                Console.WriteLine(suggestion.correctedString + " " + suggestion.distanceSum.ToString("N0") + " " + suggestion.probabilityLogSum.ToString());
                            }
                            else
                            {
                                Console.WriteLine(suggestion.correctedString);
                            }
                        }
                        break;

                    default:
                        break;
                    }
                }
            }
            else
            {
                //PrefixLength is number

                //help
                Console.WriteLine("SymSpell.CommandLine DictionaryType DictionaryPath [PrefixLength] LookupType [MaxEditDistance] [OutputStats] [Verbosity]");
                Console.WriteLine();
                Console.WriteLine("DictionaryType=load|create");
                Console.WriteLine("   load: load dictionary from dictionary file");
                Console.WriteLine("   create: create dictionary from text corpus");
                Console.WriteLine("DictionaryPath: path to dictionary/corpus file");
                Console.WriteLine("PrefixLength: default=7 (speed/memory consumption trade-off)");  //dictionary param
                Console.WriteLine("   5: low memory, slow lookup");
                Console.WriteLine("   6: medium memory, medium lookup");
                Console.WriteLine("   7: high memory, fast lookup");
                //lookup intended for correction of single word
                //lookupcompound intended for correction of multiple words, it can insert only a single space per token, faster than wordsegmentation
                //wordsegmentation intended for segmentation and correction of multiple words, it can insert multiple spaces per token, slower than lookupcompound
                Console.WriteLine("LookupType=lookup|lookupcompound|wordsegment");
                Console.WriteLine("   lookup: correct single word");
                Console.WriteLine("   lookupcompound: correct multiple-word string (supports splitting/merging)");
                Console.WriteLine("   wordsegment: word segment and correct input string");
                Console.WriteLine("MaxEditDistance: default=2 (0: no correction, word segmentation only)");
                Console.WriteLine("OutputStats=false|true");
                Console.WriteLine("   false: only corrected string");
                Console.WriteLine("   true: corrected string, edit distance, word frequency/probability");
                Console.WriteLine("Verbosity=top|closest|all"); //no effect for lookupcompound and wordsegment
                Console.WriteLine("   top: Top suggestion");
                Console.WriteLine("   closest: All suggestions of smallest edit distance found");
                Console.WriteLine("   all: All suggestions within maxEditDistance");
                Console.WriteLine();
            }
        }
        private void bw_DEBUG_OCR(object sender, DoWorkEventArgs e)
        {
            var    watch = Stopwatch.StartNew();
            string processedTextResults    = "";
            string preProcessedTextResults = "";
            int    hits = 0;

            BackgroundWorker worker = sender as BackgroundWorker;

            Process[] processes = Process.GetProcessesByName("Eternal");

            Process p = processes.FirstOrDefault();
            IntPtr  windowHandle;

            if (p != null)
            {
                windowHandle = p.MainWindowHandle;

                // difference is 20 ms between these two function
                //Image img22 = CaptureWindow(windowHandle);
                Pix img = CaptureWindowPix(windowHandle);

                img = img.ConvertRGBToGray(0.40f, 0.34f, 0.26f);

                //img = img.BinarizeOtsuAdaptiveThreshold(img.Width / 5, img.Height / 5, 10, 10, 0.1f);
                // img = img.BinarizeSauvolaTiled();
                //img = img.INVERT

                img = img.Scale(scalingFactor, scalingFactor);
                //img = img.BinarizeOtsuAdaptiveThreshold(img.Width / 5, img.Height / 5, 10, 10, 0.1f);

                //img = img.UNSHARPMASK
                //img = img.BinarizeOtsuAdaptiveThreshold(2000, 2000, 0, 0, 0.0f);
                //img = img.SELECTBYSIZE // removeNoise

                //var dpiX = 300;
                //var dpiY = 300;

                //Bitmap screenshotBitmap = PixConverter.ToBitmap(img);
                //screenshotBitmap.SetResolution(dpiX, dpiY);

                //DEBUG_PrintImage(img, "manualPreProcessing", watch);

                for (int i = 0; i < 12; i++)
                {
                    //formGraphics.DrawRectangle(new Pen(new SolidBrush(Color.Pink)), Cards[i].WholeCardBounding);
                    //formGraphics.DrawRectangle(new Pen(new SolidBrush(Color.Aqua)), Cards[i].TextboxBounding);
                    //formGraphics.DrawRectangle(new Pen(new SolidBrush(Color.Bisque)), Cards[i].RankLocation.X, Cards[i].RankLocation.Y, 10, 10);

                    Rect textbox_Scaled = new Rect(
                        Cards[i].TextboxBounding.X * (int)scalingFactor,
                        Cards[i].TextboxBounding.Y * (int)scalingFactor,
                        Cards[i].TextboxBounding.Width * (int)scalingFactor,
                        Cards[i].TextboxBounding.Height * (int)scalingFactor);

                    using (Page processedImage = ocrEngine.Process(img, textbox_Scaled))
                    {
                        DEBUG_PrintImage(processedImage.GetThresholdedImage(), "lower rez (individual)" + i, watch);

                        var text = processedImage.GetText();
                        preProcessedTextResults += text.Replace("\n", "") + Environment.NewLine;
                        text = CleanText(text);
                        processedTextResults += text + Environment.NewLine;
                        if (cardRankings.ContainsKey(text))
                        {
                            Cards[i].Rank = cardRankings[text];
                            hits++;
                        }
                        else if (!String.IsNullOrEmpty(text))
                        {
                            List <SymSpell.suggestItem> suggestions = null;
                            suggestions = SymSpell.Lookup(text, "", SymSpell.editDistanceMax);
                            if (suggestions.Count > 0)
                            {
                                Cards[i].Rank = cardRankings[suggestions.First().term];
                                hits++;
                            }
                            else
                            {
                                Cards[i].Rank = "U";
                            }
                        }
                        else
                        {
                            Cards[i].Rank = string.Empty;
                        }
                    }
                }

                RenderRankings();
            }

            watch.Stop();
            var elapsedMs = watch.ElapsedMilliseconds;

            OutputTestResults(elapsedMs, processedTextResults, hits, preProcessedTextResults);
        }
        static void Main(string[] args)
        {
            if (args.Length >= 2)
            {
                Console.Error.Write("Creating dictionary ...");
                long      memSize   = GC.GetTotalMemory(true);
                Stopwatch stopWatch = new Stopwatch();
                stopWatch.Start();

                //parameters
                int initialCapacity = 82765;

                int maxEditDistanceDictionary = 2; //maximum edit distance per dictionary precalculation
                if (args.Length > 2)
                {
                    if (!int.TryParse(args[2], out maxEditDistanceDictionary))
                    {
                        Console.Error.WriteLine("Error in parameter 3"); return;
                    }
                }
                int maxEditDistanceLookup = maxEditDistanceDictionary; //max edit distance per lookup

                var suggestionVerbosity = SymSpell.Verbosity.Top;      //Top, Closest, All
                if (args.Length > 3)
                {
                    if (!Enum.TryParse(args[3], out suggestionVerbosity))
                    {
                        Console.Error.WriteLine("Error in parameter 4"); return;
                    }
                }

                int prefixLength = 7;
                if (args.Length > 4)
                {
                    if (!int.TryParse(args[4], out prefixLength))
                    {
                        Console.Error.WriteLine("Error in parameter 5"); return;
                    }
                }

                string dictionaryPath = AppDomain.CurrentDomain.BaseDirectory + args[1]; // "../../../../SymSpell/frequency_dictionary_en_82_765.txt";
                int    termIndex      = 0;                                               //column of the term in the dictionary text file
                int    countIndex     = 1;                                               //column of the term frequency in the dictionary text file

                //create object
                var symSpell = new SymSpell(initialCapacity, maxEditDistanceDictionary, prefixLength);

                //load dictionary
                switch (args[0].ToLower())
                {
                case "load":
                    if (!symSpell.LoadDictionary(dictionaryPath, termIndex, countIndex))
                    {
                        Console.Error.WriteLine("File not found!");
                        return;
                    }
                    break;

                case "create":
                    if (!symSpell.CreateDictionary(dictionaryPath))
                    {
                        Console.Error.WriteLine("File not found!");
                        return;
                    }
                    break;

                default:
                    break;
                }

                stopWatch.Stop();
                long memDelta = GC.GetTotalMemory(true) - memSize;

                //not to stdout, but to Console.Error: status info will alway be on console, but not redirected or piped
                Console.Error.WriteLine("\rDictionary: " + symSpell.WordCount.ToString("N0") + " words, "
                                        + symSpell.EntryCount.ToString("N0") + " entries, edit distance=" + symSpell.MaxDictionaryEditDistance.ToString()
                                        + " in " + stopWatch.Elapsed.TotalMilliseconds.ToString("0.0") + "ms "
                                        + (memDelta / 1024 / 1024.0).ToString("N0") + " MB");

                //warm up
                var result = symSpell.Lookup("warmup", SymSpell.Verbosity.All, 1);

                //lookup suggestions for single-word input strings
                string inputTerm;
                while (!string.IsNullOrEmpty(inputTerm = (Console.ReadLine() ?? "").Trim()))
                {
                    var suggestions = symSpell.Lookup(inputTerm, suggestionVerbosity, maxEditDistanceLookup, true);

                    //display suggestions, edit distance and term frequency
                    foreach (var suggestion in suggestions)
                    {
                        Console.WriteLine(suggestion.term + " " + suggestion.distance.ToString() + " " + suggestion.count.ToString("N0"));
                    }
                }
            }
            else
            {
                //help
                Console.WriteLine("SymSpell.CommandLine load   Path [MaxEditDistance] [Verbosity] [PrefixLength]");
                Console.WriteLine("SymSpell.CommandLine create Path [MaxEditDistance] [Verbosity] [PrefixLength]");
                Console.WriteLine();
                Console.WriteLine("load: load dictionary from dictionary file");
                Console.WriteLine("create: create dictionary from text corpus");
                Console.WriteLine("MaxEditDistance: default=2");
                Console.WriteLine("Verbosity=Top|Closest|All (case-sensitive)");
                Console.WriteLine("PrefixLength: default=7 (5:low memory; 7:fast lookup)");
                Console.WriteLine();
            }
        }
Exemple #19
0
        private static async Task Main(string[] args)
        {
            var directory = new DirectoryInfo("temp_git");

            if (directory.Exists)
            {
                NormalizeDirectoryAttributes(directory);
                directory.Delete(true);
            }

            void NormalizeDirectoryAttributes(DirectoryInfo directoryInfo)
            {
                foreach (var subPath in directoryInfo.GetDirectories())
                {
                    NormalizeDirectoryAttributes(subPath);
                }

                foreach (var file in directoryInfo.GetFiles())
                {
                    file.Attributes = FileAttributes.Normal;
                }
            }

            await Task.Delay(TimeSpan.FromSeconds(1));

            var info = new ProcessStartInfo("git", "clone https://github.com/discord-csharp/MODiX temp_git");
            var p    = Process.Start(info);

            if (p == null)
            {
                throw new InvalidOperationException("process handle was null");
            }
            p.WaitForExit();

            await Task.Delay(TimeSpan.FromSeconds(2));

            var extensions = new[] { ".txt", ".md", ".cs" };

            var spellingInfos = new List <FileSpellingInfo>();
            var spell         = new SymSpell();

            if (!spell.LoadDictionary(Path.Combine(Environment.CurrentDirectory, "frequency_dictionary_en_82_765.txt"), 0, 1))
            {
                throw new InvalidOperationException();
            }
            foreach (var file in Directory.GetFiles(Path.Combine(Environment.CurrentDirectory, "temp_git"), "*", SearchOption.AllDirectories))
            {
                if (!extensions.Contains(Path.GetExtension(file)))
                {
                    continue;
                }

                var spellingInfo = new FileSpellingInfo {
                    Path = file
                };
                var fileContents = File.ReadAllLines(file);
                for (var i = 0; i < fileContents.Length; i++)
                {
                    var line = fileContents[i].Trim();
                    if (string.IsNullOrWhiteSpace(line))
                    {
                        continue;
                    }

                    spellingInfo.LineMistakes.Add((i + 1, line), new List <(string, string)>());
                    var words = line.Split(' ', StringSplitOptions.RemoveEmptyEntries).Select(c => c.RemoveSpecialCharacters());
                    foreach (var word in words.Where(c => !string.IsNullOrWhiteSpace(c)))
                    {
                        var results = spell.Lookup(word.ToLower(), SymSpell.Verbosity.Top);
                        if (results == null || results.Any() == false)
                        {
                            continue;
                        }
                        var suggestion = results.First();
                        if (suggestion.term == word.ToLower())
                        {
                            continue;
                        }
                        spellingInfo.LineMistakes[(i + 1, line)].Add((word, suggestion.term));
Exemple #20
0
        static void BenchmarkPrecalculationLookup()
        {
            string[] query1k = BuildQuery1K();
            int      resultNumber = 0;
            int      repetitions = 1000;
            int      totalLoopCount = 0;
            long     totalMatches = 0;
            long     totalOrigMatches = 0;
            double   totalLoadTime, totalMem, totalLookupTime, totalOrigLoadTime, totalOrigMem, totalOrigLookupTime;

            totalLoadTime = totalMem = totalLookupTime = totalOrigLoadTime = totalOrigMem = totalOrigLookupTime = 0;
            long totalRepetitions = 0;

            Stopwatch stopWatch = new Stopwatch();

            for (int maxEditDistance = 1; maxEditDistance <= 3; maxEditDistance++)
            {
                for (int prefixLength = 5; prefixLength <= 7; prefixLength++)
                {
                    //benchmark dictionary precalculation size and time
                    //maxEditDistance=1/2/3; prefixLength=5/6/7;  dictionary=30k/82k/500k; class=instantiated/static
                    for (int i = 0; i < DictionaryPath.Length; i++)
                    {
                        totalLoopCount++;

                        //instantiated dictionary
                        long memSize = GC.GetTotalMemory(true);
                        stopWatch.Restart();
                        SymSpell dict = new SymSpell(DictionarySize[i], maxEditDistance, prefixLength);
                        dict.LoadDictionary(DictionaryPath[i], 0, 1);
                        stopWatch.Stop();
                        long memDelta = GC.GetTotalMemory(true) - memSize;
                        totalLoadTime += stopWatch.Elapsed.TotalSeconds;
                        totalMem      += memDelta / 1024.0 / 1024.0;
                        Console.WriteLine("Precalculation instance " + stopWatch.Elapsed.TotalSeconds.ToString("N3") + "s " + (memDelta / 1024.0 / 1024.0).ToString("N1") + "MB " + dict.WordCount.ToString("N0") + " words " + dict.EntryCount.ToString("N0") + " entries  MaxEditDistance=" + maxEditDistance.ToString() + " prefixLength=" + prefixLength.ToString() + " dict=" + DictionaryName[i]);

                        //static dictionary
                        memSize = GC.GetTotalMemory(true);
                        stopWatch.Restart();
                        Original.SymSpell dictOrig = new Original.SymSpell(maxEditDistance, prefixLength);
                        dictOrig.LoadDictionary(DictionaryPath[i], "", 0, 1);
                        stopWatch.Stop();
                        memDelta           = GC.GetTotalMemory(true) - memSize;
                        totalOrigLoadTime += stopWatch.Elapsed.TotalSeconds;
                        totalOrigMem      += memDelta / 1024.0 / 1024.0;
                        Console.WriteLine("Precalculation static   " + stopWatch.Elapsed.TotalSeconds.ToString("N3") + "s " + (memDelta / 1024 / 1024.0).ToString("N1") + "MB " + dictOrig.Count.ToString("N0") + " words " + dictOrig.EntryCount.ToString("N0") + " entries  MaxEditDistance=" + maxEditDistance.ToString() + " prefixLength=" + prefixLength.ToString() + " dict=" + DictionaryName[i]);

                        //benchmark lookup result number and time
                        //maxEditDistance=1/2/3; prefixLength=5/6/7; dictionary=30k/82k/500k; verbosity=0/1/2; query=exact/non-exact/mix; class=instantiated/static
                        foreach (SymSpell.Verbosity verbosity in Enum.GetValues(typeof(SymSpell.Verbosity)))
                        {
                            //instantiated exact
                            stopWatch.Restart();
                            for (int round = 0; round < repetitions; round++)
                            {
                                resultNumber = dict.Lookup("different", verbosity, maxEditDistance).Count;
                            }
                            stopWatch.Stop();
                            totalLookupTime += stopWatch.Elapsed.TotalMilliseconds;
                            totalMatches    += resultNumber;
                            Console.WriteLine("Lookup instance " + resultNumber.ToString("N0") + " results " + (stopWatch.Elapsed.TotalMilliseconds / repetitions).ToString("N6") + "ms/op verbosity=" + verbosity.ToString() + " query=exact");
                            //static exact
                            stopWatch.Restart();
                            for (int round = 0; round < repetitions; round++)
                            {
                                resultNumber = dictOrig.Lookup("different", "", maxEditDistance, (int)verbosity).Count;
                            }
                            stopWatch.Stop();
                            totalOrigLookupTime += stopWatch.Elapsed.TotalMilliseconds;
                            totalOrigMatches    += resultNumber;
                            Console.WriteLine("Lookup static   " + resultNumber.ToString("N0") + " results " + (stopWatch.Elapsed.TotalMilliseconds / repetitions).ToString("N6") + "ms/op verbosity=" + verbosity.ToString() + " query=exact");
                            Console.WriteLine();
                            totalRepetitions += repetitions;

                            //instantiated non-exact
                            stopWatch.Restart();
                            for (int round = 0; round < repetitions; round++)
                            {
                                resultNumber = dict.Lookup("hockie", verbosity, maxEditDistance).Count;
                            }
                            stopWatch.Stop();
                            totalLookupTime += stopWatch.Elapsed.TotalMilliseconds;
                            totalMatches    += resultNumber;
                            Console.WriteLine("Lookup instance " + resultNumber.ToString("N0") + " results " + (stopWatch.Elapsed.TotalMilliseconds / repetitions).ToString("N6") + "ms/op verbosity=" + verbosity.ToString() + " query=non-exact");
                            //static non-exact
                            stopWatch.Restart();
                            for (int round = 0; round < repetitions; round++)
                            {
                                resultNumber = dictOrig.Lookup("hockie", "", maxEditDistance, (int)verbosity).Count;
                            }
                            stopWatch.Stop();
                            totalOrigLookupTime += stopWatch.Elapsed.TotalMilliseconds;
                            totalOrigMatches    += resultNumber;
                            Console.WriteLine("Lookup static   " + resultNumber.ToString("N0") + " results " + (stopWatch.Elapsed.TotalMilliseconds / repetitions).ToString("N6") + "ms/op verbosity=" + verbosity.ToString() + " query=non-exact");
                            Console.WriteLine();
                            totalRepetitions += repetitions;

                            //instantiated mix
                            stopWatch.Restart();
                            resultNumber = 0; foreach (var word in query1k)
                            {
                                resultNumber += dict.Lookup(word, verbosity, maxEditDistance).Count;
                            }
                            stopWatch.Stop();
                            totalLookupTime += stopWatch.Elapsed.TotalMilliseconds;
                            totalMatches    += resultNumber;
                            Console.WriteLine("Lookup instance " + resultNumber.ToString("N0") + " results " + (stopWatch.Elapsed.TotalMilliseconds / query1k.Length).ToString("N6") + "ms/op verbosity=" + verbosity.ToString() + " query=mix");
                            //static mix
                            stopWatch.Restart();
                            resultNumber = 0; foreach (var word in query1k)
                            {
                                resultNumber += dictOrig.Lookup(word, "", maxEditDistance, (int)verbosity).Count;
                            }
                            stopWatch.Stop();
                            totalOrigLookupTime += stopWatch.Elapsed.TotalMilliseconds;
                            totalOrigMatches    += resultNumber;
                            Console.WriteLine("Lookup static   " + resultNumber.ToString("N0") + " results " + (stopWatch.Elapsed.TotalMilliseconds / query1k.Length).ToString("N6") + "ms/op verbosity=" + verbosity.ToString() + " query=mix");
                            Console.WriteLine();
                            totalRepetitions += query1k.Length;
                        }
                        Console.WriteLine();

                        dict     = null;
                        dictOrig = null;
                    }
                }
            }
            Console.WriteLine("Average Precalculation time instance " + (totalLoadTime / totalLoopCount).ToString("N3") + "s   " + ((totalLoadTime / totalOrigLoadTime) - 1).ToString("P1"));
            Console.WriteLine("Average Precalculation time static   " + (totalOrigLoadTime / totalLoopCount).ToString("N3") + "s");
            Console.WriteLine("Average Precalculation memory instance " + (totalMem / totalLoopCount).ToString("N1") + "MB " + ((totalMem / totalOrigMem) - 1).ToString("P1"));
            Console.WriteLine("Average Precalculation memory static   " + (totalOrigMem / totalLoopCount).ToString("N1") + "MB");
            Console.WriteLine("Average Lookup time instance " + (totalLookupTime / totalRepetitions).ToString("N3") + "ms          " + ((totalLookupTime / totalOrigLookupTime) - 1).ToString("P1"));
            Console.WriteLine("Average Lookup time static   " + (totalOrigLookupTime / totalRepetitions).ToString("N3") + "ms");
            Console.WriteLine("Total Lookup results instance " + totalMatches.ToString("N0") + "      " + (totalMatches - totalOrigMatches) + " differences");
            Console.WriteLine("Total Lookup results static   " + totalOrigMatches.ToString("N0"));
        }
Exemple #21
0
        private IEnumerator populatePrediction(string text)
        {
            string[] words    = text.Split(' ');
            string   lastWord = words [words.Length - 1];

            lastWord = lastWord.ToLower();
            if (symSpell == null)
            {
                initDict();
            }
            if (lastWord != null && symSpell != null)
            {
                Debug.Log("GFX: LastWord:" + lastWord);
                List <SymSpell.SuggestItem> suggestions = symSpell.Lookup(lastWord, SymSpell.Verbosity.All, 3);
                suggestions.Sort();
                Text txtTag = PredictedWord.GetComponentInChildren <Text> ();
                if (suggestions != null)
                {
                    txtTag.text        = suggestions [0].term.ToUpper();
                    PredictedWord.name = suggestions [0].term.ToUpper();
                }

                List <SymSpell.SuggestItem> fitered1 = suggestions;

                /*List<SymSpell.SuggestItem> filtered = suggestions.Where(s => (s.term.Length>text.Length)).ToList();
                 * filtered = filtered.Where(s => ((s.term.IndexOf(text)==0))).ToList();
                 * filtered.Sort ();
                 * List<SymSpell.SuggestItem> fitered1, fitered2;
                 * if (filtered.Count<= 0) {
                 *      Debug.Log ("GFX: using Default List");
                 *      fitered1 = suggestions.Where (s => (s.distance == 1)).ToList ();
                 *      fitered2 = suggestions.Where (s => (s.distance == 2)).ToList ();
                 * }
                 * else{
                 *      Debug.Log ("GFX: using Filtered List");
                 *      fitered1 = filtered.Where (s => (s.distance == 1)).ToList ();
                 *      fitered2 = filtered.Where (s => (s.distance == 2)).ToList ();
                 * }
                 * int i = 0;
                 * if (fitered2.Count > 0) {
                 *      Debug.Log ("GFX: LastWord:"+lastWord+" predicted word:"+fitered2 [0].term);
                 *      PredictedWord.name = fitered2 [0].term;
                 *      Text txtTag = PredictedWord.GetComponentInChildren<Text> ();
                 *      if (txtTag != null)
                 *              txtTag.text = fitered2 [0].term.ToUpper();
                 * }*/


                int            i    = 0;
                HashSet <char> cSet = new HashSet <char>();
                while (i < fitered1.Count && cSet.Count < 3)
                {
                    string sug = fitered1 [i++].term;
                    Debug.Log("GFX: Filter1: Sug" + sug + " last:" + sug [sug.Length - 1]);
                    if (sug.Length > lastWord.Length && sug [sug.Length - 1] != '\0')
                    {
                        cSet.Add(sug [sug.Length - 1]);
                    }
                }

                i = 1;
                foreach (char c  in cSet)
                {
                    GameObject cGameObject = null;

                    switch (i)
                    {
                    case 1:
                        cGameObject = opt1;
                        break;

                    case 2:
                        cGameObject = opt2;
                        break;

                    case 3:
                        cGameObject = opt3;
                        break;
                    }

                    if (cGameObject != null)
                    {
                        cGameObject.name = "" + c;
                        Text txtText = cGameObject.GetComponentInChildren <Text> ();
                        if (txtText != null)
                        {
                            txtText.text     = "" + c.ToString().ToUpper();
                            cGameObject.name = "" + c.ToString().ToUpper();
                        }
                        i++;
                    }
                }
            }
            yield return(null);
        }
Exemple #22
0
        private void SpellCorrect_Click(object sender, RoutedEventArgs e)
        {
            ConsoleManager.Show();
            const int initialCapacity = 82765 * 2;
            const int maxEditDistance = 5;
            const int prefixLength    = 7;
            SymSpell  symSpell        = new SymSpell(initialCapacity, maxEditDistance, prefixLength);

            long memSize = GC.GetTotalMemory(true);
            // Load a frequency dictionary
            //wordfrequency_en.txt  ensures high correction quality by combining two data sources:
            //Google Books Ngram data  provides representative word frequencies (but contains many entries with spelling errors)
            //SCOWL — Spell Checker Oriented Word Lists which ensures genuine English vocabulary (but contained no word frequencies)
            string path  = @"C:\Users\Emmanuel\source\repos\Project-Carl\Project-CARL\WpfApp1\frequency_dictionary_en_82_765.txt";
            string dict2 = @"C:\Users\Emmanuel\source\repos\Project-Carl\Project-CARL\WpfApp1\unigram_freq.txt";


            long memDelta = GC.GetTotalMemory(true) - memSize;

            if (!symSpell.LoadDictionary(path, 0, 1))
            {
                Console.Error.WriteLine("\rFile not found: " + System.IO.Path.GetFullPath(path));
                Console.ReadKey();
                //return;
            }
            if (!symSpell.LoadDictionary(dict2, 0, 1))
            {
                Console.Error.WriteLine("\rFile not found: " + System.IO.Path.GetFullPath(path));
                Console.ReadKey();
                //return;
            }


            //Open textfile
            String correctionFile = "";

            System.Windows.Forms.MessageBox.Show("Choose file to Correct");
            OpenFileDialog openFileDialog1 = new OpenFileDialog();

            if (openFileDialog1.ShowDialog() == System.Windows.Forms.DialogResult.OK)
            {
                correctionFile = openFileDialog1.FileName;
                System.Windows.Forms.MessageBox.Show(correctionFile);
            }


            //read words into array/list
            string corp = File.ReadAllText(correctionFile, Encoding.UTF8);              //read raw text file

            string[] words = corp.Split(new string[] { " " }, StringSplitOptions.None); //tokenize raw text file
            List <SymSpell.SuggestItem> suggestedWord = null;                           //list of all corrected words
            List <string> correctedWords = new List <string>();                         //Output of the corrected words

            //submit word to symSpell
            for (int i = 0; i < words.Length; i++)
            {
                suggestedWord = (symSpell.Lookup(words[i], SymSpell.Verbosity.Closest));
                correctedWords.Add(suggestedWord.First().term);
            }

            //save words to file
            string fileName = System.IO.Path.GetRandomFileName() + ".txt"; //random file name for our corrected text

            //save the directory of the correction file we selected previously
            string pathString = System.IO.Path.GetDirectoryName(correctionFile);

            // Use Combine again to add the file name to the path.
            pathString = System.IO.Path.Combine(pathString, fileName);

            string tmpstring = "";

            foreach (string word in correctedWords)
            {
                tmpstring += (word + " ");
            }

            File.WriteAllText(pathString, tmpstring);
        }