예제 #1
0
        //[TestMethod]
        public void CorrectionTest()
        {
            DictionaryManager manager = new DictionaryManager(@"C:\dev\git\Pspell\SpellCheckerConsole\bin\Debug\dictionaries");
            Dictionary enUs = manager.GetDictionary("en_US");

            enUs.PreloadDictionaries();

            Corrector corrector = new Corrector(new ErrorModel(enUs), new LanguageModel(enUs));

            Stopwatch mistakesTime = Stopwatch.StartNew();
            List<MisspelledWord> mistakes = new List<MisspelledWord>();
            using (FileChecker checker = new FileChecker("testarticle.txt", enUs))
            {
                MisspelledWord error;
                while ((error = checker.GetNextMisspelling()) != null)
                {
                    mistakes.Add(error);
                }
            }
            mistakesTime.Stop();

            Stopwatch correctionTime = Stopwatch.StartNew();
            foreach (MisspelledWord word in mistakes)
            {
                corrector.Correct(word);
            }
            correctionTime.Stop();

            TestContext.WriteLine("Mistakes search time: " + mistakesTime.ElapsedMilliseconds + " ms");
            TestContext.WriteLine("Correction time: " + correctionTime.ElapsedMilliseconds + " ms");
        }
예제 #2
0
파일: Program.cs 프로젝트: pesha/MPspell
        static void Main(string[] args)
        {
            //NgramParser parser = new NgramParser();
            //parser.ParseNgrams("w2_.txt");

            DictionaryManager manager = new DictionaryManager("gen");
            Dictionary enUs = manager.GetDictionary("cs_CZ");

            //TwoCharFrequencyCounter counter = new TwoCharFrequencyCounter(enUs.GetAlphabetForErrorModel(true));
            //WordFrequencyCounter counter = new WordFrequencyCounter();
            //CharFrequencyCounter counter = new CharFrequencyCounter(enUs.GetAlphabetForErrorModel(true));
            //CorporaReader reader = new CorporaReader(new HCLineParser(), counter);
            //reader.ProcessFile("gen/data_cz/cz_data.txt");
            //counter.Save("gen/cs_CZ/wordFreq.txt");

            //string path = @"C:\dev\git\Pspell\SpellCheckerConsole\bin\Debug\gen\data_cz"; //@"F:\_dp\english\news";
            //DictionaryGenerator generator = new DictionaryGenerator(enUs, path, "gen/cs_CZ");
            //generator.CalculateFrequences();
            //generator.Save();
            //generator.RunBatch();

            /*ErrorListParser parser = new ErrorListParser("generators/en_errors.txt");
            var data = parser.Parse();

            InsertionsMatrixGenerator generator = new InsertionsMatrixGenerator(enUs.GetAlphabetForErrorModel(true).ToCharArray());
            var matrix = generator.GenerateMatrix(data);
            MatrixExport.ExportMatrix("insertTest.txt", matrix);

            FolderCorrector analyze = new FolderCorrector(enUs, @"C:\dev\git\Pspell\SpellCheckerConsole\bin\Debug\20_newsgroups");
            analyze.CorrectFiles();
            */

            //FileCorrectionHandler handlerTest = new FileCorrectionHandler("gen/data_cz/cz_data.txt", new List<MisspelledWord>());
            //handlerTest.SaveCorrectedAs("gen/temp/cz_data_copy.txt");

            FileHandler handlerTest = new FileHandler("testcs.txt", "testcsFixed2.txt");
            //handlerTest.CopyFile();

            enUs.PreloadDictionaries();

            Corrector corrector = new Corrector(new ErrorModel(enUs), new LanguageModel(enUs), new AccentModel(enUs));

            //Queue<MisspelledWord> mistakes = new Queue<MisspelledWord>();
            using (FileChecker checker = new FileChecker("testcs.txt", enUs))
            {
                MisspelledWord error;
                while ((error = checker.GetNextMisspelling()) != null)
                {
                    //mistakes.Enqueue(error);
                    corrector.Correct(error);

                    if (error.CorrectWord != "")
                    {
                        handlerTest.Push(error);
                    }

                }
            }

            handlerTest.Close();

            //FileCorrectionHandler handler = new FileCorrectionHandler("testcs.txt", mistakes);
            //handler.SaveCorrectedAs("testcsFixed.txt");
            //handler.OverwriteWithCorrections();
        }
예제 #3
0
        private void PrepareProject(Dictionary dictionary, string resultDirectory, string reportDirectory, bool preserveSubfolders)
        {
            this.ExportContext = false;
            this.ResultDirectory = resultDirectory;
            this.ReportDirectory = reportDirectory;

            this.dictionary = dictionary;

            // setup models
            this.languageModel = new LanguageModel(dictionary);
            this.errorModel = new ErrorModel(dictionary);
            this.accentModel = dictionary.IsAccentModelAvailable() ? new AccentModel(dictionary) : null;

            // setup corrector
            this.corrector = new Corrector(errorModel, languageModel, accentModel);

            this.ThreadsAvailable = this.ScaleThreads();
            this.filesGroups = this.DivadeIntoGroups(this.ThreadsAvailable);
            this.ThreadsUsed = this.FilesToProcess.Count > 1 ? filesGroups.Length : 1;

            // other settings
            PreserveSubfolders = preserveSubfolders;
        }
예제 #4
0
        public void RunBatch()
        {
            List<FileInfo> files = this.AnalyzeDir(new DirectoryInfo(this.directory));

            dictionary.PreloadDictionaries();

            Corrector corrector = new Corrector(errorModel, languageModel);

            foreach (FileInfo file in files)
            {

                using (FileChecker checker = new FileChecker(file.FullName, dictionary))
                {
                    while (!checker.EndOfCheck)
                    {
                        MisspelledWord error = checker.GetNextMisspelling();
                        if (null != error)
                        {
                            corrector.Correct(error);
                            if (null != error.CorrectWord)
                            {
                                if (!this.data.ContainsKey(error.CorrectWord))
                                {
                                    this.data.Add(error.CorrectWord, new List<string> { error.RawWord });
                                }
                                else
                                {
                                    if (!this.data[error.CorrectWord].Contains(error.RawWord))
                                    {
                                        this.data[error.CorrectWord].Add(error.RawWord);
                                    }
                                }
                            }
                        }
                    }
                }

            }

            this.Save();
        }