예제 #1
0
        //[TestMethod]
        public void CorrectionTest()
        {
            DictionaryManager manager = new DictionaryManager(@"C:\dev\git\Pspell\SpellCheckerConsole\bin\Debug\dictionaries");
            Dictionary enUs = manager.GetDictionary("en_US");

            enUs.PreloadDictionaries();

            Corrector corrector = new Corrector(new ErrorModel(enUs), new LanguageModel(enUs));

            Stopwatch mistakesTime = Stopwatch.StartNew();
            List<MisspelledWord> mistakes = new List<MisspelledWord>();
            using (FileChecker checker = new FileChecker("testarticle.txt", enUs))
            {
                MisspelledWord error;
                while ((error = checker.GetNextMisspelling()) != null)
                {
                    mistakes.Add(error);
                }
            }
            mistakesTime.Stop();

            Stopwatch correctionTime = Stopwatch.StartNew();
            foreach (MisspelledWord word in mistakes)
            {
                corrector.Correct(word);
            }
            correctionTime.Stop();

            TestContext.WriteLine("Mistakes search time: " + mistakesTime.ElapsedMilliseconds + " ms");
            TestContext.WriteLine("Correction time: " + correctionTime.ElapsedMilliseconds + " ms");
        }
예제 #2
0
파일: Program.cs 프로젝트: pesha/MPspell
        static void Main(string[] args)
        {
            //NgramParser parser = new NgramParser();
            //parser.ParseNgrams("w2_.txt");

            DictionaryManager manager = new DictionaryManager("gen");
            Dictionary enUs = manager.GetDictionary("cs_CZ");

            //TwoCharFrequencyCounter counter = new TwoCharFrequencyCounter(enUs.GetAlphabetForErrorModel(true));
            //WordFrequencyCounter counter = new WordFrequencyCounter();
            //CharFrequencyCounter counter = new CharFrequencyCounter(enUs.GetAlphabetForErrorModel(true));
            //CorporaReader reader = new CorporaReader(new HCLineParser(), counter);
            //reader.ProcessFile("gen/data_cz/cz_data.txt");
            //counter.Save("gen/cs_CZ/wordFreq.txt");

            //string path = @"C:\dev\git\Pspell\SpellCheckerConsole\bin\Debug\gen\data_cz"; //@"F:\_dp\english\news";
            //DictionaryGenerator generator = new DictionaryGenerator(enUs, path, "gen/cs_CZ");
            //generator.CalculateFrequences();
            //generator.Save();
            //generator.RunBatch();

            /*ErrorListParser parser = new ErrorListParser("generators/en_errors.txt");
            var data = parser.Parse();

            InsertionsMatrixGenerator generator = new InsertionsMatrixGenerator(enUs.GetAlphabetForErrorModel(true).ToCharArray());
            var matrix = generator.GenerateMatrix(data);
            MatrixExport.ExportMatrix("insertTest.txt", matrix);

            FolderCorrector analyze = new FolderCorrector(enUs, @"C:\dev\git\Pspell\SpellCheckerConsole\bin\Debug\20_newsgroups");
            analyze.CorrectFiles();
            */

            //FileCorrectionHandler handlerTest = new FileCorrectionHandler("gen/data_cz/cz_data.txt", new List<MisspelledWord>());
            //handlerTest.SaveCorrectedAs("gen/temp/cz_data_copy.txt");

            FileHandler handlerTest = new FileHandler("testcs.txt", "testcsFixed2.txt");
            //handlerTest.CopyFile();

            enUs.PreloadDictionaries();

            Corrector corrector = new Corrector(new ErrorModel(enUs), new LanguageModel(enUs), new AccentModel(enUs));

            //Queue<MisspelledWord> mistakes = new Queue<MisspelledWord>();
            using (FileChecker checker = new FileChecker("testcs.txt", enUs))
            {
                MisspelledWord error;
                while ((error = checker.GetNextMisspelling()) != null)
                {
                    //mistakes.Enqueue(error);
                    corrector.Correct(error);

                    if (error.CorrectWord != "")
                    {
                        handlerTest.Push(error);
                    }

                }
            }

            handlerTest.Close();

            //FileCorrectionHandler handler = new FileCorrectionHandler("testcs.txt", mistakes);
            //handler.SaveCorrectedAs("testcsFixed.txt");
            //handler.OverwriteWithCorrections();
        }
예제 #3
0
        private CorrectionStatitic CorrectGroup(List<FileInfo> group, int id)
        {
            CorrectionStatitic stats = new CorrectionStatitic(null, null, this.ExportContext);

            foreach (FileInfo file in group)
            {
                string output = PreserveSubfolders ? this.GetSubfolder(file) : this.ResultDirectory + "/" + file.Name;

                if (file.FullName == new FileInfo(output).FullName)
                {
                    output = output + ".1";
                }

                FileHandler handler = new FileHandler(file.FullName, output);

                using (FileChecker checker = new FileChecker(file.FullName, dictionary))
                {
                    Task<List<MisspelledWord>> task = null;
                    List<MisspelledWord> errors = new List<MisspelledWord>();
                    int estimates = 0;
                    while (!checker.EndOfCheck)
                    {
                        MisspelledWord error = checker.GetNextMisspelling();
                        if (null != error)
                        {
                            estimates++;
                            errors.Add(error);
                        }

                        if (errors.Count > 1000 || checker.EndOfCheck)
                        {
                            if (task != null)
                            {
                                task.Wait();
                                List<MisspelledWord> corrected = task.Result;
                                foreach (MisspelledWord item in corrected)
                                {
                                    stats.AddCorrection(item);
                                    if (item.CorrectWord != null)
                                    {
                                        handler.Push(item);
                                    }
                                }
                            }

                            List<MisspelledWord> errorBatch = errors;
                            errors = new List<MisspelledWord>();

                            task = Task<List<MisspelledWord>>.Factory.StartNew(() =>
                            {
                                return this.CorrectErrors(errorBatch);
                            });
                        }

                        if (estimates > estimateLimit)
                        {
                            this.UpdateProgres(0, checker.EstimateProcess());
                            estimates = 0;
                        }

                        if (worker.CancellationPending)
                        {
                            return null;
                        }
                    }

                    if (null != task)
                    {
                        task.Wait();
                        List<MisspelledWord> leftover = task.Result;
                        foreach (MisspelledWord item in leftover)
                        {
                            stats.AddCorrection(item);
                            if (item.CorrectWord != null)
                            {
                                handler.Push(item);
                            }
                        }
                    }
                }

                handler.Close();

                this.UpdateProgres(1);
            }

            stats.Close();
            return stats;
        }
예제 #4
0
        public void RunBatch()
        {
            List<FileInfo> files = this.AnalyzeDir(new DirectoryInfo(this.directory));

            dictionary.PreloadDictionaries();

            Corrector corrector = new Corrector(errorModel, languageModel);

            foreach (FileInfo file in files)
            {

                using (FileChecker checker = new FileChecker(file.FullName, dictionary))
                {
                    while (!checker.EndOfCheck)
                    {
                        MisspelledWord error = checker.GetNextMisspelling();
                        if (null != error)
                        {
                            corrector.Correct(error);
                            if (null != error.CorrectWord)
                            {
                                if (!this.data.ContainsKey(error.CorrectWord))
                                {
                                    this.data.Add(error.CorrectWord, new List<string> { error.RawWord });
                                }
                                else
                                {
                                    if (!this.data[error.CorrectWord].Contains(error.RawWord))
                                    {
                                        this.data[error.CorrectWord].Add(error.RawWord);
                                    }
                                }
                            }
                        }
                    }
                }

            }

            this.Save();
        }