//[TestMethod] public void CorrectionTest() { DictionaryManager manager = new DictionaryManager(@"C:\dev\git\Pspell\SpellCheckerConsole\bin\Debug\dictionaries"); Dictionary enUs = manager.GetDictionary("en_US"); enUs.PreloadDictionaries(); Corrector corrector = new Corrector(new ErrorModel(enUs), new LanguageModel(enUs)); Stopwatch mistakesTime = Stopwatch.StartNew(); List<MisspelledWord> mistakes = new List<MisspelledWord>(); using (FileChecker checker = new FileChecker("testarticle.txt", enUs)) { MisspelledWord error; while ((error = checker.GetNextMisspelling()) != null) { mistakes.Add(error); } } mistakesTime.Stop(); Stopwatch correctionTime = Stopwatch.StartNew(); foreach (MisspelledWord word in mistakes) { corrector.Correct(word); } correctionTime.Stop(); TestContext.WriteLine("Mistakes search time: " + mistakesTime.ElapsedMilliseconds + " ms"); TestContext.WriteLine("Correction time: " + correctionTime.ElapsedMilliseconds + " ms"); }
static void Main(string[] args) { //NgramParser parser = new NgramParser(); //parser.ParseNgrams("w2_.txt"); DictionaryManager manager = new DictionaryManager("gen"); Dictionary enUs = manager.GetDictionary("cs_CZ"); //TwoCharFrequencyCounter counter = new TwoCharFrequencyCounter(enUs.GetAlphabetForErrorModel(true)); //WordFrequencyCounter counter = new WordFrequencyCounter(); //CharFrequencyCounter counter = new CharFrequencyCounter(enUs.GetAlphabetForErrorModel(true)); //CorporaReader reader = new CorporaReader(new HCLineParser(), counter); //reader.ProcessFile("gen/data_cz/cz_data.txt"); //counter.Save("gen/cs_CZ/wordFreq.txt"); //string path = @"C:\dev\git\Pspell\SpellCheckerConsole\bin\Debug\gen\data_cz"; //@"F:\_dp\english\news"; //DictionaryGenerator generator = new DictionaryGenerator(enUs, path, "gen/cs_CZ"); //generator.CalculateFrequences(); //generator.Save(); //generator.RunBatch(); /*ErrorListParser parser = new ErrorListParser("generators/en_errors.txt"); var data = parser.Parse(); InsertionsMatrixGenerator generator = new InsertionsMatrixGenerator(enUs.GetAlphabetForErrorModel(true).ToCharArray()); var matrix = generator.GenerateMatrix(data); MatrixExport.ExportMatrix("insertTest.txt", matrix); FolderCorrector analyze = new FolderCorrector(enUs, @"C:\dev\git\Pspell\SpellCheckerConsole\bin\Debug\20_newsgroups"); analyze.CorrectFiles(); */ //FileCorrectionHandler handlerTest = new FileCorrectionHandler("gen/data_cz/cz_data.txt", new List<MisspelledWord>()); //handlerTest.SaveCorrectedAs("gen/temp/cz_data_copy.txt"); FileHandler handlerTest = new FileHandler("testcs.txt", "testcsFixed2.txt"); //handlerTest.CopyFile(); enUs.PreloadDictionaries(); Corrector corrector = new Corrector(new ErrorModel(enUs), new LanguageModel(enUs), new AccentModel(enUs)); //Queue<MisspelledWord> mistakes = new Queue<MisspelledWord>(); using (FileChecker checker = new FileChecker("testcs.txt", enUs)) { MisspelledWord error; while ((error = checker.GetNextMisspelling()) != null) { //mistakes.Enqueue(error); corrector.Correct(error); if (error.CorrectWord != "") { handlerTest.Push(error); } } } handlerTest.Close(); //FileCorrectionHandler handler = new FileCorrectionHandler("testcs.txt", mistakes); //handler.SaveCorrectedAs("testcsFixed.txt"); //handler.OverwriteWithCorrections(); }
private void PrepareProject(Dictionary dictionary, string resultDirectory, string reportDirectory, bool preserveSubfolders) { this.ExportContext = false; this.ResultDirectory = resultDirectory; this.ReportDirectory = reportDirectory; this.dictionary = dictionary; // setup models this.languageModel = new LanguageModel(dictionary); this.errorModel = new ErrorModel(dictionary); this.accentModel = dictionary.IsAccentModelAvailable() ? new AccentModel(dictionary) : null; // setup corrector this.corrector = new Corrector(errorModel, languageModel, accentModel); this.ThreadsAvailable = this.ScaleThreads(); this.filesGroups = this.DivadeIntoGroups(this.ThreadsAvailable); this.ThreadsUsed = this.FilesToProcess.Count > 1 ? filesGroups.Length : 1; // other settings PreserveSubfolders = preserveSubfolders; }
public void RunBatch() { List<FileInfo> files = this.AnalyzeDir(new DirectoryInfo(this.directory)); dictionary.PreloadDictionaries(); Corrector corrector = new Corrector(errorModel, languageModel); foreach (FileInfo file in files) { using (FileChecker checker = new FileChecker(file.FullName, dictionary)) { while (!checker.EndOfCheck) { MisspelledWord error = checker.GetNextMisspelling(); if (null != error) { corrector.Correct(error); if (null != error.CorrectWord) { if (!this.data.ContainsKey(error.CorrectWord)) { this.data.Add(error.CorrectWord, new List<string> { error.RawWord }); } else { if (!this.data[error.CorrectWord].Contains(error.RawWord)) { this.data[error.CorrectWord].Add(error.RawWord); } } } } } } } this.Save(); }