//[TestMethod] public void CorrectionTest() { DictionaryManager manager = new DictionaryManager(@"C:\dev\git\Pspell\SpellCheckerConsole\bin\Debug\dictionaries"); Dictionary enUs = manager.GetDictionary("en_US"); enUs.PreloadDictionaries(); Corrector corrector = new Corrector(new ErrorModel(enUs), new LanguageModel(enUs)); Stopwatch mistakesTime = Stopwatch.StartNew(); List<MisspelledWord> mistakes = new List<MisspelledWord>(); using (FileChecker checker = new FileChecker("testarticle.txt", enUs)) { MisspelledWord error; while ((error = checker.GetNextMisspelling()) != null) { mistakes.Add(error); } } mistakesTime.Stop(); Stopwatch correctionTime = Stopwatch.StartNew(); foreach (MisspelledWord word in mistakes) { corrector.Correct(word); } correctionTime.Stop(); TestContext.WriteLine("Mistakes search time: " + mistakesTime.ElapsedMilliseconds + " ms"); TestContext.WriteLine("Correction time: " + correctionTime.ElapsedMilliseconds + " ms"); }
static void Main(string[] args) { //NgramParser parser = new NgramParser(); //parser.ParseNgrams("w2_.txt"); DictionaryManager manager = new DictionaryManager("gen"); Dictionary enUs = manager.GetDictionary("cs_CZ"); //TwoCharFrequencyCounter counter = new TwoCharFrequencyCounter(enUs.GetAlphabetForErrorModel(true)); //WordFrequencyCounter counter = new WordFrequencyCounter(); //CharFrequencyCounter counter = new CharFrequencyCounter(enUs.GetAlphabetForErrorModel(true)); //CorporaReader reader = new CorporaReader(new HCLineParser(), counter); //reader.ProcessFile("gen/data_cz/cz_data.txt"); //counter.Save("gen/cs_CZ/wordFreq.txt"); //string path = @"C:\dev\git\Pspell\SpellCheckerConsole\bin\Debug\gen\data_cz"; //@"F:\_dp\english\news"; //DictionaryGenerator generator = new DictionaryGenerator(enUs, path, "gen/cs_CZ"); //generator.CalculateFrequences(); //generator.Save(); //generator.RunBatch(); /*ErrorListParser parser = new ErrorListParser("generators/en_errors.txt"); var data = parser.Parse(); InsertionsMatrixGenerator generator = new InsertionsMatrixGenerator(enUs.GetAlphabetForErrorModel(true).ToCharArray()); var matrix = generator.GenerateMatrix(data); MatrixExport.ExportMatrix("insertTest.txt", matrix); FolderCorrector analyze = new FolderCorrector(enUs, @"C:\dev\git\Pspell\SpellCheckerConsole\bin\Debug\20_newsgroups"); analyze.CorrectFiles(); */ //FileCorrectionHandler handlerTest = new FileCorrectionHandler("gen/data_cz/cz_data.txt", new List<MisspelledWord>()); //handlerTest.SaveCorrectedAs("gen/temp/cz_data_copy.txt"); FileHandler handlerTest = new FileHandler("testcs.txt", "testcsFixed2.txt"); //handlerTest.CopyFile(); enUs.PreloadDictionaries(); Corrector corrector = new Corrector(new ErrorModel(enUs), new LanguageModel(enUs), new AccentModel(enUs)); //Queue<MisspelledWord> mistakes = new Queue<MisspelledWord>(); using (FileChecker checker = new FileChecker("testcs.txt", enUs)) { MisspelledWord error; while ((error = checker.GetNextMisspelling()) != null) { //mistakes.Enqueue(error); corrector.Correct(error); if (error.CorrectWord != "") { handlerTest.Push(error); } } } handlerTest.Close(); //FileCorrectionHandler handler = new FileCorrectionHandler("testcs.txt", mistakes); //handler.SaveCorrectedAs("testcsFixed.txt"); //handler.OverwriteWithCorrections(); }
private CorrectionStatitic CorrectGroup(List<FileInfo> group, int id) { CorrectionStatitic stats = new CorrectionStatitic(null, null, this.ExportContext); foreach (FileInfo file in group) { string output = PreserveSubfolders ? this.GetSubfolder(file) : this.ResultDirectory + "/" + file.Name; if (file.FullName == new FileInfo(output).FullName) { output = output + ".1"; } FileHandler handler = new FileHandler(file.FullName, output); using (FileChecker checker = new FileChecker(file.FullName, dictionary)) { Task<List<MisspelledWord>> task = null; List<MisspelledWord> errors = new List<MisspelledWord>(); int estimates = 0; while (!checker.EndOfCheck) { MisspelledWord error = checker.GetNextMisspelling(); if (null != error) { estimates++; errors.Add(error); } if (errors.Count > 1000 || checker.EndOfCheck) { if (task != null) { task.Wait(); List<MisspelledWord> corrected = task.Result; foreach (MisspelledWord item in corrected) { stats.AddCorrection(item); if (item.CorrectWord != null) { handler.Push(item); } } } List<MisspelledWord> errorBatch = errors; errors = new List<MisspelledWord>(); task = Task<List<MisspelledWord>>.Factory.StartNew(() => { return this.CorrectErrors(errorBatch); }); } if (estimates > estimateLimit) { this.UpdateProgres(0, checker.EstimateProcess()); estimates = 0; } if (worker.CancellationPending) { return null; } } if (null != task) { task.Wait(); List<MisspelledWord> leftover = task.Result; foreach (MisspelledWord item in leftover) { stats.AddCorrection(item); if (item.CorrectWord != null) { handler.Push(item); } } } } handler.Close(); this.UpdateProgres(1); } stats.Close(); return stats; }
public void RunBatch() { List<FileInfo> files = this.AnalyzeDir(new DirectoryInfo(this.directory)); dictionary.PreloadDictionaries(); Corrector corrector = new Corrector(errorModel, languageModel); foreach (FileInfo file in files) { using (FileChecker checker = new FileChecker(file.FullName, dictionary)) { while (!checker.EndOfCheck) { MisspelledWord error = checker.GetNextMisspelling(); if (null != error) { corrector.Correct(error); if (null != error.CorrectWord) { if (!this.data.ContainsKey(error.CorrectWord)) { this.data.Add(error.CorrectWord, new List<string> { error.RawWord }); } else { if (!this.data[error.CorrectWord].Contains(error.RawWord)) { this.data[error.CorrectWord].Add(error.RawWord); } } } } } } } this.Save(); }