public static void FixWordTest() { string[] words = {"Проверка", "праверка"}; var detector = new SpellChecker(TrigramIndex.CreateFromDefaultDictionaryAnd(Program.DefaultNotStemmedQuestionList)); var fixedWords = words.Select(detector.Fix).ToArray(); Console.WriteLine(String.Join("\n", words.Zip(fixedWords, (w1, w2) => ((w1 != w2) ? "!!!\t" : "") + w1 + "\t-\t" + w2))); }
public static void TestCreation() { var detector = new SpellChecker(TrigramIndex.CreateFromDefaultDictionaryAnd(Program.DefaultNotStemmedQuestionList)); Console.WriteLine("Now we can fix:"); var words = Program.DefaultNotStemmedQuestionList.GetAllQuestions().Take(10).SelectMany( item => item.WholeText.SplitInWordsAndStripHTML()).ToArray(); var start = DateTime.Now; var fixedWords = words.Select(detector.Fix).ToArray(); Console.WriteLine("For {0} sec to {1} words:\n{2}", (DateTime.Now - start).TotalSeconds, fixedWords.Length, String.Join("\n", words.Zip(fixedWords, (w1, w2) => ((w1 != w2) ? "!!!\t" : "") + w1 + "\t-\t" + w2))); }
public static void ModifyTyposCorpus(QuestionList ql) { var detector = new SpellChecker(TrigramIndex.CreateFrom(ql)); Console.WriteLine("I am Modifying"); var start = DateTime.Now; foreach (var question in ql.GetAllQuestions()) { question.Text = String.Join(" ", question.Text.SplitInWordsAndStripHTML().Select(detector.Fix)); question.Title = String.Join(" ", question.Title.SplitInWordsAndStripHTML().Select(detector.Fix)); } Console.WriteLine("Questions modified in {0}", (DateTime.Now - start).TotalSeconds); start = DateTime.Now; foreach (var answer in ql.GetAllAnswers()) { answer.Text = String.Join(" ", answer.Text.SplitInWordsAndStripHTML().Select(detector.Fix)); } Console.WriteLine("Answers modified in {0}", (DateTime.Now - start).TotalSeconds); File.WriteAllLines(Program.QuestionsNoTyposFileName, ql.GetAllQuestions().Select(Question.FormatStringWrite)); File.WriteAllLines(Program.AnswersNoTyposFileName, ql.GetAllAnswers().Select(Answer.FormatStringWrite)); }