static void MainV1_2() { var numberOfSuggestion = 100; var testFilePath = @"../../../data/russianPosts.txt"; var testDictionaryPath = @"../../../data/russian.dic"; var testIndexPath = @"../../../data/indexV1_2"; var stopWordsPath = @"../../../data/stopWords.txt"; var outputFilePath = @"../../../data/output.txt"; var stopWordsSet = new HashSet <string>(); using (var reader = new StreamReader(stopWordsPath)) { while (!reader.EndOfStream) { stopWordsSet.Add(reader.ReadLine()); } } if (!File.Exists(testFilePath)) { Console.WriteLine("Unpack the archive with the russian posts"); Environment.Exit(1); } using (var reader = new StreamReader(testFilePath)) { using (var writer = new StreamWriter(outputFilePath)) { var directory = new SimpleFSDirectory(new DirectoryInfo(testIndexPath)); var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory); spellChecker.IndexDictionary(new PlainTextDictionary(new FileInfo(testDictionaryPath))); var analyzer = new StemmerCompareAnalyzer(stopWordsSet, spellChecker, numberOfSuggestion); var stream = analyzer.TokenStream(null, reader); while (stream.IncrementToken()) { var sourceAttribute = stream.GetAttribute <ISourceAttribute>().Term; var spellAttribute = stream.GetAttribute <ISpellAttribute>().Term; var stemAttribute = stream.GetAttribute <IStemAttribute>().Term; writer.WriteLine("{0, 20} {1, 20} {2, 20}", sourceAttribute, spellAttribute, stemAttribute); //Console.WriteLine("{0, 20} {1, 20} {2, 20}", sourceAttribute, spellAttribute, stemAttribute); } } } }
static void MainV2() { var numberOfSuggestion = 100; var testFilePath = @"C:/lucene/test1.txt"; var testDictionaryPath = @"C:/lucene/ruStem.dict"; var testIndexPath = @"C:/lucene/indexV2"; var stopWordsPath = @"C:/lucene/stopWords.txt"; var stopWordsSet = new HashSet <string>(); using (var reader = new StreamReader(stopWordsPath)) { while (!reader.EndOfStream) { stopWordsSet.Add(reader.ReadLine()); } } using (var reader = new StreamReader(testFilePath)) { var directory = new SimpleFSDirectory(new DirectoryInfo(testIndexPath)); var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory); spellChecker.IndexDictionary(new PlainTextDictionary(new FileInfo(testDictionaryPath))); StringDistance getDist = spellChecker.GetStringDistance(); var analyzer = new StemmerCompareAnalyzer(stopWordsSet, spellChecker, numberOfSuggestion); var stream = analyzer.TokenStream(null, reader); while (stream.IncrementToken()) { var termAttribute = stream.GetAttribute <ITermAttribute>().Term; var spellAttribute = stream.GetAttribute <ISpellAttribute>().Term; var stemAttribute = stream.GetAttribute <IStemAttribute>().Term; Console.WriteLine("{0, 20} {1, 20} {2, 20}", termAttribute, spellAttribute, stemAttribute); } } }