Ejemplo n.º 1
0
        static void MainV1_2()
        {
            var numberOfSuggestion = 100;

            var testFilePath       = @"../../../data/russianPosts.txt";
            var testDictionaryPath = @"../../../data/russian.dic";
            var testIndexPath      = @"../../../data/indexV1_2";
            var stopWordsPath      = @"../../../data/stopWords.txt";
            var outputFilePath     = @"../../../data/output.txt";

            var stopWordsSet = new HashSet <string>();

            using (var reader = new StreamReader(stopWordsPath))
            {
                while (!reader.EndOfStream)
                {
                    stopWordsSet.Add(reader.ReadLine());
                }
            }

            if (!File.Exists(testFilePath))
            {
                Console.WriteLine("Unpack the archive with the russian posts");
                Environment.Exit(1);
            }

            using (var reader = new StreamReader(testFilePath))
            {
                using (var writer = new StreamWriter(outputFilePath))
                {
                    var directory    = new SimpleFSDirectory(new DirectoryInfo(testIndexPath));
                    var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory);
                    spellChecker.IndexDictionary(new PlainTextDictionary(new FileInfo(testDictionaryPath)));

                    var analyzer = new StemmerCompareAnalyzer(stopWordsSet, spellChecker, numberOfSuggestion);

                    var stream = analyzer.TokenStream(null, reader);

                    while (stream.IncrementToken())
                    {
                        var sourceAttribute = stream.GetAttribute <ISourceAttribute>().Term;
                        var spellAttribute  = stream.GetAttribute <ISpellAttribute>().Term;
                        var stemAttribute   = stream.GetAttribute <IStemAttribute>().Term;

                        writer.WriteLine("{0, 20} {1, 20} {2, 20}", sourceAttribute, spellAttribute, stemAttribute);
                        //Console.WriteLine("{0, 20} {1, 20} {2, 20}", sourceAttribute, spellAttribute, stemAttribute);
                    }
                }
            }
        }
Ejemplo n.º 2
0
        static void MainV2()
        {
            var numberOfSuggestion = 100;

            var testFilePath       = @"C:/lucene/test1.txt";
            var testDictionaryPath = @"C:/lucene/ruStem.dict";
            var testIndexPath      = @"C:/lucene/indexV2";
            var stopWordsPath      = @"C:/lucene/stopWords.txt";

            var stopWordsSet = new HashSet <string>();

            using (var reader = new StreamReader(stopWordsPath))
            {
                while (!reader.EndOfStream)
                {
                    stopWordsSet.Add(reader.ReadLine());
                }
            }

            using (var reader = new StreamReader(testFilePath))
            {
                var directory    = new SimpleFSDirectory(new DirectoryInfo(testIndexPath));
                var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory);
                spellChecker.IndexDictionary(new PlainTextDictionary(new FileInfo(testDictionaryPath)));

                StringDistance getDist = spellChecker.GetStringDistance();

                var analyzer = new StemmerCompareAnalyzer(stopWordsSet, spellChecker, numberOfSuggestion);

                var stream = analyzer.TokenStream(null, reader);

                while (stream.IncrementToken())
                {
                    var termAttribute  = stream.GetAttribute <ITermAttribute>().Term;
                    var spellAttribute = stream.GetAttribute <ISpellAttribute>().Term;
                    var stemAttribute  = stream.GetAttribute <IStemAttribute>().Term;

                    Console.WriteLine("{0, 20} {1, 20} {2, 20}", termAttribute, spellAttribute, stemAttribute);
                }
            }
        }