Spell Checker class (Main class)
(initially inspired by the David Spencer code).

Example Usage:

 SpellChecker spellchecker = new SpellChecker(spellIndexDirectory); // To index a field of a user index: spellchecker.indexDictionary(new LuceneDictionary(my_lucene_reader, a_field)); // To index a file containing words: spellchecker.indexDictionary(new PlainTextDictionary(new File("myfile.txt"))); String[] suggestions = spellchecker.suggestSimilar("misspelt", 5); 
		public SuggestionQueryIndexExtension(
			WorkContext workContext,
			string key,
			StringDistance distanceType,
			bool isRunInMemory,
			string field,
			float accuracy)
		{
			this.workContext = workContext;
			this.key = key;
			this.field = field;

			if (isRunInMemory)
			{
				directory = new RAMDirectory();
			}
			else
			{
				directory = FSDirectory.Open(new DirectoryInfo(key));
			}

			this.spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory, null);
			this.spellChecker.SetAccuracy(accuracy);
			this.spellChecker.setStringDistance(distanceType);
		}
        public UmbracoSpellChecker(BaseLuceneSearcher searchProvider)
        {
            var searcher = (IndexSearcher)searchProvider.GetSearcher();

            _indexReader = searcher.GetIndexReader();
            _checker     = new SpellChecker.Net.Search.Spell.SpellChecker(new RAMDirectory(), new JaroWinklerDistance());
        }
Exemplo n.º 3
0
        public string Suggest(string phrase)
        {
            StringBuilder res = new StringBuilder();

            try
            {
                String[] words = phrase.Split(new char[] { ' ' });
                SpellChecker.Net.Search.Spell.SpellChecker spell = GetSpelling(false);
                if (spell != null)
                {
                    for (int i = 0; i < words.Length; i++)
                    {
                        string[] similar = spell.SuggestSimilar(words[i], 1);
                        if (similar != null && similar.Length > 0)
                        {
                            res.Append(similar[0]);
                            if (i != words.Length - 1)
                            {
                                res.Append(' ');
                            }
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                GXLogging.Error(log, "Suggest Error", ex);
            }
            return(res.ToString());
        }
Exemplo n.º 4
0
		private static void Main(string[] args)
		{
		    var ramDirectory = new RAMDirectory();
		    var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(ramDirectory);
		    var ms = new MemoryStream();
		    var sw = new StreamWriter(ms);
            sw.WriteLine("Book");
            sw.WriteLine("Bath");
            sw.WriteLine("Bed");
            sw.WriteLine("Make");
            sw.WriteLine("Model");
            sw.WriteLine("Vacum");
            sw.WriteLine("Wending machine");
            sw.Flush();
		    ms.Position = 0;
            spellChecker.setStringDistance(new JaroWinklerDistance());
            spellChecker.SetAccuracy(0.3f);
            spellChecker.IndexDictionary(new PlainTextDictionary(ms), CancellationToken.None);

		    var indexReader = IndexReader.Open(ramDirectory, true);
		    var termEnum = indexReader.Terms();
		    while (termEnum.Next())
		    {
		        Console.WriteLine(termEnum.Term);
		    }

		    var suggestSimilar = spellChecker.SuggestSimilar("both", 10);
		    foreach (var s in suggestSimilar)
		    {
		        Console.WriteLine(s);
		    }
		}
Exemplo n.º 5
0
        private static void doSpellCheckerIndexing(string LuceneIndexDir, string SpellCheckerIndexDir)
        {
            try
            {
                // http://lucene.apache.org/java/2_2_0/api/org/apache/lucene/search/spell/SpellChecker.html
                FSDirectory spellCheckerIndexDir = FSDirectory.GetDirectory(SpellCheckerIndexDir, false);
                FSDirectory indexDir             = FSDirectory.GetDirectory(LuceneIndexDir, false);

                SpellChecker.Net.Search.Spell.SpellChecker spellchecker = new SpellChecker.Net.Search.Spell.SpellChecker(spellCheckerIndexDir);
                spellchecker.ClearIndex();
                // SpellChecker.Net.Search.Spell.SpellChecker spellchecker = new SpellChecker.Net.Search.Spell.SpellChecker (global::Lucene.Net.Store.Directory SpellChecker(spellIndexDirectory);

                IndexReader r = IndexReader.Open(indexDir);
                try
                {
                    // To index a field of a user index:
                    Dictionary dict = new SpellChecker.Net.Search.Spell.LuceneDictionary(r, "title");

                    spellchecker.IndexDictionary(dict);
                }
                finally
                {
                    r.Close();
                }
            }
            catch (Exception ex)
            {
                Console.Write("Could not create spell-checking index" + ex.Message);
            }
        }
        public SuggestionQueryIndexExtension(
            WorkContext workContext,
            string key,
            StringDistance distanceType,
            bool isRunInMemory,
            string field,
            float accuracy)
        {
            this.workContext = workContext;
            this.key         = key;
            this.field       = field;

            if (isRunInMemory)
            {
                directory = new RAMDirectory();
            }
            else
            {
                directory = FSDirectory.Open(new DirectoryInfo(key));
            }

            this.spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory, null);
            this.spellChecker.SetAccuracy(accuracy);
            this.spellChecker.setStringDistance(distanceType);
        }
Exemplo n.º 7
0
        public void CreateFullTextIndex(IEnumerable <SearchResult> dataList, string path)
        {
            var directory = FSDirectory.Open(new DirectoryInfo(path));
            var analyzer  = new StandardAnalyzer(_version);

            //var analyzer = new WhitespaceAnalyzer();
            using (var writer = new IndexWriter(directory, analyzer, create: true, mfl: IndexWriter.MaxFieldLength.UNLIMITED))
            {
                foreach (var post in dataList)
                {
                    writer.AddDocument(MapPostToDocument(post));
                }

                writer.Optimize();
                writer.Commit();
                writer.Dispose();
                directory.Dispose();
                //change here
            }

            var indexReader = IndexReader.Open(FSDirectory.Open(path), readOnly: true);

            // Create the SpellChecker
            //Directory d = new Directory();d.Delete(path + "\\Spell");
            spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(FSDirectory.Open(path + "\\Spell"));

            // Create SpellChecker Index
            spellChecker.ClearIndex();
            spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "Title"));
        }
Exemplo n.º 8
0
        } // SearchActiveDocument

        public string getSpellingSuggestion(string query)
        {
            FSDirectory indexDir = FSDirectory.GetDirectory(this.spellingIndexDir, false);

            SpellChecker.Net.Search.Spell.SpellChecker spellchecker = new SpellChecker.Net.Search.Spell.SpellChecker(indexDir);
            IndexReader my_lucene_reader = IndexReader.Open(indexDir);

            string[]      words          = query.Split(new char[] { ' ', ',', ';' }, StringSplitOptions.RemoveEmptyEntries);
            List <string> allSuggestions = new List <string>();

            foreach (string word in words)
            {
                string[] suggestions = spellchecker.SuggestSimilar(word, 1);
                if (suggestions.Length > 0)
                {
                    allSuggestions.Add(suggestions[0]);
                }
                else
                {
                    allSuggestions.Add(word);
                }
            }

            string completeSuggestion = String.Join(" ", allSuggestions.ToArray());

            return(completeSuggestion);
        }
Exemplo n.º 9
0
        //function to initialize the spell checker functionality
        public void SpellCheckerInit()
        {
            spellchecker = new SpellChecker.Net.Search.Spell.SpellChecker(spellCheckIndexStorage);

            // To index a field of a user index:
            indexReader = writer.GetReader();
            spellchecker.IndexDictionary(new LuceneDictionary(indexReader, WORD_FN));
        }
 public StemFilter(TokenStream in_Renamed, LuceneSpellChecker spellChecker, int numberOfSuggestions)
     : base(in_Renamed)
 {
     SpellChecker        = spellChecker;
     NumberOfSuggestions = numberOfSuggestions;
     _defaultDistance    = spellChecker.GetStringDistance();
     _customDistance     = new StemDistance(_defaultDistance);
 }
Exemplo n.º 11
0
        private static void SuggestAndVerify(SpellChecker.Net.Search.Spell.SpellChecker checker, string misspelledWord, string expectedSuggestion)
        {
            string[] similarWords = checker.SuggestSimilar(misspelledWord, 1);
            Assert.AreEqual(1, similarWords.Length);

            log.DebugFormat("If searching: '{0}'\t\t\t\tI suggest: '{1}'", misspelledWord, similarWords[0]);

            Assert.AreEqual(expectedSuggestion, similarWords[0]);
        }
Exemplo n.º 12
0
 private static string SuggestSimilar(string term, string type)
 {
     string[] terms = new SpellChecker.Net.Search.Spell.SpellChecker(FSDirectory.Open(new System.IO.DirectoryInfo(IndexDirectory + type))).SuggestSimilar(term, 1);
     if (terms.Length > 0)
     {
         return(terms[0]);
     }
     return(term);
 }
Exemplo n.º 13
0
        public static Net.Search.Spell.SpellChecker GetSpellChecker(Directory luceneDir, Directory spellDir)
        {
            var indexReader = IndexReader.Open(luceneDir, true);

            var spell = new Net.Search.Spell.SpellChecker(spellDir);

            spell.IndexDictionary(new LuceneDictionary(indexReader, "name"));

            return(spell);
        }
Exemplo n.º 14
0
		public void IndexWords() {
			// open the index reader
			IndexReader indexReader = IndexReader.Open(FSDirectory.Open(_indexRootDirectory), true);

			// create the spell checker
			var spell = new SpellChecker.Net.Search.Spell.SpellChecker(FSDirectory.Open(_spellRootDirectory));

			// add all the words in the field description to the spell checker
			spell.IndexDictionary(new LuceneDictionary(indexReader, "text"));
		}
    public string[] GetSuggestedWords(string spellIndex, string term, int maxCount)
    {
        FSDirectory dir   = FSDirectory.Open(spellIndex);
        var         spell = new SpellChecker.Net.Search.Spell.SpellChecker(dir);

        spell.SetAccuracy(0.6f);
        spell.setStringDistance(new LevenshteinDistance());

        return(spell.SuggestSimilar(term, maxCount));
    }
Exemplo n.º 16
0
        /// <summary>
        /// Gets the similar words.
        /// </summary>
        /// <param name="reader">The reader.</param>
        /// <param name="fieldName">Name of the field.</param>
        /// <param name="word">The word.</param>
        /// <returns></returns>
        private static string[] SuggestSimilar(IndexReader reader, string fieldName, string word)
        {
            var spell = new SpellChecker.Net.Search.Spell.SpellChecker(reader.Directory());

            spell.IndexDictionary(new LuceneDictionary(reader, fieldName));
            var similarWords = spell.SuggestSimilar(word, 2);

            // now make sure to close the spell checker
            spell.Close();

            return(similarWords);
        }
Exemplo n.º 17
0
 public void TestSpellchecker()
 {
     SpellChecker.Net.Search.Spell.SpellChecker sc = new SpellChecker.Net.Search.Spell.SpellChecker(new RAMDirectory());
     indexReader = IndexReader.Open(store);
     sc.IndexDictionary(new LuceneDictionary(indexReader, "contents"));
     String[] suggestions = sc.SuggestSimilar("Tam", 1);
     AssertEquals(1, suggestions.Length);
     AssertEquals("Tom", suggestions[0]);
     suggestions = sc.SuggestSimilar("Jarry", 1);
     AssertEquals(1, suggestions.Length);
     AssertEquals("Jerry", suggestions[0]);
     indexReader.Close();
 }
    public void IndexSpellCheckDictionary(string dbIndexName, string spellIndex)
    {
        LuceneIndex index  = (LuceneIndex)ContentSearchManager.GetIndex(dbIndexName);
        IndexReader reader = index.CreateReader(LuceneIndexAccess.ReadOnly);

        FSDirectory dir   = FSDirectory.Open(spellIndex);
        var         spell = new SpellChecker.Net.Search.Spell.SpellChecker(dir);

        string           fieldName  = "description";
        LuceneDictionary dictionary = new LuceneDictionary(reader, fieldName);

        spell.IndexDictionary(dictionary, 10, 32);
    }
Exemplo n.º 19
0
        static void MainV1_2()
        {
            var numberOfSuggestion = 100;

            var testFilePath       = @"../../../data/russianPosts.txt";
            var testDictionaryPath = @"../../../data/russian.dic";
            var testIndexPath      = @"../../../data/indexV1_2";
            var stopWordsPath      = @"../../../data/stopWords.txt";
            var outputFilePath     = @"../../../data/output.txt";

            var stopWordsSet = new HashSet <string>();

            using (var reader = new StreamReader(stopWordsPath))
            {
                while (!reader.EndOfStream)
                {
                    stopWordsSet.Add(reader.ReadLine());
                }
            }

            if (!File.Exists(testFilePath))
            {
                Console.WriteLine("Unpack the archive with the russian posts");
                Environment.Exit(1);
            }

            using (var reader = new StreamReader(testFilePath))
            {
                using (var writer = new StreamWriter(outputFilePath))
                {
                    var directory    = new SimpleFSDirectory(new DirectoryInfo(testIndexPath));
                    var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory);
                    spellChecker.IndexDictionary(new PlainTextDictionary(new FileInfo(testDictionaryPath)));

                    var analyzer = new StemmerCompareAnalyzer(stopWordsSet, spellChecker, numberOfSuggestion);

                    var stream = analyzer.TokenStream(null, reader);

                    while (stream.IncrementToken())
                    {
                        var sourceAttribute = stream.GetAttribute <ISourceAttribute>().Term;
                        var spellAttribute  = stream.GetAttribute <ISpellAttribute>().Term;
                        var stemAttribute   = stream.GetAttribute <IStemAttribute>().Term;

                        writer.WriteLine("{0, 20} {1, 20} {2, 20}", sourceAttribute, spellAttribute, stemAttribute);
                        //Console.WriteLine("{0, 20} {1, 20} {2, 20}", sourceAttribute, spellAttribute, stemAttribute);
                    }
                }
            }
        }
        public void TestBuild()
        {

            String LF = System.Environment.NewLine;
            String input = "oneword" + LF + "twoword" + LF + "threeword";
            PlainTextDictionary ptd = new PlainTextDictionary( new MemoryStream( System.Text.Encoding.UTF8.GetBytes(input)) );
            RAMDirectory ramDir = new RAMDirectory();
            SpellChecker.Net.Search.Spell.SpellChecker spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(ramDir);
            spellChecker.IndexDictionary(ptd);
            String[] similar = spellChecker.SuggestSimilar("treeword", 2);
            Assert.AreEqual(2, similar.Length);
            Assert.AreEqual(similar[0], "threeword");
            Assert.AreEqual(similar[1], "twoword");
        }
        public void TestBuild()
        {
            String LF                  = System.Environment.NewLine;
            String input               = "oneword" + LF + "twoword" + LF + "threeword";
            PlainTextDictionary ptd    = new PlainTextDictionary(new MemoryStream(System.Text.Encoding.UTF8.GetBytes(input)));
            RAMDirectory        ramDir = new RAMDirectory();

            SpellChecker.Net.Search.Spell.SpellChecker spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(ramDir);
            spellChecker.IndexDictionary(ptd);
            String[] similar = spellChecker.SuggestSimilar("treeword", 2);
            Assert.AreEqual(2, similar.Length);
            Assert.AreEqual(similar[0], "threeword");
            Assert.AreEqual(similar[1], "twoword");
        }
Exemplo n.º 22
0
        public static string[] SuggestSilmilarWords(string term, int count = 10)
        {
            IndexReader indexReader = IndexReader.Open(FSDirectory.Open(_luceneDir), true);

            // Create the SpellChecker
            var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(FSDirectory.Open(_luceneDir + "\\Spell"));

            // Create SpellChecker Index
            spellChecker.ClearIndex();
            spellChecker.IndexDictionary(new LuceneDictionary(indexReader, StronglyTyped.PropertyName <LuceneSearchModel>(x => x.Title)));
            spellChecker.IndexDictionary(new LuceneDictionary(indexReader, StronglyTyped.PropertyName <LuceneSearchModel>(x => x.Description)));

            //Suggest Similar Words
            return(spellChecker.SuggestSimilar(term, count, null, null, true));
        }
Exemplo n.º 23
0
        public static string[] SuggestSilmilarWords(string term, int count = 10)
        {
            IndexReader indexReader = IndexReader.Open(FSDirectory.Open(_luceneDir), true);

            // Create the SpellChecker
            var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(FSDirectory.Open(_luceneDir + "\\Spell"));

            // Create SpellChecker Index
            spellChecker.ClearIndex();
            spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "Title"));
            spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "Body"));
            spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "SubTitle"));
            spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "Keywords"));
            spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "Description"));

            //Suggest Similar Words
            return(spellChecker.SuggestSimilar(term, count, null, null, true));
        }
Exemplo n.º 24
0
        public SuggestionQueryResult ExecuteSuggestionQuery(string indexName, SuggestionQuery suggestionQuery)
        {
            if (suggestionQuery == null) throw new ArgumentNullException("suggestionQuery");
            if (string.IsNullOrWhiteSpace(suggestionQuery.Term)) throw new ArgumentNullException("suggestionQuery.Term");
            if (string.IsNullOrWhiteSpace(indexName)) throw new ArgumentNullException("indexName");
            if (string.IsNullOrWhiteSpace(suggestionQuery.Field)) throw new ArgumentNullException("suggestionQuery.Field");
            if (suggestionQuery.MaxSuggestions <= 0) suggestionQuery.MaxSuggestions = 10;
            if (suggestionQuery.Accuracy <= 0 || suggestionQuery.Accuracy > 1) suggestionQuery.Accuracy = 0.5f;

            suggestionQuery.MaxSuggestions = Math.Min(suggestionQuery.MaxSuggestions,
                                                      _database.Configuration.MaxPageSize);

            var currentSearcher = _database.IndexStorage.GetCurrentIndexSearcher(indexName);
            IndexSearcher searcher;
            using(currentSearcher.Use(out searcher))
            {
                var indexReader = searcher.GetIndexReader();

                var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(new RAMDirectory(), GetStringDistance(suggestionQuery));
                try
                {
                    spellChecker.IndexDictionary(new LuceneDictionary(indexReader, suggestionQuery.Field));
                    spellChecker.SetAccuracy(suggestionQuery.Accuracy);

                    var suggestions = spellChecker.SuggestSimilar(suggestionQuery.Term, 
                        suggestionQuery.MaxSuggestions,
                        indexReader,
                        suggestionQuery.Field, 
                        true);

                    return new SuggestionQueryResult
                    {
                        Suggestions = suggestions
                    };
                }
                finally
                {
                    spellChecker.Close();
                    // this is really stupid, but it doesn't handle this in its close method!
                    GC.SuppressFinalize(spellChecker);
                }
            }
            
        }
        public SuggestionQueryIndexExtension(Index indexInstance, WorkContext workContext, string key,
                                             bool isRunInMemory, string field)
        {
            _indexInstance   = indexInstance;
            this.workContext = workContext;
            this.field       = field;

            if (isRunInMemory)
            {
                directory = new RAMDirectory();
            }
            else
            {
                directory = FSDirectory.Open(new DirectoryInfo(key));
            }

            spellChecker   = new SpellChecker.Net.Search.Spell.SpellChecker(directory, null);
            _operationText = "Suggestions for " + field;
        }
Exemplo n.º 26
0
 public static void Build(string dictionaryPath, string indexPath)
 {
     var di = CreateTargetFolder(indexPath);
     using (var file = File.Open(dictionaryPath, FileMode.Open, FileAccess.Read))
     {
         var dict = new PlainTextDictionary(file);
         using (var staticSpellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(FSDirectory.Open(di)))
         {
             try
             {
                 staticSpellChecker.IndexDictionary(dict);
             }
             catch (Exception e)
             {
                 Console.WriteLine(e.Message);
             }
         }
     }
 }
Exemplo n.º 27
0
        static void MainV2()
        {
            var numberOfSuggestion = 100;

            var testFilePath       = @"C:/lucene/test1.txt";
            var testDictionaryPath = @"C:/lucene/ruStem.dict";
            var testIndexPath      = @"C:/lucene/indexV2";
            var stopWordsPath      = @"C:/lucene/stopWords.txt";

            var stopWordsSet = new HashSet <string>();

            using (var reader = new StreamReader(stopWordsPath))
            {
                while (!reader.EndOfStream)
                {
                    stopWordsSet.Add(reader.ReadLine());
                }
            }

            using (var reader = new StreamReader(testFilePath))
            {
                var directory    = new SimpleFSDirectory(new DirectoryInfo(testIndexPath));
                var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory);
                spellChecker.IndexDictionary(new PlainTextDictionary(new FileInfo(testDictionaryPath)));

                StringDistance getDist = spellChecker.GetStringDistance();

                var analyzer = new StemmerCompareAnalyzer(stopWordsSet, spellChecker, numberOfSuggestion);

                var stream = analyzer.TokenStream(null, reader);

                while (stream.IncrementToken())
                {
                    var termAttribute  = stream.GetAttribute <ITermAttribute>().Term;
                    var spellAttribute = stream.GetAttribute <ISpellAttribute>().Term;
                    var stemAttribute  = stream.GetAttribute <IStemAttribute>().Term;

                    Console.WriteLine("{0, 20} {1, 20} {2, 20}", termAttribute, spellAttribute, stemAttribute);
                }
            }
        }
Exemplo n.º 28
0
        public bool BuildDictionary()
        {
            try
            {
                IndexReader my_luceneReader = IndexReader.Open(Settings.Instance.StoreFolder, true);

                SpellChecker.Net.Search.Spell.SpellChecker spell = GetSpelling(true);
                if (spell != null)
                {
                    spell.IndexDictionary(new LuceneDictionary(my_luceneReader, IndexRecord.CONTENTFIELD));
                }
                my_luceneReader.Dispose();

                return(true);
            }
            catch (Exception ex)
            {
                GXLogging.Error(log, "BuildDictionary Error", ex);
                return(false);
            }
        }
Exemplo n.º 29
0
        public SuggestionQueryIndexExtension(Index indexInstance, WorkContext workContext, string key,
                                             StringDistance distanceType, bool isRunInMemory, string field, float accuracy)
        {
            _indexInstance   = indexInstance;
            this.workContext = workContext;
            this.field       = field;

            if (isRunInMemory)
            {
                directory = new RAMDirectory();
            }
            else
            {
                directory = FSDirectory.Open(new DirectoryInfo(key));
            }

            spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory, null);
            spellChecker.SetAccuracy(accuracy);
            spellChecker.setStringDistance(distanceType);
            _operationText = "Suggestions for " + field + " " + distanceType + " (" + accuracy + ")";
        }
		public SuggestionQueryIndexExtension(Index indexInstance, WorkContext workContext, string key, 
			StringDistance distanceType, bool isRunInMemory, string field, float accuracy)
		{
			_indexInstance = indexInstance;
			this.workContext = workContext;
			this.field = field;

			if (isRunInMemory)
			{
				directory = new RAMDirectory();
			}
			else
			{
				directory = FSDirectory.Open(new DirectoryInfo(key));
			}

			spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory, null);
			spellChecker.SetAccuracy(accuracy);
			spellChecker.setStringDistance(distanceType);
			_operationText = "Suggestions for " + field + " " + distanceType + " (" + accuracy + ")";
		}
Exemplo n.º 31
0
        static void Main(string[] args)
        {
            Directory luceneDir = new RAMDirectory();
            Directory spellDir  = new RAMDirectory();


            CreateLuceneIndex(luceneDir);
            Net.Search.Spell.SpellChecker spell = GetSpellChecker(luceneDir, spellDir);


            var word = "dammark";


            string[] similarWords = spell.SuggestSimilar(word, 10);

            // show the similar words
            for (int wordIndex = 0; wordIndex < similarWords.Length; wordIndex++)
            {
                Console.WriteLine("{0} is similar to {1}", similarWords[wordIndex], word);
            }
        }
		public SuggestionQueryIndexExtension(
			string key,
			IndexReader reader,
			StringDistance distance, 
			string field, 
			float accuracy)
		{
			this.key = key;
			this.field = field;
			
			if(reader.Directory() is RAMDirectory)
			{
				directory = new RAMDirectory();
			}
			else
			{
				directory = FSDirectory.Open(new DirectoryInfo(key));
			}

			this.spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory, distance);
			this.spellChecker.SetAccuracy(accuracy);
		}
Exemplo n.º 33
0
        public void SpellingSuggestion()
        {
            log.Debug("Building the word index");

            // Create a "Did you mean?" dictionary (the words are extracted from the search index)
            Directory wordDirectory = GetWordDirectory();

            Directory wordIndex = new RAMDirectory();

            SpellChecker.Net.Search.Spell.SpellChecker checker = new SpellChecker.Net.Search.Spell.SpellChecker(wordIndex);
            checker.ClearIndex();

            IndexReader reader = IndexReader.Open(wordDirectory);

            // Add words to spell checker index
            checker.IndexDictionary(new LuceneDictionary(reader, SpellChecker.Net.Search.Spell.SpellChecker.F_WORD));

            // Suggest similar words
            SuggestAndVerify(checker, "nhibrenate", "nhibernate");
            SuggestAndVerify(checker, "dreiven", "driven");
            SuggestAndVerify(checker, "inyection", "injection");
        }
Exemplo n.º 34
0
        public SuggestionQueryIndexExtension(
            string key,
            IndexReader reader,
            StringDistance distance,
            string field,
            float accuracy)
        {
            this.key   = key;
            this.field = field;

            if (reader.Directory() is RAMDirectory)
            {
                directory = new RAMDirectory();
            }
            else
            {
                directory = FSDirectory.Open(new DirectoryInfo(key));
            }

            this.spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory, distance);
            this.spellChecker.SetAccuracy(accuracy);
        }
Exemplo n.º 35
0
        public static string[] SuggestSilmilarWords(string term, int count = 10)
        {
            IndexReader indexReader = IndexReader.Open(FSDirectory.Open(_luceneDir), true);

            // Create the SpellChecker
            var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(FSDirectory.Open(_luceneDir + "\\Spell"));

            // Create SpellChecker Index
            spellChecker.ClearIndex();
            spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "Name"));
            spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "Author"));
            spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "Publisher"));
            spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "ISBN"));
            spellChecker.IndexDictionary(new LuceneDictionary(indexReader, "Description"));

            //Suggest Similar Words
            return spellChecker.SuggestSimilar(term, count, null, null, true);
        }
        /// <summary>
        /// Gets the similar words.
        /// </summary>
        /// <param name="reader">The reader.</param>
        /// <param name="fieldName">Name of the field.</param>
        /// <param name="word">The word.</param>
        /// <returns></returns>
        private string[] SuggestSimilar(IndexReader reader, string fieldName, string word)
        {
            var spell = new SpellChecker(reader.Directory());
            spell.IndexDictionary(new LuceneDictionary(reader, fieldName));
            var similarWords = spell.SuggestSimilar(word, 2);

            // now make sure to close the spell checker
            spell.Close();

            return similarWords;
        }
		public SuggestionQueryIndexExtension(StringDistance distance, string field, float accuracy)
		{
			this.field = field;
			this.spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory, distance);
			this.spellChecker.SetAccuracy(accuracy);
		}
 public void TestSpellchecker()
 {
     SpellChecker.Net.Search.Spell.SpellChecker sc = new SpellChecker.Net.Search.Spell.SpellChecker(new RAMDirectory());
     indexReader = IndexReader.Open(store);
     sc.IndexDictionary(new LuceneDictionary(indexReader, "contents"));
     String[] suggestions = sc.SuggestSimilar("Tam", 1);
     AssertEquals(1, suggestions.Length);
     AssertEquals("Tom", suggestions[0]);
     suggestions = sc.SuggestSimilar("Jarry", 1);
     AssertEquals(1, suggestions.Length);
     AssertEquals("Jerry", suggestions[0]);
     indexReader.Close();
 }
        protected void Page_Load(object sender, EventArgs e)
        {
            lastUpdatedText = SiteConfiguration.GetDictionaryText("Last Updated");
            cmdPrev.Text = SiteConfiguration.GetDictionaryText("Previous Button");
            cmdNext.Text = SiteConfiguration.GetDictionaryText("Next Button");

            // Decode the search string query string.  Will be empty string if no search string was provided.
            string searchStr = Server.UrlDecode(WebUtil.GetQueryString("searchStr"));

            // If the visitor provided no criteria, don't bother searching
            if (searchStr == string.Empty)
                lblSearchString.Text = SiteConfiguration.GetDictionaryText("Search Criteria") + SiteConfiguration.GetDictionaryText("No Criteria");
            else
            {
                string indexName = StringUtil.GetString(IndexName, SiteConfiguration.GetSiteSettingsItem()["Search Index"]);
                searchMgr = new SearchManager(indexName);

                // Remind the visitor what they provided as search criteria
                lblSearchString.Text = SiteConfiguration.GetDictionaryText("Search Criteria") + searchStr;

                // Perform the actual search
                searchMgr.Search(searchStr);

                // Display the search results
                results = searchMgr.SearchResults;

                // Now iterate over the number of results
                foreach (var result in results)
                {
                    Item hit = result.GetObject<Item>();
                    if (hit != null)
                    {
                        ResultsList.Add(hit);
                    }
                }

                // no results were found so we need to show message and suggestions
                if (searchMgr.SearchResults.Count == 0)
                {
                    Sitecore.Search.Index index = Sitecore.Search.SearchManager.GetIndex("system");
                    SpellChecker.Net.Search.Spell.SpellChecker spellchecker = new SpellChecker.Net.Search.Spell.SpellChecker(index.Directory);
                    spellchecker.IndexDictionary(new LuceneDictionary(IndexReader.Open(index.Directory), "_content"));
                    String[] suggestions = spellchecker.SuggestSimilar(searchStr, 5);

                    if (suggestions.Length > 0)
                    {
                        lblSearchString.Text += "<p>";
                        lblSearchString.Text += SiteConfiguration.GetDictionaryText("Did You Mean");
                        foreach (string s in suggestions)
                        {
                            lblSearchString.Text += String.Format("&nbsp;<a href=\"{0}?searchStr={1}\">{2}</a>&nbsp;", LinkManager.GetItemUrl(Sitecore.Context.Item), s, s);
                        }
                        lblSearchString.Text += "</p>";
                    }
                    else
                    {
                        string noResultsMsg = SiteConfiguration.GetDictionaryText("No Results");
                        LiteralControl noResults = new LiteralControl(string.Format("<p>{0}</p>", noResultsMsg));
                        pnResultsPanel.Controls.Add(noResults);
                    }
                }
                else
                {
                    if (!Page.IsPostBack)
                        DisplayResults();
                }
            }
        }
Exemplo n.º 40
0
        public static SearchResultsViewModel SearchWithSuggestions(SearchQuery searchQuery, bool suggestOnlyWhenNoResults = false)
        {
            var ret = new SearchResultsViewModel
                      	{
                      		SearchResults = new List<SearchResultsViewModel.SearchResult>(PageSize), Query = searchQuery
                      	};

            // Parse query, possibly throwing a ParseException
            Query query;
            if (searchQuery.TitlesOnly)
            {
                var qp = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "Title",
                                         new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)
                                         );
                query = qp.Parse(searchQuery.Query);
            }
            else
            {
                query = MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, searchQuery.Query,
                                                    SearchFields, SearchFlags,
                                                    new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)
                                                    );
            }

            // Perform the actual search
            var tsdc = TopScoreDocCollector.create(PageSize * searchQuery.CurrentPage, true);
            Searcher.Search(query, tsdc);
            ret.TotalResults = tsdc.GetTotalHits();
            var hits = tsdc.TopDocs().ScoreDocs;

            // Do the suggestion magic
            if (suggestOnlyWhenNoResults && ret.TotalResults == 0 || (!suggestOnlyWhenNoResults))
            {
                ret.Suggestions = new List<string>();
                var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(Searcher.GetIndexReader().Directory());

                // This is kind of a hack to get things working quickly
                // for real-world usage we probably want to get the analyzed terms from the Query object
                var individualTerms = searchQuery.Query.Split(new[] { ' ', '\t', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);

                foreach (var term in individualTerms)
                {
                    // we only specify field name if we actually got results,
                    // to improve suggestions relevancy
                    ret.Suggestions.AddRange(spellChecker.SuggestSimilar(term,
                                                                searchQuery.MaxSuggestions,
                                                                null,
                                                                ret.TotalResults == 0 ? null : "Title",
                                                                true));
                }
            }

            // Init the highlighter instance
            var fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT,
                                    FastVectorHighlighter.DEFAULT_FIELD_MATCH,
                                    new SimpleFragListBuilder(),
                                    new ScoreOrderFragmentsBuilder(new[] { "[b]" }, new[] { "[/b]" }));

            int i;
            for (i = (searchQuery.CurrentPage - 1) * PageSize; i < hits.Length; ++i)
            {
                var d = Searcher.Doc(hits[i].doc);
                var fq = fvh.GetFieldQuery(query);
                var fragment = fvh.GetBestFragment(fq, Searcher.GetIndexReader(),
                    hits[i].doc, "Content", 400);

                ret.SearchResults.Add(new SearchResultsViewModel.SearchResult
                {
                    Id = d.Get("Id"),
                    Title = d.Get("Title"),
                    Score = hits[i].score,
                    LuceneDocId = hits[i].doc,
                    Fragment = MvcHtmlString.Create(fragment.HtmlStripFragment()),
                });
            }
            return ret;
        }
Exemplo n.º 41
0
 public SuggestionQueryIndexExtension(StringDistance distance, string field, float accuracy)
 {
     this.field        = field;
     this.spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(directory, distance);
     this.spellChecker.SetAccuracy(accuracy);
 }
Exemplo n.º 42
0
        private void InitializeSpellChecker()
        {
            if (SpellChecker != null) return;

            var di = DictionaryBuilder.CreateTargetFolder(_pathToDictDir);
            SpellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(FSDirectory.Open(di));
            InitSuggestionListFromFile();
        }
 public StemmerCompareAnalyzer(ISet <string> stopWords, LuceneSpellChecker spellChecker, int numberOfSuggestion)
 {
     StopWords           = stopWords;
     SpellChecker        = spellChecker;
     NumberOfSuggestions = numberOfSuggestion;
 }
Exemplo n.º 44
0
        public static SearchResultsViewModel SearchWithSuggestions(SearchQuery searchQuery, bool suggestOnlyWhenNoResults = false)
        {
            var ret = new SearchResultsViewModel
            {
                SearchResults = new List <SearchResultsViewModel.SearchResult>(PageSize), Query = searchQuery
            };

            // Parse query, possibly throwing a ParseException
            Query query;

            if (searchQuery.TitlesOnly)
            {
                var qp = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "Title",
                                         new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)
                                         );
                query = qp.Parse(searchQuery.Query);
            }
            else
            {
                query = MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, searchQuery.Query,
                                                    SearchFields, SearchFlags,
                                                    new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)
                                                    );
            }

            // Perform the actual search
            var tsdc = TopScoreDocCollector.create(PageSize * searchQuery.CurrentPage, true);

            Searcher.Search(query, tsdc);
            ret.TotalResults = tsdc.GetTotalHits();
            var hits = tsdc.TopDocs().ScoreDocs;

            // Do the suggestion magic
            if (suggestOnlyWhenNoResults && ret.TotalResults == 0 || (!suggestOnlyWhenNoResults))
            {
                ret.Suggestions = new List <string>();
                var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(Searcher.GetIndexReader().Directory());

                // This is kind of a hack to get things working quickly
                // for real-world usage we probably want to get the analyzed terms from the Query object
                var individualTerms = searchQuery.Query.Split(new[] { ' ', '\t', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries);

                foreach (var term in individualTerms)
                {
                    // we only specify field name if we actually got results,
                    // to improve suggestions relevancy
                    ret.Suggestions.AddRange(spellChecker.SuggestSimilar(term,
                                                                         searchQuery.MaxSuggestions,
                                                                         null,
                                                                         ret.TotalResults == 0 ? null : "Title",
                                                                         true));
                }
            }

            // Init the highlighter instance
            var fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT,
                                                FastVectorHighlighter.DEFAULT_FIELD_MATCH,
                                                new SimpleFragListBuilder(),
                                                new ScoreOrderFragmentsBuilder(new[] { "[b]" }, new[] { "[/b]" }));


            int i;

            for (i = (searchQuery.CurrentPage - 1) * PageSize; i < hits.Length; ++i)
            {
                var d        = Searcher.Doc(hits[i].doc);
                var fq       = fvh.GetFieldQuery(query);
                var fragment = fvh.GetBestFragment(fq, Searcher.GetIndexReader(),
                                                   hits[i].doc, "Content", 400);

                ret.SearchResults.Add(new SearchResultsViewModel.SearchResult
                {
                    Id          = d.Get("Id"),
                    Title       = d.Get("Title"),
                    Score       = hits[i].score,
                    LuceneDocId = hits[i].doc,
                    Fragment    = MvcHtmlString.Create(fragment.HtmlStripFragment()),
                });
            }
            return(ret);
        }