/// <summary>
        /// Open the index in the given directory and create a new index of word frequency for the 
        /// given index.</summary>
        public void BuildAutoCompleteIndex()
        {
            // use a custom analyzer so we can do EdgeNGramFiltering
            var analyzer = new AutoCompleteAnalyzer();
            using (var writer = new IndexWriter(m_directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED))
            {
                writer.MergeFactor = 300;
                writer.SetMaxBufferedDocs(150);

                // go through every word, storing the original word (incl. n-grams)
                // and the number of times it occurs
                foreach (var hotel in _hotels)
                {
                    if (hotel.Name.Length < 3)
                        continue; // too short we bail but "too long" is fine...

                    // ok index the word
                    // use the number of documents this word appears in
                    int freq = hotel.SearchCount;
                    var doc = MakeDocument(hotel, freq);

                    writer.AddDocument(doc);
                }

                writer.Optimize();
            }

            // re-open our reader
            ReplaceSearcher();
        }
Esempio n. 2
0
        /// <summary>
        /// Open the index in the given directory and create a new index of word frequency for the
        /// given index.</summary>
        /// <param name="sourceDirectory">Directory containing the index to count words in.</param>
        /// <param name="fieldToAutocomplete">The field in the index that should be analyzed.</param>
        public void BuildAutoCompleteIndex(Directory sourceDirectory, String fieldToAutocomplete)
        {
            // build a dictionary (from the spell package)
            using (IndexReader sourceReader = IndexReader.Open(sourceDirectory, true))
            {
                //Read words from Lucene Index Directory
                LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete);

                // use a custom analyzer so we can do EdgeNGramFiltering
                var analyzer = new AutoCompleteAnalyzer();
                using (var writer = new IndexWriter(m_directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED))
                {
                    writer.MergeFactor = 300;
                    writer.SetMaxBufferedDocs(150);

                    // go through every word, storing the original word (incl. n-grams)
                    // and the number of times it occurs
                    foreach (string word in dict)
                    {
                        if (word.Length < 3)
                        {
                            continue; // too short --> bail,  but "too long" is fine...
                        }
                        // ok index the word
                        // use the number of documents this word appears in
                        int freq = sourceReader.DocFreq(new Term(fieldToAutocomplete, word));
                        var doc  = MakeDocument(fieldToAutocomplete, word, freq);

                        writer.AddDocument(doc);
                    }

                    writer.Optimize();
                }
            }

            // re-open our reader
            ReplaceSearcher();
        }
Esempio n. 3
0
        /// <summary>
        /// Open the index in the given directory and create a new index of word frequency for the 
        /// given index.</summary>
        /// <param name="sourceDirectory">Directory containing the index to count words in.</param>
        /// <param name="fieldToAutocomplete">The field in the index that should be analyzed.</param>
        public void BuildAutoCompleteIndex(Directory sourceDirectory, String fieldToAutocomplete)
        {
            // build a dictionary (from the spell package)
            using (IndexReader sourceReader = IndexReader.Open(sourceDirectory, true))
            {
                LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete);

                // code from
                // org.apache.lucene.search.spell.SpellChecker.indexDictionary(
                // Dictionary)
                //IndexWriter.Unlock(m_directory);

                // use a custom analyzer so we can do EdgeNGramFiltering
                var analyzer = new AutoCompleteAnalyzer();
                using (var writer = new IndexWriter(m_directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED))
                {
                    writer.MergeFactor = 300;
                    writer.SetMaxBufferedDocs(150);

                    // go through every word, storing the original word (incl. n-grams)
                    // and the number of times it occurs
                    foreach (string word in dict)
                    {
                        if (word.Length < 3)
                            continue; // too short we bail but "too long" is fine...

                        // ok index the word
                        // use the number of documents this word appears in
                        int freq = sourceReader.DocFreq(new Term(fieldToAutocomplete, word));
                        var doc = MakeDocument(fieldToAutocomplete, word, freq);

                        writer.AddDocument(doc);
                    }

                    writer.Optimize();
                }

            }

            // re-open our reader
            ReplaceSearcher();
        }