/// <summary> /// Open the index in the given directory and create a new index of word frequency for the /// given index.</summary> public void BuildAutoCompleteIndex() { // use a custom analyzer so we can do EdgeNGramFiltering var analyzer = new AutoCompleteAnalyzer(); using (var writer = new IndexWriter(m_directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED)) { writer.MergeFactor = 300; writer.SetMaxBufferedDocs(150); // go through every word, storing the original word (incl. n-grams) // and the number of times it occurs foreach (var hotel in _hotels) { if (hotel.Name.Length < 3) continue; // too short we bail but "too long" is fine... // ok index the word // use the number of documents this word appears in int freq = hotel.SearchCount; var doc = MakeDocument(hotel, freq); writer.AddDocument(doc); } writer.Optimize(); } // re-open our reader ReplaceSearcher(); }
/// <summary> /// Open the index in the given directory and create a new index of word frequency for the /// given index.</summary> /// <param name="sourceDirectory">Directory containing the index to count words in.</param> /// <param name="fieldToAutocomplete">The field in the index that should be analyzed.</param> public void BuildAutoCompleteIndex(Directory sourceDirectory, String fieldToAutocomplete) { // build a dictionary (from the spell package) using (IndexReader sourceReader = IndexReader.Open(sourceDirectory, true)) { //Read words from Lucene Index Directory LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete); // use a custom analyzer so we can do EdgeNGramFiltering var analyzer = new AutoCompleteAnalyzer(); using (var writer = new IndexWriter(m_directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED)) { writer.MergeFactor = 300; writer.SetMaxBufferedDocs(150); // go through every word, storing the original word (incl. n-grams) // and the number of times it occurs foreach (string word in dict) { if (word.Length < 3) { continue; // too short --> bail, but "too long" is fine... } // ok index the word // use the number of documents this word appears in int freq = sourceReader.DocFreq(new Term(fieldToAutocomplete, word)); var doc = MakeDocument(fieldToAutocomplete, word, freq); writer.AddDocument(doc); } writer.Optimize(); } } // re-open our reader ReplaceSearcher(); }
/// <summary> /// Open the index in the given directory and create a new index of word frequency for the /// given index.</summary> /// <param name="sourceDirectory">Directory containing the index to count words in.</param> /// <param name="fieldToAutocomplete">The field in the index that should be analyzed.</param> public void BuildAutoCompleteIndex(Directory sourceDirectory, String fieldToAutocomplete) { // build a dictionary (from the spell package) using (IndexReader sourceReader = IndexReader.Open(sourceDirectory, true)) { LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete); // code from // org.apache.lucene.search.spell.SpellChecker.indexDictionary( // Dictionary) //IndexWriter.Unlock(m_directory); // use a custom analyzer so we can do EdgeNGramFiltering var analyzer = new AutoCompleteAnalyzer(); using (var writer = new IndexWriter(m_directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED)) { writer.MergeFactor = 300; writer.SetMaxBufferedDocs(150); // go through every word, storing the original word (incl. n-grams) // and the number of times it occurs foreach (string word in dict) { if (word.Length < 3) continue; // too short we bail but "too long" is fine... // ok index the word // use the number of documents this word appears in int freq = sourceReader.DocFreq(new Term(fieldToAutocomplete, word)); var doc = MakeDocument(fieldToAutocomplete, word, freq); writer.AddDocument(doc); } writer.Optimize(); } } // re-open our reader ReplaceSearcher(); }