public LuceneIterator(LuceneDictionary enclosingInstance) { InitBlock(enclosingInstance); try { termEnum = Enclosing_Instance.reader.Terms(new Term(Enclosing_Instance.field, "")); } catch (System.IO.IOException ex) { System.Console.Error.WriteLine(ex.StackTrace); } }
public void TestFieldNonExistent() { try { indexReader = IndexReader.Open(store); ld = new LuceneDictionary(indexReader, "nonexistent_field"); it = ld.GetWordsIterator(); AssertFalse("More elements than expected", it.HasNext()); AssertTrue("Nonexistent element is really null", it.Next() == null); } finally { if (indexReader != null) { indexReader.Close(); } } }
private void InitBlock(LuceneDictionary enclosingInstance) { this.enclosingInstance = enclosingInstance; }
public void TestFieldAaa() { try { indexReader = IndexReader.Open(store); ld = new LuceneDictionary(indexReader, "aaa"); it = ld.GetWordsIterator(); AssertTrue("First element doesn't exist.", it.HasNext()); AssertTrue("First element isn't correct", it.Next().Equals("foo")); AssertFalse("More elements than expected", it.HasNext()); AssertTrue("Nonexistent element is really null", it.Next() == null); } finally { if (indexReader != null) { indexReader.Close(); } } }
public void TestFieldContents_2() { try { indexReader = IndexReader.Open(store); ld = new LuceneDictionary(indexReader, "contents"); it = ld.GetWordsIterator(); // hasNext() should have no side effects //{{DIGY}} But has. Need a fix? //AssertTrue("First element isn't were it should be.", it.HasNext()); //AssertTrue("First element isn't were it should be.", it.HasNext()); //AssertTrue("First element isn't were it should be.", it.HasNext()); // just iterate through words AssertTrue("First element isn't correct", it.Next().Equals("Jerry")); AssertTrue("Second element isn't correct", it.Next().Equals("Tom")); AssertTrue("Nonexistent element is really null", it.Next() == null); // hasNext() should still have no side effects ... AssertFalse("There should be any more elements", it.HasNext()); AssertFalse("There should be any more elements", it.HasNext()); AssertFalse("There should be any more elements", it.HasNext()); // .. and there are really no more words AssertTrue("Nonexistent element is really null", it.Next() == null); AssertTrue("Nonexistent element is really null", it.Next() == null); AssertTrue("Nonexistent element is really null", it.Next() == null); } finally { if (indexReader != null) { indexReader.Close(); } } }
public void TestFieldContents_1() { try { indexReader = IndexReader.Open(store); ld = new LuceneDictionary(indexReader, "contents"); it = ld.GetWordsIterator(); AssertTrue("First element doesn't exist.", it.HasNext()); AssertTrue("First element isn't correct", it.Next().Equals("Jerry")); AssertTrue("Second element doesn't exist.", it.HasNext()); AssertTrue("Second element isn't correct", it.Next().Equals("Tom")); AssertFalse("More elements than expected", it.HasNext()); AssertTrue("Nonexistent element is really null", it.Next() == null); ld = new LuceneDictionary(indexReader, "contents"); it = ld.GetWordsIterator(); int counter = 2; while (it.HasNext()) { it.Next(); counter--; } AssertTrue("Number of words incorrect", counter == 0); } finally { if (indexReader != null) { indexReader.Close(); } } }
/// <summary> /// Open the index in the given directory and create a new index of word frequency for the /// given index.</summary> /// <param name="sourceDirectory">Directory containing the index to count words in.</param> /// <param name="fieldToAutocomplete">The field in the index that should be analyzed.</param> public void BuildAutoCompleteIndex(Directory sourceDirectory, Directory TargetDirectory, bool verbose) { // build a dictionary (from the spell package) using (IndexReader sourceReader = IndexReader.Open(sourceDirectory, true)) { string[] fieldNames = sourceReader.GetFieldNames(IndexReader.FieldOption.ALL).ToArray(); foreach (string fieldToAutocomplete in fieldNames) { if (fieldToAutocomplete.Contains("__display name") || fieldToAutocomplete.Contains("_name") || !fieldToAutocomplete.Contains('_') || !fieldToAutocomplete.Contains("date") || !fieldToAutocomplete.Contains("threshold")) { LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete); // code from // org.apache.lucene.search.spell.SpellChecker.indexDictionary( // Dictionary) //IndexWriter.Unlock(m_directory); // use a custom analyzer so we can do EdgeNGramFiltering AutoCompleteAnalyzer analyzer = new AutoCompleteAnalyzer(); using (var writer = new IndexWriter(TargetDirectory, analyzer, IsFirstTime, IndexWriter.MaxFieldLength.UNLIMITED)) { writer.SetMergeFactor(300); writer.SetMaxBufferedDocs(150); // go through every word, storing the original word (incl. n-grams) // and the number of times it occurs System.Collections.IEnumerator ie = dict.GetWordsIterator(); double num; Guid guid; foreach (string word in dict) { if (word.Length < UtilitySettings.AllowedMinimumWordLengthToBeIndexed) continue; // too short we bail but if (word.Length > UtilitySettings.AllowedMaxWordLengthToBeIndexed) continue; //too long also we bail out if (!word.Contains('<') && !word.Contains('>') && !word.Contains('/') && !word.Contains('\\') && !isNotFile(word) && !word.Contains('@') && !word.Contains('&') && !double.TryParse(word, out num) && !Guid.TryParse(word, out guid)) { // ok index the word // use the number of documents this word appears in int freq = sourceReader.DocFreq(new Term(fieldToAutocomplete, word)); if (verbose) { log.Info(string.Format("Frequency {0} of this word {1}", freq, word)); } var doc = MakeDocument(fieldToAutocomplete, word, freq); writer.AddDocument(doc); } } writer.Optimize(); } } } } // re-open our reader //ReplaceSearcher(); }
/// <summary> /// Open the index in the given directory and create a new index of word frequency for the /// given index.</summary> /// <param name="sourceDirectory">Directory containing the index to count words in.</param> /// <param name="fieldToAutocomplete">The field in the index that should be analyzed.</param> public void BuildAutoCompleteIndex(Directory sourceDirectory, String fieldToAutocomplete) { // build a dictionary (from the spell package) using (IndexReader sourceReader = IndexReader.Open(sourceDirectory, true)) { LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete); // code from // org.apache.lucene.search.spell.SpellChecker.indexDictionary( // Dictionary) //IndexWriter.Unlock(m_directory); // use a custom analyzer so we can do EdgeNGramFiltering var analyzer = new AutoCompleteAnalyzer(); using (var writer = new IndexWriter(m_directory, analyzer, true, IndexWriter.MaxFieldLength.LIMITED)) { writer.MergeFactor = 300; writer.SetMaxBufferedDocs(150); // go through every word, storing the original word (incl. n-grams) // and the number of times it occurs foreach (string word in dict) { if (word.Length < 3) continue; // too short we bail but "too long" is fine... // ok index the word // use the number of documents this word appears in int freq = sourceReader.DocFreq(new Term(fieldToAutocomplete, word)); var doc = MakeDocument(fieldToAutocomplete, word, freq); writer.AddDocument(doc); } writer.Optimize(); } } // re-open our reader ReplaceSearcher(); }