HighFrequencyDictionary: terms taken from the given field of a Lucene index, which appear in a number of documents above a given threshold. Threshold is a value in [0..1] representing the minimum number of documents (of the total) where a term should appear. Based on LuceneDictionary.
Наследование: Dictionary
 public void TestEmpty()
 {
     Directory dir = NewDirectory();
     IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
     writer.Commit();
     writer.Dispose();
     IndexReader ir = DirectoryReader.Open(dir);
     IDictionary dictionary = new HighFrequencyDictionary(ir, "bogus", 0.1f);
     BytesRefIterator tf = dictionary.EntryIterator;
     assertNull(tf.Comparator);
     assertNull(tf.Next());
     dir.Dispose();
 }
Пример #2
0
            internal HighFrequencyEnumerator(HighFrequencyDictionary outerInstance)
            {
                Terms terms = MultiFields.GetTerms(outerInstance.reader, outerInstance.field);

                if (terms != null)
                {
                    termsEnum = terms.GetEnumerator();
                }
                else
                {
                    termsEnum = null;
                }
                minNumDocs = (int)(outerInstance.thresh * (float)outerInstance.reader.NumDocs);
            }
 internal HighFrequencyIterator(HighFrequencyDictionary outerInstance)
 {
     this.outerInstance = outerInstance;
     Terms terms = MultiFields.GetTerms(outerInstance.reader, outerInstance.field);
     if (terms != null)
     {
         termsEnum = terms.Iterator(null);
     }
     else
     {
         termsEnum = null;
     }
     minNumDocs = (int)(outerInstance.thresh * (float)outerInstance.reader.NumDocs);
 }