public void RestDocsAndPositionsEnumStart() { Analyzer analyzer = new MockAnalyzer(Random); int numIters = AtLeast(3); MemoryIndex memory = new MemoryIndex(true, Random.nextInt(50) * 1024 * 1024); for (int i = 0; i < numIters; i++) { // check reuse memory.AddField("foo", "bar", analyzer); AtomicReader reader = (AtomicReader)memory.CreateSearcher().IndexReader; assertEquals(1, reader.GetTerms("foo").SumTotalTermFreq); DocsAndPositionsEnum disi = reader.GetTermPositionsEnum(new Term("foo", "bar")); int docid = disi.DocID; assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); assertEquals(0, disi.NextPosition()); assertEquals(0, disi.StartOffset); assertEquals(3, disi.EndOffset); // now reuse and check again TermsEnum te = reader.GetTerms("foo").GetIterator(null); assertTrue(te.SeekExact(new BytesRef("bar"))); disi = te.DocsAndPositions(null, disi); docid = disi.DocID; assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); reader.Dispose(); memory.Reset(); } }
public virtual void TestDocsAndPositionsEnum() { TermsEnum termsEnum = reader.GetTerms(DOC_POSITIONS_FIELD).GetIterator(null); assertEquals(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef(DOC_POSITIONS_TERM))); DocsAndPositionsEnum sortedPositions = termsEnum.DocsAndPositions(null, null); int doc; // test nextDoc() while ((doc = sortedPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { int freq = sortedPositions.Freq; assertEquals("incorrect freq for doc=" + doc, sortedValues[doc] / 10 + 1, freq); for (int i = 0; i < freq; i++) { assertEquals("incorrect position for doc=" + doc, i, sortedPositions.NextPosition()); if (!DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat(DOC_POSITIONS_FIELD))) { assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.StartOffset); assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.EndOffset); } assertEquals("incorrect payload for doc=" + doc, freq - i, int.Parse(sortedPositions.GetPayload().Utf8ToString(), CultureInfo.InvariantCulture)); } } // test advance() DocsAndPositionsEnum reuse = sortedPositions; sortedPositions = termsEnum.DocsAndPositions(null, reuse); if (sortedPositions is SortingAtomicReader.SortingDocsAndPositionsEnum) { assertTrue(((SortingAtomicReader.SortingDocsAndPositionsEnum)sortedPositions).Reused(reuse)); // make sure reuse worked } doc = 0; while ((doc = sortedPositions.Advance(doc + TestUtil.NextInt32(Random, 1, 5))) != DocIdSetIterator.NO_MORE_DOCS) { int freq = sortedPositions.Freq; assertEquals("incorrect freq for doc=" + doc, sortedValues[doc] / 10 + 1, freq); for (int i = 0; i < freq; i++) { assertEquals("incorrect position for doc=" + doc, i, sortedPositions.NextPosition()); if (!DoesntSupportOffsets.Contains(TestUtil.GetPostingsFormat(DOC_POSITIONS_FIELD))) { assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.StartOffset); assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.EndOffset); } assertEquals("incorrect payload for doc=" + doc, freq - i, int.Parse(sortedPositions.GetPayload().Utf8ToString(), CultureInfo.InvariantCulture)); } } }
/// <summary> /// Takes the categories from the given taxonomy directory, and adds the /// missing ones to this taxonomy. Additionally, it fills the given /// <see cref="IOrdinalMap"/> with a mapping from the original ordinal to the new /// ordinal. /// </summary> public virtual void AddTaxonomy(Directory taxoDir, IOrdinalMap map) { EnsureOpen(); DirectoryReader r = DirectoryReader.Open(taxoDir); try { int size = r.NumDocs; IOrdinalMap ordinalMap = map; ordinalMap.SetSize(size); int @base = 0; TermsEnum te = null; DocsEnum docs = null; foreach (AtomicReaderContext ctx in r.Leaves) { AtomicReader ar = ctx.AtomicReader; Terms terms = ar.GetTerms(Consts.FULL); te = terms.GetIterator(te); while (te.Next() != null) { FacetLabel cp = new FacetLabel(FacetsConfig.StringToPath(te.Term.Utf8ToString())); int ordinal = AddCategory(cp); docs = te.Docs(null, docs, DocsFlags.NONE); ordinalMap.AddMapping(docs.NextDoc() + @base, ordinal); } @base += ar.MaxDoc; // no deletions, so we're ok } ordinalMap.AddDone(); } finally { r.Dispose(); } }
public virtual void TestReuseDocsEnumNoReuse() { Directory dir = NewDirectory(); Codec cp = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat()); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp)); int numdocs = AtLeast(20); CreateRandomIndex(numdocs, writer, Random()); writer.Commit(); DirectoryReader open = DirectoryReader.Open(dir); foreach (AtomicReaderContext ctx in open.Leaves) { AtomicReader indexReader = (AtomicReader)ctx.Reader; Terms terms = indexReader.GetTerms("body"); TermsEnum iterator = terms.GetIterator(null); IdentityHashMap <DocsEnum, bool?> enums = new IdentityHashMap <DocsEnum, bool?>(); MatchNoBits bits = new MatchNoBits(indexReader.MaxDoc); while ((iterator.Next()) != null) { DocsEnum docs = iterator.Docs(Random().NextBoolean() ? bits : new MatchNoBits(indexReader.MaxDoc), null, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE); enums[docs] = true; } Assert.AreEqual(terms.Count, enums.Count); } IOUtils.Close(writer, open, dir); }
public virtual void TestNestedPulsing() { // we always run this test with pulsing codec. Codec cp = TestUtil.AlwaysPostingsFormat(new NestedPulsingPostingsFormat()); BaseDirectoryWrapper dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(cp)); Document doc = new Document(); doc.Add(new TextField("foo", "a b b c c c d e f g g g h i i j j k l l m m m", Field.Store.NO)); // note: the reuse is imperfect, here we would have 4 enums (lost reuse when we get an enum for 'm') // this is because we only track the 'last' enum we reused (not all). // but this seems 'good enough' for now. iw.AddDocument(doc); DirectoryReader ir = iw.GetReader(); iw.Dispose(); AtomicReader segment = GetOnlySegmentReader(ir); DocsEnum reuse = null; IDictionary <DocsEnum, bool?> allEnums = new JCG.Dictionary <DocsEnum, bool?>(IdentityEqualityComparer <DocsEnum> .Default); TermsEnum te = segment.GetTerms("foo").GetEnumerator(); while (te.MoveNext()) { reuse = te.Docs(null, reuse, DocsFlags.NONE); allEnums[reuse] = true; } assertEquals(4, allEnums.Count); allEnums.Clear(); DocsAndPositionsEnum posReuse = null; te = segment.GetTerms("foo").GetEnumerator(); while (te.MoveNext()) { posReuse = te.DocsAndPositions(null, posReuse); allEnums[posReuse] = true; } assertEquals(4, allEnums.Count); ir.Dispose(); dir.Dispose(); }
public virtual void TestSophisticatedReuse() { // we always run this test with pulsing codec. Codec cp = TestUtil.AlwaysPostingsFormat(new Pulsing41PostingsFormat(1)); Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(cp)); Document doc = new Document(); doc.Add(new TextField("foo", "a b b c c c d e f g g h i i j j k", Field.Store.NO)); iw.AddDocument(doc); DirectoryReader ir = iw.GetReader(); iw.Dispose(); AtomicReader segment = GetOnlySegmentReader(ir); DocsEnum reuse = null; IDictionary <DocsEnum, bool?> allEnums = new JCG.Dictionary <DocsEnum, bool?>(IdentityEqualityComparer <DocsEnum> .Default); TermsEnum te = segment.GetTerms("foo").GetEnumerator(); while (te.MoveNext()) { reuse = te.Docs(null, reuse, DocsFlags.NONE); allEnums[reuse] = true; } assertEquals(2, allEnums.Count); allEnums.Clear(); DocsAndPositionsEnum posReuse = null; te = segment.GetTerms("foo").GetEnumerator(); while (te.MoveNext()) { posReuse = te.DocsAndPositions(null, posReuse); allEnums[posReuse] = true; } assertEquals(2, allEnums.Count); ir.Dispose(); dir.Dispose(); }
protected int GetDictValueCount(AtomicReader reader, string field) { int ret = 0; Terms terms = reader.GetTerms(field); if (terms == null) { return(ret); } return((int)terms.Count); }
public void TestNonExistingsField() { MemoryIndex mindex = new MemoryIndex(Random.nextBoolean(), Random.nextInt(50) * 1024 * 1024); MockAnalyzer mockAnalyzer = new MockAnalyzer(Random); mindex.AddField("field", "the quick brown fox", mockAnalyzer); AtomicReader reader = (AtomicReader)mindex.CreateSearcher().IndexReader; assertNull(reader.GetNumericDocValues("not-in-index")); assertNull(reader.GetNormValues("not-in-index")); assertNull(reader.GetTermDocsEnum(new Term("not-in-index", "foo"))); assertNull(reader.GetTermPositionsEnum(new Term("not-in-index", "foo"))); assertNull(reader.GetTerms("not-in-index")); }
public BaseTermsEnumTraverser(AbstractPrefixTreeFilter outerInstance, AtomicReaderContext context, IBits acceptDocs) { this.m_outerInstance = outerInstance; this.m_context = context; AtomicReader reader = context.AtomicReader; this.m_acceptDocs = acceptDocs; m_maxDoc = reader.MaxDoc; Terms terms = reader.GetTerms(outerInstance.m_fieldName); if (terms != null) { m_termsEnum = terms.GetIterator(null); } }
protected BaseTermsEnumTraverser(AbstractPrefixTreeFilter outerInstance, AtomicReaderContext context, IBits acceptDocs) // LUCENENET: CA1012: Abstract types should not have constructors (marked protected) { this.m_outerInstance = outerInstance; this.m_context = context; AtomicReader reader = context.AtomicReader; this.m_acceptDocs = acceptDocs; m_maxDoc = reader.MaxDoc; Terms terms = reader.GetTerms(outerInstance.m_fieldName); if (terms != null) { m_termsEnum = terms.GetEnumerator(); } }
protected BaseTermsEnumTraverser(AbstractPrefixTreeFilter filter, AtomicReaderContext context, IBits?acceptDocs) // LUCENENET: CA1012: Abstract types should not have constructors (marked protected) { // LUCENENET specific - added guard clauses this.m_filter = filter ?? throw new ArgumentNullException(nameof(filter)); this.m_context = context ?? throw new ArgumentNullException(nameof(context)); AtomicReader reader = context.AtomicReader; this.m_acceptDocs = acceptDocs; m_maxDoc = reader.MaxDoc; Terms?terms = reader.GetTerms(filter.m_fieldName); if (terms != null) { m_termsEnum = terms.GetEnumerator(); } }
public virtual ShapeFieldCache <T> GetCache(AtomicReader reader) { lock (locker) { ShapeFieldCache <T> idx; if (sidx.TryGetValue(reader, out idx) && idx != null) { return(idx); } /*long startTime = Runtime.CurrentTimeMillis(); * log.Fine("Building Cache [" + reader.MaxDoc() + "]");*/ idx = new ShapeFieldCache <T>(reader.MaxDoc, m_defaultSize); int count = 0; DocsEnum docs = null; Terms terms = reader.GetTerms(m_shapeField); TermsEnum te = null; if (terms != null) { te = terms.GetIterator(te); BytesRef term = te.Next(); while (term != null) { T shape = ReadShape(term); if (shape != null) { docs = te.Docs(null, docs, DocsFlags.NONE); int docid = docs.NextDoc(); while (docid != DocIdSetIterator.NO_MORE_DOCS) { idx.Add(docid, shape); docid = docs.NextDoc(); count++; } } term = te.Next(); } } sidx[reader] = idx; /*long elapsed = Runtime.CurrentTimeMillis() - startTime; * log.Fine("Cached: [" + count + " in " + elapsed + "ms] " + idx);*/ return(idx); } }
public virtual DocsEnum RandomDocsEnum(string field, BytesRef term, IList <AtomicReaderContext> readers, IBits bits) { if (Random().Next(10) == 0) { return(null); } AtomicReader indexReader = (AtomicReader)readers[Random().Next(readers.Count)].Reader; Terms terms = indexReader.GetTerms(field); if (terms == null) { return(null); } TermsEnum iterator = terms.GetIterator(null); if (iterator.SeekExact(term)) { return(iterator.Docs(bits, null, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE)); } return(null); }
protected int GetNegativeValueCount(AtomicReader reader, string field) { int ret = 0; Terms terms = reader.GetTerms(field); if (terms == null) { return(ret); } TermsEnum termsEnum = terms.GetIterator(null); BytesRef text; while ((text = termsEnum.Next()) != null) { if (!text.Utf8ToString().StartsWith("-")) { break; } ret++; } return(ret); }
public void TestSameFieldAddedMultipleTimes() { MemoryIndex mindex = new MemoryIndex(Random.nextBoolean(), Random.nextInt(50) * 1024 * 1024); MockAnalyzer mockAnalyzer = new MockAnalyzer(Random); mindex.AddField("field", "the quick brown fox", mockAnalyzer); mindex.AddField("field", "jumps over the", mockAnalyzer); AtomicReader reader = (AtomicReader)mindex.CreateSearcher().IndexReader; assertEquals(7, reader.GetTerms("field").SumTotalTermFreq); PhraseQuery query = new PhraseQuery(); query.Add(new Term("field", "fox")); query.Add(new Term("field", "jumps")); assertTrue(mindex.Search(query) > 0.1); mindex.Reset(); mockAnalyzer.SetPositionIncrementGap(1 + Random.nextInt(10)); mindex.AddField("field", "the quick brown fox", mockAnalyzer); mindex.AddField("field", "jumps over the", mockAnalyzer); assertEquals(0, mindex.Search(query), 0.00001f); query.Slop = (10); assertTrue("posGap" + mockAnalyzer.GetPositionIncrementGap("field"), mindex.Search(query) > 0.0001); }
public void TestDocsEnumStart() { Analyzer analyzer = new MockAnalyzer(Random); MemoryIndex memory = new MemoryIndex(Random.nextBoolean(), Random.nextInt(50) * 1024 * 1024); memory.AddField("foo", "bar", analyzer); AtomicReader reader = (AtomicReader)memory.CreateSearcher().IndexReader; DocsEnum disi = TestUtil.Docs(Random, reader, "foo", new BytesRef("bar"), null, null, DocsFlags.NONE); int docid = disi.DocID; assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); // now reuse and check again TermsEnum te = reader.GetTerms("foo").GetEnumerator(); assertTrue(te.SeekExact(new BytesRef("bar"))); disi = te.Docs(null, disi, DocsFlags.NONE); docid = disi.DocID; assertEquals(-1, docid); assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); reader.Dispose(); }
private IDictionary <int, object> HighlightField(string field, string[] contents, BreakIterator bi, BytesRef[] terms, int[] docids, IList <AtomicReaderContext> leaves, int maxPassages, Query query) { IDictionary <int, object> highlights = new Dictionary <int, object>(); PassageFormatter fieldFormatter = GetFormatter(field); if (fieldFormatter is null) { // LUCENENET: Changed from NullPointerException to InvalidOperationException (which isn't caught anywhere outside of tests) throw IllegalStateException.Create("PassageFormatter cannot be null"); } // check if we should do any multiterm processing Analyzer analyzer = GetIndexAnalyzer(field); CharacterRunAutomaton[] automata = Arrays.Empty <CharacterRunAutomaton>(); if (analyzer != null) { automata = MultiTermHighlighting.ExtractAutomata(query, field); } // resize 'terms', where the last term is the multiterm matcher if (automata.Length > 0) { BytesRef[] newTerms = new BytesRef[terms.Length + 1]; System.Array.Copy(terms, 0, newTerms, 0, terms.Length); terms = newTerms; } // we are processing in increasing docid order, so we only need to reinitialize stuff on segment changes // otherwise, we will just advance() existing enums to the new document in the same segment. DocsAndPositionsEnum[] postings = null; TermsEnum termsEnum = null; int lastLeaf = -1; for (int i = 0; i < docids.Length; i++) { string content = contents[i]; if (content.Length == 0) { continue; // nothing to do } bi.SetText(content); int doc = docids[i]; int leaf = ReaderUtil.SubIndex(doc, leaves); AtomicReaderContext subContext = leaves[leaf]; AtomicReader r = subContext.AtomicReader; if (Debugging.AssertsEnabled) { Debugging.Assert(leaf >= lastLeaf); // increasing order } // if the segment has changed, we must initialize new enums. if (leaf != lastLeaf) { Terms t = r.GetTerms(field); if (t != null) { termsEnum = t.GetEnumerator(); postings = new DocsAndPositionsEnum[terms.Length]; } } if (termsEnum is null) { continue; // no terms for this field, nothing to do } // if there are multi-term matches, we have to initialize the "fake" enum for each document if (automata.Length > 0) { DocsAndPositionsEnum dp = MultiTermHighlighting.GetDocsEnum(analyzer.GetTokenStream(field, content), automata); dp.Advance(doc - subContext.DocBase); postings[terms.Length - 1] = dp; // last term is the multiterm matcher } Passage[] passages = HighlightDoc(field, terms, content.Length, bi, doc - subContext.DocBase, termsEnum, postings, maxPassages); if (passages.Length == 0) { // no passages were returned, so ask for a default summary passages = GetEmptyHighlight(field, bi, maxPassages); } if (passages.Length > 0) { highlights[doc] = fieldFormatter.Format(passages, content); } lastLeaf = leaf; } return(highlights); }
public void TestRandomIndex() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir, analyzer); CreateRandomIndex(AtLeast(50), w, Random.NextInt64()); DirectoryReader reader = w.GetReader(); AtomicReader wrapper = SlowCompositeReaderWrapper.Wrap(reader); string field = @"body"; Terms terms = wrapper.GetTerms(field); var lowFreqQueue = new AnonymousPriorityQueue(this, 5); Util.PriorityQueue<TermAndFreq> highFreqQueue = new AnonymousPriorityQueue1(this, 5); try { TermsEnum iterator = terms.GetIterator(null); while (iterator.Next() != null) { if (highFreqQueue.Count < 5) { highFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq)); lowFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq)); } else { if (highFreqQueue.Top.freq < iterator.DocFreq) { highFreqQueue.Top.freq = iterator.DocFreq; highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term); highFreqQueue.UpdateTop(); } if (lowFreqQueue.Top.freq > iterator.DocFreq) { lowFreqQueue.Top.freq = iterator.DocFreq; lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term); lowFreqQueue.UpdateTop(); } } } int lowFreq = lowFreqQueue.Top.freq; int highFreq = highFreqQueue.Top.freq; AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq); List<TermAndFreq> highTerms = QueueToList(highFreqQueue); List<TermAndFreq> lowTerms = QueueToList(lowFreqQueue); IndexSearcher searcher = NewSearcher(reader); Occur lowFreqOccur = RandomOccur(Random); BooleanQuery verifyQuery = new BooleanQuery(); CommonTermsQuery cq = new CommonTermsQuery(RandomOccur(Random), lowFreqOccur, highFreq - 1, Random.NextBoolean()); foreach (TermAndFreq termAndFreq in lowTerms) { cq.Add(new Term(field, termAndFreq.term)); verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur)); } foreach (TermAndFreq termAndFreq in highTerms) { cq.Add(new Term(field, termAndFreq.term)); } TopDocs cqSearch = searcher.Search(cq, reader.MaxDoc); TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc); assertEquals(verifySearch.TotalHits, cqSearch.TotalHits); var hits = new JCG.HashSet<int>(); foreach (ScoreDoc doc in verifySearch.ScoreDocs) { hits.Add(doc.Doc); } foreach (ScoreDoc doc in cqSearch.ScoreDocs) { assertTrue(hits.Remove(doc.Doc)); } assertTrue(hits.Count == 0); w.ForceMerge(1); DirectoryReader reader2 = w.GetReader(); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, cq, NewSearcher(reader2)); reader2.Dispose(); } finally { reader.Dispose(); wrapper.Dispose(); w.Dispose(); dir.Dispose(); } }
private void Verify(Directory dir) { DirectoryReader ir = DirectoryReader.Open(dir); foreach (AtomicReaderContext leaf in ir.Leaves) { AtomicReader leafReader = (AtomicReader)leaf.Reader; AssertTerms(leafReader.GetTerms("field1docs"), leafReader.GetTerms("field2freqs"), true); AssertTerms(leafReader.GetTerms("field3positions"), leafReader.GetTerms("field4offsets"), true); AssertTerms(leafReader.GetTerms("field4offsets"), leafReader.GetTerms("field5payloadsFixed"), true); AssertTerms(leafReader.GetTerms("field5payloadsFixed"), leafReader.GetTerms("field6payloadsVariable"), true); AssertTerms(leafReader.GetTerms("field6payloadsVariable"), leafReader.GetTerms("field7payloadsFixedOffsets"), true); AssertTerms(leafReader.GetTerms("field7payloadsFixedOffsets"), leafReader.GetTerms("field8payloadsVariableOffsets"), true); } ir.Dispose(); }
public void TestRandomIndex() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter(Random, dir, analyzer); CreateRandomIndex(AtLeast(50), w, Random.NextInt64()); DirectoryReader reader = w.GetReader(); AtomicReader wrapper = SlowCompositeReaderWrapper.Wrap(reader); string field = @"body"; Terms terms = wrapper.GetTerms(field); var lowFreqQueue = new PriorityQueueAnonymousClass(5); var highFreqQueue = new PriorityQueueAnonymousClass1(5); try { TermsEnum iterator = terms.GetEnumerator(); while (iterator.MoveNext()) { if (highFreqQueue.Count < 5) { highFreqQueue.Add(new TermAndFreq( BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq)); lowFreqQueue.Add(new TermAndFreq( BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq)); } else { if (highFreqQueue.Top.freq < iterator.DocFreq) { highFreqQueue.Top.freq = iterator.DocFreq; highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term); highFreqQueue.UpdateTop(); } if (lowFreqQueue.Top.freq > iterator.DocFreq) { lowFreqQueue.Top.freq = iterator.DocFreq; lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term); lowFreqQueue.UpdateTop(); } } } int lowFreq = lowFreqQueue.Top.freq; int highFreq = highFreqQueue.Top.freq; AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq); IList <TermAndFreq> highTerms = QueueToList(highFreqQueue); IList <TermAndFreq> lowTerms = QueueToList(lowFreqQueue); IndexSearcher searcher = NewSearcher(reader); Occur lowFreqOccur = RandomOccur(Random); BooleanQuery verifyQuery = new BooleanQuery(); CommonTermsQuery cq = new CommonTermsQuery(RandomOccur(Random), lowFreqOccur, highFreq - 1, Random.NextBoolean()); foreach (TermAndFreq termAndFreq in lowTerms) { cq.Add(new Term(field, termAndFreq.term)); verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur)); } foreach (TermAndFreq termAndFreq in highTerms) { cq.Add(new Term(field, termAndFreq.term)); } TopDocs cqSearch = searcher.Search(cq, reader.MaxDoc); TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc); assertEquals(verifySearch.TotalHits, cqSearch.TotalHits); var hits = new JCG.HashSet <int>(); foreach (ScoreDoc doc in verifySearch.ScoreDocs) { hits.Add(doc.Doc); } foreach (ScoreDoc doc in cqSearch.ScoreDocs) { assertTrue(hits.Remove(doc.Doc)); } assertTrue(hits.Count == 0); /* * need to force merge here since QueryUtils adds checks based * on leave readers which have different statistics than the top * level reader if we have more than one segment. This could * result in a different query / results. */ w.ForceMerge(1); DirectoryReader reader2 = w.GetReader(); QueryUtils.Check(Random, cq, NewSearcher(reader2)); reader2.Dispose(); } finally { reader.Dispose(); wrapper.Dispose(); w.Dispose(); dir.Dispose(); } }
private void DuellReaders(CompositeReader other, AtomicReader memIndexReader) { AtomicReader competitor = SlowCompositeReaderWrapper.Wrap(other); Fields memFields = memIndexReader.Fields; foreach (string field in competitor.Fields) { Terms memTerms = memFields.GetTerms(field); Terms iwTerms = memIndexReader.GetTerms(field); if (iwTerms is null) { assertNull(memTerms); } else { NumericDocValues normValues = competitor.GetNormValues(field); NumericDocValues memNormValues = memIndexReader.GetNormValues(field); if (normValues != null) { // mem idx always computes norms on the fly assertNotNull(memNormValues); assertEquals(normValues.Get(0), memNormValues.Get(0)); } assertNotNull(memTerms); assertEquals(iwTerms.DocCount, memTerms.DocCount); assertEquals(iwTerms.SumDocFreq, memTerms.SumDocFreq); assertEquals(iwTerms.SumTotalTermFreq, memTerms.SumTotalTermFreq); TermsEnum iwTermsIter = iwTerms.GetEnumerator(); TermsEnum memTermsIter = memTerms.GetEnumerator(); if (iwTerms.HasPositions) { bool offsets = iwTerms.HasOffsets && memTerms.HasOffsets; while (iwTermsIter.MoveNext()) { assertTrue(memTermsIter.MoveNext()); assertEquals(iwTermsIter.Term, memTermsIter.Term); DocsAndPositionsEnum iwDocsAndPos = iwTermsIter.DocsAndPositions(null, null); DocsAndPositionsEnum memDocsAndPos = memTermsIter.DocsAndPositions(null, null); while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) { assertEquals(iwDocsAndPos.DocID, memDocsAndPos.NextDoc()); assertEquals(iwDocsAndPos.Freq, memDocsAndPos.Freq); for (int i = 0; i < iwDocsAndPos.Freq; i++) { assertEquals("term: " + iwTermsIter.Term.Utf8ToString(), iwDocsAndPos.NextPosition(), memDocsAndPos.NextPosition()); if (offsets) { assertEquals(iwDocsAndPos.StartOffset, memDocsAndPos.StartOffset); assertEquals(iwDocsAndPos.EndOffset, memDocsAndPos.EndOffset); } } } } } else { while (iwTermsIter.MoveNext()) { assertEquals(iwTermsIter.Term, memTermsIter.Term); DocsEnum iwDocsAndPos = iwTermsIter.Docs(null, null); DocsEnum memDocsAndPos = memTermsIter.Docs(null, null); while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS) { assertEquals(iwDocsAndPos.DocID, memDocsAndPos.NextDoc()); assertEquals(iwDocsAndPos.Freq, memDocsAndPos.Freq); } } } } } }
private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, bool multipleValuesPerDocument, bool scoreDocsInOrder) { IndexIterationContext context = new IndexIterationContext(); int numRandomValues = nDocs / 2; context.RandomUniqueValues = new string[numRandomValues]; ISet <string> trackSet = new JCG.HashSet <string>(); context.RandomFrom = new bool[numRandomValues]; for (int i = 0; i < numRandomValues; i++) { string uniqueRandomValue; do { uniqueRandomValue = TestUtil.RandomRealisticUnicodeString(Random); // uniqueRandomValue = TestUtil.randomSimpleString(random); } while ("".Equals(uniqueRandomValue, StringComparison.Ordinal) || trackSet.Contains(uniqueRandomValue)); // Generate unique values and empty strings aren't allowed. trackSet.Add(uniqueRandomValue); context.RandomFrom[i] = Random.NextBoolean(); context.RandomUniqueValues[i] = uniqueRandomValue; } RandomDoc[] docs = new RandomDoc[nDocs]; for (int i = 0; i < nDocs; i++) { string id = Convert.ToString(i, CultureInfo.InvariantCulture); int randomI = Random.Next(context.RandomUniqueValues.Length); string value = context.RandomUniqueValues[randomI]; Document document = new Document(); document.Add(NewTextField(Random, "id", id, Field.Store.NO)); document.Add(NewTextField(Random, "value", value, Field.Store.NO)); bool from = context.RandomFrom[randomI]; int numberOfLinkValues = multipleValuesPerDocument ? 2 + Random.Next(10) : 1; docs[i] = new RandomDoc(id, numberOfLinkValues, value, from); for (int j = 0; j < numberOfLinkValues; j++) { string linkValue = context.RandomUniqueValues[Random.Next(context.RandomUniqueValues.Length)]; docs[i].linkValues.Add(linkValue); if (from) { if (!context.FromDocuments.TryGetValue(linkValue, out IList <RandomDoc> fromDocs)) { context.FromDocuments[linkValue] = fromDocs = new JCG.List <RandomDoc>(); } if (!context.RandomValueFromDocs.TryGetValue(value, out IList <RandomDoc> randomValueFromDocs)) { context.RandomValueFromDocs[value] = randomValueFromDocs = new JCG.List <RandomDoc>(); } fromDocs.Add(docs[i]); randomValueFromDocs.Add(docs[i]); document.Add(NewTextField(Random, "from", linkValue, Field.Store.NO)); } else { if (!context.ToDocuments.TryGetValue(linkValue, out IList <RandomDoc> toDocuments)) { context.ToDocuments[linkValue] = toDocuments = new JCG.List <RandomDoc>(); } if (!context.RandomValueToDocs.TryGetValue(value, out IList <RandomDoc> randomValueToDocs)) { context.RandomValueToDocs[value] = randomValueToDocs = new JCG.List <RandomDoc>(); } toDocuments.Add(docs[i]); randomValueToDocs.Add(docs[i]); document.Add(NewTextField(Random, "to", linkValue, Field.Store.NO)); } } RandomIndexWriter w; if (from) { w = fromWriter; } else { w = toWriter; } w.AddDocument(document); if (Random.Next(10) == 4) { w.Commit(); } if (Verbose) { Console.WriteLine("Added document[" + docs[i].id + "]: " + document); } } // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for // any ScoreMode. IndexSearcher fromSearcher = NewSearcher(fromWriter.GetReader()); IndexSearcher toSearcher = NewSearcher(toWriter.GetReader()); for (int i = 0; i < context.RandomUniqueValues.Length; i++) { string uniqueRandomValue = context.RandomUniqueValues[i]; string fromField; string toField; IDictionary <string, IDictionary <int, JoinScore> > queryVals; if (context.RandomFrom[i]) { fromField = "from"; toField = "to"; queryVals = context.FromHitsToJoinScore; } else { fromField = "to"; toField = "from"; queryVals = context.ToHitsToJoinScore; } IDictionary <BytesRef, JoinScore> joinValueToJoinScores = new Dictionary <BytesRef, JoinScore>(); if (multipleValuesPerDocument) { fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)), new CollectorAnonymousClass3(fromField, joinValueToJoinScores)); } else { fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)), new CollectorAnonymousClass4(fromField, joinValueToJoinScores)); } IDictionary <int, JoinScore> docToJoinScore = new Dictionary <int, JoinScore>(); if (multipleValuesPerDocument) { if (scoreDocsInOrder) { AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.Wrap(toSearcher.IndexReader); Terms terms = slowCompositeReader.GetTerms(toField); if (terms != null) { DocsEnum docsEnum = null; TermsEnum termsEnum = null; JCG.SortedSet <BytesRef> joinValues = new JCG.SortedSet <BytesRef>(BytesRef.UTF8SortedAsUnicodeComparer); joinValues.UnionWith(joinValueToJoinScores.Keys); foreach (BytesRef joinValue in joinValues) { termsEnum = terms.GetEnumerator(termsEnum); if (termsEnum.SeekExact(joinValue)) { docsEnum = termsEnum.Docs(slowCompositeReader.LiveDocs, docsEnum, DocsFlags.NONE); JoinScore joinScore = joinValueToJoinScores[joinValue]; for (int doc = docsEnum.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.NextDoc()) { // First encountered join value determines the score. // Something to keep in mind for many-to-many relations. if (!docToJoinScore.ContainsKey(doc)) { docToJoinScore[doc] = joinScore; } } } } } } else { toSearcher.Search(new MatchAllDocsQuery(), new CollectorAnonymousClass5(toField, joinValueToJoinScores, docToJoinScore)); } } else { toSearcher.Search(new MatchAllDocsQuery(), new CollectorAnonymousClass6(toField, joinValueToJoinScores, docToJoinScore)); } queryVals[uniqueRandomValue] = docToJoinScore; } fromSearcher.IndexReader.Dispose(); toSearcher.IndexReader.Dispose(); return(context); }
public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs) { Debug.Assert(outerInstance.termArrays.Count > 0); AtomicReader reader = (context.AtomicReader); IBits liveDocs = acceptDocs; PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[outerInstance.termArrays.Count]; Terms fieldTerms = reader.GetTerms(outerInstance.field); if (fieldTerms == null) { return(null); } // Reuse single TermsEnum below: TermsEnum termsEnum = fieldTerms.GetIterator(null); for (int pos = 0; pos < postingsFreqs.Length; pos++) { Term[] terms = outerInstance.termArrays[pos]; DocsAndPositionsEnum postingsEnum; int docFreq; if (terms.Length > 1) { postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum); // coarse -- this overcounts since a given doc can // have more than one term: docFreq = 0; for (int termIdx = 0; termIdx < terms.Length; termIdx++) { Term term = terms[termIdx]; TermState termState = termContexts[term].Get(context.Ord); if (termState == null) { // Term not in reader continue; } termsEnum.SeekExact(term.Bytes, termState); docFreq += termsEnum.DocFreq; } if (docFreq == 0) { // None of the terms are in this reader return(null); } } else { Term term = terms[0]; TermState termState = termContexts[term].Get(context.Ord); if (termState == null) { // Term not in reader return(null); } termsEnum.SeekExact(term.Bytes, termState); postingsEnum = termsEnum.DocsAndPositions(liveDocs, null, DocsAndPositionsFlags.NONE); if (postingsEnum == null) { // term does exist, but has no positions Debug.Assert(termsEnum.Docs(liveDocs, null, DocsFlags.NONE) != null, "termstate found but no term exists in reader"); throw new InvalidOperationException("field \"" + term.Field + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.Text() + ")"); } docFreq = termsEnum.DocFreq; } postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, (int)outerInstance.positions[pos], terms); } // sort by increasing docFreq order if (outerInstance.slop == 0) { ArrayUtil.TimSort(postingsFreqs); } if (outerInstance.slop == 0) { ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.GetSimScorer(stats, context)); if (s.noDocs) { return(null); } else { return(s); } } else { return(new SloppyPhraseScorer(this, postingsFreqs, outerInstance.slop, similarity.GetSimScorer(stats, context))); } }
public virtual void Load(string fieldName, AtomicReader reader, TermListFactory listFactory) { #if FEATURE_STRING_INTERN string field = string.Intern(fieldName); #else string field = fieldName; #endif int maxDoc = reader.MaxDoc; int dictValueCount = GetDictValueCount(reader, fieldName); BigSegmentedArray order = NewInstance(dictValueCount, maxDoc); this.m_orderArray = order; List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); int length = maxDoc + 1; ITermValueList list = listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList(); int negativeValueCount = GetNegativeValueCount(reader, field); int t = 1; // valid term id starts from 1 list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); int totalFreq = 0; Terms terms = reader.GetTerms(field); if (terms != null) { TermsEnum termsEnum = terms.GetIterator(null); BytesRef text; while ((text = termsEnum.Next()) != null) { // store term text // we expect that there is at most one term per document if (t >= length) { throw new RuntimeException("there are more terms than " + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields"); } string strText = text.Utf8ToString(); list.Add(strText); Term term = new Term(field, strText); DocsEnum docsEnum = reader.GetTermDocsEnum(term); // freqList.add(termEnum.docFreq()); // doesn't take into account // deldocs int minID = -1; int maxID = -1; int docID = -1; int df = 0; int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS) { df++; order.Add(docID, valId); minID = docID; while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS) { docID = docsEnum.DocID; df++; order.Add(docID, valId); } maxID = docID; } freqList.Add(df); totalFreq += df; minIDList.Add(minID); maxIDList.Add(maxID); t++; } } list.Seal(); this.m_valArray = list; this.m_freqs = freqList.ToArray(); this.m_minIDs = minIDList.ToArray(); this.m_maxIDs = maxIDList.ToArray(); int doc = 0; while (doc < maxDoc && order.Get(doc) != 0) { ++doc; } if (doc < maxDoc) { this.m_minIDs[0] = doc; // Try to get the max doc = maxDoc - 1; while (doc >= 0 && order.Get(doc) != 0) { --doc; } this.m_maxIDs[0] = doc; } this.m_freqs[0] = reader.NumDocs - totalFreq; }
/// <summary> /// loads multi-value facet data. This method uses a workarea to prepare loading. /// </summary> /// <param name="fieldName"></param> /// <param name="reader"></param> /// <param name="listFactory"></param> /// <param name="workArea"></param> public virtual void Load(string fieldName, AtomicReader reader, TermListFactory listFactory, BoboSegmentReader.WorkArea workArea) { #if FEATURE_STRING_INTERN string field = string.Intern(fieldName); #else string field = fieldName; #endif int maxdoc = reader.MaxDoc; BigNestedInt32Array.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea); ITermValueList list = (listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList()); List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); OpenBitSet bitset = new OpenBitSet(maxdoc + 1); int negativeValueCount = GetNegativeValueCount(reader, field); int t = 1; // valid term id starts from 1 list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); m_overflow = false; Terms terms = reader.GetTerms(field); if (terms != null) { TermsEnum termsEnum = terms.GetIterator(null); BytesRef text; while ((text = termsEnum.Next()) != null) { string strText = text.Utf8ToString(); list.Add(strText); Term term = new Term(field, strText); DocsEnum docsEnum = reader.GetTermDocsEnum(term); int df = 0; int minID = -1; int maxID = -1; int docID = -1; int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS) { df++; if (!loader.Add(docID, valId)) { LogOverflow(fieldName); } minID = docID; bitset.FastSet(docID); while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS) { docID = docsEnum.DocID; df++; if (!loader.Add(docID, valId)) { LogOverflow(fieldName); } bitset.FastSet(docID); } maxID = docID; } freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); t++; } } list.Seal(); try { m_nestedArray.Load(maxdoc + 1, loader); } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.ToString(), e); } this.m_valArray = list; this.m_freqs = freqList.ToArray(); this.m_minIDs = minIDList.ToArray(); this.m_maxIDs = maxIDList.ToArray(); int doc = 0; while (doc < maxdoc && !m_nestedArray.Contains(doc, 0, true)) { ++doc; } if (doc < maxdoc) { this.m_minIDs[0] = doc; doc = maxdoc - 1; while (doc >= 0 && !m_nestedArray.Contains(doc, 0, true)) { --doc; } this.m_maxIDs[0] = doc; } this.m_freqs[0] = maxdoc - (int)bitset.Cardinality(); }
public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs) { Debug.Assert(outerInstance.terms.Count > 0); AtomicReader reader = context.AtomicReader; IBits liveDocs = acceptDocs; PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[outerInstance.terms.Count]; Terms fieldTerms = reader.GetTerms(outerInstance.field); if (fieldTerms == null) { return(null); } // Reuse single TermsEnum below: TermsEnum te = fieldTerms.GetIterator(null); for (int i = 0; i < outerInstance.terms.Count; i++) { Term t = outerInstance.terms[i]; TermState state = states[i].Get(context.Ord); if (state == null) // term doesnt exist in this segment { Debug.Assert(TermNotInReader(reader, t), "no termstate found but term exists in reader"); return(null); } te.SeekExact(t.Bytes, state); DocsAndPositionsEnum postingsEnum = te.DocsAndPositions(liveDocs, null, DocsAndPositionsFlags.NONE); // PhraseQuery on a field that did not index // positions. if (postingsEnum == null) { Debug.Assert(te.SeekExact(t.Bytes), "termstate found but no term exists in reader"); // term does exist, but has no positions throw new InvalidOperationException("field \"" + t.Field + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.Text() + ")"); } postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.DocFreq, (int)outerInstance.positions[i], t); } // sort by increasing docFreq order if (outerInstance.slop == 0) { ArrayUtil.TimSort(postingsFreqs); } if (outerInstance.slop == 0) // optimize exact case { ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, similarity.GetSimScorer(stats, context)); if (s.noDocs) { return(null); } else { return(s); } } else { return(new SloppyPhraseScorer(this, postingsFreqs, outerInstance.slop, similarity.GetSimScorer(stats, context))); } }
public override void Load(string fieldName, AtomicReader reader, TermListFactory listFactory, BoboSegmentReader.WorkArea workArea) { #if FEATURE_STRING_INTERN string field = string.Intern(fieldName); #else string field = fieldName; #endif int maxdoc = reader.MaxDoc; BigNestedInt32Array.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea); BigNestedInt32Array.BufferedLoader weightLoader = GetBufferedLoader(maxdoc, null); var list = (listFactory == null ? new TermStringList() : listFactory.CreateTermList()); List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); OpenBitSet bitset = new OpenBitSet(maxdoc + 1); int negativeValueCount = GetNegativeValueCount(reader, field); int t = 1; // valid term id starts from 1 list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); m_overflow = false; string pre = null; int df = 0; int minID = -1; int maxID = -1; int docID = -1; int valId = 0; Terms terms = reader.GetTerms(field); if (terms != null) { TermsEnum termsEnum = terms.GetIterator(null); BytesRef text; while ((text = termsEnum.Next()) != null) { string strText = text.Utf8ToString(); string val = null; int weight = 0; string[] split = strText.Split(new char[] { '\0' }, StringSplitOptions.RemoveEmptyEntries); if (split.Length > 1) { val = split[0]; weight = int.Parse(split[split.Length - 1]); } else { continue; } if (pre == null || !val.Equals(pre)) { if (pre != null) { freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } list.Add(val); df = 0; minID = -1; maxID = -1; valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; t++; } Term term = new Term(field, strText); DocsEnum docsEnum = reader.GetTermDocsEnum(term); if (docsEnum != null) { while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS) { df++; if (!loader.Add(docID, valId)) { LogOverflow(fieldName); } else { weightLoader.Add(docID, weight); } if (docID < minID) { minID = docID; } bitset.FastSet(docID); while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS) { docID = docsEnum.DocID; df++; if (!loader.Add(docID, valId)) { LogOverflow(fieldName); } else { weightLoader.Add(docID, weight); } bitset.FastSet(docID); } if (docID > maxID) { maxID = docID; } } } pre = val; } if (pre != null) { freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); } } list.Seal(); try { m_nestedArray.Load(maxdoc + 1, loader); m_weightArray.Load(maxdoc + 1, weightLoader); } catch (Exception e) { throw new RuntimeException("failed to load due to " + e.ToString(), e); } this.m_valArray = list; this.m_freqs = freqList.ToArray(); this.m_minIDs = minIDList.ToArray(); this.m_maxIDs = maxIDList.ToArray(); int doc = 0; while (doc < maxdoc && !m_nestedArray.Contains(doc, 0, true)) { ++doc; } if (doc < maxdoc) { this.m_minIDs[0] = doc; doc = maxdoc - 1; while (doc >= 0 && !m_nestedArray.Contains(doc, 0, true)) { --doc; } this.m_maxIDs[0] = doc; } this.m_freqs[0] = maxdoc - (int)bitset.Cardinality(); }