Exemple #1
0
        public virtual void TestReuseDocsEnumDifferentReader()
        {
            Directory    dir      = NewDirectory();
            Codec        cp       = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat());
            MockAnalyzer analyzer = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);

            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetCodec(cp));
            int numdocs = AtLeast(20);

            CreateRandomIndex(numdocs, writer, Random);
            writer.Commit();

            DirectoryReader             firstReader  = DirectoryReader.Open(dir);
            DirectoryReader             secondReader = DirectoryReader.Open(dir);
            IList <AtomicReaderContext> leaves       = firstReader.Leaves;
            IList <AtomicReaderContext> leaves2      = secondReader.Leaves;

            foreach (AtomicReaderContext ctx in leaves)
            {
                Terms     terms    = ((AtomicReader)ctx.Reader).GetTerms("body");
                TermsEnum iterator = terms.GetEnumerator();
                IDictionary <DocsEnum, bool?> enums = new JCG.Dictionary <DocsEnum, bool?>(IdentityEqualityComparer <DocsEnum> .Default);
                MatchNoBits bits = new MatchNoBits(firstReader.MaxDoc);
                iterator = terms.GetEnumerator();
                DocsEnum docs = null;
                BytesRef term = null;
                while (iterator.MoveNext())
                {
                    term        = iterator.Term;
                    docs        = iterator.Docs(null, RandomDocsEnum("body", term, leaves2, bits), Random.NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Count, enums.Count);

                iterator = terms.GetEnumerator();
                enums.Clear();
                docs = null;
                while (iterator.MoveNext())
                {
                    term        = iterator.Term;
                    docs        = iterator.Docs(bits, RandomDocsEnum("body", term, leaves2, bits), Random.NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Count, enums.Count);
            }
            IOUtils.Dispose(writer, firstReader, secondReader, dir);
        }
Exemple #2
0
        public virtual void TestDocsEnum()
        {
            IBits     mappedLiveDocs = RandomLiveDocs(reader.MaxDoc);
            TermsEnum termsEnum      = reader.GetTerms(DOCS_ENUM_FIELD).GetIterator(null);

            assertEquals(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef(DOCS_ENUM_TERM)));
            DocsEnum docs = termsEnum.Docs(mappedLiveDocs, null);

            int doc;
            int prev = -1;

            while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
            {
                assertTrue("document " + doc + " marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(doc));
                assertEquals("incorrect value; doc " + doc, sortedValues[doc], int.Parse(reader.Document(doc).Get(ID_FIELD)));
                while (++prev < doc)
                {
                    assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(prev));
                }
            }
            while (++prev < reader.MaxDoc)
            {
                assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(prev));
            }

            DocsEnum reuse = docs;

            docs = termsEnum.Docs(mappedLiveDocs, reuse);
            if (docs is SortingAtomicReader.SortingDocsEnum)
            {
                assertTrue(((SortingAtomicReader.SortingDocsEnum)docs).Reused(reuse)); // make sure reuse worked
            }
            doc  = -1;
            prev = -1;
            while ((doc = docs.Advance(doc + 1)) != DocIdSetIterator.NO_MORE_DOCS)
            {
                assertTrue("document " + doc + " marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(doc));
                assertEquals("incorrect value; doc " + doc, sortedValues[doc], int.Parse(reader.Document(doc).Get(ID_FIELD)));
                while (++prev < doc)
                {
                    assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(prev));
                }
            }
            while (++prev < reader.MaxDoc)
            {
                assertFalse("document " + prev + " not marked as deleted", mappedLiveDocs == null || mappedLiveDocs.Get(prev));
            }
        }
Exemple #3
0
        public virtual void TestReuseDocsEnumSameBitsOrNull()
        {
            Directory         dir    = NewDirectory();
            Codec             cp     = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp));
            int numdocs = AtLeast(20);

            CreateRandomIndex(numdocs, writer, Random());
            writer.Commit();

            DirectoryReader open = DirectoryReader.Open(dir);

            foreach (AtomicReaderContext ctx in open.Leaves)
            {
                Terms     terms    = ((AtomicReader)ctx.Reader).GetTerms("body");
                TermsEnum iterator = terms.GetIterator(null);
                IdentityHashMap <DocsEnum, bool?> enums = new IdentityHashMap <DocsEnum, bool?>();
                MatchNoBits bits = new MatchNoBits(open.MaxDoc);
                DocsEnum    docs = null;
                while ((iterator.Next()) != null)
                {
                    docs        = iterator.Docs(bits, docs, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE);
                    enums[docs] = true;
                }

                Assert.AreEqual(1, enums.Count);
                enums.Clear();
                iterator = terms.GetIterator(null);
                docs     = null;
                while ((iterator.Next()) != null)
                {
                    docs        = iterator.Docs(new MatchNoBits(open.MaxDoc), docs, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Count, enums.Count);

                enums.Clear();
                iterator = terms.GetIterator(null);
                docs     = null;
                while ((iterator.Next()) != null)
                {
                    docs        = iterator.Docs(null, docs, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(1, enums.Count);
            }
            IOUtils.Close(writer, open, dir);
        }
            public virtual void Reset()
            {
                // no one should call us for deleted docs?

                if (terms != null)
                {
                    TermsEnum termsEnum = terms.GetEnumerator();
                    if (termsEnum.SeekExact(outerInstance.m_indexedBytes))
                    {
                        docs = termsEnum.Docs(null, null);
                    }
                    else
                    {
                        docs = null;
                    }
                }
                else
                {
                    docs = null;
                }

                if (docs == null)
                {
                    docs = new DocsEnumAnonymousClass();
                }
                atDoc = -1;
            }
Exemple #5
0
            private int NextDocOutOfOrder()
            {
                while (true)
                {
                    if (docsEnum != null)
                    {
                        int docId = DocsEnumNextDoc();
                        if (docId == DocIdSetIterator.NO_MORE_DOCS)
                        {
                            docsEnum = null;
                        }
                        else
                        {
                            return(_doc = docId);
                        }
                    }

                    if (_upto == outerInstance._terms.Count)
                    {
                        return(_doc = DocIdSetIterator.NO_MORE_DOCS);
                    }

                    _scoreUpto = _upto;
                    if (_termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[_upto++], _spare)))
                    {
                        docsEnum = _reuse = _termsEnum.Docs(_acceptDocs, _reuse, DocsFlags.NONE);
                    }
                }
            }
        private FixedBitSet FastBits(AtomicReader reader, IBits acceptDocs)
        {
            FixedBitSet bits = new FixedBitSet(reader.MaxDoc);

            bits.Set(0, reader.MaxDoc); //assume all are valid
            Terms terms = reader.Fields.GetTerms(fieldName);

            if (terms == null)
            {
                return(bits);
            }

            TermsEnum termsEnum = terms.GetIterator(null);
            DocsEnum  docs      = null;

            while (true)
            {
                BytesRef currTerm = termsEnum.Next();
                if (currTerm == null)
                {
                    break;
                }
                else
                {
                    if (termsEnum.DocFreq > 1)
                    {
                        // unset potential duplicates
                        docs = termsEnum.Docs(acceptDocs, docs, DocsFlags.NONE);
                        int doc = docs.NextDoc();
                        if (doc != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE)
                            {
                                doc = docs.NextDoc();
                            }
                        }

                        int lastDoc = -1;
                        while (true)
                        {
                            lastDoc = doc;
                            bits.Clear(lastDoc);
                            doc = docs.NextDoc();
                            if (doc == DocIdSetIterator.NO_MORE_DOCS)
                            {
                                break;
                            }
                        }

                        if (keepMode == KeepMode.KM_USE_LAST_OCCURRENCE)
                        {
                            // restore the last bit
                            bits.Set(lastDoc);
                        }
                    }
                }
            }

            return(bits);
        }
Exemple #7
0
        /// <summary>
        /// Takes the categories from the given taxonomy directory, and adds the
        /// missing ones to this taxonomy. Additionally, it fills the given
        /// <see cref="IOrdinalMap"/> with a mapping from the original ordinal to the new
        /// ordinal.
        /// </summary>
        public virtual void AddTaxonomy(Directory taxoDir, IOrdinalMap map)
        {
            EnsureOpen();
            DirectoryReader r = DirectoryReader.Open(taxoDir);

            try
            {
                int         size       = r.NumDocs;
                IOrdinalMap ordinalMap = map;
                ordinalMap.SetSize(size);
                int       @base = 0;
                TermsEnum te    = null;
                DocsEnum  docs  = null;
                foreach (AtomicReaderContext ctx in r.Leaves)
                {
                    AtomicReader ar    = ctx.AtomicReader;
                    Terms        terms = ar.GetTerms(Consts.FULL);
                    te = terms.GetIterator(te);
                    while (te.Next() != null)
                    {
                        FacetLabel cp      = new FacetLabel(FacetsConfig.StringToPath(te.Term.Utf8ToString()));
                        int        ordinal = AddCategory(cp);
                        docs = te.Docs(null, docs, DocsFlags.NONE);
                        ordinalMap.AddMapping(docs.NextDoc() + @base, ordinal);
                    }
                    @base += ar.MaxDoc; // no deletions, so we're ok
                }
                ordinalMap.AddDone();
            }
            finally
            {
                r.Dispose();
            }
        }
Exemple #8
0
        public virtual void TestReuseDocsEnumNoReuse()
        {
            Directory         dir    = NewDirectory();
            Codec             cp     = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat(OLD_FORMAT_IMPERSONATION_IS_ACTIVE));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp));
            int numdocs = AtLeast(20);

            CreateRandomIndex(numdocs, writer, Random());
            writer.Commit();

            DirectoryReader open = DirectoryReader.Open(dir);

            foreach (AtomicReaderContext ctx in open.Leaves)
            {
                AtomicReader indexReader = (AtomicReader)ctx.Reader;
                Terms        terms       = indexReader.Terms("body");
                TermsEnum    iterator    = terms.Iterator(null);
                IdentityHashMap <DocsEnum, bool?> enums = new IdentityHashMap <DocsEnum, bool?>();
                MatchNoBits bits = new MatchNoBits(indexReader.MaxDoc);
                while ((iterator.Next()) != null)
                {
                    DocsEnum docs = iterator.Docs(Random().NextBoolean() ? bits : new MatchNoBits(indexReader.MaxDoc), null, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
                    enums[docs] = true;
                }

                Assert.AreEqual(terms.Size(), enums.Count);
            }
            IOUtils.Close(writer, open, dir);
        }
Exemple #9
0
        public virtual void TestReuseDocsEnumNoReuse()
        {
            Directory         dir    = NewDirectory();
            Codec             cp     = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat());
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(cp));
            int numdocs = AtLeast(20);

            CreateRandomIndex(numdocs, writer, Random);
            writer.Commit();

            DirectoryReader open = DirectoryReader.Open(dir);

            foreach (AtomicReaderContext ctx in open.Leaves)
            {
                AtomicReader indexReader            = (AtomicReader)ctx.Reader;
                Terms        terms                  = indexReader.GetTerms("body");
                TermsEnum    iterator               = terms.GetEnumerator();
                IDictionary <DocsEnum, bool?> enums = new JCG.Dictionary <DocsEnum, bool?>(IdentityEqualityComparer <DocsEnum> .Default);
                MatchNoBits bits = new MatchNoBits(indexReader.MaxDoc);
                while (iterator.MoveNext())
                {
                    DocsEnum docs = iterator.Docs(Random.NextBoolean() ? bits : new MatchNoBits(indexReader.MaxDoc), null, Random.NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE);
                    enums[docs] = true;
                }

                Assert.AreEqual(terms.Count, enums.Count);
            }
            IOUtils.Dispose(writer, open, dir);
        }
Exemple #10
0
            protected override void FillDocsAndScores(FixedBitSet matchingDocs, IBits acceptDocs,
                                                      TermsEnum termsEnum)
            {
                BytesRef spare    = new BytesRef();
                DocsEnum docsEnum = null;

                for (int i = 0; i < m_outerInstance._terms.Count; i++)
                {
                    if (termsEnum.SeekExact(m_outerInstance._terms.Get(m_outerInstance._ords[i], spare)))
                    {
                        docsEnum = termsEnum.Docs(acceptDocs, docsEnum, DocsFlags.NONE);
                        float score = m_outerInstance._scores[m_outerInstance._ords[i]];
                        for (int doc = docsEnum.NextDoc();
                             doc != NO_MORE_DOCS;
                             doc = docsEnum.NextDoc())
                        {
                            // I prefer this:

                            /*if (scores[doc] < score) {
                             * scores[doc] = score;
                             * matchingDocs.set(doc);
                             * }*/
                            // But this behaves the same as MVInnerScorer and only then the tests will pass:
                            if (!matchingDocs.Get(doc))
                            {
                                scores[doc] = score;
                                matchingDocs.Set(doc);
                            }
                        }
                    }
                }
            }
Exemple #11
0
        public virtual void TestReuseDocsEnumDifferentReader()
        {
            Directory    dir      = NewDirectory();
            Codec        cp       = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat(OLD_FORMAT_IMPERSONATION_IS_ACTIVE));
            MockAnalyzer analyzer = new MockAnalyzer(Random());

            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).SetCodec(cp));
            int numdocs = AtLeast(20);

            CreateRandomIndex(numdocs, writer, Random());
            writer.Commit();

            DirectoryReader             firstReader  = DirectoryReader.Open(dir);
            DirectoryReader             secondReader = DirectoryReader.Open(dir);
            IList <AtomicReaderContext> leaves       = firstReader.Leaves;
            IList <AtomicReaderContext> leaves2      = secondReader.Leaves;

            foreach (AtomicReaderContext ctx in leaves)
            {
                Terms     terms    = ((AtomicReader)ctx.Reader).Terms("body");
                TermsEnum iterator = terms.Iterator(null);
                IdentityHashMap <DocsEnum, bool?> enums = new IdentityHashMap <DocsEnum, bool?>();
                MatchNoBits bits = new MatchNoBits(firstReader.MaxDoc);
                iterator = terms.Iterator(null);
                DocsEnum docs = null;
                BytesRef term = null;
                while ((term = iterator.Next()) != null)
                {
                    docs        = iterator.Docs(null, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Size(), enums.Count);

                iterator = terms.Iterator(null);
                enums.Clear();
                docs = null;
                while ((term = iterator.Next()) != null)
                {
                    docs        = iterator.Docs(bits, RandomDocsEnum("body", term, leaves2, bits), Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
                    enums[docs] = true;
                }
                Assert.AreEqual(terms.Size(), enums.Count);
            }
            IOUtils.Close(writer, firstReader, secondReader, dir);
        }
            protected virtual void CollectDocs(FixedBitSet bitSet)
            {
                //WARN: keep this specialization in sync
                Debug.Assert(m_termsEnum != null);
                m_docsEnum = m_termsEnum.Docs(m_acceptDocs, m_docsEnum, DocsFlags.NONE);
                int docid;

                while ((docid = m_docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    bitSet.Set(docid);
                }
            }
Exemple #13
0
            /// <exception cref="System.IO.IOException"></exception>
            protected internal virtual void CollectDocs(FixedBitSet bitSet)
            {
                //WARN: keep this specialization in sync
                Debug.Assert(termsEnum != null);
                docsEnum = termsEnum.Docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE
                                          );
                int docid;

                while ((docid = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                {
                    bitSet.Set(docid);
                }
            }
Exemple #14
0
            public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs)
            {
                Debug.Assert(termStates.TopReaderContext == ReaderUtil.GetTopLevelContext(context), "The top-reader used to create Weight (" + termStates.TopReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.GetTopLevelContext(context));
                TermsEnum termsEnum = GetTermsEnum(context);

                if (termsEnum == null)
                {
                    return(null);
                }
                DocsEnum docs = termsEnum.Docs(acceptDocs, null);

                Debug.Assert(docs != null);
                return(new TermScorer(this, docs, similarity.GetSimScorer(stats, context)));
            }
Exemple #15
0
        private FixedBitSet CorrectBits(AtomicReader reader, IBits acceptDocs)
        {
            FixedBitSet bits  = new FixedBitSet(reader.MaxDoc); //assume all are INvalid
            Terms       terms = reader.Fields.GetTerms(fieldName);

            if (terms == null)
            {
                return(bits);
            }

            TermsEnum termsEnum = terms.GetIterator(null);
            DocsEnum  docs      = null;

            while (true)
            {
                BytesRef currTerm = termsEnum.Next();
                if (currTerm == null)
                {
                    break;
                }
                else
                {
                    docs = termsEnum.Docs(acceptDocs, docs, DocsFlags.NONE);
                    int doc = docs.NextDoc();
                    if (doc != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE)
                        {
                            bits.Set(doc);
                        }
                        else
                        {
                            int lastDoc = doc;
                            while (true)
                            {
                                lastDoc = doc;
                                doc     = docs.NextDoc();
                                if (doc == DocIdSetIterator.NO_MORE_DOCS)
                                {
                                    break;
                                }
                            }
                            bits.Set(lastDoc);
                        }
                    }
                }
            }
            return(bits);
        }
        /// <summary>
        /// Returns a <see cref="DocIdSet"/> with documents that should be permitted in search
        /// results.
        /// </summary>
        public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs)
        {
            AtomicReader reader = (context.AtomicReader);
            Fields       fields = reader.Fields;

            if (fields is null)
            {
                // reader has no fields
                return(null);
            }

            Terms terms = fields.GetTerms(m_query.m_field);

            if (terms is null)
            {
                // field does not exist
                return(null);
            }

            TermsEnum termsEnum = m_query.GetTermsEnum(terms);

            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(termsEnum != null);
            }
            if (termsEnum.MoveNext())
            {
                // fill into a FixedBitSet
                FixedBitSet bitSet   = new FixedBitSet(context.AtomicReader.MaxDoc);
                DocsEnum    docsEnum = null;
                do
                {
                    // System.out.println("  iter termCount=" + termCount + " term=" +
                    // enumerator.term().toBytesString());
                    docsEnum = termsEnum.Docs(acceptDocs, docsEnum, DocsFlags.NONE);
                    int docid;
                    while ((docid = docsEnum.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        bitSet.Set(docid);
                    }
                } while (termsEnum.MoveNext());

                return(bitSet);
            }
            else
            {
                return(null);
            }
        }
Exemple #17
0
        public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs)
        {
            AtomicReader reader = context.AtomicReader;
            FixedBitSet  result = null; // lazy init if needed - no need to create a big bitset ahead of time
            Fields       fields = reader.Fields;
            BytesRef     spare  = new BytesRef(this.termsBytes);

            if (fields == null)
            {
                return(result);
            }
            Terms     terms; // LUCENENET: IDE0059: Remove unnecessary value assignment
            TermsEnum termsEnum = null;
            DocsEnum  docs      = null;

            foreach (TermsAndField termsAndField in this.termsAndFields)
            {
                if ((terms = fields.GetTerms(termsAndField.field)) != null)
                {
                    termsEnum = terms.GetEnumerator(termsEnum); // this won't return null
                    for (int i = termsAndField.start; i < termsAndField.end; i++)
                    {
                        spare.Offset = offsets[i];
                        spare.Length = offsets[i + 1] - offsets[i];
                        if (termsEnum.SeekExact(spare))
                        {
                            docs = termsEnum.Docs(acceptDocs, docs, DocsFlags.NONE); // no freq since we don't need them
                            if (result == null)
                            {
                                if (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                                {
                                    result = new FixedBitSet(reader.MaxDoc);
                                    // lazy init but don't do it in the hot loop since we could read many docs
                                    result.Set(docs.DocID);
                                }
                            }
                            while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS)
                            {
                                result.Set(docs.DocID);
                            }
                        }
                    }
                }
            }
            return(result);
        }
        public virtual ShapeFieldCache <T> GetCache(AtomicReader reader)
        {
            lock (locker)
            {
                ShapeFieldCache <T> idx;
                if (sidx.TryGetValue(reader, out idx) && idx != null)
                {
                    return(idx);
                }

                /*long startTime = Runtime.CurrentTimeMillis();
                 *              log.Fine("Building Cache [" + reader.MaxDoc() + "]");*/
                idx = new ShapeFieldCache <T>(reader.MaxDoc, m_defaultSize);
                int       count = 0;
                DocsEnum  docs  = null;
                Terms     terms = reader.GetTerms(m_shapeField);
                TermsEnum te    = null;
                if (terms != null)
                {
                    te = terms.GetIterator(te);
                    BytesRef term = te.Next();
                    while (term != null)
                    {
                        T shape = ReadShape(term);
                        if (shape != null)
                        {
                            docs = te.Docs(null, docs, DocsFlags.NONE);
                            int docid = docs.NextDoc();
                            while (docid != DocIdSetIterator.NO_MORE_DOCS)
                            {
                                idx.Add(docid, shape);
                                docid = docs.NextDoc();
                                count++;
                            }
                        }
                        term = te.Next();
                    }
                }
                sidx[reader] = idx;

                /*long elapsed = Runtime.CurrentTimeMillis() - startTime;
                 * log.Fine("Cached: [" + count + " in " + elapsed + "ms] " + idx);*/
                return(idx);
            }
        }
        public virtual void TestNestedPulsing()
        {
            // we always run this test with pulsing codec.
            Codec cp = TestUtil.AlwaysPostingsFormat(new NestedPulsingPostingsFormat());
            BaseDirectoryWrapper dir = NewDirectory();
            RandomIndexWriter    iw  = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(cp));
            Document             doc = new Document();

            doc.Add(new TextField("foo", "a b b c c c d e f g g g h i i j j k l l m m m", Field.Store.NO));
            // note: the reuse is imperfect, here we would have 4 enums (lost reuse when we get an enum for 'm')
            // this is because we only track the 'last' enum we reused (not all).
            // but this seems 'good enough' for now.
            iw.AddDocument(doc);
            DirectoryReader ir = iw.GetReader();

            iw.Dispose();

            AtomicReader segment = GetOnlySegmentReader(ir);
            DocsEnum     reuse   = null;
            IDictionary <DocsEnum, bool?> allEnums = new JCG.Dictionary <DocsEnum, bool?>(IdentityEqualityComparer <DocsEnum> .Default);
            TermsEnum te = segment.GetTerms("foo").GetEnumerator();

            while (te.MoveNext())
            {
                reuse           = te.Docs(null, reuse, DocsFlags.NONE);
                allEnums[reuse] = true;
            }

            assertEquals(4, allEnums.Count);

            allEnums.Clear();
            DocsAndPositionsEnum posReuse = null;

            te = segment.GetTerms("foo").GetEnumerator();
            while (te.MoveNext())
            {
                posReuse           = te.DocsAndPositions(null, posReuse);
                allEnums[posReuse] = true;
            }

            assertEquals(4, allEnums.Count);

            ir.Dispose();
            dir.Dispose();
        }
        public virtual void TestSophisticatedReuse()
        {
            // we always run this test with pulsing codec.
            Codec             cp  = TestUtil.AlwaysPostingsFormat(new Pulsing41PostingsFormat(1));
            Directory         dir = NewDirectory();
            RandomIndexWriter iw  = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetCodec(cp));
            Document          doc = new Document();

            doc.Add(new TextField("foo", "a b b c c c d e f g g h i i j j k", Field.Store.NO));
            iw.AddDocument(doc);
            DirectoryReader ir = iw.GetReader();

            iw.Dispose();

            AtomicReader segment = GetOnlySegmentReader(ir);
            DocsEnum     reuse   = null;
            IDictionary <DocsEnum, bool?> allEnums = new JCG.Dictionary <DocsEnum, bool?>(IdentityEqualityComparer <DocsEnum> .Default);
            TermsEnum te = segment.GetTerms("foo").GetEnumerator();

            while (te.MoveNext())
            {
                reuse           = te.Docs(null, reuse, DocsFlags.NONE);
                allEnums[reuse] = true;
            }

            assertEquals(2, allEnums.Count);

            allEnums.Clear();
            DocsAndPositionsEnum posReuse = null;

            te = segment.GetTerms("foo").GetEnumerator();
            while (te.MoveNext())
            {
                posReuse           = te.DocsAndPositions(null, posReuse);
                allEnums[posReuse] = true;
            }

            assertEquals(2, allEnums.Count);

            ir.Dispose();
            dir.Dispose();
        }
Exemple #21
0
        private FixedBitSet CorrectBits(AtomicReader reader, IBits acceptDocs)
        {
            FixedBitSet bits  = new FixedBitSet(reader.MaxDoc); //assume all are INvalid
            Terms       terms = reader.Fields.GetTerms(fieldName);

            if (terms is null)
            {
                return(bits);
            }

            TermsEnum termsEnum = terms.GetEnumerator();
            DocsEnum  docs      = null;

            while (termsEnum.MoveNext())
            {
                docs = termsEnum.Docs(acceptDocs, docs, DocsFlags.NONE);
                int doc = docs.NextDoc();
                if (doc != DocIdSetIterator.NO_MORE_DOCS)
                {
                    if (keepMode == KeepMode.KM_USE_FIRST_OCCURRENCE)
                    {
                        bits.Set(doc);
                    }
                    else
                    {
                        int lastDoc /* = doc*/; // LUCENENET: Removed unnecessary assignment
                        while (true)
                        {
                            lastDoc = doc;
                            doc     = docs.NextDoc();
                            if (doc == DocIdSetIterator.NO_MORE_DOCS)
                            {
                                break;
                            }
                        }
                        bits.Set(lastDoc);
                    }
                }
            }
            return(bits);
        }
Exemple #22
0
        public virtual DocsEnum RandomDocsEnum(string field, BytesRef term, IList <AtomicReaderContext> readers, IBits bits)
        {
            if (Random().Next(10) == 0)
            {
                return(null);
            }
            AtomicReader indexReader = (AtomicReader)readers[Random().Next(readers.Count)].Reader;
            Terms        terms       = indexReader.GetTerms(field);

            if (terms == null)
            {
                return(null);
            }
            TermsEnum iterator = terms.GetIterator(null);

            if (iterator.SeekExact(term))
            {
                return(iterator.Docs(bits, null, Random().NextBoolean() ? DocsFlags.FREQS : DocsFlags.NONE));
            }
            return(null);
        }
        public virtual ShapeFieldCache <T> GetCache(AtomicReader reader)
        {
            // LUCENENET: ConditionalWeakTable allows us to simplify and remove locks
            return(sidx.GetValue(reader, (key) =>
            {
                /*long startTime = Runtime.CurrentTimeMillis();
                 * log.Fine("Building Cache [" + reader.MaxDoc() + "]");*/
                ShapeFieldCache <T> idx = new ShapeFieldCache <T>(key.MaxDoc, m_defaultSize);
                int count = 0;
                DocsEnum docs = null;
                Terms terms = ((AtomicReader)key).GetTerms(m_shapeField);
                TermsEnum te = null;
                if (terms != null)
                {
                    te = terms.GetIterator(te);
                    BytesRef term = te.Next();
                    while (term != null)
                    {
                        T shape = ReadShape(term);
                        if (shape != null)
                        {
                            docs = te.Docs(null, docs, DocsFlags.NONE);
                            int docid = docs.NextDoc();
                            while (docid != DocIdSetIterator.NO_MORE_DOCS)
                            {
                                idx.Add(docid, shape);
                                docid = docs.NextDoc();
                                count++;
                            }
                        }
                        term = te.Next();
                    }
                }

                /*long elapsed = Runtime.CurrentTimeMillis() - startTime;
                 * log.Fine("Cached: [" + count + " in " + elapsed + "ms] " + idx);*/
                return idx;
            }));
        }
        public virtual ShapeFieldCache <T> GetCache(AtomicReader reader)
        {
            // LUCENENET: ConditionalWeakTable allows us to simplify and remove locks on the
            // read operation. For the create case, we use Lazy<T> to ensure atomicity.
            return(sidx.GetValue(reader, (key) => new Lazy <ShapeFieldCache <T> >(() =>
            {
                /*long startTime = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                 * log.Fine("Building Cache [" + reader.MaxDoc() + "]");*/
                ShapeFieldCache <T> idx = new ShapeFieldCache <T>(key.MaxDoc, m_defaultSize);
                int count = 0;
                DocsEnum docs = null;
                Terms terms = ((AtomicReader)key).GetTerms(m_shapeField);
                TermsEnum te = null;
                if (terms != null)
                {
                    te = terms.GetEnumerator(te);
                    while (te.MoveNext())
                    {
                        T shape = ReadShape(te.Term);
                        if (shape != null)
                        {
                            docs = te.Docs(null, docs, DocsFlags.NONE);
                            int docid = docs.NextDoc();
                            while (docid != DocIdSetIterator.NO_MORE_DOCS)
                            {
                                idx.Add(docid, shape);
                                docid = docs.NextDoc();
                                count++;
                            }
                        }
                    }
                }

                /*long elapsed = J2N.Time.NanoTime() / J2N.Time.MillisecondsPerNanosecond - startTime; // LUCENENET: Use NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable results
                 * log.Fine("Cached: [" + count + " in " + elapsed + "ms] " + idx);*/
                return idx;
            })).Value);
        }
Exemple #25
0
        public void TestDocsEnumStart()
        {
            Analyzer    analyzer = new MockAnalyzer(Random);
            MemoryIndex memory   = new MemoryIndex(Random.nextBoolean(), Random.nextInt(50) * 1024 * 1024);

            memory.AddField("foo", "bar", analyzer);
            AtomicReader reader = (AtomicReader)memory.CreateSearcher().IndexReader;
            DocsEnum     disi   = TestUtil.Docs(Random, reader, "foo", new BytesRef("bar"), null, null, DocsFlags.NONE);
            int          docid  = disi.DocID;

            assertEquals(-1, docid);
            assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);

            // now reuse and check again
            TermsEnum te = reader.GetTerms("foo").GetEnumerator();

            assertTrue(te.SeekExact(new BytesRef("bar")));
            disi  = te.Docs(null, disi, DocsFlags.NONE);
            docid = disi.DocID;
            assertEquals(-1, docid);
            assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            reader.Dispose();
        }
Exemple #26
0
        /// <summary>
        /// Returns a <see cref="DocsEnum"/> from a positioned <see cref="TermsEnum"/>, but
        /// randomly sometimes uses a <see cref="MultiDocsEnum"/>, <see cref="DocsAndPositionsEnum"/>.
        /// </summary>
        public static DocsEnum Docs(Random random, TermsEnum termsEnum, IBits liveDocs, DocsEnum reuse, DocsFlags flags)
        {
            if (random.NextBoolean())
            {
                if (random.NextBoolean())
                {
                    DocsAndPositionsFlags posFlags;
                    switch (random.Next(4))
                    {
                    case 0:
                        posFlags = 0;
                        break;

                    case 1:
                        posFlags = DocsAndPositionsFlags.OFFSETS;
                        break;

                    case 2:
                        posFlags = DocsAndPositionsFlags.PAYLOADS;
                        break;

                    default:
                        posFlags = DocsAndPositionsFlags.OFFSETS | DocsAndPositionsFlags.PAYLOADS;
                        break;
                    }
                    // TODO: cast to DocsAndPositionsEnum?
                    DocsAndPositionsEnum docsAndPositions = termsEnum.DocsAndPositions(liveDocs, null, posFlags);
                    if (docsAndPositions != null)
                    {
                        return(docsAndPositions);
                    }
                }
                flags |= DocsFlags.FREQS;
            }
            return(termsEnum.Docs(liveDocs, reuse, flags));
        }
Exemple #27
0
            protected virtual void FillDocsAndScores(FixedBitSet matchingDocs, IBits acceptDocs,
                                                     TermsEnum termsEnum)
            {
                BytesRef spare    = new BytesRef();
                DocsEnum docsEnum = null;

                for (int i = 0; i < m_outerInstance._terms.Count; i++)
                {
                    if (termsEnum.SeekExact(m_outerInstance._terms.Get(m_outerInstance._ords[i], spare)))
                    {
                        docsEnum = termsEnum.Docs(acceptDocs, docsEnum, DocsFlags.NONE);
                        float score = m_outerInstance._scores[m_outerInstance._ords[i]];
                        for (int doc = docsEnum.NextDoc();
                             doc != NO_MORE_DOCS;
                             doc = docsEnum.NextDoc())
                        {
                            matchingDocs.Set(doc);
                            // In the case the same doc is also related to a another doc, a score might be overwritten. I think this
                            // can only happen in a many-to-many relation
                            scores[doc] = score;
                        }
                    }
                }
            }
Exemple #28
0
        /// <summary>
        /// Default merge impl </summary>
        public virtual void Merge(MergeState mergeState, FieldInfo.IndexOptions? indexOptions, TermsEnum termsEnum)
        {
            BytesRef term;
            Debug.Assert(termsEnum != null);
            long sumTotalTermFreq = 0;
            long sumDocFreq = 0;
            long sumDFsinceLastAbortCheck = 0;
            FixedBitSet visitedDocs = new FixedBitSet(mergeState.SegmentInfo.DocCount);

            if (indexOptions == FieldInfo.IndexOptions.DOCS_ONLY)
            {
                if (DocsEnum == null)
                {
                    DocsEnum = new MappingMultiDocsEnum();
                }
                DocsEnum.MergeState = mergeState;

                MultiDocsEnum docsEnumIn = null;

                while ((term = termsEnum.Next()) != null)
                {
                    // We can pass null for liveDocs, because the
                    // mapping enum will skip the non-live docs:
                    docsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsEnumIn, Index.DocsEnum.FLAG_NONE);
                    if (docsEnumIn != null)
                    {
                        DocsEnum.Reset(docsEnumIn);
                        PostingsConsumer postingsConsumer = StartTerm(term);
                        TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, DocsEnum, visitedDocs);
                        if (stats.DocFreq > 0)
                        {
                            FinishTerm(term, stats);
                            sumTotalTermFreq += stats.DocFreq;
                            sumDFsinceLastAbortCheck += stats.DocFreq;
                            sumDocFreq += stats.DocFreq;
                            if (sumDFsinceLastAbortCheck > 60000)
                            {
                                mergeState.checkAbort.Work(sumDFsinceLastAbortCheck / 5.0);
                                sumDFsinceLastAbortCheck = 0;
                            }
                        }
                    }
                }
            }
            else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS)
            {
                if (DocsAndFreqsEnum == null)
                {
                    DocsAndFreqsEnum = new MappingMultiDocsEnum();
                }
                DocsAndFreqsEnum.MergeState = mergeState;

                MultiDocsEnum docsAndFreqsEnumIn = null;

                while ((term = termsEnum.Next()) != null)
                {
                    // We can pass null for liveDocs, because the
                    // mapping enum will skip the non-live docs:
                    docsAndFreqsEnumIn = (MultiDocsEnum)termsEnum.Docs(null, docsAndFreqsEnumIn);
                    Debug.Assert(docsAndFreqsEnumIn != null);
                    DocsAndFreqsEnum.Reset(docsAndFreqsEnumIn);
                    PostingsConsumer postingsConsumer = StartTerm(term);
                    TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, DocsAndFreqsEnum, visitedDocs);
                    if (stats.DocFreq > 0)
                    {
                        FinishTerm(term, stats);
                        sumTotalTermFreq += stats.TotalTermFreq;
                        sumDFsinceLastAbortCheck += stats.DocFreq;
                        sumDocFreq += stats.DocFreq;
                        if (sumDFsinceLastAbortCheck > 60000)
                        {
                            mergeState.checkAbort.Work(sumDFsinceLastAbortCheck / 5.0);
                            sumDFsinceLastAbortCheck = 0;
                        }
                    }
                }
            }
            else if (indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
            {
                if (PostingsEnum == null)
                {
                    PostingsEnum = new MappingMultiDocsAndPositionsEnum();
                }
                PostingsEnum.MergeState = mergeState;
                MultiDocsAndPositionsEnum postingsEnumIn = null;
                while ((term = termsEnum.Next()) != null)
                {
                    // We can pass null for liveDocs, because the
                    // mapping enum will skip the non-live docs:
                    postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn, DocsAndPositionsEnum.FLAG_PAYLOADS);
                    Debug.Assert(postingsEnumIn != null);
                    PostingsEnum.Reset(postingsEnumIn);

                    PostingsConsumer postingsConsumer = StartTerm(term);
                    TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, PostingsEnum, visitedDocs);
                    if (stats.DocFreq > 0)
                    {
                        FinishTerm(term, stats);
                        sumTotalTermFreq += stats.TotalTermFreq;
                        sumDFsinceLastAbortCheck += stats.DocFreq;
                        sumDocFreq += stats.DocFreq;
                        if (sumDFsinceLastAbortCheck > 60000)
                        {
                            mergeState.checkAbort.Work(sumDFsinceLastAbortCheck / 5.0);
                            sumDFsinceLastAbortCheck = 0;
                        }
                    }
                }
            }
            else
            {
                Debug.Assert(indexOptions == FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
                if (PostingsEnum == null)
                {
                    PostingsEnum = new MappingMultiDocsAndPositionsEnum();
                }
                PostingsEnum.MergeState = mergeState;
                MultiDocsAndPositionsEnum postingsEnumIn = null;
                while ((term = termsEnum.Next()) != null)
                {
                    // We can pass null for liveDocs, because the
                    // mapping enum will skip the non-live docs:
                    postingsEnumIn = (MultiDocsAndPositionsEnum)termsEnum.DocsAndPositions(null, postingsEnumIn);
                    Debug.Assert(postingsEnumIn != null);
                    PostingsEnum.Reset(postingsEnumIn);

                    PostingsConsumer postingsConsumer = StartTerm(term);
                    TermStats stats = postingsConsumer.Merge(mergeState, indexOptions, PostingsEnum, visitedDocs);
                    if (stats.DocFreq > 0)
                    {
                        FinishTerm(term, stats);
                        sumTotalTermFreq += stats.TotalTermFreq;
                        sumDFsinceLastAbortCheck += stats.DocFreq;
                        sumDocFreq += stats.DocFreq;
                        if (sumDFsinceLastAbortCheck > 60000)
                        {
                            mergeState.checkAbort.Work(sumDFsinceLastAbortCheck / 5.0);
                            sumDFsinceLastAbortCheck = 0;
                        }
                    }
                }
            }
            Finish(indexOptions == FieldInfo.IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq, sumDocFreq, visitedDocs.Cardinality());
        }
Exemple #29
0
        /// <summary>
        /// checks the terms enum sequentially
        /// if deep is false, it does a 'shallow' test that doesnt go down to the docsenums
        /// </summary>
        public virtual void AssertTermsEnum(TermsEnum leftTermsEnum, TermsEnum rightTermsEnum, bool deep)
        {
            BytesRef             term;
            IBits                randomBits     = new RandomBits(MAXDOC, Random.NextDouble(), Random);
            DocsAndPositionsEnum leftPositions  = null;
            DocsAndPositionsEnum rightPositions = null;
            DocsEnum             leftDocs       = null;
            DocsEnum             rightDocs      = null;

            while ((term = leftTermsEnum.Next()) != null)
            {
                Assert.AreEqual(term, rightTermsEnum.Next());
                AssertTermStats(leftTermsEnum, rightTermsEnum);
                if (deep)
                {
                    // with payloads + off
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions));
                    // with payloads only
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.PAYLOADS));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.PAYLOADS));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.PAYLOADS));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.PAYLOADS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.PAYLOADS));

                    // with offsets only
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.OFFSETS));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.OFFSETS));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.OFFSETS));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.OFFSETS), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.OFFSETS));

                    // with positions only
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.NONE), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.NONE));
                    AssertDocsAndPositionsEnum(leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.NONE), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.NONE));

                    AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(null, leftPositions, DocsAndPositionsFlags.NONE), rightPositions = rightTermsEnum.DocsAndPositions(null, rightPositions, DocsAndPositionsFlags.NONE));
                    AssertPositionsSkipping(leftTermsEnum.DocFreq, leftPositions = leftTermsEnum.DocsAndPositions(randomBits, leftPositions, DocsAndPositionsFlags.NONE), rightPositions = rightTermsEnum.DocsAndPositions(randomBits, rightPositions, DocsAndPositionsFlags.NONE));

                    // with freqs:
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs));
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs));

                    // w/o freqs:
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsFlags.NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsFlags.NONE));
                    AssertDocsEnum(leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsFlags.NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsFlags.NONE));

                    // with freqs:
                    AssertDocsSkipping(leftTermsEnum.DocFreq, leftDocs = leftTermsEnum.Docs(null, leftDocs), rightDocs = rightTermsEnum.Docs(null, rightDocs));
                    AssertDocsSkipping(leftTermsEnum.DocFreq, leftDocs = leftTermsEnum.Docs(randomBits, leftDocs), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs));

                    // w/o freqs:
                    AssertDocsSkipping(leftTermsEnum.DocFreq, leftDocs = leftTermsEnum.Docs(null, leftDocs, DocsFlags.NONE), rightDocs = rightTermsEnum.Docs(null, rightDocs, DocsFlags.NONE));
                    AssertDocsSkipping(leftTermsEnum.DocFreq, leftDocs = leftTermsEnum.Docs(randomBits, leftDocs, DocsFlags.NONE), rightDocs = rightTermsEnum.Docs(randomBits, rightDocs, DocsFlags.NONE));
                }
            }
            Assert.IsNull(rightTermsEnum.Next());
        }
Exemple #30
0
        private void DuellReaders(CompositeReader other, AtomicReader memIndexReader)
        {
            AtomicReader competitor = SlowCompositeReaderWrapper.Wrap(other);
            Fields       memFields  = memIndexReader.Fields;

            foreach (string field in competitor.Fields)
            {
                Terms memTerms = memFields.GetTerms(field);
                Terms iwTerms  = memIndexReader.GetTerms(field);
                if (iwTerms is null)
                {
                    assertNull(memTerms);
                }
                else
                {
                    NumericDocValues normValues    = competitor.GetNormValues(field);
                    NumericDocValues memNormValues = memIndexReader.GetNormValues(field);
                    if (normValues != null)
                    {
                        // mem idx always computes norms on the fly
                        assertNotNull(memNormValues);
                        assertEquals(normValues.Get(0), memNormValues.Get(0));
                    }

                    assertNotNull(memTerms);
                    assertEquals(iwTerms.DocCount, memTerms.DocCount);
                    assertEquals(iwTerms.SumDocFreq, memTerms.SumDocFreq);
                    assertEquals(iwTerms.SumTotalTermFreq, memTerms.SumTotalTermFreq);
                    TermsEnum iwTermsIter  = iwTerms.GetEnumerator();
                    TermsEnum memTermsIter = memTerms.GetEnumerator();
                    if (iwTerms.HasPositions)
                    {
                        bool offsets = iwTerms.HasOffsets && memTerms.HasOffsets;

                        while (iwTermsIter.MoveNext())
                        {
                            assertTrue(memTermsIter.MoveNext());
                            assertEquals(iwTermsIter.Term, memTermsIter.Term);
                            DocsAndPositionsEnum iwDocsAndPos  = iwTermsIter.DocsAndPositions(null, null);
                            DocsAndPositionsEnum memDocsAndPos = memTermsIter.DocsAndPositions(null, null);
                            while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS)
                            {
                                assertEquals(iwDocsAndPos.DocID, memDocsAndPos.NextDoc());
                                assertEquals(iwDocsAndPos.Freq, memDocsAndPos.Freq);
                                for (int i = 0; i < iwDocsAndPos.Freq; i++)
                                {
                                    assertEquals("term: " + iwTermsIter.Term.Utf8ToString(), iwDocsAndPos.NextPosition(), memDocsAndPos.NextPosition());
                                    if (offsets)
                                    {
                                        assertEquals(iwDocsAndPos.StartOffset, memDocsAndPos.StartOffset);
                                        assertEquals(iwDocsAndPos.EndOffset, memDocsAndPos.EndOffset);
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        while (iwTermsIter.MoveNext())
                        {
                            assertEquals(iwTermsIter.Term, memTermsIter.Term);
                            DocsEnum iwDocsAndPos  = iwTermsIter.Docs(null, null);
                            DocsEnum memDocsAndPos = memTermsIter.Docs(null, null);
                            while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS)
                            {
                                assertEquals(iwDocsAndPos.DocID, memDocsAndPos.NextDoc());
                                assertEquals(iwDocsAndPos.Freq, memDocsAndPos.Freq);
                            }
                        }
                    }
                }
            }
        }
Exemple #31
0
        private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter,
                                                    bool multipleValuesPerDocument, bool scoreDocsInOrder)
        {
            IndexIterationContext context = new IndexIterationContext();
            int numRandomValues           = nDocs / 2;

            context.RandomUniqueValues = new string[numRandomValues];
            ISet <string> trackSet = new JCG.HashSet <string>();

            context.RandomFrom = new bool[numRandomValues];
            for (int i = 0; i < numRandomValues; i++)
            {
                string uniqueRandomValue;
                do
                {
                    uniqueRandomValue = TestUtil.RandomRealisticUnicodeString(Random);
                    //        uniqueRandomValue = TestUtil.randomSimpleString(random);
                } while ("".Equals(uniqueRandomValue, StringComparison.Ordinal) || trackSet.Contains(uniqueRandomValue));
                // Generate unique values and empty strings aren't allowed.
                trackSet.Add(uniqueRandomValue);
                context.RandomFrom[i]         = Random.NextBoolean();
                context.RandomUniqueValues[i] = uniqueRandomValue;
            }

            RandomDoc[] docs = new RandomDoc[nDocs];
            for (int i = 0; i < nDocs; i++)
            {
                string   id       = Convert.ToString(i, CultureInfo.InvariantCulture);
                int      randomI  = Random.Next(context.RandomUniqueValues.Length);
                string   value    = context.RandomUniqueValues[randomI];
                Document document = new Document();
                document.Add(NewTextField(Random, "id", id, Field.Store.NO));
                document.Add(NewTextField(Random, "value", value, Field.Store.NO));

                bool from = context.RandomFrom[randomI];
                int  numberOfLinkValues = multipleValuesPerDocument ? 2 + Random.Next(10) : 1;
                docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
                for (int j = 0; j < numberOfLinkValues; j++)
                {
                    string linkValue = context.RandomUniqueValues[Random.Next(context.RandomUniqueValues.Length)];
                    docs[i].linkValues.Add(linkValue);
                    if (from)
                    {
                        if (!context.FromDocuments.TryGetValue(linkValue, out IList <RandomDoc> fromDocs))
                        {
                            context.FromDocuments[linkValue] = fromDocs = new JCG.List <RandomDoc>();
                        }
                        if (!context.RandomValueFromDocs.TryGetValue(value, out IList <RandomDoc> randomValueFromDocs))
                        {
                            context.RandomValueFromDocs[value] = randomValueFromDocs = new JCG.List <RandomDoc>();
                        }

                        fromDocs.Add(docs[i]);
                        randomValueFromDocs.Add(docs[i]);
                        document.Add(NewTextField(Random, "from", linkValue, Field.Store.NO));
                    }
                    else
                    {
                        if (!context.ToDocuments.TryGetValue(linkValue, out IList <RandomDoc> toDocuments))
                        {
                            context.ToDocuments[linkValue] = toDocuments = new JCG.List <RandomDoc>();
                        }
                        if (!context.RandomValueToDocs.TryGetValue(value, out IList <RandomDoc> randomValueToDocs))
                        {
                            context.RandomValueToDocs[value] = randomValueToDocs = new JCG.List <RandomDoc>();
                        }

                        toDocuments.Add(docs[i]);
                        randomValueToDocs.Add(docs[i]);
                        document.Add(NewTextField(Random, "to", linkValue, Field.Store.NO));
                    }
                }

                RandomIndexWriter w;
                if (from)
                {
                    w = fromWriter;
                }
                else
                {
                    w = toWriter;
                }

                w.AddDocument(document);
                if (Random.Next(10) == 4)
                {
                    w.Commit();
                }
                if (Verbose)
                {
                    Console.WriteLine("Added document[" + docs[i].id + "]: " + document);
                }
            }

            // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for
            // any ScoreMode.
            IndexSearcher fromSearcher = NewSearcher(fromWriter.GetReader());
            IndexSearcher toSearcher   = NewSearcher(toWriter.GetReader());

            for (int i = 0; i < context.RandomUniqueValues.Length; i++)
            {
                string uniqueRandomValue = context.RandomUniqueValues[i];
                string fromField;
                string toField;
                IDictionary <string, IDictionary <int, JoinScore> > queryVals;
                if (context.RandomFrom[i])
                {
                    fromField = "from";
                    toField   = "to";
                    queryVals = context.FromHitsToJoinScore;
                }
                else
                {
                    fromField = "to";
                    toField   = "from";
                    queryVals = context.ToHitsToJoinScore;
                }
                IDictionary <BytesRef, JoinScore> joinValueToJoinScores = new Dictionary <BytesRef, JoinScore>();
                if (multipleValuesPerDocument)
                {
                    fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
                                        new CollectorAnonymousClass3(fromField, joinValueToJoinScores));
                }
                else
                {
                    fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
                                        new CollectorAnonymousClass4(fromField, joinValueToJoinScores));
                }

                IDictionary <int, JoinScore> docToJoinScore = new Dictionary <int, JoinScore>();
                if (multipleValuesPerDocument)
                {
                    if (scoreDocsInOrder)
                    {
                        AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.Wrap(toSearcher.IndexReader);
                        Terms        terms = slowCompositeReader.GetTerms(toField);
                        if (terms != null)
                        {
                            DocsEnum  docsEnum  = null;
                            TermsEnum termsEnum = null;
                            JCG.SortedSet <BytesRef> joinValues =
                                new JCG.SortedSet <BytesRef>(BytesRef.UTF8SortedAsUnicodeComparer);
                            joinValues.UnionWith(joinValueToJoinScores.Keys);
                            foreach (BytesRef joinValue in joinValues)
                            {
                                termsEnum = terms.GetEnumerator(termsEnum);
                                if (termsEnum.SeekExact(joinValue))
                                {
                                    docsEnum = termsEnum.Docs(slowCompositeReader.LiveDocs, docsEnum, DocsFlags.NONE);
                                    JoinScore joinScore = joinValueToJoinScores[joinValue];

                                    for (int doc = docsEnum.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.NextDoc())
                                    {
                                        // First encountered join value determines the score.
                                        // Something to keep in mind for many-to-many relations.
                                        if (!docToJoinScore.ContainsKey(doc))
                                        {
                                            docToJoinScore[doc] = joinScore;
                                        }
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        toSearcher.Search(new MatchAllDocsQuery(),
                                          new CollectorAnonymousClass5(toField, joinValueToJoinScores,
                                                                       docToJoinScore));
                    }
                }
                else
                {
                    toSearcher.Search(new MatchAllDocsQuery(),
                                      new CollectorAnonymousClass6(toField, joinValueToJoinScores,
                                                                   docToJoinScore));
                }
                queryVals[uniqueRandomValue] = docToJoinScore;
            }

            fromSearcher.IndexReader.Dispose();
            toSearcher.IndexReader.Dispose();

            return(context);
        }