Esempio n. 1
0
        public void RestDocsAndPositionsEnumStart()
        {
            Analyzer    analyzer = new MockAnalyzer(Random());
            int         numIters = AtLeast(3);
            MemoryIndex memory   = new MemoryIndex(true, Random().nextInt(50) * 1024 * 1024);

            for (int i = 0; i < numIters; i++)
            { // check reuse
                memory.AddField("foo", "bar", analyzer);
                AtomicReader reader = (AtomicReader)memory.CreateSearcher().IndexReader;
                assertEquals(1, reader.Terms("foo").SumTotalTermFreq);
                DocsAndPositionsEnum disi = reader.TermPositionsEnum(new Term("foo", "bar"));
                int docid = disi.DocID();
                assertEquals(-1, docid);
                assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                assertEquals(0, disi.NextPosition());
                assertEquals(0, disi.StartOffset());
                assertEquals(3, disi.EndOffset());

                // now reuse and check again
                TermsEnum te = reader.Terms("foo").Iterator(null);
                assertTrue(te.SeekExact(new BytesRef("bar")));
                disi  = te.DocsAndPositions(null, disi);
                docid = disi.DocID();
                assertEquals(-1, docid);
                assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
                reader.Dispose();
                memory.Reset();
            }
        }
Esempio n. 2
0
        public virtual void TestDocsAndPositionsEnum()
        {
            TermsEnum termsEnum = reader.Terms(DOC_POSITIONS_FIELD).Iterator(null);

            assertEquals(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef(DOC_POSITIONS_TERM)));
            DocsAndPositionsEnum sortedPositions = termsEnum.DocsAndPositions(null, null);
            int doc;

            // test nextDoc()
            while ((doc = sortedPositions.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
            {
                int freq = sortedPositions.Freq();
                assertEquals("incorrect freq for doc=" + doc, sortedValues[doc] / 10 + 1, freq);
                for (int i = 0; i < freq; i++)
                {
                    assertEquals("incorrect position for doc=" + doc, i, sortedPositions.NextPosition());
                    if (!DoesntSupportOffsets.contains(TestUtil.GetPostingsFormat(DOC_POSITIONS_FIELD)))
                    {
                        assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.StartOffset());
                        assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.EndOffset());
                    }
                    assertEquals("incorrect payload for doc=" + doc, freq - i, int.Parse(sortedPositions.Payload.Utf8ToString(), CultureInfo.InvariantCulture));
                }
            }

            // test advance()
            DocsAndPositionsEnum reuse = sortedPositions;

            sortedPositions = termsEnum.DocsAndPositions(null, reuse);
            if (sortedPositions is SortingAtomicReader.SortingDocsAndPositionsEnum)
            {
                assertTrue(((SortingAtomicReader.SortingDocsAndPositionsEnum)sortedPositions).Reused(reuse)); // make sure reuse worked
            }
            doc = 0;
            while ((doc = sortedPositions.Advance(doc + TestUtil.NextInt(Random(), 1, 5))) != DocIdSetIterator.NO_MORE_DOCS)
            {
                int freq = sortedPositions.Freq();
                assertEquals("incorrect freq for doc=" + doc, sortedValues[doc] / 10 + 1, freq);
                for (int i = 0; i < freq; i++)
                {
                    assertEquals("incorrect position for doc=" + doc, i, sortedPositions.NextPosition());
                    if (!DoesntSupportOffsets.contains(TestUtil.GetPostingsFormat(DOC_POSITIONS_FIELD)))
                    {
                        assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.StartOffset());
                        assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.EndOffset());
                    }
                    assertEquals("incorrect payload for doc=" + doc, freq - i, int.Parse(sortedPositions.Payload.Utf8ToString(), CultureInfo.InvariantCulture));
                }
            }
        }
Esempio n. 3
0
        public virtual void TestReuseDocsEnumNoReuse()
        {
            Directory         dir    = NewDirectory();
            Codec             cp     = TestUtil.AlwaysPostingsFormat(new Lucene40RWPostingsFormat(OLD_FORMAT_IMPERSONATION_IS_ACTIVE));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp));
            int numdocs = AtLeast(20);

            CreateRandomIndex(numdocs, writer, Random());
            writer.Commit();

            DirectoryReader open = DirectoryReader.Open(dir);

            foreach (AtomicReaderContext ctx in open.Leaves)
            {
                AtomicReader indexReader = (AtomicReader)ctx.Reader;
                Terms        terms       = indexReader.Terms("body");
                TermsEnum    iterator    = terms.Iterator(null);
                IdentityHashMap <DocsEnum, bool?> enums = new IdentityHashMap <DocsEnum, bool?>();
                MatchNoBits bits = new MatchNoBits(indexReader.MaxDoc);
                while ((iterator.Next()) != null)
                {
                    DocsEnum docs = iterator.Docs(Random().NextBoolean() ? bits : new MatchNoBits(indexReader.MaxDoc), null, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE);
                    enums[docs] = true;
                }

                Assert.AreEqual(terms.Size(), enums.Count);
            }
            IOUtils.Close(writer, open, dir);
        }
        /// <summary>
        /// Takes the categories from the given taxonomy directory, and adds the
        /// missing ones to this taxonomy. Additionally, it fills the given
        /// <seealso cref="OrdinalMap"/> with a mapping from the original ordinal to the new
        /// ordinal.
        /// </summary>
        public virtual void AddTaxonomy(Directory taxoDir, OrdinalMap map)
        {
            EnsureOpen();
            DirectoryReader r = DirectoryReader.Open(taxoDir);

            try
            {
                int        size       = r.NumDocs;
                OrdinalMap ordinalMap = map;
                ordinalMap.Size = size;
                int       @base = 0;
                TermsEnum te    = null;
                DocsEnum  docs  = null;
                foreach (AtomicReaderContext ctx in r.Leaves)
                {
                    AtomicReader ar    = ctx.AtomicReader;
                    Terms        terms = ar.Terms(Consts.FULL);
                    te = terms.Iterator(te);
                    while (te.Next() != null)
                    {
                        FacetLabel cp      = new FacetLabel(FacetsConfig.StringToPath(te.Term().Utf8ToString()));
                        int        ordinal = AddCategory(cp);
                        docs = te.Docs(null, docs, DocsEnum.FLAG_NONE);
                        ordinalMap.AddMapping(docs.NextDoc() + @base, ordinal);
                    }
                    @base += ar.MaxDoc; // no deletions, so we're ok
                }
                ordinalMap.AddDone();
            }
            finally
            {
                r.Dispose();
            }
        }
Esempio n. 5
0
        public virtual void TestNestedPulsing()
        {
            // we always run this test with pulsing codec.
            Codec cp = TestUtil.AlwaysPostingsFormat(new NestedPulsingPostingsFormat());
            BaseDirectoryWrapper dir = NewDirectory();
            RandomIndexWriter    iw  = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp));
            Document             doc = new Document();

            doc.Add(new TextField("foo", "a b b c c c d e f g g g h i i j j k l l m m m", Field.Store.NO));
            // note: the reuse is imperfect, here we would have 4 enums (lost reuse when we get an enum for 'm')
            // this is because we only track the 'last' enum we reused (not all).
            // but this seems 'good enough' for now.
            iw.AddDocument(doc);
            DirectoryReader ir = iw.Reader;

            iw.Dispose();

            AtomicReader segment = GetOnlySegmentReader(ir);
            DocsEnum     reuse   = null;
            IDictionary <DocsEnum, bool?> allEnums = new IdentityHashMap <DocsEnum, bool?>();
            TermsEnum te = segment.Terms("foo").Iterator(null);

            while (te.Next() != null)
            {
                reuse           = te.Docs(null, reuse, DocsEnum.FLAG_NONE);
                allEnums[reuse] = true;
            }

            assertEquals(4, allEnums.Count);

            allEnums.Clear();
            DocsAndPositionsEnum posReuse = null;

            te = segment.Terms("foo").Iterator(null);
            while (te.Next() != null)
            {
                posReuse           = te.DocsAndPositions(null, posReuse);
                allEnums[posReuse] = true;
            }

            assertEquals(4, allEnums.Count);

            ir.Dispose();
            dir.Dispose();
        }
Esempio n. 6
0
        public virtual void TestSophisticatedReuse()
        {
            // we always run this test with pulsing codec.
            Codec             cp  = TestUtil.AlwaysPostingsFormat(new Pulsing41PostingsFormat(1));
            Directory         dir = NewDirectory();
            RandomIndexWriter iw  = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetCodec(cp));
            Document          doc = new Document();

            doc.Add(new TextField("foo", "a b b c c c d e f g g h i i j j k", Field.Store.NO));
            iw.AddDocument(doc);
            DirectoryReader ir = iw.Reader;

            iw.Dispose();

            AtomicReader segment = GetOnlySegmentReader(ir);
            DocsEnum     reuse   = null;
            IDictionary <DocsEnum, bool?> allEnums = new IdentityHashMap <DocsEnum, bool?>();
            TermsEnum te = segment.Terms("foo").Iterator(null);

            while (te.Next() != null)
            {
                reuse           = te.Docs(null, reuse, DocsEnum.FLAG_NONE);
                allEnums[reuse] = true;
            }

            assertEquals(2, allEnums.Count);

            allEnums.Clear();
            DocsAndPositionsEnum posReuse = null;

            te = segment.Terms("foo").Iterator(null);
            while (te.Next() != null)
            {
                posReuse           = te.DocsAndPositions(null, posReuse);
                allEnums[posReuse] = true;
            }

            assertEquals(2, allEnums.Count);

            ir.Dispose();
            dir.Dispose();
        }
Esempio n. 7
0
        public void TestNonExistingsField()
        {
            MemoryIndex  mindex       = new MemoryIndex(Random().nextBoolean(), Random().nextInt(50) * 1024 * 1024);
            MockAnalyzer mockAnalyzer = new MockAnalyzer(Random());

            mindex.AddField("field", "the quick brown fox", mockAnalyzer);
            AtomicReader reader = (AtomicReader)mindex.CreateSearcher().IndexReader;

            assertNull(reader.GetNumericDocValues("not-in-index"));
            assertNull(reader.GetNormValues("not-in-index"));
            assertNull(reader.TermDocsEnum(new Term("not-in-index", "foo")));
            assertNull(reader.TermPositionsEnum(new Term("not-in-index", "foo")));
            assertNull(reader.Terms("not-in-index"));
        }
Esempio n. 8
0
            public BaseTermsEnumTraverser(AbstractPrefixTreeFilter outerInstance, AtomicReaderContext context, Bits acceptDocs)
            {
                this.outerInstance = outerInstance;

                this.context = context;
                AtomicReader reader = context.AtomicReader;

                this.acceptDocs = acceptDocs;
                maxDoc          = reader.MaxDoc;
                Terms terms = reader.Terms(outerInstance.fieldName);

                if (terms != null)
                {
                    termsEnum = terms.Iterator(null);
                }
            }
Esempio n. 9
0
            /// <exception cref="System.IO.IOException"></exception>
            public BaseTermsEnumTraverser(AbstractPrefixTreeFilter _enclosing, AtomicReaderContext
                                          context, IBits acceptDocs)
            {
                this._enclosing = _enclosing;
                //remember to check for null in getDocIdSet
                this.context = context;
                AtomicReader reader = context.AtomicReader;

                this.acceptDocs = acceptDocs;
                maxDoc          = reader.MaxDoc;
                Terms terms = reader.Terms(this._enclosing.fieldName);

                if (terms != null)
                {
                    termsEnum = terms.Iterator(null);
                }
            }
Esempio n. 10
0
        /// <exception cref="System.IO.IOException"></exception>
        public virtual ShapeFieldCache <T> GetCache(AtomicReader reader)
        {
            lock (locker)
            {
                ShapeFieldCache <T> idx;
                if (sidx.TryGetValue(reader, out idx) && idx != null)
                {
                    return(idx);
                }

                /*long startTime = Runtime.CurrentTimeMillis();
                 *              log.Fine("Building Cache [" + reader.MaxDoc() + "]");*/
                idx = new ShapeFieldCache <T>(reader.MaxDoc, defaultSize);
                int       count = 0;
                DocsEnum  docs  = null;
                Terms     terms = reader.Terms(shapeField);
                TermsEnum te    = null;
                if (terms != null)
                {
                    te = terms.Iterator(te);
                    BytesRef term = te.Next();
                    while (term != null)
                    {
                        T shape = ReadShape(term);
                        if (shape != null)
                        {
                            docs = te.Docs(null, docs, DocsEnum.FLAG_NONE);
                            int docid = docs.NextDoc();
                            while (docid != DocIdSetIterator.NO_MORE_DOCS)
                            {
                                idx.Add(docid, shape);
                                docid = docs.NextDoc();
                                count++;
                            }
                        }
                        term = te.Next();
                    }
                }
                sidx.Add(reader, idx);

                /*long elapsed = Runtime.CurrentTimeMillis() - startTime;
                 * log.Fine("Cached: [" + count + " in " + elapsed + "ms] " + idx);*/
                return(idx);
            }
        }
Esempio n. 11
0
        public virtual DocsEnum RandomDocsEnum(string field, BytesRef term, IList <AtomicReaderContext> readers, Bits bits)
        {
            if (Random().Next(10) == 0)
            {
                return(null);
            }
            AtomicReader indexReader = (AtomicReader)readers[Random().Next(readers.Count)].Reader;
            Terms        terms       = indexReader.Terms(field);

            if (terms == null)
            {
                return(null);
            }
            TermsEnum iterator = terms.Iterator(null);

            if (iterator.SeekExact(term))
            {
                return(iterator.Docs(bits, null, Random().NextBoolean() ? DocsEnum.FLAG_FREQS : DocsEnum.FLAG_NONE));
            }
            return(null);
        }
Esempio n. 12
0
        public void TestDocsEnumStart()
        {
            Analyzer    analyzer = new MockAnalyzer(Random());
            MemoryIndex memory   = new MemoryIndex(Random().nextBoolean(), Random().nextInt(50) * 1024 * 1024);

            memory.AddField("foo", "bar", analyzer);
            AtomicReader reader = (AtomicReader)memory.CreateSearcher().IndexReader;
            DocsEnum     disi   = TestUtil.Docs(Random(), reader, "foo", new BytesRef("bar"), null, null, DocsEnum.FLAG_NONE);
            int          docid  = disi.DocID();

            assertEquals(-1, docid);
            assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);

            // now reuse and check again
            TermsEnum te = reader.Terms("foo").Iterator(null);

            assertTrue(te.SeekExact(new BytesRef("bar")));
            disi  = te.Docs(null, disi, DocsEnum.FLAG_NONE);
            docid = disi.DocID();
            assertEquals(-1, docid);
            assertTrue(disi.NextDoc() != DocIdSetIterator.NO_MORE_DOCS);
            reader.Dispose();
        }
Esempio n. 13
0
        public void TestSameFieldAddedMultipleTimes()
        {
            MemoryIndex  mindex       = new MemoryIndex(Random().nextBoolean(), Random().nextInt(50) * 1024 * 1024);
            MockAnalyzer mockAnalyzer = new MockAnalyzer(Random());

            mindex.AddField("field", "the quick brown fox", mockAnalyzer);
            mindex.AddField("field", "jumps over the", mockAnalyzer);
            AtomicReader reader = (AtomicReader)mindex.CreateSearcher().IndexReader;

            assertEquals(7, reader.Terms("field").SumTotalTermFreq);
            PhraseQuery query = new PhraseQuery();

            query.Add(new Term("field", "fox"));
            query.Add(new Term("field", "jumps"));
            assertTrue(mindex.Search(query) > 0.1);
            mindex.Reset();
            mockAnalyzer.PositionIncrementGap = (1 + Random().nextInt(10));
            mindex.AddField("field", "the quick brown fox", mockAnalyzer);
            mindex.AddField("field", "jumps over the", mockAnalyzer);
            assertEquals(0, mindex.Search(query), 0.00001f);
            query.Slop = (10);
            assertTrue("posGap" + mockAnalyzer.GetPositionIncrementGap("field"), mindex.Search(query) > 0.0001);
        }
Esempio n. 14
0
        private void Verify(Directory dir)
        {
            DirectoryReader ir = DirectoryReader.Open(dir);

            foreach (AtomicReaderContext leaf in ir.Leaves)
            {
                AtomicReader leafReader = (AtomicReader)leaf.Reader;
                AssertTerms(leafReader.Terms("field1docs"), leafReader.Terms("field2freqs"), true);
                AssertTerms(leafReader.Terms("field3positions"), leafReader.Terms("field4offsets"), true);
                AssertTerms(leafReader.Terms("field4offsets"), leafReader.Terms("field5payloadsFixed"), true);
                AssertTerms(leafReader.Terms("field5payloadsFixed"), leafReader.Terms("field6payloadsVariable"), true);
                AssertTerms(leafReader.Terms("field6payloadsVariable"), leafReader.Terms("field7payloadsFixedOffsets"), true);
                AssertTerms(leafReader.Terms("field7payloadsFixedOffsets"), leafReader.Terms("field8payloadsVariableOffsets"), true);
            }
            ir.Dispose();
        }
Esempio n. 15
0
        private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter,
                                                    bool multipleValuesPerDocument, bool scoreDocsInOrder)
        {
            IndexIterationContext context = new IndexIterationContext();
            int numRandomValues           = nDocs / 2;

            context.RandomUniqueValues = new string[numRandomValues];
            ISet <string> trackSet = new HashSet <string>();

            context.RandomFrom = new bool[numRandomValues];
            for (int i = 0; i < numRandomValues; i++)
            {
                string uniqueRandomValue;
                do
                {
                    uniqueRandomValue = TestUtil.RandomRealisticUnicodeString(Random());
                    //        uniqueRandomValue = TestUtil.randomSimpleString(random);
                } while ("".Equals(uniqueRandomValue) || trackSet.Contains(uniqueRandomValue));
                // Generate unique values and empty strings aren't allowed.
                trackSet.Add(uniqueRandomValue);
                context.RandomFrom[i]         = Random().NextBoolean();
                context.RandomUniqueValues[i] = uniqueRandomValue;
            }

            RandomDoc[] docs = new RandomDoc[nDocs];
            for (int i = 0; i < nDocs; i++)
            {
                string   id       = Convert.ToString(i);
                int      randomI  = Random().Next(context.RandomUniqueValues.Length);
                string   value    = context.RandomUniqueValues[randomI];
                Document document = new Document();
                document.Add(NewTextField(Random(), "id", id, Field.Store.NO));
                document.Add(NewTextField(Random(), "value", value, Field.Store.NO));

                bool from = context.RandomFrom[randomI];
                int  numberOfLinkValues = multipleValuesPerDocument ? 2 + Random().Next(10) : 1;
                docs[i] = new RandomDoc(id, numberOfLinkValues, value, from);
                for (int j = 0; j < numberOfLinkValues; j++)
                {
                    string linkValue = context.RandomUniqueValues[Random().Next(context.RandomUniqueValues.Length)];
                    docs[i].LinkValues.Add(linkValue);
                    if (from)
                    {
                        if (!context.FromDocuments.ContainsKey(linkValue))
                        {
                            context.FromDocuments[linkValue] = new List <RandomDoc>();
                        }
                        if (!context.RandomValueFromDocs.ContainsKey(value))
                        {
                            context.RandomValueFromDocs[value] = new List <RandomDoc>();
                        }

                        context.FromDocuments[linkValue].Add(docs[i]);
                        context.RandomValueFromDocs[value].Add(docs[i]);
                        document.Add(NewTextField(Random(), "from", linkValue, Field.Store.NO));
                    }
                    else
                    {
                        if (!context.ToDocuments.ContainsKey(linkValue))
                        {
                            context.ToDocuments[linkValue] = new List <RandomDoc>();
                        }
                        if (!context.RandomValueToDocs.ContainsKey(value))
                        {
                            context.RandomValueToDocs[value] = new List <RandomDoc>();
                        }

                        context.ToDocuments[linkValue].Add(docs[i]);
                        context.RandomValueToDocs[value].Add(docs[i]);
                        document.Add(NewTextField(Random(), "to", linkValue, Field.Store.NO));
                    }
                }

                RandomIndexWriter w;
                if (from)
                {
                    w = fromWriter;
                }
                else
                {
                    w = toWriter;
                }

                w.AddDocument(document);
                if (Random().Next(10) == 4)
                {
                    w.Commit();
                }
                if (VERBOSE)
                {
                    Console.WriteLine("Added document[" + docs[i].Id + "]: " + document);
                }
            }

            // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for
            // any ScoreMode.
            IndexSearcher fromSearcher = NewSearcher(fromWriter.Reader);
            IndexSearcher toSearcher   = NewSearcher(toWriter.Reader);

            for (int i = 0; i < context.RandomUniqueValues.Length; i++)
            {
                string uniqueRandomValue = context.RandomUniqueValues[i];
                string fromField;
                string toField;
                IDictionary <string, IDictionary <int, JoinScore> > queryVals;
                if (context.RandomFrom[i])
                {
                    fromField = "from";
                    toField   = "to";
                    queryVals = context.FromHitsToJoinScore;
                }
                else
                {
                    fromField = "to";
                    toField   = "from";
                    queryVals = context.ToHitsToJoinScore;
                }
                IDictionary <BytesRef, JoinScore> joinValueToJoinScores = new Dictionary <BytesRef, JoinScore>();
                if (multipleValuesPerDocument)
                {
                    fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
                                        new CollectorAnonymousInnerClassHelper3(this, context, fromField, joinValueToJoinScores));
                }
                else
                {
                    fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)),
                                        new CollectorAnonymousInnerClassHelper4(this, context, fromField, joinValueToJoinScores));
                }

                IDictionary <int, JoinScore> docToJoinScore = new Dictionary <int, JoinScore>();
                if (multipleValuesPerDocument)
                {
                    if (scoreDocsInOrder)
                    {
                        AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.Wrap(toSearcher.IndexReader);
                        Terms        terms = slowCompositeReader.Terms(toField);
                        if (terms != null)
                        {
                            DocsEnum             docsEnum   = null;
                            TermsEnum            termsEnum  = null;
                            SortedSet <BytesRef> joinValues =
                                new SortedSet <BytesRef>(BytesRef.UTF8SortedAsUnicodeComparer);
                            joinValues.AddAll(joinValueToJoinScores.Keys);
                            foreach (BytesRef joinValue in joinValues)
                            {
                                termsEnum = terms.Iterator(termsEnum);
                                if (termsEnum.SeekExact(joinValue))
                                {
                                    docsEnum = termsEnum.Docs(slowCompositeReader.LiveDocs, docsEnum, DocsEnum.FLAG_NONE);
                                    JoinScore joinScore = joinValueToJoinScores[joinValue];

                                    for (int doc = docsEnum.NextDoc();
                                         doc != DocIdSetIterator.NO_MORE_DOCS;
                                         doc = docsEnum.NextDoc())
                                    {
                                        // First encountered join value determines the score.
                                        // Something to keep in mind for many-to-many relations.
                                        if (!docToJoinScore.ContainsKey(doc))
                                        {
                                            docToJoinScore[doc] = joinScore;
                                        }
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        toSearcher.Search(new MatchAllDocsQuery(),
                                          new CollectorAnonymousInnerClassHelper5(this, context, toField, joinValueToJoinScores,
                                                                                  docToJoinScore));
                    }
                }
                else
                {
                    toSearcher.Search(new MatchAllDocsQuery(),
                                      new CollectorAnonymousInnerClassHelper6(this, context, toField, joinValueToJoinScores,
                                                                              docToJoinScore));
                }
                queryVals[uniqueRandomValue] = docToJoinScore;
            }

            fromSearcher.IndexReader.Dispose();
            toSearcher.IndexReader.Dispose();

            return(context);
        }
Esempio n. 16
0
            public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs)
            {
                Debug.Assert(OuterInstance.termArrays.Count > 0);
                AtomicReader reader   = (context.AtomicReader);
                Bits         liveDocs = acceptDocs;

                PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[OuterInstance.termArrays.Count];

                Terms fieldTerms = reader.Terms(OuterInstance.Field);

                if (fieldTerms == null)
                {
                    return(null);
                }

                // Reuse single TermsEnum below:
                TermsEnum termsEnum = fieldTerms.Iterator(null);

                for (int pos = 0; pos < postingsFreqs.Length; pos++)
                {
                    Term[] terms = OuterInstance.termArrays[pos];

                    DocsAndPositionsEnum postingsEnum;
                    int docFreq;

                    if (terms.Length > 1)
                    {
                        postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, TermContexts, termsEnum);

                        // coarse -- this overcounts since a given doc can
                        // have more than one term:
                        docFreq = 0;
                        for (int termIdx = 0; termIdx < terms.Length; termIdx++)
                        {
                            Term      term      = terms[termIdx];
                            TermState termState = TermContexts[term].Get(context.Ord);
                            if (termState == null)
                            {
                                // Term not in reader
                                continue;
                            }
                            termsEnum.SeekExact(term.Bytes(), termState);
                            docFreq += termsEnum.DocFreq();
                        }

                        if (docFreq == 0)
                        {
                            // None of the terms are in this reader
                            return(null);
                        }
                    }
                    else
                    {
                        Term      term      = terms[0];
                        TermState termState = TermContexts[term].Get(context.Ord);
                        if (termState == null)
                        {
                            // Term not in reader
                            return(null);
                        }
                        termsEnum.SeekExact(term.Bytes(), termState);
                        postingsEnum = termsEnum.DocsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);

                        if (postingsEnum == null)
                        {
                            // term does exist, but has no positions
                            Debug.Assert(termsEnum.Docs(liveDocs, null, DocsEnum.FLAG_NONE) != null, "termstate found but no term exists in reader");
                            throw new InvalidOperationException("field \"" + term.Field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.Text() + ")");
                        }

                        docFreq = termsEnum.DocFreq();
                    }

                    postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, (int)OuterInstance.positions[pos], terms);
                }

                // sort by increasing docFreq order
                if (OuterInstance.slop == 0)
                {
                    ArrayUtil.TimSort(postingsFreqs);
                }

                if (OuterInstance.slop == 0)
                {
                    ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, Similarity.DoSimScorer(Stats, context));
                    if (s.NoDocs)
                    {
                        return(null);
                    }
                    else
                    {
                        return(s);
                    }
                }
                else
                {
                    return(new SloppyPhraseScorer(this, postingsFreqs, OuterInstance.slop, Similarity.DoSimScorer(Stats, context)));
                }
            }
Esempio n. 17
0
        private void DuellReaders(CompositeReader other, AtomicReader memIndexReader)
        {
            AtomicReader competitor = SlowCompositeReaderWrapper.Wrap(other);
            Fields       memFields  = memIndexReader.Fields;

            foreach (string field in competitor.Fields)
            {
                Terms memTerms = memFields.Terms(field);
                Terms iwTerms  = memIndexReader.Terms(field);
                if (iwTerms == null)
                {
                    assertNull(memTerms);
                }
                else
                {
                    NumericDocValues normValues    = competitor.GetNormValues(field);
                    NumericDocValues memNormValues = memIndexReader.GetNormValues(field);
                    if (normValues != null)
                    {
                        // mem idx always computes norms on the fly
                        assertNotNull(memNormValues);
                        assertEquals(normValues.Get(0), memNormValues.Get(0));
                    }

                    assertNotNull(memTerms);
                    assertEquals(iwTerms.DocCount, memTerms.DocCount);
                    assertEquals(iwTerms.SumDocFreq, memTerms.SumDocFreq);
                    assertEquals(iwTerms.SumTotalTermFreq, memTerms.SumTotalTermFreq);
                    TermsEnum iwTermsIter  = iwTerms.Iterator(null);
                    TermsEnum memTermsIter = memTerms.Iterator(null);
                    if (iwTerms.HasPositions())
                    {
                        bool offsets = iwTerms.HasOffsets() && memTerms.HasOffsets();

                        while (iwTermsIter.Next() != null)
                        {
                            assertNotNull(memTermsIter.Next());
                            assertEquals(iwTermsIter.Term(), memTermsIter.Term());
                            DocsAndPositionsEnum iwDocsAndPos  = iwTermsIter.DocsAndPositions(null, null);
                            DocsAndPositionsEnum memDocsAndPos = memTermsIter.DocsAndPositions(null, null);
                            while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS)
                            {
                                assertEquals(iwDocsAndPos.DocID(), memDocsAndPos.NextDoc());
                                assertEquals(iwDocsAndPos.Freq(), memDocsAndPos.Freq());
                                for (int i = 0; i < iwDocsAndPos.Freq(); i++)
                                {
                                    assertEquals("term: " + iwTermsIter.Term().Utf8ToString(), iwDocsAndPos.NextPosition(), memDocsAndPos.NextPosition());
                                    if (offsets)
                                    {
                                        assertEquals(iwDocsAndPos.StartOffset(), memDocsAndPos.StartOffset());
                                        assertEquals(iwDocsAndPos.EndOffset(), memDocsAndPos.EndOffset());
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        while (iwTermsIter.Next() != null)
                        {
                            assertEquals(iwTermsIter.Term(), memTermsIter.Term());
                            DocsEnum iwDocsAndPos  = iwTermsIter.Docs(null, null);
                            DocsEnum memDocsAndPos = memTermsIter.Docs(null, null);
                            while (iwDocsAndPos.NextDoc() != DocsAndPositionsEnum.NO_MORE_DOCS)
                            {
                                assertEquals(iwDocsAndPos.DocID(), memDocsAndPos.NextDoc());
                                assertEquals(iwDocsAndPos.Freq(), memDocsAndPos.Freq());
                            }
                        }
                    }
                }
            }
        }
Esempio n. 18
0
            public override Scorer Scorer(AtomicReaderContext context, Bits acceptDocs)
            {
                Debug.Assert(OuterInstance.Terms_Renamed.Count > 0);
                AtomicReader reader   = context.AtomicReader;
                Bits         liveDocs = acceptDocs;

                PostingsAndFreq[] postingsFreqs = new PostingsAndFreq[OuterInstance.Terms_Renamed.Count];

                Terms fieldTerms = reader.Terms(OuterInstance.Field);

                if (fieldTerms == null)
                {
                    return(null);
                }

                // Reuse single TermsEnum below:
                TermsEnum te = fieldTerms.Iterator(null);

                for (int i = 0; i < OuterInstance.Terms_Renamed.Count; i++)
                {
                    Term      t     = OuterInstance.Terms_Renamed[i];
                    TermState state = States[i].Get(context.Ord);
                    if (state == null) // term doesnt exist in this segment
                    {
                        Debug.Assert(TermNotInReader(reader, t), "no termstate found but term exists in reader");
                        return(null);
                    }
                    te.SeekExact(t.Bytes, state);
                    DocsAndPositionsEnum postingsEnum = te.DocsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);

                    // PhraseQuery on a field that did not index
                    // positions.
                    if (postingsEnum == null)
                    {
                        Debug.Assert(te.SeekExact(t.Bytes), "termstate found but no term exists in reader");
                        // term does exist, but has no positions
                        throw new InvalidOperationException("field \"" + t.Field + "\" was indexed without position data; cannot run PhraseQuery (term=" + t.Text() + ")");
                    }
                    postingsFreqs[i] = new PostingsAndFreq(postingsEnum, te.DocFreq(), (int)OuterInstance.Positions_Renamed[i], t);
                }

                // sort by increasing docFreq order
                if (OuterInstance.slop == 0)
                {
                    ArrayUtil.TimSort(postingsFreqs);
                }

                if (OuterInstance.slop == 0) // optimize exact case
                {
                    ExactPhraseScorer s = new ExactPhraseScorer(this, postingsFreqs, Similarity.DoSimScorer(Stats, context));
                    if (s.NoDocs)
                    {
                        return(null);
                    }
                    else
                    {
                        return(s);
                    }
                }
                else
                {
                    return(new SloppyPhraseScorer(this, postingsFreqs, OuterInstance.slop, Similarity.DoSimScorer(Stats, context)));
                }
            }