Beispiel #1
        /// <summary>
        /// Returns the <see cref="ITopGroups{T}"/> for the specified
        /// BlockJoinQuery. The groupValue of each GroupDocs will
        /// be the parent docID for that group.
        /// The number of documents within each group is calculated as minimum of <paramref name="maxDocsPerGroup"/>
        /// and number of matched child documents for that group.
        /// Returns <c>null</c> if no groups matched.
        /// </summary>
        /// <param name="query"> Search query </param>
        /// <param name="withinGroupSort"> Sort criteria within groups </param>
        /// <param name="offset"> Parent docs offset </param>
        /// <param name="maxDocsPerGroup"> Upper bound of documents per group number </param>
        /// <param name="withinGroupOffset"> Offset within each group of child docs </param>
        /// <param name="fillSortFields"> Specifies whether to add sort fields or not </param>
        /// <returns> <see cref="ITopGroups{T}"/> for specified query </returns>
        /// <exception cref="IOException"> if there is a low-level I/O error </exception>
        public virtual ITopGroups <int> GetTopGroups(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset, int maxDocsPerGroup, int withinGroupOffset, bool fillSortFields)
            if (!joinQueryID.TryGetValue(query, out int slot))
                if (totalHitCount == 0)
                slot = -1; // LUCENENET: In Java null is converted to -1 for AccumulateGroups()

            if (sortedGroups is null)
                if (offset >= queue.Count)
            else if (offset > sortedGroups.Length)

            return(AccumulateGroups(slot, offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields));
        /// <summary>
        /// Returns the <see cref="ITopGroups{T}"/> for the specified
        /// BlockJoinQuery. The groupValue of each GroupDocs will
        /// be the parent docID for that group.
        /// The number of documents within each group is calculated as minimum of <paramref name="maxDocsPerGroup"/>
        /// and number of matched child documents for that group.
        /// Returns <c>null</c> if no groups matched.
        /// </summary>
        /// <param name="query"> Search query </param>
        /// <param name="withinGroupSort"> Sort criteria within groups </param>
        /// <param name="offset"> Parent docs offset </param>
        /// <param name="maxDocsPerGroup"> Upper bound of documents per group number </param>
        /// <param name="withinGroupOffset"> Offset within each group of child docs </param>
        /// <param name="fillSortFields"> Specifies whether to add sort fields or not </param>
        /// <returns> <see cref="ITopGroups{T}"/> for specified query </returns>
        /// <exception cref="IOException"> if there is a low-level I/O error </exception>
        public virtual ITopGroups <int> GetTopGroups(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset, int maxDocsPerGroup, int withinGroupOffset, bool fillSortFields)

            if (!joinQueryID.TryGetValue(query, out slot))
                if (totalHitCount == 0)

            if (sortedGroups == null)
                if (offset >= queue.Count)
            else if (offset > sortedGroups.Length)

            return(AccumulateGroups(slot == null ? -1 : (int)slot, offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields));
 protected bool Equals(ToParentBlockJoinQuery other)
     return(base.Equals(other) &&
            Equals(_parentsFilter, other._parentsFilter) &&
            _scoreMode == other._scoreMode &&
            Equals(_origChildQuery, other._origChildQuery));
        public void TestNextDocValidationForToParentBjq()
            Query parentQueryWithRandomChild = CreateChildrenQueryWithOneParent(GetRandomChildNumber(0));
            var blockJoinQuery = new ToParentBlockJoinQuery(parentQueryWithRandomChild, ParentsFilter, ScoreMode.None);

            var ex = Throws<InvalidOperationException>(() => IndexSearcher.Search(blockJoinQuery, 1));
            StringAssert.Contains("child query must only match non-parent docs", ex.Message);

        public override Query Rewrite(IndexReader reader)
            Query childRewrite = _childQuery.Rewrite(reader);

            if (childRewrite != _childQuery)
                Query rewritten = new ToParentBlockJoinQuery(_origChildQuery, childRewrite, _parentsFilter, _scoreMode);
                rewritten.Boost = Boost;
 public override bool Equals(object obj)
     if (obj is ToParentBlockJoinQuery)
         ToParentBlockJoinQuery other = (ToParentBlockJoinQuery)obj;
         return(_origChildQuery.Equals(other._origChildQuery) &&
                _parentsFilter.Equals(other._parentsFilter) &&
                _scoreMode == other._scoreMode &&
Beispiel #7
 private void Enroll(ToParentBlockJoinQuery query, ToParentBlockJoinQuery.BlockJoinScorer scorer)
     if (joinQueryID.TryGetValue(query, out int?slot))
         joinScorers[(int)slot] = scorer;
         joinQueryID[query] = joinScorers.Length;
         //System.out.println("found JQ: " + query + " slot=" + joinScorers.length);
         ToParentBlockJoinQuery.BlockJoinScorer[] newArray = new ToParentBlockJoinQuery.BlockJoinScorer[1 + joinScorers.Length];
         Array.Copy(joinScorers, 0, newArray, 0, joinScorers.Length);
         joinScorers = newArray;
         joinScorers[joinScorers.Length - 1] = scorer;
Beispiel #8
        public void TestBugCausedByRewritingTwice()
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            IList<Document> docs = new List<Document>();

            for (int i = 0; i < 10; i++)
                docs.Add(MakeJob("ruby", i));
                docs.Add(MakeJob("java", 2007));
                docs.Add(MakeResume("Frank", "United States"));

            IndexReader r = w.Reader;
            IndexSearcher s = NewSearcher(r);

            MultiTermQuery qc = NumericRangeQuery.NewIntRange("year", 2007, 2007, true, true);
            // Hacky: this causes the query to need 2 rewrite
            // iterations: 

            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));

            int h1 = qc.GetHashCode();
            Query qw1 = qc.Rewrite(r);
            int h2 = qw1.GetHashCode();
            Query qw2 = qw1.Rewrite(r);
            int h3 = qw2.GetHashCode();

            assertTrue(h1 != h2);
            assertTrue(h2 != h3);
            assertTrue(h3 != h1);

            ToParentBlockJoinQuery qp = new ToParentBlockJoinQuery(qc, parentsFilter, ScoreMode.Max);
            ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, true);

            s.Search(qp, c);
            TopGroups<int> groups = c.GetTopGroups(qp, Sort.INDEXORDER, 0, 10, 0, true);
            foreach (GroupDocs<int> group in groups.Groups)
                assertEquals(1, group.TotalHits);

Beispiel #9
        public virtual void TestSimpleFilter()
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            IList<Document> docs = new List<Document>();
            docs.Add(MakeJob("java", 2007));
            docs.Add(MakeJob("python", 2010));
            docs.Add(MakeResume("Lisa", "United Kingdom"));

            IList<Document> docs2 = new List<Document>();
            docs2.Add(MakeJob("ruby", 2005));
            docs2.Add(MakeJob("java", 2006));
            docs2.Add(MakeResume("Frank", "United States"));

            bool turn = Random().NextBoolean();
            w.AddDocuments(turn ? docs : docs2);


            w.AddDocuments(!turn ? docs : docs2);


            IndexReader r = w.Reader;
            IndexSearcher s = NewSearcher(r);

            // Create a filter that defines "parent" documents in the index - in this case resumes
            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));

            // Define child document criteria (finds an example of relevant work experience)
            BooleanQuery childQuery = new BooleanQuery();
            childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
            childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));

            // Define parent document criteria (find a resident in the UK)
            Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));

            // Wrap the child document query to 'join' any matches
            // up to corresponding parent:
            ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);

            assertEquals("no filter - both passed", 2, s.Search(childJoinQuery, 10).TotalHits);

            assertEquals("dummy filter passes everyone ", 2, s.Search(childJoinQuery, parentsFilter, 10).TotalHits);
            assertEquals("dummy filter passes everyone ", 2, s.Search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))), 10).TotalHits);

            // not found test
            assertEquals("noone live there", 0, s.Search(childJoinQuery, new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("country", "Oz")))), 1).TotalHits);
            assertEquals("noone live there", 0, s.Search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("country", "Oz"))), 1).TotalHits);

            // apply the UK filter by the searcher
            TopDocs ukOnly = s.Search(childJoinQuery, new QueryWrapperFilter(parentQuery), 1);
            assertEquals("has filter - single passed", 1, ukOnly.TotalHits);
            assertEquals("Lisa", r.Document(ukOnly.ScoreDocs[0].Doc).Get("name"));

            // looking for US candidates
            TopDocs usThen = s.Search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("country", "United States"))), 1);
            assertEquals("has filter - single passed", 1, usThen.TotalHits);
            assertEquals("Frank", r.Document(usThen.ScoreDocs[0].Doc).Get("name"));

            TermQuery us = new TermQuery(new Term("country", "United States"));
            assertEquals("@ US we have java and ruby", 2, s.Search(new ToChildBlockJoinQuery(us, parentsFilter, Random().NextBoolean()), 10).TotalHits);

            assertEquals("java skills in US", 1, s.Search(new ToChildBlockJoinQuery(us, parentsFilter, Random().NextBoolean()), Skill("java"), 10).TotalHits);

            BooleanQuery rubyPython = new BooleanQuery();
            rubyPython.Add(new TermQuery(new Term("skill", "ruby")), BooleanClause.Occur.SHOULD);
            rubyPython.Add(new TermQuery(new Term("skill", "python")), BooleanClause.Occur.SHOULD);
            assertEquals("ruby skills in US", 1, s.Search(new ToChildBlockJoinQuery(us, parentsFilter, Random().NextBoolean()), new QueryWrapperFilter(rubyPython), 10).TotalHits);

Beispiel #10
        public void TestChildQueryNeverMatches()
            Directory d = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), d, Similarity, TimeZone);
            Document parent = new Document();
            parent.Add(new StoredField("parentID", "0"));
            parent.Add(NewTextField("parentText", "text", Field.Store.NO));
            parent.Add(NewStringField("isParent", "yes", Field.Store.NO));

            IList<Document> docs = new List<Document>();

            Document child = new Document();
            child.Add(new StoredField("childID", "0"));
            child.Add(NewTextField("childText", "text", Field.Store.NO));

            // parent last:


            parent = new Document();
            parent.Add(NewTextField("parentText", "text", Field.Store.NO));
            parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
            parent.Add(new StoredField("parentID", "1"));

            // parent last:

            IndexReader r = w.Reader;

            // never matches:
            Query childQuery = new TermQuery(new Term("childText", "bogus"));
            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
            ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
            BooleanQuery parentQuery = new BooleanQuery();
            parentQuery.Add(childJoinQuery, BooleanClause.Occur.SHOULD);
            parentQuery.Add(new TermQuery(new Term("parentText", "text")), BooleanClause.Occur.SHOULD);

            ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type_e.STRING)), 10, true, true);
            NewSearcher(r).Search(parentQuery, c);
            TopGroups<int> groups = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, false);

            // Two parents:
            assertEquals(2, (int)groups.TotalGroupCount);

            // One child docs:
            assertEquals(0, groups.TotalGroupedHitCount);

            IGroupDocs<int> group = groups.Groups[0];
            Document doc = r.Document((int)group.GroupValue);
            assertEquals("0", doc.Get("parentID"));

            group = groups.Groups[1];
            doc = r.Document((int)group.GroupValue);
            assertEquals("1", doc.Get("parentID"));

Beispiel #11
        public void TestChildQueryMatchesParent()
            Directory d = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), d, Similarity, TimeZone);
            Document parent = new Document();
            parent.Add(new StoredField("parentID", "0"));
            parent.Add(NewTextField("parentText", "text", Field.Store.NO));
            parent.Add(NewStringField("isParent", "yes", Field.Store.NO));

            IList<Document> docs = new List<Document>();

            Document child = new Document();
            child.Add(new StoredField("childID", "0"));
            child.Add(NewTextField("childText", "text", Field.Store.NO));

            // parent last:


            parent = new Document();
            parent.Add(NewTextField("parentText", "text", Field.Store.NO));
            parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
            parent.Add(new StoredField("parentID", "1"));

            // parent last:

            IndexReader r = w.Reader;

            // illegally matches parent:
            Query childQuery = new TermQuery(new Term("parentText", "text"));
            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
            ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
            BooleanQuery parentQuery = new BooleanQuery();
            parentQuery.Add(childJoinQuery, BooleanClause.Occur.SHOULD);
            parentQuery.Add(new TermQuery(new Term("parentText", "text")), BooleanClause.Occur.SHOULD);

            ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type_e.STRING)), 10, true, true);

            Assert.Throws<InvalidOperationException>(() => NewSearcher(r).Search(parentQuery, c));

Beispiel #12
        public void TestGetTopGroups()

            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            IList<Document> docs = new List<Document>();
            docs.Add(MakeJob("ruby", 2005));
            docs.Add(MakeJob("java", 2006));
            docs.Add(MakeJob("java", 2010));
            docs.Add(MakeJob("java", 2012));
            docs.Add(MakeResume("Frank", "United States"));


            IndexReader r = w.Reader;
            IndexSearcher s = new IndexSearcher(r);

            // Create a filter that defines "parent" documents in the index - in this case resumes
            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));

            // Define child document criteria (finds an example of relevant work experience)
            BooleanQuery childQuery = new BooleanQuery();
            childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
            childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));

            // Wrap the child document query to 'join' any matches
            // up to corresponding parent:
            ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);

            ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 2, true, true);
            s.Search(childJoinQuery, c);

            //Get all child documents within groups
            TopGroups<int>[] getTopGroupsResults = new TopGroups<int>[2];
            getTopGroupsResults[0] = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);
            getTopGroupsResults[1] = c.GetTopGroupsWithAllChildDocs(childJoinQuery, null, 0, 0, true);

            foreach (TopGroups<int> results in getTopGroupsResults)
                assertEquals(2, results.TotalGroupedHitCount);
                assertEquals(1, results.Groups.Length);

                IGroupDocs<int> resultGroup = results.Groups[0];
                assertEquals(2, resultGroup.TotalHits);
                Document parentDocument = s.Doc(resultGroup.GroupValue);
                assertEquals("Frank", parentDocument.Get("name"));

                assertEquals(2, resultGroup.ScoreDocs.Length); //all matched child documents collected

                foreach (ScoreDoc scoreDoc in resultGroup.ScoreDocs)
                    Document childDoc = s.Doc(scoreDoc.Doc);
                    assertEquals("java", childDoc.Get("skill"));
                    int year = Convert.ToInt32(childDoc.Get("year"));
                    assertTrue(year >= 2006 && year <= 2011);

            //Get part of child documents
            TopGroups<int> boundedResults = c.GetTopGroups(childJoinQuery, null, 0, 1, 0, true);
            assertEquals(2, boundedResults.TotalGroupedHitCount);
            assertEquals(1, boundedResults.Groups.Length);

            IGroupDocs<int> group = boundedResults.Groups[0];
            assertEquals(2, group.TotalHits);
            Document parentDoc = s.Doc(group.GroupValue);
            assertEquals("Frank", parentDoc.Get("name"));

            assertEquals(1, group.ScoreDocs.Length); //not all matched child documents collected

            foreach (ScoreDoc scoreDoc in group.ScoreDocs)
                Document childDoc = s.Doc(scoreDoc.Doc);
                assertEquals("java", childDoc.Get("skill"));
                int year = Convert.ToInt32(childDoc.Get("year"));
                assertTrue(year >= 2006 && year <= 2011);

Beispiel #13
        public void TestSimple()
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            IList<Document> docs = new List<Document>();

            docs.Add(MakeJob("java", 2007));
            docs.Add(MakeJob("python", 2010));
            docs.Add(MakeResume("Lisa", "United Kingdom"));

            docs.Add(MakeJob("ruby", 2005));
            docs.Add(MakeJob("java", 2006));
            docs.Add(MakeResume("Frank", "United States"));

            IndexReader r = w.Reader;
            IndexSearcher s = NewSearcher(r);

            // Create a filter that defines "parent" documents in the index - in this case resumes
            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));

            // Define child document criteria (finds an example of relevant work experience)
            BooleanQuery childQuery = new BooleanQuery();
            childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
            childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));

            // Define parent document criteria (find a resident in the UK)
            Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));

            // Wrap the child document query to 'join' any matches
            // up to corresponding parent:
            ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);

            // Combine the parent and nested child queries into a single query for a candidate
            BooleanQuery fullQuery = new BooleanQuery();
            fullQuery.Add(new BooleanClause(parentQuery, BooleanClause.Occur.MUST));
            fullQuery.Add(new BooleanClause(childJoinQuery, BooleanClause.Occur.MUST));

            ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);

            s.Search(fullQuery, c);

            TopGroups<int> results = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);

            //assertEquals(1, results.totalHitCount);
            assertEquals(1, results.TotalGroupedHitCount);
            assertEquals(1, results.Groups.Length);

            IGroupDocs<int> group = results.Groups[0];
            assertEquals(1, group.TotalHits);

            Document childDoc = s.Doc(group.ScoreDocs[0].Doc);
            //System.out.println("  doc=" + group.ScoreDocs[0].Doc);
            assertEquals("java", childDoc.Get("skill"));
            Document parentDoc = s.Doc(group.GroupValue);
            assertEquals("Lisa", parentDoc.Get("name"));

            //System.out.println("TEST: now test up");

            // Now join "up" (map parent hits to child docs) instead...:
            ToChildBlockJoinQuery parentJoinQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter, Random().NextBoolean());
            BooleanQuery fullChildQuery = new BooleanQuery();
            fullChildQuery.Add(new BooleanClause(parentJoinQuery, BooleanClause.Occur.MUST));
            fullChildQuery.Add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));

            //System.out.println("FULL: " + fullChildQuery);
            TopDocs hits = s.Search(fullChildQuery, 10);
            assertEquals(1, hits.TotalHits);
            childDoc = s.Doc(hits.ScoreDocs[0].Doc);
            //System.out.println("CHILD = " + childDoc + " docID=" + hits.ScoreDocs[0].Doc);
            assertEquals("java", childDoc.Get("skill"));
            assertEquals(2007, childDoc.GetField("year").NumericValue);
            assertEquals("Lisa", GetParentDoc(r, parentsFilter, hits.ScoreDocs[0].Doc).Get("name"));

            // Test with filter on child docs:
            assertEquals(0, s.Search(fullChildQuery, new QueryWrapperFilter(new TermQuery(new Term("skill", "foosball"))), 1).TotalHits);

        public void TestAdvanceValidationForToParentBjq()
            int randomChildNumber = GetRandomChildNumber(0);
            // we need to make advance method meet wrong document, so random child number
            // in BJQ must be greater than child number in Boolean clause
            int nextRandomChildNumber = GetRandomChildNumber(randomChildNumber);
            Query parentQueryWithRandomChild = CreateChildrenQueryWithOneParent(nextRandomChildNumber);
            ToParentBlockJoinQuery blockJoinQuery = new ToParentBlockJoinQuery(parentQueryWithRandomChild, ParentsFilter, ScoreMode.None);
            // advance() method is used by ConjunctionScorer, so we need to create Boolean conjunction query
            BooleanQuery conjunctionQuery = new BooleanQuery();
            WildcardQuery childQuery = new WildcardQuery(new Term("child", CreateFieldValue(randomChildNumber)));
            conjunctionQuery.Add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));
            conjunctionQuery.Add(new BooleanClause(blockJoinQuery, BooleanClause.Occur.MUST));

            var ex = Throws<InvalidOperationException>(() => IndexSearcher.Search(conjunctionQuery, 1));
            StringAssert.Contains("child query must only match non-parent docs", ex.Message);
 private void Enroll(ToParentBlockJoinQuery query, ToParentBlockJoinQuery.BlockJoinScorer scorer)
     int? slot;
     if (joinQueryID.TryGetValue(query, out slot))
         joinScorers[(int) slot] = scorer;
         joinQueryID[query] = joinScorers.Length;
         //System.out.println("found JQ: " + query + " slot=" + joinScorers.length);
         ToParentBlockJoinQuery.BlockJoinScorer[] newArray = new ToParentBlockJoinQuery.BlockJoinScorer[1 + joinScorers.Length];
         Array.Copy(joinScorers, 0, newArray, 0, joinScorers.Length);
         joinScorers = newArray;
         joinScorers[joinScorers.Length - 1] = scorer;
Beispiel #16
        public void TestRandom()
            // We build two indices at once: one normalized (which
            // ToParentBlockJoinQuery/Collector,
            // ToChildBlockJoinQuery can query) and the other w/
            // the same docs, just fully denormalized:
            Directory dir = NewDirectory();
            Directory joinDir = NewDirectory();

            int numParentDocs = TestUtil.NextInt(Random(), 100 * RANDOM_MULTIPLIER, 300 * RANDOM_MULTIPLIER);
            //final int numParentDocs = 30;

            // Values for parent fields:
            string[][] parentFields = GetRandomFields(numParentDocs / 2);
            // Values for child fields:
            string[][] childFields = GetRandomFields(numParentDocs);

            bool doDeletes = Random().NextBoolean();
            IList<int> toDelete = new List<int>();

            // TODO: parallel star join, nested join cases too!
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            RandomIndexWriter joinW = new RandomIndexWriter(Random(), joinDir, Similarity, TimeZone);
            for (int parentDocID = 0; parentDocID < numParentDocs; parentDocID++)
                Document parentDoc = new Document();
                Document parentJoinDoc = new Document();
                Field id = NewStringField("parentID", "" + parentDocID, Field.Store.YES);
                parentJoinDoc.Add(NewStringField("isParent", "x", Field.Store.NO));
                for (int field = 0; field < parentFields.Length; field++)
                    if (Random().NextDouble() < 0.9)
                        Field f = NewStringField("parent" + field, parentFields[field][Random().Next(parentFields[field].Length)], Field.Store.NO);

                if (doDeletes)
                    parentDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
                    parentJoinDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));

                IList<Document> joinDocs = new List<Document>();

                if (VERBOSE)
                    StringBuilder sb = new StringBuilder();
                    for (int fieldID = 0; fieldID < parentFields.Length; fieldID++)
                        string parent = parentDoc.Get("parent" + fieldID);
                        if (parent != null)
                            sb.Append(" parent" + fieldID + "=" + parent);
                    Console.WriteLine("  " + sb);

                int numChildDocs = TestUtil.NextInt(Random(), 1, 20);
                for (int childDocID = 0; childDocID < numChildDocs; childDocID++)
                    // Denormalize: copy all parent fields into child doc:
                    Document childDoc = TestUtil.CloneDocument(parentDoc);
                    Document joinChildDoc = new Document();

                    Field childID = NewStringField("childID", "" + childDocID, Field.Store.YES);

                    for (int childFieldID = 0; childFieldID < childFields.Length; childFieldID++)
                        if (Random().NextDouble() < 0.9)
                            Field f = NewStringField("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)], Field.Store.NO);

                    if (VERBOSE)
                        StringBuilder sb = new StringBuilder();
                        for (int fieldID = 0; fieldID < childFields.Length; fieldID++)
                            string child = joinChildDoc.Get("child" + fieldID);
                            if (child != null)
                                sb.Append(" child" + fieldID + "=" + child);
                        Console.WriteLine("    " + sb);

                    if (doDeletes)
                        joinChildDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));


                // Parent last:

                if (doDeletes && Random().Next(30) == 7)

            foreach (int deleteID in toDelete)
                if (VERBOSE)
                    Console.WriteLine("DELETE parentID=" + deleteID);
                w.DeleteDocuments(new Term("blockID", "" + deleteID));
                joinW.DeleteDocuments(new Term("blockID", "" + deleteID));

            IndexReader r = w.Reader;
            IndexReader joinR = joinW.Reader;

            if (VERBOSE)
                Console.WriteLine("TEST: reader=" + r);
                Console.WriteLine("TEST: joinReader=" + joinR);

                for (int docIDX = 0; docIDX < joinR.MaxDoc; docIDX++)
                    Console.WriteLine("  docID=" + docIDX + " doc=" + joinR.Document(docIDX));

            IndexSearcher s = NewSearcher(r);

            IndexSearcher joinS = new IndexSearcher(joinR);

            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "x"))));

            int iters = 200 * RANDOM_MULTIPLIER;

            for (int iter = 0; iter < iters; iter++)
                if (VERBOSE)
                    Console.WriteLine("TEST: iter=" + (1 + iter) + " of " + iters);

                Query childQuery;
                if (Random().Next(3) == 2)
                    int childFieldID = Random().Next(childFields.Length);
                    childQuery = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)]));
                else if (Random().Next(3) == 2)
                    BooleanQuery bq = new BooleanQuery();
                    childQuery = bq;
                    int numClauses = TestUtil.NextInt(Random(), 2, 4);
                    bool didMust = false;
                    for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++)
                        Query clause;
                        BooleanClause.Occur occur;
                        if (!didMust && Random().NextBoolean())
                            occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
                            clause = new TermQuery(RandomChildTerm(childFields[0]));
                            didMust = true;
                            occur = BooleanClause.Occur.SHOULD;
                            int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1);
                            clause = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)]));
                        bq.Add(clause, occur);
                    BooleanQuery bq = new BooleanQuery();
                    childQuery = bq;

                    bq.Add(new TermQuery(RandomChildTerm(childFields[0])), BooleanClause.Occur.MUST);
                    int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1);
                    bq.Add(new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);

                int x = Random().Next(4);
                ScoreMode agg;
                if (x == 0)
                    agg = ScoreMode.None;
                else if (x == 1)
                    agg = ScoreMode.Max;
                else if (x == 2)
                    agg = ScoreMode.Total;
                    agg = ScoreMode.Avg;

                ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, agg);

                // To run against the block-join index:
                Query parentJoinQuery;

                // Same query as parentJoinQuery, but to run against
                // the fully denormalized index (so we can compare
                // results):
                Query parentQuery;

                if (Random().NextBoolean())
                    parentQuery = childQuery;
                    parentJoinQuery = childJoinQuery;
                    // AND parent field w/ child field
                    BooleanQuery bq = new BooleanQuery();
                    parentJoinQuery = bq;
                    Term parentTerm = RandomParentTerm(parentFields[0]);
                    if (Random().NextBoolean())
                        bq.Add(childJoinQuery, BooleanClause.Occur.MUST);
                        bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                        bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                        bq.Add(childJoinQuery, BooleanClause.Occur.MUST);

                    BooleanQuery bq2 = new BooleanQuery();
                    parentQuery = bq2;
                    if (Random().NextBoolean())
                        bq2.Add(childQuery, BooleanClause.Occur.MUST);
                        bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                        bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                        bq2.Add(childQuery, BooleanClause.Occur.MUST);

                Sort parentSort = GetRandomSort("parent", parentFields.Length);
                Sort childSort = GetRandomSort("child", childFields.Length);

                if (VERBOSE)
                    Console.WriteLine("\nTEST: query=" + parentQuery + " joinQuery=" + parentJoinQuery + " parentSort=" + parentSort + " childSort=" + childSort);

                // Merge both sorts:
                IList<SortField> sortFields = new List<SortField>(Arrays.AsList(parentSort.GetSort()));
                Sort parentAndChildSort = new Sort(sortFields.ToArray());

                TopDocs results = s.Search(parentQuery, null, r.NumDocs, parentAndChildSort);

                if (VERBOSE)
                    Console.WriteLine("\nTEST: normal index gets " + results.TotalHits + " hits");
                    ScoreDoc[] hits = results.ScoreDocs;
                    for (int hitIDX = 0; hitIDX < hits.Length; hitIDX++)
                        Document doc = s.Doc(hits[hitIDX].Doc);
                        //System.out.println("  score=" + hits[hitIDX].Score + " parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")");
                        Console.WriteLine("  parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")");
                        FieldDoc fd = (FieldDoc)hits[hitIDX];
                        if (fd.Fields != null)
                            Console.Write("    ");
                            foreach (object o in fd.Fields)
                                if (o is BytesRef)
                                    Console.Write(((BytesRef)o).Utf8ToString() + " ");
                                    Console.Write(o + " ");

                bool trackScores;
                bool trackMaxScore;
                if (agg == ScoreMode.None)
                    trackScores = false;
                    trackMaxScore = false;
                    trackScores = Random().NextBoolean();
                    trackMaxScore = Random().NextBoolean();
                ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(parentSort, 10, trackScores, trackMaxScore);

                joinS.Search(parentJoinQuery, c);

                int hitsPerGroup = TestUtil.NextInt(Random(), 1, 20);
                //final int hitsPerGroup = 100;
                TopGroups<int> joinResults = c.GetTopGroups(childJoinQuery, childSort, 0, hitsPerGroup, 0, true);

                if (VERBOSE)
                    Console.WriteLine("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.Groups.Length) + " groups; hitsPerGroup=" + hitsPerGroup);
                    if (joinResults != null)
                        IGroupDocs<int>[] groups = joinResults.Groups;
                        for (int groupIDX = 0; groupIDX < groups.Length; groupIDX++)
                            IGroupDocs<int> group = groups[groupIDX];
                            if (group.GroupSortValues != null)
                                Console.Write("  ");
                                foreach (object o in group.GroupSortValues)
                                    if (o is BytesRef)
                                        Console.Write(((BytesRef)o).Utf8ToString() + " ");
                                        Console.Write(o + " ");

                            Document parentDoc = joinS.Doc(group.GroupValue);
                            Console.WriteLine("  group parentID=" + parentDoc.Get("parentID") + " (docID=" + group.GroupValue + ")");
                            for (int hitIDX = 0; hitIDX < group.ScoreDocs.Length; hitIDX++)
                                Document doc = joinS.Doc(group.ScoreDocs[hitIDX].Doc);
                                //System.out.println("    score=" + group.ScoreDocs[hitIDX].Score + " childID=" + doc.Get("childID") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")");
                                Console.WriteLine("    childID=" + doc.Get("childID") + " child0=" + doc.Get("child0") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")");

                if (results.TotalHits == 0)
                    CompareHits(r, joinR, results, joinResults);
                    TopDocs b = joinS.Search(childJoinQuery, 10);
                    foreach (ScoreDoc hit in b.ScoreDocs)
                        Explanation explanation = joinS.Explain(childJoinQuery, hit.Doc);
                        Document document = joinS.Doc(hit.Doc - 1);
                        int childId = Convert.ToInt32(document.Get("childID"));
                        assertEquals(hit.Score, explanation.Value, 0.0f);
                        assertEquals(string.Format("Score based on child doc range from {0} to {1}", hit.Doc - 1 - childId, hit.Doc - 1), explanation.Description);

                // Test joining in the opposite direction (parent to
                // child):

                // Get random query against parent documents:
                Query parentQuery2;
                if (Random().Next(3) == 2)
                    int fieldID = Random().Next(parentFields.Length);
                    parentQuery2 = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)]));
                else if (Random().Next(3) == 2)
                    BooleanQuery bq = new BooleanQuery();
                    parentQuery2 = bq;
                    int numClauses = TestUtil.NextInt(Random(), 2, 4);
                    bool didMust = false;
                    for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++)
                        Query clause;
                        BooleanClause.Occur occur;
                        if (!didMust && Random().NextBoolean())
                            occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
                            clause = new TermQuery(RandomParentTerm(parentFields[0]));
                            didMust = true;
                            occur = BooleanClause.Occur.SHOULD;
                            int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1);
                            clause = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)]));
                        bq.Add(clause, occur);
                    BooleanQuery bq = new BooleanQuery();
                    parentQuery2 = bq;

                    bq.Add(new TermQuery(RandomParentTerm(parentFields[0])), BooleanClause.Occur.MUST);
                    int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1);
                    bq.Add(new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);

                if (VERBOSE)
                    Console.WriteLine("\nTEST: top down: parentQuery2=" + parentQuery2);

                // Maps parent query to child docs:
                ToChildBlockJoinQuery parentJoinQuery2 = new ToChildBlockJoinQuery(parentQuery2, parentsFilter, Random().NextBoolean());

                // To run against the block-join index:
                Query childJoinQuery2;

                // Same query as parentJoinQuery, but to run against
                // the fully denormalized index (so we can compare
                // results):
                Query childQuery2;

                // apply a filter to children
                Filter childFilter2, childJoinFilter2;

                if (Random().NextBoolean())
                    childQuery2 = parentQuery2;
                    childJoinQuery2 = parentJoinQuery2;
                    childFilter2 = null;
                    childJoinFilter2 = null;
                    Term childTerm = RandomChildTerm(childFields[0]);
                    if (Random().NextBoolean()) // filtered case
                        childJoinQuery2 = parentJoinQuery2;
                        Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
                        childJoinFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f;
                        childJoinFilter2 = null;
                        // AND child field w/ parent query:
                        BooleanQuery bq = new BooleanQuery();
                        childJoinQuery2 = bq;
                        if (Random().NextBoolean())
                            bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST);
                            bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                            bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                            bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST);

                    if (Random().NextBoolean()) // filtered case
                        childQuery2 = parentQuery2;
                        Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
                        childFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f;
                        childFilter2 = null;
                        BooleanQuery bq2 = new BooleanQuery();
                        childQuery2 = bq2;
                        if (Random().NextBoolean())
                            bq2.Add(parentQuery2, BooleanClause.Occur.MUST);
                            bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                            bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                            bq2.Add(parentQuery2, BooleanClause.Occur.MUST);

                Sort childSort2 = GetRandomSort("child", childFields.Length);

                // Search denormalized index:
                if (VERBOSE)
                    Console.WriteLine("TEST: run top down query=" + childQuery2 + " filter=" + childFilter2 + " sort=" + childSort2);
                TopDocs results2 = s.Search(childQuery2, childFilter2, r.NumDocs, childSort2);
                if (VERBOSE)
                    Console.WriteLine("  " + results2.TotalHits + " totalHits:");
                    foreach (ScoreDoc sd in results2.ScoreDocs)
                        Document doc = s.Doc(sd.Doc);
                        Console.WriteLine("  childID=" + doc.Get("childID") + " parentID=" + doc.Get("parentID") + " docID=" + sd.Doc);

                // Search join index:
                if (VERBOSE)
                    Console.WriteLine("TEST: run top down join query=" + childJoinQuery2 + " filter=" + childJoinFilter2 + " sort=" + childSort2);
                TopDocs joinResults2 = joinS.Search(childJoinQuery2, childJoinFilter2, joinR.NumDocs, childSort2);
                if (VERBOSE)
                    Console.WriteLine("  " + joinResults2.TotalHits + " totalHits:");
                    foreach (ScoreDoc sd in joinResults2.ScoreDocs)
                        Document doc = joinS.Doc(sd.Doc);
                        Document parentDoc = GetParentDoc(joinR, parentsFilter, sd.Doc);
                        Console.WriteLine("  childID=" + doc.Get("childID") + " parentID=" + parentDoc.Get("parentID") + " docID=" + sd.Doc);

                CompareChildHits(r, joinR, results2, joinResults2);

        public void TestNestedSorting()
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            IList<Document> docs = new List<Document>();
            Document document = new Document();
            document.Add(new StringField("field2", "a", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("field2", "b", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("field2", "c", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "a", Field.Store.NO));

            document = new Document();
            document.Add(new StringField("field2", "c", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("field2", "d", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("field2", "e", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "b", Field.Store.NO));

            document = new Document();
            document.Add(new StringField("field2", "e", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("field2", "f", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("field2", "g", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "c", Field.Store.NO));

            document = new Document();
            document.Add(new StringField("field2", "g", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("field2", "h", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("field2", "i", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "d", Field.Store.NO));

            document = new Document();
            document.Add(new StringField("field2", "i", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("field2", "j", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("field2", "k", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "f", Field.Store.NO));

            document = new Document();
            document.Add(new StringField("field2", "k", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("field2", "l", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("field2", "m", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "g", Field.Store.NO));

            // This doc will not be included, because it doesn't have nested docs
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "h", Field.Store.NO));

            document = new Document();
            document.Add(new StringField("field2", "m", Field.Store.NO));
            document.Add(new StringField("filter_1", "T", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("field2", "n", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("field2", "o", Field.Store.NO));
            document.Add(new StringField("filter_1", "F", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("__type", "parent", Field.Store.NO));
            document.Add(new StringField("field1", "i", Field.Store.NO));

            // Some garbage docs, just to check if the NestedFieldComparator can deal with this.
            document = new Document();
            document.Add(new StringField("fieldXXX", "x", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("fieldXXX", "x", Field.Store.NO));
            document = new Document();
            document.Add(new StringField("fieldXXX", "x", Field.Store.NO));

            IndexSearcher searcher = new IndexSearcher(DirectoryReader.Open(w.w, false));
            Filter parentFilter = new QueryWrapperFilter(new TermQuery(new Term("__type", "parent")));
            Filter childFilter = new QueryWrapperFilter(new PrefixQuery(new Term("field2")));
            ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None);

            // Sort by field ascending, order first
            ToParentBlockJoinSortField sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, false, Wrap(parentFilter), Wrap(childFilter));
            Sort sort = new Sort(sortField);
            TopFieldDocs topDocs = searcher.Search(query, 5, sort);
            assertEquals(7, topDocs.TotalHits);
            assertEquals(5, topDocs.ScoreDocs.Length);
            assertEquals(3, topDocs.ScoreDocs[0].Doc);
            assertEquals("a", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
            assertEquals(7, topDocs.ScoreDocs[1].Doc);
            assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
            assertEquals(11, topDocs.ScoreDocs[2].Doc);
            assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
            assertEquals(15, topDocs.ScoreDocs[3].Doc);
            assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
            assertEquals(19, topDocs.ScoreDocs[4].Doc);
            assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());

            // Sort by field ascending, order last
            sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, false, true, Wrap(parentFilter), Wrap(childFilter));
            sort = new Sort(sortField);
            topDocs = searcher.Search(query, 5, sort);
            assertEquals(7, topDocs.TotalHits);
            assertEquals(5, topDocs.ScoreDocs.Length);
            assertEquals(3, topDocs.ScoreDocs[0].Doc);
            assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
            assertEquals(7, topDocs.ScoreDocs[1].Doc);
            assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
            assertEquals(11, topDocs.ScoreDocs[2].Doc);
            assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
            assertEquals(15, topDocs.ScoreDocs[3].Doc);
            assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
            assertEquals(19, topDocs.ScoreDocs[4].Doc);
            assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());

            // Sort by field descending, order last
            sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, true, Wrap(parentFilter), Wrap(childFilter));
            sort = new Sort(sortField);
            topDocs = searcher.Search(query, 5, sort);
            assertEquals(topDocs.TotalHits, 7);
            assertEquals(5, topDocs.ScoreDocs.Length);
            assertEquals(28, topDocs.ScoreDocs[0].Doc);
            assertEquals("o", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
            assertEquals(23, topDocs.ScoreDocs[1].Doc);
            assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
            assertEquals(19, topDocs.ScoreDocs[2].Doc);
            assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
            assertEquals(15, topDocs.ScoreDocs[3].Doc);
            assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
            assertEquals(11, topDocs.ScoreDocs[4].Doc);
            assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());

            // Sort by field descending, order last, sort filter (filter_1:T)
            childFilter = new QueryWrapperFilter(new TermQuery((new Term("filter_1", "T"))));
            query = new ToParentBlockJoinQuery(new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None);
            sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, true, Wrap(parentFilter), Wrap(childFilter));
            sort = new Sort(sortField);
            topDocs = searcher.Search(query, 5, sort);
            assertEquals(6, topDocs.TotalHits);
            assertEquals(5, topDocs.ScoreDocs.Length);
            assertEquals(23, topDocs.ScoreDocs[0].Doc);
            assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString());
            assertEquals(28, topDocs.ScoreDocs[1].Doc);
            assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString());
            assertEquals(11, topDocs.ScoreDocs[2].Doc);
            assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString());
            assertEquals(15, topDocs.ScoreDocs[3].Doc);
            assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString());
            assertEquals(7, topDocs.ScoreDocs[4].Doc);
            assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString());

 protected bool Equals(ToParentBlockJoinQuery other)
     return base.Equals(other) && 
         Equals(_parentsFilter, other._parentsFilter) && 
         _scoreMode == other._scoreMode && 
         Equals(_origChildQuery, other._origChildQuery);
 public override Query Rewrite(IndexReader reader)
     Query childRewrite = _childQuery.Rewrite(reader);
     if (childRewrite != _childQuery)
         Query rewritten = new ToParentBlockJoinQuery(_origChildQuery, childRewrite, _parentsFilter, _scoreMode);
         rewritten.Boost = Boost;
         return rewritten;
     return this;
        /// <summary>
        /// Returns the TopGroups for the specified
        ///  BlockJoinQuery. The groupValue of each GroupDocs will
        ///  be the parent docID for that group.
        ///  The number of documents within each group is calculated as minimum of <code>maxDocsPerGroup</code>
        ///  and number of matched child documents for that group.
        ///  Returns null if no groups matched.
        /// </summary>
        /// <param name="query"> Search query </param>
        /// <param name="withinGroupSort"> Sort criteria within groups </param>
        /// <param name="offset"> Parent docs offset </param>
        /// <param name="maxDocsPerGroup"> Upper bound of documents per group number </param>
        /// <param name="withinGroupOffset"> Offset within each group of child docs </param>
        /// <param name="fillSortFields"> Specifies whether to add sort fields or not </param>
        /// <returns> TopGroups for specified query </returns>
        /// <exception cref="IOException"> if there is a low-level I/O error </exception>
        public virtual TopGroups<int> GetTopGroups(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset, int maxDocsPerGroup, int withinGroupOffset, bool fillSortFields)
            int? slot;
            if (!joinQueryID.TryGetValue(query, out slot))
                if (totalHitCount == 0)
                    return null;

            if (sortedGroups == null)
                if (offset >= queue.Size())
                    return null;
            else if (offset > sortedGroups.Length)
                return null;

            return AccumulateGroups(slot == null ? -1 : (int)slot, offset, maxDocsPerGroup, withinGroupOffset, withinGroupSort, fillSortFields);
Beispiel #21
        public void TestBoostBug()
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            IndexReader r = w.Reader;
            IndexSearcher s = NewSearcher(r);

            ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(new MatchAllDocsQuery(), new QueryWrapperFilter(new MatchAllDocsQuery()), ScoreMode.Avg);
            QueryUtils.Check(Random(), q, s, Similarity);
            s.Search(q, 10);
            BooleanQuery bq = new BooleanQuery();
            bq.Boost = 2f; // we boost the BQ
            bq.Add(q, BooleanClause.Occur.MUST);
            s.Search(bq, 10);
Beispiel #22
        public void TestAdvanceSingleParentNoChild()
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogDocMergePolicy()));
            Document parentDoc = new Document();
            parentDoc.Add(NewStringField("parent", "1", Field.Store.NO));
            parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));

            // Add another doc so scorer is not null
            parentDoc = new Document();
            parentDoc.Add(NewStringField("parent", "2", Field.Store.NO));
            parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
            Document childDoc = new Document();
            childDoc.Add(NewStringField("child", "2", Field.Store.NO));
            w.AddDocuments(Arrays.AsList(childDoc, parentDoc));

            // Need single seg:
            IndexReader r = w.Reader;
            IndexSearcher s = NewSearcher(r);
            Query tq = new TermQuery(new Term("child", "2"));
            Filter parentFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isparent", "yes"))));

            ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg);
            Weight weight = s.CreateNormalizedWeight(q);
            DocIdSetIterator disi = weight.Scorer(s.IndexReader.Leaves.First(), null);
            assertEquals(2, disi.Advance(0));
Beispiel #23
        public void TestNestedDocScoringWithDeletes()
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            // Cannot assert this since we use NoMergePolicy:
            w.DoRandomForceMergeAssert = false;

            IList<Document> docs = new List<Document>();
            docs.Add(MakeJob("java", 2007));
            docs.Add(MakeJob("python", 2010));
            docs.Add(MakeResume("Lisa", "United Kingdom"));

            docs.Add(MakeJob("c", 1999));
            docs.Add(MakeJob("ruby", 2005));
            docs.Add(MakeJob("java", 2006));
            docs.Add(MakeResume("Frank", "United States"));

            IndexSearcher s = NewSearcher(DirectoryReader.Open(dir));

            ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(NumericRangeQuery.NewIntRange("year", 1990, 2010, true, true), new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))), ScoreMode.Total);

            TopDocs topDocs = s.Search(q, 10);
            assertEquals(2, topDocs.TotalHits);
            assertEquals(6, topDocs.ScoreDocs[0].Doc);
            assertEquals(3.0f, topDocs.ScoreDocs[0].Score, 0.0f);
            assertEquals(2, topDocs.ScoreDocs[1].Doc);
            assertEquals(2.0f, topDocs.ScoreDocs[1].Score, 0.0f);

            w.DeleteDocuments(new Term("skill", "java"));
            s = NewSearcher(DirectoryReader.Open(dir));

            topDocs = s.Search(q, 10);
            assertEquals(2, topDocs.TotalHits);
            assertEquals(6, topDocs.ScoreDocs[0].Doc);
            assertEquals(2.0f, topDocs.ScoreDocs[0].Score, 0.0f);
            assertEquals(2, topDocs.ScoreDocs[1].Doc);
            assertEquals(1.0f, topDocs.ScoreDocs[1].Score, 0.0f);

 /// <summary>
 /// Returns the TopGroups for the specified BlockJoinQuery. The groupValue of each 
 /// GroupDocs will be the parent docID for that group. The number of documents within 
 /// each group equals to the total number of matched child documents for that group.
 /// Returns null if no groups matched.
 /// </summary>
 /// <param name="query">Search query</param>
 /// <param name="withinGroupSort">Sort criteria within groups</param>
 /// <param name="offset">Parent docs offset</param>
 /// <param name="withinGroupOffset">Offset within each group of child docs</param>
 /// <param name="fillSortFields">Specifies whether to add sort fields or not</param>
 /// <returns>TopGroups for specified query</returns>
 /// <exception cref="IOException"> if there is a low-level I/O error </exception>
 public virtual TopGroups<int> GetTopGroupsWithAllChildDocs(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset, int withinGroupOffset, bool fillSortFields)
     return GetTopGroups(query, withinGroupSort, offset, int.MaxValue, withinGroupOffset, fillSortFields);
Beispiel #25
        public void TestEmptyChildFilter()
            Directory dir = NewDirectory();
            IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            // we don't want to merge - since we rely on certain segment setup
            IndexWriter w = new IndexWriter(dir, config);

            IList<Document> docs = new List<Document>();

            docs.Add(MakeJob("java", 2007));
            docs.Add(MakeJob("python", 2010));
            docs.Add(MakeResume("Lisa", "United Kingdom"));

            docs.Add(MakeJob("ruby", 2005));
            docs.Add(MakeJob("java", 2006));
            docs.Add(MakeResume("Frank", "United States"));
            int num = AtLeast(10); // produce a segment that doesn't have a value in the docType field
            for (int i = 0; i < num; i++)
                docs.Add(MakeJob("java", 2007));

            IndexReader r = DirectoryReader.Open(w, Random().NextBoolean());
            assertTrue(r.Leaves.size() > 1);
            IndexSearcher s = new IndexSearcher(r);
            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));

            BooleanQuery childQuery = new BooleanQuery();
            childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
            childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));

            ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);

            BooleanQuery fullQuery = new BooleanQuery();
            fullQuery.Add(new BooleanClause(childJoinQuery, BooleanClause.Occur.MUST));
            fullQuery.Add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.MUST));
            ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
            s.Search(fullQuery, c);
            TopGroups<int> results = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);
            assertEquals(1, results.TotalGroupedHitCount);
            assertEquals(1, results.Groups.Length);
            IGroupDocs<int> group = results.Groups[0];
            Document childDoc = s.Doc(group.ScoreDocs[0].Doc);
            assertEquals("java", childDoc.Get("skill"));
            Document parentDoc = s.Doc(group.GroupValue);
            assertEquals("Lisa", parentDoc.Get("name"));

Beispiel #26
        public void TestMultiChildTypes()
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            IList<Document> docs = new List<Document>();

            docs.Add(MakeJob("java", 2007));
            docs.Add(MakeJob("python", 2010));
            docs.Add(MakeQualification("maths", 1999));
            docs.Add(MakeResume("Lisa", "United Kingdom"));

            IndexReader r = w.Reader;
            IndexSearcher s = NewSearcher(r);

            // Create a filter that defines "parent" documents in the index - in this case resumes
            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));

            // Define child document criteria (finds an example of relevant work experience)
            BooleanQuery childJobQuery = new BooleanQuery();
            childJobQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
            childJobQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));

            BooleanQuery childQualificationQuery = new BooleanQuery();
            childQualificationQuery.Add(new BooleanClause(new TermQuery(new Term("qualification", "maths")), BooleanClause.Occur.MUST));
            childQualificationQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 1980, 2000, true, true), BooleanClause.Occur.MUST));

            // Define parent document criteria (find a resident in the UK)
            Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));

            // Wrap the child document query to 'join' any matches
            // up to corresponding parent:
            ToParentBlockJoinQuery childJobJoinQuery = new ToParentBlockJoinQuery(childJobQuery, parentsFilter, ScoreMode.Avg);
            ToParentBlockJoinQuery childQualificationJoinQuery = new ToParentBlockJoinQuery(childQualificationQuery, parentsFilter, ScoreMode.Avg);

            // Combine the parent and nested child queries into a single query for a candidate
            BooleanQuery fullQuery = new BooleanQuery();
            fullQuery.Add(new BooleanClause(parentQuery, BooleanClause.Occur.MUST));
            fullQuery.Add(new BooleanClause(childJobJoinQuery, BooleanClause.Occur.MUST));
            fullQuery.Add(new BooleanClause(childQualificationJoinQuery, BooleanClause.Occur.MUST));

            // Collects all job and qualification child docs for
            // each resume hit in the top N (sorted by score):
            ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, false);

            s.Search(fullQuery, c);

            // Examine "Job" children
            TopGroups<int> jobResults = c.GetTopGroups(childJobJoinQuery, null, 0, 10, 0, true);

            //assertEquals(1, results.totalHitCount);
            assertEquals(1, jobResults.TotalGroupedHitCount);
            assertEquals(1, jobResults.Groups.Length);

            IGroupDocs<int> group = jobResults.Groups[0];
            assertEquals(1, group.TotalHits);

            Document childJobDoc = s.Doc(group.ScoreDocs[0].Doc);
            //System.out.println("  doc=" + group.ScoreDocs[0].Doc);
            assertEquals("java", childJobDoc.Get("skill"));
            Document parentDoc = s.Doc(group.GroupValue);
            assertEquals("Lisa", parentDoc.Get("name"));

            // Now Examine qualification children
            TopGroups<int> qualificationResults = c.GetTopGroups(childQualificationJoinQuery, null, 0, 10, 0, true);

            assertEquals(1, qualificationResults.TotalGroupedHitCount);
            assertEquals(1, qualificationResults.Groups.Length);

            IGroupDocs<int> qGroup = qualificationResults.Groups[0];
            assertEquals(1, qGroup.TotalHits);

            Document childQualificationDoc = s.Doc(qGroup.ScoreDocs[0].Doc);
            assertEquals("maths", childQualificationDoc.Get("qualification"));
            parentDoc = s.Doc(qGroup.GroupValue);
            assertEquals("Lisa", parentDoc.Get("name"));

 /// <summary>
 /// Returns the <see cref="TopGroups{T}"/> for the specified BlockJoinQuery. The groupValue of each
 /// GroupDocs will be the parent docID for that group. The number of documents within
 /// each group equals to the total number of matched child documents for that group.
 /// Returns <c>null</c> if no groups matched.
 /// </summary>
 /// <param name="query">Search query</param>
 /// <param name="withinGroupSort">Sort criteria within groups</param>
 /// <param name="offset">Parent docs offset</param>
 /// <param name="withinGroupOffset">Offset within each group of child docs</param>
 /// <param name="fillSortFields">Specifies whether to add sort fields or not</param>
 /// <returns><see cref="ITopGroups{T}"/> for specified query</returns>
 /// <exception cref="IOException"> if there is a low-level I/O error </exception>
 public virtual ITopGroups <int> GetTopGroupsWithAllChildDocs(ToParentBlockJoinQuery query, Sort withinGroupSort, int offset, int withinGroupOffset, bool fillSortFields)
     return(GetTopGroups(query, withinGroupSort, offset, int.MaxValue, withinGroupOffset, fillSortFields));
Beispiel #28
        public void TestAdvanceSingleParentSingleChild()
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document childDoc = new Document();
            childDoc.Add(NewStringField("child", "1", Field.Store.NO));
            Document parentDoc = new Document();
            parentDoc.Add(NewStringField("parent", "1", Field.Store.NO));
            w.AddDocuments(Arrays.AsList(childDoc, parentDoc));
            IndexReader r = w.Reader;
            IndexSearcher s = NewSearcher(r);
            Query tq = new TermQuery(new Term("child", "1"));
            Filter parentFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "1"))));

            ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg);
            Weight weight = s.CreateNormalizedWeight(q);
            DocIdSetIterator disi = weight.Scorer(s.IndexReader.Leaves.First(), null);
            assertEquals(1, disi.Advance(1));