Collects parent document hits for a Query containing one more more BlockJoinQuery clauses, sorted by the specified parent Sort. Note that this cannot perform arbitrary joins; rather, it requires that all joined documents are indexed as a doc block (using {@link IndexWriter#addDocuments} or {@link IndexWriter#updateDocuments}). Ie, the join is computed at index time.

The parent Sort must only use fields from the parent documents; sorting by field in the child documents is not supported.

You should only use this collector if one or more of the clauses in the query is a ToParentBlockJoinQuery. This collector will find those query clauses and record the matching child documents for the top scoring parent documents.

Multiple joins (star join) and nested joins and a mix of the two are allowed, as long as in all cases the documents corresponding to a single row of each joined parent table were indexed as a doc block.

For the simple star join you can retrieve the TopGroups instance containing each ToParentBlockJoinQuery's matching child documents for the top parent groups, using #getTopGroups. Ie, a single query, which will contain two or more ToParentBlockJoinQuery's as clauses representing the star join, can then retrieve two or more TopGroups instances.

For nested joins, the query will run correctly (ie, match the right parent and child documents), however, because TopGroups is currently unable to support nesting (each group is not able to hold another TopGroups), you are only able to retrieve the TopGroups of the first join. The TopGroups of the nested joins will not be correct. See org.apache.lucene.search.join for a code sample. @lucene.experimental

Наследование: Lucene.Net.Search.Collector
Пример #1
0
        public void TestEmptyChildFilter()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            config.SetMergePolicy(NoMergePolicy.NO_COMPOUND_FILES);
            // we don't want to merge - since we rely on certain segment setup
            IndexWriter w = new IndexWriter(dir, config);

            IList<Document> docs = new List<Document>();

            docs.Add(MakeJob("java", 2007));
            docs.Add(MakeJob("python", 2010));
            docs.Add(MakeResume("Lisa", "United Kingdom"));
            w.AddDocuments(docs);

            docs.Clear();
            docs.Add(MakeJob("ruby", 2005));
            docs.Add(MakeJob("java", 2006));
            docs.Add(MakeResume("Frank", "United States"));
            w.AddDocuments(docs);
            w.Commit();
            int num = AtLeast(10); // produce a segment that doesn't have a value in the docType field
            for (int i = 0; i < num; i++)
            {
                docs.Clear();
                docs.Add(MakeJob("java", 2007));
                w.AddDocuments(docs);
            }

            IndexReader r = DirectoryReader.Open(w, Random().NextBoolean());
            w.Dispose();
            assertTrue(r.Leaves.size() > 1);
            IndexSearcher s = new IndexSearcher(r);
            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));

            BooleanQuery childQuery = new BooleanQuery();
            childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
            childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));

            ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);

            BooleanQuery fullQuery = new BooleanQuery();
            fullQuery.Add(new BooleanClause(childJoinQuery, BooleanClause.Occur.MUST));
            fullQuery.Add(new BooleanClause(new MatchAllDocsQuery(), BooleanClause.Occur.MUST));
            ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);
            s.Search(fullQuery, c);
            TopGroups<int> results = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);
            assertFalse(float.IsNaN(results.MaxScore));
            assertEquals(1, results.TotalGroupedHitCount);
            assertEquals(1, results.Groups.Length);
            IGroupDocs<int> group = results.Groups[0];
            Document childDoc = s.Doc(group.ScoreDocs[0].Doc);
            assertEquals("java", childDoc.Get("skill"));
            assertNotNull(group.GroupValue);
            Document parentDoc = s.Doc(group.GroupValue);
            assertEquals("Lisa", parentDoc.Get("name"));

            r.Dispose();
            dir.Dispose();
        }
Пример #2
0
        public void TestBugCausedByRewritingTwice()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            IList<Document> docs = new List<Document>();

            for (int i = 0; i < 10; i++)
            {
                docs.Clear();
                docs.Add(MakeJob("ruby", i));
                docs.Add(MakeJob("java", 2007));
                docs.Add(MakeResume("Frank", "United States"));
                w.AddDocuments(docs);
            }

            IndexReader r = w.Reader;
            w.Dispose();
            IndexSearcher s = NewSearcher(r);

            MultiTermQuery qc = NumericRangeQuery.NewIntRange("year", 2007, 2007, true, true);
            // Hacky: this causes the query to need 2 rewrite
            // iterations: 
            qc.SetRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);

            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));

            int h1 = qc.GetHashCode();
            Query qw1 = qc.Rewrite(r);
            int h2 = qw1.GetHashCode();
            Query qw2 = qw1.Rewrite(r);
            int h3 = qw2.GetHashCode();

            assertTrue(h1 != h2);
            assertTrue(h2 != h3);
            assertTrue(h3 != h1);

            ToParentBlockJoinQuery qp = new ToParentBlockJoinQuery(qc, parentsFilter, ScoreMode.Max);
            ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, true);

            s.Search(qp, c);
            TopGroups<int> groups = c.GetTopGroups(qp, Sort.INDEXORDER, 0, 10, 0, true);
            foreach (GroupDocs<int> group in groups.Groups)
            {
                assertEquals(1, group.TotalHits);
            }

            r.Dispose();
            dir.Dispose();
        }
Пример #3
0
        public void TestRandom()
        {
            // We build two indices at once: one normalized (which
            // ToParentBlockJoinQuery/Collector,
            // ToChildBlockJoinQuery can query) and the other w/
            // the same docs, just fully denormalized:
            Directory dir = NewDirectory();
            Directory joinDir = NewDirectory();

            int numParentDocs = TestUtil.NextInt(Random(), 100 * RANDOM_MULTIPLIER, 300 * RANDOM_MULTIPLIER);
            //final int numParentDocs = 30;

            // Values for parent fields:
            string[][] parentFields = GetRandomFields(numParentDocs / 2);
            // Values for child fields:
            string[][] childFields = GetRandomFields(numParentDocs);

            bool doDeletes = Random().NextBoolean();
            IList<int> toDelete = new List<int>();

            // TODO: parallel star join, nested join cases too!
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            RandomIndexWriter joinW = new RandomIndexWriter(Random(), joinDir, Similarity, TimeZone);
            for (int parentDocID = 0; parentDocID < numParentDocs; parentDocID++)
            {
                Document parentDoc = new Document();
                Document parentJoinDoc = new Document();
                Field id = NewStringField("parentID", "" + parentDocID, Field.Store.YES);
                parentDoc.Add(id);
                parentJoinDoc.Add(id);
                parentJoinDoc.Add(NewStringField("isParent", "x", Field.Store.NO));
                for (int field = 0; field < parentFields.Length; field++)
                {
                    if (Random().NextDouble() < 0.9)
                    {
                        Field f = NewStringField("parent" + field, parentFields[field][Random().Next(parentFields[field].Length)], Field.Store.NO);
                        parentDoc.Add(f);
                        parentJoinDoc.Add(f);
                    }
                }

                if (doDeletes)
                {
                    parentDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
                    parentJoinDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
                }

                IList<Document> joinDocs = new List<Document>();

                if (VERBOSE)
                {
                    StringBuilder sb = new StringBuilder();
                    sb.Append("parentID=").Append(parentDoc.Get("parentID"));
                    for (int fieldID = 0; fieldID < parentFields.Length; fieldID++)
                    {
                        string parent = parentDoc.Get("parent" + fieldID);
                        if (parent != null)
                        {
                            sb.Append(" parent" + fieldID + "=" + parent);
                        }
                    }
                    Console.WriteLine("  " + sb);
                }

                int numChildDocs = TestUtil.NextInt(Random(), 1, 20);
                for (int childDocID = 0; childDocID < numChildDocs; childDocID++)
                {
                    // Denormalize: copy all parent fields into child doc:
                    Document childDoc = TestUtil.CloneDocument(parentDoc);
                    Document joinChildDoc = new Document();
                    joinDocs.Add(joinChildDoc);

                    Field childID = NewStringField("childID", "" + childDocID, Field.Store.YES);
                    childDoc.Add(childID);
                    joinChildDoc.Add(childID);

                    for (int childFieldID = 0; childFieldID < childFields.Length; childFieldID++)
                    {
                        if (Random().NextDouble() < 0.9)
                        {
                            Field f = NewStringField("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)], Field.Store.NO);
                            childDoc.Add(f);
                            joinChildDoc.Add(f);
                        }
                    }

                    if (VERBOSE)
                    {
                        StringBuilder sb = new StringBuilder();
                        sb.Append("childID=").Append(joinChildDoc.Get("childID"));
                        for (int fieldID = 0; fieldID < childFields.Length; fieldID++)
                        {
                            string child = joinChildDoc.Get("child" + fieldID);
                            if (child != null)
                            {
                                sb.Append(" child" + fieldID + "=" + child);
                            }
                        }
                        Console.WriteLine("    " + sb);
                    }

                    if (doDeletes)
                    {
                        joinChildDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
                    }

                    w.AddDocument(childDoc);
                }

                // Parent last:
                joinDocs.Add(parentJoinDoc);
                joinW.AddDocuments(joinDocs);

                if (doDeletes && Random().Next(30) == 7)
                {
                    toDelete.Add(parentDocID);
                }
            }

            foreach (int deleteID in toDelete)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("DELETE parentID=" + deleteID);
                }
                w.DeleteDocuments(new Term("blockID", "" + deleteID));
                joinW.DeleteDocuments(new Term("blockID", "" + deleteID));
            }

            IndexReader r = w.Reader;
            w.Dispose();
            IndexReader joinR = joinW.Reader;
            joinW.Dispose();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: reader=" + r);
                Console.WriteLine("TEST: joinReader=" + joinR);

                for (int docIDX = 0; docIDX < joinR.MaxDoc; docIDX++)
                {
                    Console.WriteLine("  docID=" + docIDX + " doc=" + joinR.Document(docIDX));
                }
            }

            IndexSearcher s = NewSearcher(r);

            IndexSearcher joinS = new IndexSearcher(joinR);

            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "x"))));

            int iters = 200 * RANDOM_MULTIPLIER;

            for (int iter = 0; iter < iters; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + (1 + iter) + " of " + iters);
                }

                Query childQuery;
                if (Random().Next(3) == 2)
                {
                    int childFieldID = Random().Next(childFields.Length);
                    childQuery = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)]));
                }
                else if (Random().Next(3) == 2)
                {
                    BooleanQuery bq = new BooleanQuery();
                    childQuery = bq;
                    int numClauses = TestUtil.NextInt(Random(), 2, 4);
                    bool didMust = false;
                    for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++)
                    {
                        Query clause;
                        BooleanClause.Occur occur;
                        if (!didMust && Random().NextBoolean())
                        {
                            occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
                            clause = new TermQuery(RandomChildTerm(childFields[0]));
                            didMust = true;
                        }
                        else
                        {
                            occur = BooleanClause.Occur.SHOULD;
                            int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1);
                            clause = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)]));
                        }
                        bq.Add(clause, occur);
                    }
                }
                else
                {
                    BooleanQuery bq = new BooleanQuery();
                    childQuery = bq;

                    bq.Add(new TermQuery(RandomChildTerm(childFields[0])), BooleanClause.Occur.MUST);
                    int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1);
                    bq.Add(new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
                }

                int x = Random().Next(4);
                ScoreMode agg;
                if (x == 0)
                {
                    agg = ScoreMode.None;
                }
                else if (x == 1)
                {
                    agg = ScoreMode.Max;
                }
                else if (x == 2)
                {
                    agg = ScoreMode.Total;
                }
                else
                {
                    agg = ScoreMode.Avg;
                }

                ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, agg);

                // To run against the block-join index:
                Query parentJoinQuery;

                // Same query as parentJoinQuery, but to run against
                // the fully denormalized index (so we can compare
                // results):
                Query parentQuery;

                if (Random().NextBoolean())
                {
                    parentQuery = childQuery;
                    parentJoinQuery = childJoinQuery;
                }
                else
                {
                    // AND parent field w/ child field
                    BooleanQuery bq = new BooleanQuery();
                    parentJoinQuery = bq;
                    Term parentTerm = RandomParentTerm(parentFields[0]);
                    if (Random().NextBoolean())
                    {
                        bq.Add(childJoinQuery, BooleanClause.Occur.MUST);
                        bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                    }
                    else
                    {
                        bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                        bq.Add(childJoinQuery, BooleanClause.Occur.MUST);
                    }

                    BooleanQuery bq2 = new BooleanQuery();
                    parentQuery = bq2;
                    if (Random().NextBoolean())
                    {
                        bq2.Add(childQuery, BooleanClause.Occur.MUST);
                        bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                    }
                    else
                    {
                        bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                        bq2.Add(childQuery, BooleanClause.Occur.MUST);
                    }
                }

                Sort parentSort = GetRandomSort("parent", parentFields.Length);
                Sort childSort = GetRandomSort("child", childFields.Length);

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: query=" + parentQuery + " joinQuery=" + parentJoinQuery + " parentSort=" + parentSort + " childSort=" + childSort);
                }

                // Merge both sorts:
                IList<SortField> sortFields = new List<SortField>(Arrays.AsList(parentSort.GetSort()));
                sortFields.AddRange(Arrays.AsList(childSort.GetSort()));
                Sort parentAndChildSort = new Sort(sortFields.ToArray());

                TopDocs results = s.Search(parentQuery, null, r.NumDocs, parentAndChildSort);

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: normal index gets " + results.TotalHits + " hits");
                    ScoreDoc[] hits = results.ScoreDocs;
                    for (int hitIDX = 0; hitIDX < hits.Length; hitIDX++)
                    {
                        Document doc = s.Doc(hits[hitIDX].Doc);
                        //System.out.println("  score=" + hits[hitIDX].Score + " parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")");
                        Console.WriteLine("  parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")");
                        FieldDoc fd = (FieldDoc)hits[hitIDX];
                        if (fd.Fields != null)
                        {
                            Console.Write("    ");
                            foreach (object o in fd.Fields)
                            {
                                if (o is BytesRef)
                                {
                                    Console.Write(((BytesRef)o).Utf8ToString() + " ");
                                }
                                else
                                {
                                    Console.Write(o + " ");
                                }
                            }
                            Console.WriteLine();
                        }
                    }
                }

                bool trackScores;
                bool trackMaxScore;
                if (agg == ScoreMode.None)
                {
                    trackScores = false;
                    trackMaxScore = false;
                }
                else
                {
                    trackScores = Random().NextBoolean();
                    trackMaxScore = Random().NextBoolean();
                }
                ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(parentSort, 10, trackScores, trackMaxScore);

                joinS.Search(parentJoinQuery, c);

                int hitsPerGroup = TestUtil.NextInt(Random(), 1, 20);
                //final int hitsPerGroup = 100;
                TopGroups<int> joinResults = c.GetTopGroups(childJoinQuery, childSort, 0, hitsPerGroup, 0, true);

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.Groups.Length) + " groups; hitsPerGroup=" + hitsPerGroup);
                    if (joinResults != null)
                    {
                        IGroupDocs<int>[] groups = joinResults.Groups;
                        for (int groupIDX = 0; groupIDX < groups.Length; groupIDX++)
                        {
                            IGroupDocs<int> group = groups[groupIDX];
                            if (group.GroupSortValues != null)
                            {
                                Console.Write("  ");
                                foreach (object o in group.GroupSortValues)
                                {
                                    if (o is BytesRef)
                                    {
                                        Console.Write(((BytesRef)o).Utf8ToString() + " ");
                                    }
                                    else
                                    {
                                        Console.Write(o + " ");
                                    }
                                }
                                Console.WriteLine();
                            }

                            assertNotNull(group.GroupValue);
                            Document parentDoc = joinS.Doc(group.GroupValue);
                            Console.WriteLine("  group parentID=" + parentDoc.Get("parentID") + " (docID=" + group.GroupValue + ")");
                            for (int hitIDX = 0; hitIDX < group.ScoreDocs.Length; hitIDX++)
                            {
                                Document doc = joinS.Doc(group.ScoreDocs[hitIDX].Doc);
                                //System.out.println("    score=" + group.ScoreDocs[hitIDX].Score + " childID=" + doc.Get("childID") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")");
                                Console.WriteLine("    childID=" + doc.Get("childID") + " child0=" + doc.Get("child0") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")");
                            }
                        }
                    }
                }

                if (results.TotalHits == 0)
                {
                    assertNull(joinResults);
                }
                else
                {
                    CompareHits(r, joinR, results, joinResults);
                    TopDocs b = joinS.Search(childJoinQuery, 10);
                    foreach (ScoreDoc hit in b.ScoreDocs)
                    {
                        Explanation explanation = joinS.Explain(childJoinQuery, hit.Doc);
                        Document document = joinS.Doc(hit.Doc - 1);
                        int childId = Convert.ToInt32(document.Get("childID"));
                        assertTrue(explanation.IsMatch);
                        assertEquals(hit.Score, explanation.Value, 0.0f);
                        assertEquals(string.Format("Score based on child doc range from {0} to {1}", hit.Doc - 1 - childId, hit.Doc - 1), explanation.Description);
                    }
                }

                // Test joining in the opposite direction (parent to
                // child):

                // Get random query against parent documents:
                Query parentQuery2;
                if (Random().Next(3) == 2)
                {
                    int fieldID = Random().Next(parentFields.Length);
                    parentQuery2 = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)]));
                }
                else if (Random().Next(3) == 2)
                {
                    BooleanQuery bq = new BooleanQuery();
                    parentQuery2 = bq;
                    int numClauses = TestUtil.NextInt(Random(), 2, 4);
                    bool didMust = false;
                    for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++)
                    {
                        Query clause;
                        BooleanClause.Occur occur;
                        if (!didMust && Random().NextBoolean())
                        {
                            occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
                            clause = new TermQuery(RandomParentTerm(parentFields[0]));
                            didMust = true;
                        }
                        else
                        {
                            occur = BooleanClause.Occur.SHOULD;
                            int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1);
                            clause = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)]));
                        }
                        bq.Add(clause, occur);
                    }
                }
                else
                {
                    BooleanQuery bq = new BooleanQuery();
                    parentQuery2 = bq;

                    bq.Add(new TermQuery(RandomParentTerm(parentFields[0])), BooleanClause.Occur.MUST);
                    int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1);
                    bq.Add(new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
                }

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: top down: parentQuery2=" + parentQuery2);
                }

                // Maps parent query to child docs:
                ToChildBlockJoinQuery parentJoinQuery2 = new ToChildBlockJoinQuery(parentQuery2, parentsFilter, Random().NextBoolean());

                // To run against the block-join index:
                Query childJoinQuery2;

                // Same query as parentJoinQuery, but to run against
                // the fully denormalized index (so we can compare
                // results):
                Query childQuery2;

                // apply a filter to children
                Filter childFilter2, childJoinFilter2;

                if (Random().NextBoolean())
                {
                    childQuery2 = parentQuery2;
                    childJoinQuery2 = parentJoinQuery2;
                    childFilter2 = null;
                    childJoinFilter2 = null;
                }
                else
                {
                    Term childTerm = RandomChildTerm(childFields[0]);
                    if (Random().NextBoolean()) // filtered case
                    {
                        childJoinQuery2 = parentJoinQuery2;
                        Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
                        childJoinFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f;
                    }
                    else
                    {
                        childJoinFilter2 = null;
                        // AND child field w/ parent query:
                        BooleanQuery bq = new BooleanQuery();
                        childJoinQuery2 = bq;
                        if (Random().NextBoolean())
                        {
                            bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST);
                            bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                        }
                        else
                        {
                            bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                            bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST);
                        }
                    }

                    if (Random().NextBoolean()) // filtered case
                    {
                        childQuery2 = parentQuery2;
                        Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
                        childFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f;
                    }
                    else
                    {
                        childFilter2 = null;
                        BooleanQuery bq2 = new BooleanQuery();
                        childQuery2 = bq2;
                        if (Random().NextBoolean())
                        {
                            bq2.Add(parentQuery2, BooleanClause.Occur.MUST);
                            bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                        }
                        else
                        {
                            bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                            bq2.Add(parentQuery2, BooleanClause.Occur.MUST);
                        }
                    }
                }

                Sort childSort2 = GetRandomSort("child", childFields.Length);

                // Search denormalized index:
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: run top down query=" + childQuery2 + " filter=" + childFilter2 + " sort=" + childSort2);
                }
                TopDocs results2 = s.Search(childQuery2, childFilter2, r.NumDocs, childSort2);
                if (VERBOSE)
                {
                    Console.WriteLine("  " + results2.TotalHits + " totalHits:");
                    foreach (ScoreDoc sd in results2.ScoreDocs)
                    {
                        Document doc = s.Doc(sd.Doc);
                        Console.WriteLine("  childID=" + doc.Get("childID") + " parentID=" + doc.Get("parentID") + " docID=" + sd.Doc);
                    }
                }

                // Search join index:
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: run top down join query=" + childJoinQuery2 + " filter=" + childJoinFilter2 + " sort=" + childSort2);
                }
                TopDocs joinResults2 = joinS.Search(childJoinQuery2, childJoinFilter2, joinR.NumDocs, childSort2);
                if (VERBOSE)
                {
                    Console.WriteLine("  " + joinResults2.TotalHits + " totalHits:");
                    foreach (ScoreDoc sd in joinResults2.ScoreDocs)
                    {
                        Document doc = joinS.Doc(sd.Doc);
                        Document parentDoc = GetParentDoc(joinR, parentsFilter, sd.Doc);
                        Console.WriteLine("  childID=" + doc.Get("childID") + " parentID=" + parentDoc.Get("parentID") + " docID=" + sd.Doc);
                    }
                }

                CompareChildHits(r, joinR, results2, joinResults2);
            }

            r.Dispose();
            joinR.Dispose();
            dir.Dispose();
            joinDir.Dispose();
        }
Пример #4
0
        public void TestChildQueryMatchesParent()
        {
            Directory d = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), d, Similarity, TimeZone);
            Document parent = new Document();
            parent.Add(new StoredField("parentID", "0"));
            parent.Add(NewTextField("parentText", "text", Field.Store.NO));
            parent.Add(NewStringField("isParent", "yes", Field.Store.NO));

            IList<Document> docs = new List<Document>();

            Document child = new Document();
            docs.Add(child);
            child.Add(new StoredField("childID", "0"));
            child.Add(NewTextField("childText", "text", Field.Store.NO));

            // parent last:
            docs.Add(parent);
            w.AddDocuments(docs);

            docs.Clear();

            parent = new Document();
            parent.Add(NewTextField("parentText", "text", Field.Store.NO));
            parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
            parent.Add(new StoredField("parentID", "1"));

            // parent last:
            docs.Add(parent);
            w.AddDocuments(docs);

            IndexReader r = w.Reader;
            w.Dispose();

            // illegally matches parent:
            Query childQuery = new TermQuery(new Term("parentText", "text"));
            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
            ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
            BooleanQuery parentQuery = new BooleanQuery();
            parentQuery.Add(childJoinQuery, BooleanClause.Occur.SHOULD);
            parentQuery.Add(new TermQuery(new Term("parentText", "text")), BooleanClause.Occur.SHOULD);

            ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type_e.STRING)), 10, true, true);

            Assert.Throws<InvalidOperationException>(() => NewSearcher(r).Search(parentQuery, c));

            r.Dispose();
            d.Dispose();
        }
Пример #5
0
        public void TestChildQueryNeverMatches()
        {
            Directory d = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), d, Similarity, TimeZone);
            Document parent = new Document();
            parent.Add(new StoredField("parentID", "0"));
            parent.Add(NewTextField("parentText", "text", Field.Store.NO));
            parent.Add(NewStringField("isParent", "yes", Field.Store.NO));

            IList<Document> docs = new List<Document>();

            Document child = new Document();
            docs.Add(child);
            child.Add(new StoredField("childID", "0"));
            child.Add(NewTextField("childText", "text", Field.Store.NO));

            // parent last:
            docs.Add(parent);
            w.AddDocuments(docs);

            docs.Clear();

            parent = new Document();
            parent.Add(NewTextField("parentText", "text", Field.Store.NO));
            parent.Add(NewStringField("isParent", "yes", Field.Store.NO));
            parent.Add(new StoredField("parentID", "1"));

            // parent last:
            docs.Add(parent);
            w.AddDocuments(docs);

            IndexReader r = w.Reader;
            w.Dispose();

            // never matches:
            Query childQuery = new TermQuery(new Term("childText", "bogus"));
            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes"))));
            ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);
            BooleanQuery parentQuery = new BooleanQuery();
            parentQuery.Add(childJoinQuery, BooleanClause.Occur.SHOULD);
            parentQuery.Add(new TermQuery(new Term("parentText", "text")), BooleanClause.Occur.SHOULD);

            ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type_e.STRING)), 10, true, true);
            NewSearcher(r).Search(parentQuery, c);
            TopGroups<int> groups = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, false);

            // Two parents:
            assertEquals(2, (int)groups.TotalGroupCount);

            // One child docs:
            assertEquals(0, groups.TotalGroupedHitCount);

            IGroupDocs<int> group = groups.Groups[0];
            Document doc = r.Document((int)group.GroupValue);
            assertEquals("0", doc.Get("parentID"));

            group = groups.Groups[1];
            doc = r.Document((int)group.GroupValue);
            assertEquals("1", doc.Get("parentID"));

            r.Dispose();
            d.Dispose();
        }
Пример #6
0
        public void TestSimple()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            IList<Document> docs = new List<Document>();

            docs.Add(MakeJob("java", 2007));
            docs.Add(MakeJob("python", 2010));
            docs.Add(MakeResume("Lisa", "United Kingdom"));
            w.AddDocuments(docs);

            docs.Clear();
            docs.Add(MakeJob("ruby", 2005));
            docs.Add(MakeJob("java", 2006));
            docs.Add(MakeResume("Frank", "United States"));
            w.AddDocuments(docs);

            IndexReader r = w.Reader;
            w.Dispose();
            IndexSearcher s = NewSearcher(r);

            // Create a filter that defines "parent" documents in the index - in this case resumes
            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));

            // Define child document criteria (finds an example of relevant work experience)
            BooleanQuery childQuery = new BooleanQuery();
            childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
            childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));

            // Define parent document criteria (find a resident in the UK)
            Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));

            // Wrap the child document query to 'join' any matches
            // up to corresponding parent:
            ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);

            // Combine the parent and nested child queries into a single query for a candidate
            BooleanQuery fullQuery = new BooleanQuery();
            fullQuery.Add(new BooleanClause(parentQuery, BooleanClause.Occur.MUST));
            fullQuery.Add(new BooleanClause(childJoinQuery, BooleanClause.Occur.MUST));

            ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true);

            s.Search(fullQuery, c);

            TopGroups<int> results = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);
            assertFalse(float.IsNaN(results.MaxScore));

            //assertEquals(1, results.totalHitCount);
            assertEquals(1, results.TotalGroupedHitCount);
            assertEquals(1, results.Groups.Length);

            IGroupDocs<int> group = results.Groups[0];
            assertEquals(1, group.TotalHits);
            assertFalse(float.IsNaN(group.Score));

            Document childDoc = s.Doc(group.ScoreDocs[0].Doc);
            //System.out.println("  doc=" + group.ScoreDocs[0].Doc);
            assertEquals("java", childDoc.Get("skill"));
            assertNotNull(group.GroupValue);
            Document parentDoc = s.Doc(group.GroupValue);
            assertEquals("Lisa", parentDoc.Get("name"));


            //System.out.println("TEST: now test up");

            // Now join "up" (map parent hits to child docs) instead...:
            ToChildBlockJoinQuery parentJoinQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter, Random().NextBoolean());
            BooleanQuery fullChildQuery = new BooleanQuery();
            fullChildQuery.Add(new BooleanClause(parentJoinQuery, BooleanClause.Occur.MUST));
            fullChildQuery.Add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));

            //System.out.println("FULL: " + fullChildQuery);
            TopDocs hits = s.Search(fullChildQuery, 10);
            assertEquals(1, hits.TotalHits);
            childDoc = s.Doc(hits.ScoreDocs[0].Doc);
            //System.out.println("CHILD = " + childDoc + " docID=" + hits.ScoreDocs[0].Doc);
            assertEquals("java", childDoc.Get("skill"));
            assertEquals(2007, childDoc.GetField("year").NumericValue);
            assertEquals("Lisa", GetParentDoc(r, parentsFilter, hits.ScoreDocs[0].Doc).Get("name"));

            // Test with filter on child docs:
            assertEquals(0, s.Search(fullChildQuery, new QueryWrapperFilter(new TermQuery(new Term("skill", "foosball"))), 1).TotalHits);

            r.Dispose();
            dir.Dispose();
        }
Пример #7
0
        public void TestGetTopGroups()
        {

            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            IList<Document> docs = new List<Document>();
            docs.Add(MakeJob("ruby", 2005));
            docs.Add(MakeJob("java", 2006));
            docs.Add(MakeJob("java", 2010));
            docs.Add(MakeJob("java", 2012));
            CollectionsHelper.Shuffle(docs);
            docs.Add(MakeResume("Frank", "United States"));

            AddSkillless(w);
            w.AddDocuments(docs);
            AddSkillless(w);

            IndexReader r = w.Reader;
            w.Dispose();
            IndexSearcher s = new IndexSearcher(r);

            // Create a filter that defines "parent" documents in the index - in this case resumes
            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));

            // Define child document criteria (finds an example of relevant work experience)
            BooleanQuery childQuery = new BooleanQuery();
            childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
            childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));

            // Wrap the child document query to 'join' any matches
            // up to corresponding parent:
            ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg);

            ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 2, true, true);
            s.Search(childJoinQuery, c);

            //Get all child documents within groups
            TopGroups<int>[] getTopGroupsResults = new TopGroups<int>[2];
            getTopGroupsResults[0] = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true);
            getTopGroupsResults[1] = c.GetTopGroupsWithAllChildDocs(childJoinQuery, null, 0, 0, true);

            foreach (TopGroups<int> results in getTopGroupsResults)
            {
                assertFalse(float.IsNaN(results.MaxScore));
                assertEquals(2, results.TotalGroupedHitCount);
                assertEquals(1, results.Groups.Length);

                IGroupDocs<int> resultGroup = results.Groups[0];
                assertEquals(2, resultGroup.TotalHits);
                assertFalse(float.IsNaN(resultGroup.Score));
                assertNotNull(resultGroup.GroupValue);
                Document parentDocument = s.Doc(resultGroup.GroupValue);
                assertEquals("Frank", parentDocument.Get("name"));

                assertEquals(2, resultGroup.ScoreDocs.Length); //all matched child documents collected

                foreach (ScoreDoc scoreDoc in resultGroup.ScoreDocs)
                {
                    Document childDoc = s.Doc(scoreDoc.Doc);
                    assertEquals("java", childDoc.Get("skill"));
                    int year = Convert.ToInt32(childDoc.Get("year"));
                    assertTrue(year >= 2006 && year <= 2011);
                }
            }

            //Get part of child documents
            TopGroups<int> boundedResults = c.GetTopGroups(childJoinQuery, null, 0, 1, 0, true);
            assertFalse(float.IsNaN(boundedResults.MaxScore));
            assertEquals(2, boundedResults.TotalGroupedHitCount);
            assertEquals(1, boundedResults.Groups.Length);

            IGroupDocs<int> group = boundedResults.Groups[0];
            assertEquals(2, group.TotalHits);
            assertFalse(float.IsNaN(group.Score));
            assertNotNull(group.GroupValue);
            Document parentDoc = s.Doc(group.GroupValue);
            assertEquals("Frank", parentDoc.Get("name"));

            assertEquals(1, group.ScoreDocs.Length); //not all matched child documents collected

            foreach (ScoreDoc scoreDoc in group.ScoreDocs)
            {
                Document childDoc = s.Doc(scoreDoc.Doc);
                assertEquals("java", childDoc.Get("skill"));
                int year = Convert.ToInt32(childDoc.Get("year"));
                assertTrue(year >= 2006 && year <= 2011);
            }

            r.Dispose();
            dir.Dispose();
        }
Пример #8
0
        public void TestMultiChildTypes()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            IList<Document> docs = new List<Document>();

            docs.Add(MakeJob("java", 2007));
            docs.Add(MakeJob("python", 2010));
            docs.Add(MakeQualification("maths", 1999));
            docs.Add(MakeResume("Lisa", "United Kingdom"));
            w.AddDocuments(docs);

            IndexReader r = w.Reader;
            w.Dispose();
            IndexSearcher s = NewSearcher(r);

            // Create a filter that defines "parent" documents in the index - in this case resumes
            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))));

            // Define child document criteria (finds an example of relevant work experience)
            BooleanQuery childJobQuery = new BooleanQuery();
            childJobQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST));
            childJobQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST));

            BooleanQuery childQualificationQuery = new BooleanQuery();
            childQualificationQuery.Add(new BooleanClause(new TermQuery(new Term("qualification", "maths")), BooleanClause.Occur.MUST));
            childQualificationQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 1980, 2000, true, true), BooleanClause.Occur.MUST));


            // Define parent document criteria (find a resident in the UK)
            Query parentQuery = new TermQuery(new Term("country", "United Kingdom"));

            // Wrap the child document query to 'join' any matches
            // up to corresponding parent:
            ToParentBlockJoinQuery childJobJoinQuery = new ToParentBlockJoinQuery(childJobQuery, parentsFilter, ScoreMode.Avg);
            ToParentBlockJoinQuery childQualificationJoinQuery = new ToParentBlockJoinQuery(childQualificationQuery, parentsFilter, ScoreMode.Avg);

            // Combine the parent and nested child queries into a single query for a candidate
            BooleanQuery fullQuery = new BooleanQuery();
            fullQuery.Add(new BooleanClause(parentQuery, BooleanClause.Occur.MUST));
            fullQuery.Add(new BooleanClause(childJobJoinQuery, BooleanClause.Occur.MUST));
            fullQuery.Add(new BooleanClause(childQualificationJoinQuery, BooleanClause.Occur.MUST));

            // Collects all job and qualification child docs for
            // each resume hit in the top N (sorted by score):
            ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, false);

            s.Search(fullQuery, c);

            // Examine "Job" children
            TopGroups<int> jobResults = c.GetTopGroups(childJobJoinQuery, null, 0, 10, 0, true);

            //assertEquals(1, results.totalHitCount);
            assertEquals(1, jobResults.TotalGroupedHitCount);
            assertEquals(1, jobResults.Groups.Length);

            IGroupDocs<int> group = jobResults.Groups[0];
            assertEquals(1, group.TotalHits);

            Document childJobDoc = s.Doc(group.ScoreDocs[0].Doc);
            //System.out.println("  doc=" + group.ScoreDocs[0].Doc);
            assertEquals("java", childJobDoc.Get("skill"));
            assertNotNull(group.GroupValue);
            Document parentDoc = s.Doc(group.GroupValue);
            assertEquals("Lisa", parentDoc.Get("name"));

            // Now Examine qualification children
            TopGroups<int> qualificationResults = c.GetTopGroups(childQualificationJoinQuery, null, 0, 10, 0, true);

            assertEquals(1, qualificationResults.TotalGroupedHitCount);
            assertEquals(1, qualificationResults.Groups.Length);

            IGroupDocs<int> qGroup = qualificationResults.Groups[0];
            assertEquals(1, qGroup.TotalHits);

            Document childQualificationDoc = s.Doc(qGroup.ScoreDocs[0].Doc);
            assertEquals("maths", childQualificationDoc.Get("qualification"));
            assertNotNull(qGroup.GroupValue);
            parentDoc = s.Doc(qGroup.GroupValue);
            assertEquals("Lisa", parentDoc.Get("name"));

            r.Dispose();
            dir.Dispose();
        }