public void TestAdvanceSingleDeletedParentNoChild() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir); // First doc with 1 children Document parentDoc = new Document(); parentDoc.Add(NewStringField("parent", "1", Field.Store.NO)); parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO)); Document childDoc = new Document(); childDoc.Add(NewStringField("child", "1", Field.Store.NO)); w.AddDocuments(Arrays.AsList(childDoc, parentDoc)); parentDoc = new Document(); parentDoc.Add(NewStringField("parent", "2", Field.Store.NO)); parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO)); w.AddDocuments(Arrays.AsList(parentDoc)); w.DeleteDocuments(new Term("parent", "2")); parentDoc = new Document(); parentDoc.Add(NewStringField("parent", "2", Field.Store.NO)); parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO)); childDoc = new Document(); childDoc.Add(NewStringField("child", "2", Field.Store.NO)); w.AddDocuments(Arrays.AsList(childDoc, parentDoc)); IndexReader r = w.Reader; w.Dispose(); IndexSearcher s = NewSearcher(r); // Create a filter that defines "parent" documents in the index - in this case resumes Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isparent", "yes")))); Query parentQuery = new TermQuery(new Term("parent", "2")); ToChildBlockJoinQuery parentJoinQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter, Random().NextBoolean()); TopDocs topdocs = s.Search(parentJoinQuery, 3); assertEquals(1, topdocs.TotalHits); r.Dispose(); dir.Dispose(); }
public void TestNestedDocScoringWithDeletes() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); // Cannot assert this since we use NoMergePolicy: w.DoRandomForceMergeAssert = false; IList<Document> docs = new List<Document>(); docs.Add(MakeJob("java", 2007)); docs.Add(MakeJob("python", 2010)); docs.Add(MakeResume("Lisa", "United Kingdom")); w.AddDocuments(docs); docs.Clear(); docs.Add(MakeJob("c", 1999)); docs.Add(MakeJob("ruby", 2005)); docs.Add(MakeJob("java", 2006)); docs.Add(MakeResume("Frank", "United States")); w.AddDocuments(docs); w.Commit(); IndexSearcher s = NewSearcher(DirectoryReader.Open(dir)); ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(NumericRangeQuery.NewIntRange("year", 1990, 2010, true, true), new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))), ScoreMode.Total); TopDocs topDocs = s.Search(q, 10); assertEquals(2, topDocs.TotalHits); assertEquals(6, topDocs.ScoreDocs[0].Doc); assertEquals(3.0f, topDocs.ScoreDocs[0].Score, 0.0f); assertEquals(2, topDocs.ScoreDocs[1].Doc); assertEquals(2.0f, topDocs.ScoreDocs[1].Score, 0.0f); s.IndexReader.Dispose(); w.DeleteDocuments(new Term("skill", "java")); w.Dispose(); s = NewSearcher(DirectoryReader.Open(dir)); topDocs = s.Search(q, 10); assertEquals(2, topDocs.TotalHits); assertEquals(6, topDocs.ScoreDocs[0].Doc); assertEquals(2.0f, topDocs.ScoreDocs[0].Score, 0.0f); assertEquals(2, topDocs.ScoreDocs[1].Doc); assertEquals(1.0f, topDocs.ScoreDocs[1].Score, 0.0f); s.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestSimpleFilter() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); IList<Document> docs = new List<Document>(); docs.Add(MakeJob("java", 2007)); docs.Add(MakeJob("python", 2010)); CollectionsHelper.Shuffle(docs); docs.Add(MakeResume("Lisa", "United Kingdom")); IList<Document> docs2 = new List<Document>(); docs2.Add(MakeJob("ruby", 2005)); docs2.Add(MakeJob("java", 2006)); CollectionsHelper.Shuffle(docs2); docs2.Add(MakeResume("Frank", "United States")); AddSkillless(w); bool turn = Random().NextBoolean(); w.AddDocuments(turn ? docs : docs2); AddSkillless(w); w.AddDocuments(!turn ? docs : docs2); AddSkillless(w); IndexReader r = w.Reader; w.Dispose(); IndexSearcher s = NewSearcher(r); // Create a filter that defines "parent" documents in the index - in this case resumes Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))); // Define child document criteria (finds an example of relevant work experience) BooleanQuery childQuery = new BooleanQuery(); childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST)); childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST)); // Define parent document criteria (find a resident in the UK) Query parentQuery = new TermQuery(new Term("country", "United Kingdom")); // Wrap the child document query to 'join' any matches // up to corresponding parent: ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); assertEquals("no filter - both passed", 2, s.Search(childJoinQuery, 10).TotalHits); assertEquals("dummy filter passes everyone ", 2, s.Search(childJoinQuery, parentsFilter, 10).TotalHits); assertEquals("dummy filter passes everyone ", 2, s.Search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("docType", "resume"))), 10).TotalHits); // not found test assertEquals("noone live there", 0, s.Search(childJoinQuery, new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("country", "Oz")))), 1).TotalHits); assertEquals("noone live there", 0, s.Search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("country", "Oz"))), 1).TotalHits); // apply the UK filter by the searcher TopDocs ukOnly = s.Search(childJoinQuery, new QueryWrapperFilter(parentQuery), 1); assertEquals("has filter - single passed", 1, ukOnly.TotalHits); assertEquals("Lisa", r.Document(ukOnly.ScoreDocs[0].Doc).Get("name")); // looking for US candidates TopDocs usThen = s.Search(childJoinQuery, new QueryWrapperFilter(new TermQuery(new Term("country", "United States"))), 1); assertEquals("has filter - single passed", 1, usThen.TotalHits); assertEquals("Frank", r.Document(usThen.ScoreDocs[0].Doc).Get("name")); TermQuery us = new TermQuery(new Term("country", "United States")); assertEquals("@ US we have java and ruby", 2, s.Search(new ToChildBlockJoinQuery(us, parentsFilter, Random().NextBoolean()), 10).TotalHits); assertEquals("java skills in US", 1, s.Search(new ToChildBlockJoinQuery(us, parentsFilter, Random().NextBoolean()), Skill("java"), 10).TotalHits); BooleanQuery rubyPython = new BooleanQuery(); rubyPython.Add(new TermQuery(new Term("skill", "ruby")), BooleanClause.Occur.SHOULD); rubyPython.Add(new TermQuery(new Term("skill", "python")), BooleanClause.Occur.SHOULD); assertEquals("ruby skills in US", 1, s.Search(new ToChildBlockJoinQuery(us, parentsFilter, Random().NextBoolean()), new QueryWrapperFilter(rubyPython), 10).TotalHits); r.Dispose(); dir.Dispose(); }
public void TestBugCausedByRewritingTwice() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); IList<Document> docs = new List<Document>(); for (int i = 0; i < 10; i++) { docs.Clear(); docs.Add(MakeJob("ruby", i)); docs.Add(MakeJob("java", 2007)); docs.Add(MakeResume("Frank", "United States")); w.AddDocuments(docs); } IndexReader r = w.Reader; w.Dispose(); IndexSearcher s = NewSearcher(r); MultiTermQuery qc = NumericRangeQuery.NewIntRange("year", 2007, 2007, true, true); // Hacky: this causes the query to need 2 rewrite // iterations: qc.SetRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))); int h1 = qc.GetHashCode(); Query qw1 = qc.Rewrite(r); int h2 = qw1.GetHashCode(); Query qw2 = qw1.Rewrite(r); int h3 = qw2.GetHashCode(); assertTrue(h1 != h2); assertTrue(h2 != h3); assertTrue(h3 != h1); ToParentBlockJoinQuery qp = new ToParentBlockJoinQuery(qc, parentsFilter, ScoreMode.Max); ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, true); s.Search(qp, c); TopGroups<int> groups = c.GetTopGroups(qp, Sort.INDEXORDER, 0, 10, 0, true); foreach (GroupDocs<int> group in groups.Groups) { assertEquals(1, group.TotalHits); } r.Dispose(); dir.Dispose(); }
public void TestChildQueryMatchesParent() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d, Similarity, TimeZone); Document parent = new Document(); parent.Add(new StoredField("parentID", "0")); parent.Add(NewTextField("parentText", "text", Field.Store.NO)); parent.Add(NewStringField("isParent", "yes", Field.Store.NO)); IList<Document> docs = new List<Document>(); Document child = new Document(); docs.Add(child); child.Add(new StoredField("childID", "0")); child.Add(NewTextField("childText", "text", Field.Store.NO)); // parent last: docs.Add(parent); w.AddDocuments(docs); docs.Clear(); parent = new Document(); parent.Add(NewTextField("parentText", "text", Field.Store.NO)); parent.Add(NewStringField("isParent", "yes", Field.Store.NO)); parent.Add(new StoredField("parentID", "1")); // parent last: docs.Add(parent); w.AddDocuments(docs); IndexReader r = w.Reader; w.Dispose(); // illegally matches parent: Query childQuery = new TermQuery(new Term("parentText", "text")); Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes")))); ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); BooleanQuery parentQuery = new BooleanQuery(); parentQuery.Add(childJoinQuery, BooleanClause.Occur.SHOULD); parentQuery.Add(new TermQuery(new Term("parentText", "text")), BooleanClause.Occur.SHOULD); ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type_e.STRING)), 10, true, true); Assert.Throws<InvalidOperationException>(() => NewSearcher(r).Search(parentQuery, c)); r.Dispose(); d.Dispose(); }
public void TestChildQueryNeverMatches() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d, Similarity, TimeZone); Document parent = new Document(); parent.Add(new StoredField("parentID", "0")); parent.Add(NewTextField("parentText", "text", Field.Store.NO)); parent.Add(NewStringField("isParent", "yes", Field.Store.NO)); IList<Document> docs = new List<Document>(); Document child = new Document(); docs.Add(child); child.Add(new StoredField("childID", "0")); child.Add(NewTextField("childText", "text", Field.Store.NO)); // parent last: docs.Add(parent); w.AddDocuments(docs); docs.Clear(); parent = new Document(); parent.Add(NewTextField("parentText", "text", Field.Store.NO)); parent.Add(NewStringField("isParent", "yes", Field.Store.NO)); parent.Add(new StoredField("parentID", "1")); // parent last: docs.Add(parent); w.AddDocuments(docs); IndexReader r = w.Reader; w.Dispose(); // never matches: Query childQuery = new TermQuery(new Term("childText", "bogus")); Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "yes")))); ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); BooleanQuery parentQuery = new BooleanQuery(); parentQuery.Add(childJoinQuery, BooleanClause.Occur.SHOULD); parentQuery.Add(new TermQuery(new Term("parentText", "text")), BooleanClause.Occur.SHOULD); ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(new Sort(new SortField("parentID", SortField.Type_e.STRING)), 10, true, true); NewSearcher(r).Search(parentQuery, c); TopGroups<int> groups = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, false); // Two parents: assertEquals(2, (int)groups.TotalGroupCount); // One child docs: assertEquals(0, groups.TotalGroupedHitCount); IGroupDocs<int> group = groups.Groups[0]; Document doc = r.Document((int)group.GroupValue); assertEquals("0", doc.Get("parentID")); group = groups.Groups[1]; doc = r.Document((int)group.GroupValue); assertEquals("1", doc.Get("parentID")); r.Dispose(); d.Dispose(); }
public void TestAdvanceSingleParentNoChild() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(new LogDocMergePolicy())); Document parentDoc = new Document(); parentDoc.Add(NewStringField("parent", "1", Field.Store.NO)); parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO)); w.AddDocuments(Arrays.AsList(parentDoc)); // Add another doc so scorer is not null parentDoc = new Document(); parentDoc.Add(NewStringField("parent", "2", Field.Store.NO)); parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO)); Document childDoc = new Document(); childDoc.Add(NewStringField("child", "2", Field.Store.NO)); w.AddDocuments(Arrays.AsList(childDoc, parentDoc)); // Need single seg: w.ForceMerge(1); IndexReader r = w.Reader; w.Dispose(); IndexSearcher s = NewSearcher(r); Query tq = new TermQuery(new Term("child", "2")); Filter parentFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isparent", "yes")))); ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg); Weight weight = s.CreateNormalizedWeight(q); DocIdSetIterator disi = weight.Scorer(s.IndexReader.Leaves.First(), null); assertEquals(2, disi.Advance(0)); r.Dispose(); dir.Dispose(); }
public void TestRandomDiscreteMultiValueHighlighting() { String[] randomValues = new String[3 + Random.nextInt(10 * RANDOM_MULTIPLIER)]; for (int i = 0; i < randomValues.Length; i++) { String randomValue; do { randomValue = TestUtil.RandomSimpleString(Random); } while ("".Equals(randomValue, StringComparison.Ordinal)); randomValues[i] = randomValue; } Directory dir = NewDirectory(); RandomIndexWriter writer = new RandomIndexWriter( Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy())); FieldType customType = new FieldType(TextField.TYPE_STORED); customType.StoreTermVectors = (true); customType.StoreTermVectorOffsets = (true); customType.StoreTermVectorPositions = (true); int numDocs = randomValues.Length * 5; int numFields = 2 + Random.nextInt(5); int numTerms = 2 + Random.nextInt(3); List <Doc> docs = new List <Doc>(numDocs); List <Document> documents = new List <Document>(numDocs); IDictionary <String, ISet <int> > valueToDocId = new HashMap <String, ISet <int> >(); for (int i = 0; i < numDocs; i++) { Document document = new Document(); String[][] fields = RectangularArrays.ReturnRectangularArray <string>(numFields, numTerms); //new String[numFields][numTerms]; for (int j = 0; j < numFields; j++) { String[] fieldValues = new String[numTerms]; fieldValues[0] = getRandomValue(randomValues, valueToDocId, i); StringBuilder builder = new StringBuilder(fieldValues[0]); for (int k = 1; k < numTerms; k++) { fieldValues[k] = getRandomValue(randomValues, valueToDocId, i); builder.Append(' ').Append(fieldValues[k]); } document.Add(new Field(F, builder.ToString(), customType)); fields[j] = fieldValues; } docs.Add(new Doc(fields)); documents.Add(document); } writer.AddDocuments(documents); writer.Dispose(); IndexReader reader = DirectoryReader.Open(dir); try { int highlightIters = 1 + Random.nextInt(120 * RANDOM_MULTIPLIER); for (int highlightIter = 0; highlightIter < highlightIters; highlightIter++) { Console.WriteLine($"Highlighter iter: {highlightIter}"); String queryTerm = randomValues[Random.nextInt(randomValues.Length)]; int randomHit = valueToDocId[queryTerm].First(); List <StringBuilder> builders = new List <StringBuilder>(); foreach (String[] fieldValues in docs[randomHit].fieldValues) { StringBuilder builder = new StringBuilder(); bool hit = false; for (int i = 0; i < fieldValues.Length; i++) { if (queryTerm.Equals(fieldValues[i], StringComparison.Ordinal)) { builder.Append("<b>").Append(queryTerm).Append("</b>"); hit = true; } else { builder.Append(fieldValues[i]); } if (i != fieldValues.Length - 1) { builder.Append(' '); } } if (hit) { builders.Add(builder); } } FieldQuery fq = new FieldQuery(tq(queryTerm), true, true); FieldTermStack stack = new FieldTermStack(reader, randomHit, F, fq); FieldPhraseList fpl = new FieldPhraseList(stack, fq); SimpleFragListBuilder sflb = new SimpleFragListBuilder(100); FieldFragList ffl = sflb.CreateFieldFragList(fpl, 300); SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder(); sfb.IsDiscreteMultiValueHighlighting = (true); String[] actualFragments = sfb.CreateFragments(reader, randomHit, F, ffl, numFields); assertEquals(builders.Count, actualFragments.Length); for (int i = 0; i < actualFragments.Length; i++) { assertEquals(builders[i].ToString(), actualFragments[i]); } } } finally { reader.Dispose(); dir.Dispose(); } }
public void TestMultiChildTypes() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); IList<Document> docs = new List<Document>(); docs.Add(MakeJob("java", 2007)); docs.Add(MakeJob("python", 2010)); docs.Add(MakeQualification("maths", 1999)); docs.Add(MakeResume("Lisa", "United Kingdom")); w.AddDocuments(docs); IndexReader r = w.Reader; w.Dispose(); IndexSearcher s = NewSearcher(r); // Create a filter that defines "parent" documents in the index - in this case resumes Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))); // Define child document criteria (finds an example of relevant work experience) BooleanQuery childJobQuery = new BooleanQuery(); childJobQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST)); childJobQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST)); BooleanQuery childQualificationQuery = new BooleanQuery(); childQualificationQuery.Add(new BooleanClause(new TermQuery(new Term("qualification", "maths")), BooleanClause.Occur.MUST)); childQualificationQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 1980, 2000, true, true), BooleanClause.Occur.MUST)); // Define parent document criteria (find a resident in the UK) Query parentQuery = new TermQuery(new Term("country", "United Kingdom")); // Wrap the child document query to 'join' any matches // up to corresponding parent: ToParentBlockJoinQuery childJobJoinQuery = new ToParentBlockJoinQuery(childJobQuery, parentsFilter, ScoreMode.Avg); ToParentBlockJoinQuery childQualificationJoinQuery = new ToParentBlockJoinQuery(childQualificationQuery, parentsFilter, ScoreMode.Avg); // Combine the parent and nested child queries into a single query for a candidate BooleanQuery fullQuery = new BooleanQuery(); fullQuery.Add(new BooleanClause(parentQuery, BooleanClause.Occur.MUST)); fullQuery.Add(new BooleanClause(childJobJoinQuery, BooleanClause.Occur.MUST)); fullQuery.Add(new BooleanClause(childQualificationJoinQuery, BooleanClause.Occur.MUST)); // Collects all job and qualification child docs for // each resume hit in the top N (sorted by score): ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 10, true, false); s.Search(fullQuery, c); // Examine "Job" children TopGroups<int> jobResults = c.GetTopGroups(childJobJoinQuery, null, 0, 10, 0, true); //assertEquals(1, results.totalHitCount); assertEquals(1, jobResults.TotalGroupedHitCount); assertEquals(1, jobResults.Groups.Length); IGroupDocs<int> group = jobResults.Groups[0]; assertEquals(1, group.TotalHits); Document childJobDoc = s.Doc(group.ScoreDocs[0].Doc); //System.out.println(" doc=" + group.ScoreDocs[0].Doc); assertEquals("java", childJobDoc.Get("skill")); assertNotNull(group.GroupValue); Document parentDoc = s.Doc(group.GroupValue); assertEquals("Lisa", parentDoc.Get("name")); // Now Examine qualification children TopGroups<int> qualificationResults = c.GetTopGroups(childQualificationJoinQuery, null, 0, 10, 0, true); assertEquals(1, qualificationResults.TotalGroupedHitCount); assertEquals(1, qualificationResults.Groups.Length); IGroupDocs<int> qGroup = qualificationResults.Groups[0]; assertEquals(1, qGroup.TotalHits); Document childQualificationDoc = s.Doc(qGroup.ScoreDocs[0].Doc); assertEquals("maths", childQualificationDoc.Get("qualification")); assertNotNull(qGroup.GroupValue); parentDoc = s.Doc(qGroup.GroupValue); assertEquals("Lisa", parentDoc.Get("name")); r.Dispose(); dir.Dispose(); }
public void TestAdvanceSingleParentSingleChild() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document childDoc = new Document(); childDoc.Add(NewStringField("child", "1", Field.Store.NO)); Document parentDoc = new Document(); parentDoc.Add(NewStringField("parent", "1", Field.Store.NO)); w.AddDocuments(Arrays.AsList(childDoc, parentDoc)); IndexReader r = w.Reader; w.Dispose(); IndexSearcher s = NewSearcher(r); Query tq = new TermQuery(new Term("child", "1")); Filter parentFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "1")))); ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(tq, parentFilter, ScoreMode.Avg); Weight weight = s.CreateNormalizedWeight(q); DocIdSetIterator disi = weight.Scorer(s.IndexReader.Leaves.First(), null); assertEquals(1, disi.Advance(1)); r.Dispose(); dir.Dispose(); }
public void Test() { RandomIndexWriter writer; DirectoryReader indexReader; int numParents = AtLeast(200); IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); cfg.SetMergePolicy(NewLogMergePolicy()); using (writer = new RandomIndexWriter(Random(), NewDirectory(), cfg)) { Document parentDoc = new Document(); NumericDocValuesField parentVal = new NumericDocValuesField("parent_val", 0L); parentDoc.Add(parentVal); StringField parent = new StringField("parent", "true", Field.Store.YES); parentDoc.Add(parent); for (int i = 0; i < numParents; ++i) { List <Document> documents = new List <Document>(); int numChildren = Random().nextInt(10); for (int j = 0; j < numChildren; ++j) { Document childDoc = new Document(); childDoc.Add(new NumericDocValuesField("child_val", Random().nextInt(5))); documents.Add(childDoc); } parentVal.SetInt64Value(Random().nextInt(50)); documents.Add(parentDoc); writer.AddDocuments(documents); } writer.ForceMerge(1); indexReader = writer.Reader; } AtomicReader reader = GetOnlySegmentReader(indexReader); Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "true")))); FixedBitSet parentBits = (FixedBitSet)parentsFilter.GetDocIdSet(reader.AtomicContext, null); NumericDocValues parentValues = reader.GetNumericDocValues("parent_val"); NumericDocValues childValues = reader.GetNumericDocValues("child_val"); Sort parentSort = new Sort(new SortField("parent_val", SortFieldType.INT64)); Sort childSort = new Sort(new SortField("child_val", SortFieldType.INT64)); Sort sort = new Sort(new SortField("custom", new BlockJoinComparerSource(parentsFilter, parentSort, childSort))); Sorter sorter = new Sorter(sort); Sorter.DocMap docMap = sorter.Sort(reader); assertEquals(reader.MaxDoc, docMap.Count); int[] children = new int[1]; int numChildren2 = 0; int previousParent = -1; for (int i = 0; i < docMap.Count; ++i) { int oldID = docMap.NewToOld(i); if (parentBits.Get(oldID)) { // check that we have the right children for (int j = 0; j < numChildren2; ++j) { assertEquals(oldID, parentBits.NextSetBit(children[j])); } // check that children are sorted for (int j = 1; j < numChildren2; ++j) { int doc1 = children[j - 1]; int doc2 = children[j]; if (childValues.Get(doc1) == childValues.Get(doc2)) { assertTrue(doc1 < doc2); // sort is stable } else { assertTrue(childValues.Get(doc1) < childValues.Get(doc2)); } } // check that parents are sorted if (previousParent != -1) { if (parentValues.Get(previousParent) == parentValues.Get(oldID)) { assertTrue(previousParent < oldID); } else { assertTrue(parentValues.Get(previousParent) < parentValues.Get(oldID)); } } // reset previousParent = oldID; numChildren2 = 0; } else { children = ArrayUtil.Grow(children, numChildren2 + 1); children[numChildren2++] = oldID; } } indexReader.Dispose(); writer.w.Directory.Dispose(); }
public virtual void TestBasic() { string groupField = "author"; FieldType customType = new FieldType(); customType.IsStored = (true); Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy())); bool canUseIDV = !"Lucene3x".Equals(w.IndexWriter.Config.Codec.Name, StringComparison.Ordinal); JCG.List <Document> documents = new JCG.List <Document>(); // 0 Document doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV); doc.Add(new TextField("content", "random text", Field.Store.YES)); doc.Add(new Field("id", "1", customType)); documents.Add(doc); // 1 doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV); doc.Add(new TextField("content", "some more random text", Field.Store.YES)); doc.Add(new Field("id", "2", customType)); documents.Add(doc); // 2 doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV); doc.Add(new TextField("content", "some more random textual data", Field.Store.YES)); doc.Add(new Field("id", "3", customType)); doc.Add(new StringField("groupend", "x", Field.Store.NO)); documents.Add(doc); w.AddDocuments(documents); documents.Clear(); // 3 doc = new Document(); AddGroupField(doc, groupField, "author2", canUseIDV); doc.Add(new TextField("content", "some random text", Field.Store.YES)); doc.Add(new Field("id", "4", customType)); doc.Add(new StringField("groupend", "x", Field.Store.NO)); w.AddDocument(doc); // 4 doc = new Document(); AddGroupField(doc, groupField, "author3", canUseIDV); doc.Add(new TextField("content", "some more random text", Field.Store.YES)); doc.Add(new Field("id", "5", customType)); documents.Add(doc); // 5 doc = new Document(); AddGroupField(doc, groupField, "author3", canUseIDV); doc.Add(new TextField("content", "random", Field.Store.YES)); doc.Add(new Field("id", "6", customType)); doc.Add(new StringField("groupend", "x", Field.Store.NO)); documents.Add(doc); w.AddDocuments(documents); documents.Clear(); // 6 -- no author field doc = new Document(); doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES)); doc.Add(new Field("id", "6", customType)); doc.Add(new StringField("groupend", "x", Field.Store.NO)); w.AddDocument(doc); IndexSearcher indexSearcher = NewSearcher(w.GetReader()); w.Dispose(); Sort groupSort = Sort.RELEVANCE; GroupingSearch groupingSearch = CreateRandomGroupingSearch(groupField, groupSort, 5, canUseIDV); ITopGroups <object> groups = groupingSearch.Search(indexSearcher, (Filter)null, new TermQuery(new Index.Term("content", "random")), 0, 10); assertEquals(7, groups.TotalHitCount); assertEquals(7, groups.TotalGroupedHitCount); assertEquals(4, groups.Groups.Length); // relevance order: 5, 0, 3, 4, 1, 2, 6 // the later a document is added the higher this docId // value IGroupDocs <object> group = groups.Groups[0]; CompareGroupValue("author3", group); assertEquals(2, group.ScoreDocs.Length); assertEquals(5, group.ScoreDocs[0].Doc); assertEquals(4, group.ScoreDocs[1].Doc); assertTrue(group.ScoreDocs[0].Score > group.ScoreDocs[1].Score); group = groups.Groups[1]; CompareGroupValue("author1", group); assertEquals(3, group.ScoreDocs.Length); assertEquals(0, group.ScoreDocs[0].Doc); assertEquals(1, group.ScoreDocs[1].Doc); assertEquals(2, group.ScoreDocs[2].Doc); assertTrue(group.ScoreDocs[0].Score > group.ScoreDocs[1].Score); assertTrue(group.ScoreDocs[1].Score > group.ScoreDocs[2].Score); group = groups.Groups[2]; CompareGroupValue("author2", group); assertEquals(1, group.ScoreDocs.Length); assertEquals(3, group.ScoreDocs[0].Doc); group = groups.Groups[3]; CompareGroupValue(null, group); assertEquals(1, group.ScoreDocs.Length); assertEquals(6, group.ScoreDocs[0].Doc); Filter lastDocInBlock = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Index.Term("groupend", "x")))); groupingSearch = new GroupingSearch(lastDocInBlock); groups = groupingSearch.Search(indexSearcher, null, new TermQuery(new Index.Term("content", "random")), 0, 10); assertEquals(7, groups.TotalHitCount); assertEquals(7, groups.TotalGroupedHitCount); assertEquals(4, groups.TotalGroupCount.GetValueOrDefault()); assertEquals(4, groups.Groups.Length); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public void TestNestedSorting() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); IList<Document> docs = new List<Document>(); Document document = new Document(); document.Add(new StringField("field2", "a", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "b", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "c", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "a", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); w.Commit(); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "c", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "d", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "e", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "b", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "e", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "f", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "g", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "c", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "g", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "h", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "i", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "d", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); w.Commit(); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "i", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "j", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "k", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "f", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "k", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "l", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "m", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "g", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); // This doc will not be included, because it doesn't have nested docs document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "h", Field.Store.NO)); w.AddDocument(document); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "m", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "n", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "o", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "i", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); w.Commit(); // Some garbage docs, just to check if the NestedFieldComparator can deal with this. document = new Document(); document.Add(new StringField("fieldXXX", "x", Field.Store.NO)); w.AddDocument(document); document = new Document(); document.Add(new StringField("fieldXXX", "x", Field.Store.NO)); w.AddDocument(document); document = new Document(); document.Add(new StringField("fieldXXX", "x", Field.Store.NO)); w.AddDocument(document); IndexSearcher searcher = new IndexSearcher(DirectoryReader.Open(w.w, false)); w.Dispose(); Filter parentFilter = new QueryWrapperFilter(new TermQuery(new Term("__type", "parent"))); Filter childFilter = new QueryWrapperFilter(new PrefixQuery(new Term("field2"))); ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None); // Sort by field ascending, order first ToParentBlockJoinSortField sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, false, Wrap(parentFilter), Wrap(childFilter)); Sort sort = new Sort(sortField); TopFieldDocs topDocs = searcher.Search(query, 5, sort); assertEquals(7, topDocs.TotalHits); assertEquals(5, topDocs.ScoreDocs.Length); assertEquals(3, topDocs.ScoreDocs[0].Doc); assertEquals("a", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString()); assertEquals(7, topDocs.ScoreDocs[1].Doc); assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString()); assertEquals(11, topDocs.ScoreDocs[2].Doc); assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString()); assertEquals(15, topDocs.ScoreDocs[3].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString()); assertEquals(19, topDocs.ScoreDocs[4].Doc); assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString()); // Sort by field ascending, order last sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, false, true, Wrap(parentFilter), Wrap(childFilter)); sort = new Sort(sortField); topDocs = searcher.Search(query, 5, sort); assertEquals(7, topDocs.TotalHits); assertEquals(5, topDocs.ScoreDocs.Length); assertEquals(3, topDocs.ScoreDocs[0].Doc); assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString()); assertEquals(7, topDocs.ScoreDocs[1].Doc); assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString()); assertEquals(11, topDocs.ScoreDocs[2].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString()); assertEquals(15, topDocs.ScoreDocs[3].Doc); assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString()); assertEquals(19, topDocs.ScoreDocs[4].Doc); assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString()); // Sort by field descending, order last sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, true, Wrap(parentFilter), Wrap(childFilter)); sort = new Sort(sortField); topDocs = searcher.Search(query, 5, sort); assertEquals(topDocs.TotalHits, 7); assertEquals(5, topDocs.ScoreDocs.Length); assertEquals(28, topDocs.ScoreDocs[0].Doc); assertEquals("o", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString()); assertEquals(23, topDocs.ScoreDocs[1].Doc); assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString()); assertEquals(19, topDocs.ScoreDocs[2].Doc); assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString()); assertEquals(15, topDocs.ScoreDocs[3].Doc); assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString()); assertEquals(11, topDocs.ScoreDocs[4].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString()); // Sort by field descending, order last, sort filter (filter_1:T) childFilter = new QueryWrapperFilter(new TermQuery((new Term("filter_1", "T")))); query = new ToParentBlockJoinQuery(new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None); sortField = new ToParentBlockJoinSortField("field2", SortField.Type_e.STRING, true, Wrap(parentFilter), Wrap(childFilter)); sort = new Sort(sortField); topDocs = searcher.Search(query, 5, sort); assertEquals(6, topDocs.TotalHits); assertEquals(5, topDocs.ScoreDocs.Length); assertEquals(23, topDocs.ScoreDocs[0].Doc); assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString()); assertEquals(28, topDocs.ScoreDocs[1].Doc); assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString()); assertEquals(11, topDocs.ScoreDocs[2].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString()); assertEquals(15, topDocs.ScoreDocs[3].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString()); assertEquals(7, topDocs.ScoreDocs[4].Doc); assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString()); searcher.IndexReader.Dispose(); dir.Dispose(); }
public void TestRandom() { // We build two indices at once: one normalized (which // ToParentBlockJoinQuery/Collector, // ToChildBlockJoinQuery can query) and the other w/ // the same docs, just fully denormalized: Directory dir = NewDirectory(); Directory joinDir = NewDirectory(); int numParentDocs = TestUtil.NextInt(Random(), 100 * RANDOM_MULTIPLIER, 300 * RANDOM_MULTIPLIER); //final int numParentDocs = 30; // Values for parent fields: string[][] parentFields = GetRandomFields(numParentDocs / 2); // Values for child fields: string[][] childFields = GetRandomFields(numParentDocs); bool doDeletes = Random().NextBoolean(); IList<int> toDelete = new List<int>(); // TODO: parallel star join, nested join cases too! RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); RandomIndexWriter joinW = new RandomIndexWriter(Random(), joinDir, Similarity, TimeZone); for (int parentDocID = 0; parentDocID < numParentDocs; parentDocID++) { Document parentDoc = new Document(); Document parentJoinDoc = new Document(); Field id = NewStringField("parentID", "" + parentDocID, Field.Store.YES); parentDoc.Add(id); parentJoinDoc.Add(id); parentJoinDoc.Add(NewStringField("isParent", "x", Field.Store.NO)); for (int field = 0; field < parentFields.Length; field++) { if (Random().NextDouble() < 0.9) { Field f = NewStringField("parent" + field, parentFields[field][Random().Next(parentFields[field].Length)], Field.Store.NO); parentDoc.Add(f); parentJoinDoc.Add(f); } } if (doDeletes) { parentDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO)); parentJoinDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO)); } IList<Document> joinDocs = new List<Document>(); if (VERBOSE) { StringBuilder sb = new StringBuilder(); sb.Append("parentID=").Append(parentDoc.Get("parentID")); for (int fieldID = 0; fieldID < parentFields.Length; fieldID++) { string parent = parentDoc.Get("parent" + fieldID); if (parent != null) { sb.Append(" parent" + fieldID + "=" + parent); } } Console.WriteLine(" " + sb); } int numChildDocs = TestUtil.NextInt(Random(), 1, 20); for (int childDocID = 0; childDocID < numChildDocs; childDocID++) { // Denormalize: copy all parent fields into child doc: Document childDoc = TestUtil.CloneDocument(parentDoc); Document joinChildDoc = new Document(); joinDocs.Add(joinChildDoc); Field childID = NewStringField("childID", "" + childDocID, Field.Store.YES); childDoc.Add(childID); joinChildDoc.Add(childID); for (int childFieldID = 0; childFieldID < childFields.Length; childFieldID++) { if (Random().NextDouble() < 0.9) { Field f = NewStringField("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)], Field.Store.NO); childDoc.Add(f); joinChildDoc.Add(f); } } if (VERBOSE) { StringBuilder sb = new StringBuilder(); sb.Append("childID=").Append(joinChildDoc.Get("childID")); for (int fieldID = 0; fieldID < childFields.Length; fieldID++) { string child = joinChildDoc.Get("child" + fieldID); if (child != null) { sb.Append(" child" + fieldID + "=" + child); } } Console.WriteLine(" " + sb); } if (doDeletes) { joinChildDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO)); } w.AddDocument(childDoc); } // Parent last: joinDocs.Add(parentJoinDoc); joinW.AddDocuments(joinDocs); if (doDeletes && Random().Next(30) == 7) { toDelete.Add(parentDocID); } } foreach (int deleteID in toDelete) { if (VERBOSE) { Console.WriteLine("DELETE parentID=" + deleteID); } w.DeleteDocuments(new Term("blockID", "" + deleteID)); joinW.DeleteDocuments(new Term("blockID", "" + deleteID)); } IndexReader r = w.Reader; w.Dispose(); IndexReader joinR = joinW.Reader; joinW.Dispose(); if (VERBOSE) { Console.WriteLine("TEST: reader=" + r); Console.WriteLine("TEST: joinReader=" + joinR); for (int docIDX = 0; docIDX < joinR.MaxDoc; docIDX++) { Console.WriteLine(" docID=" + docIDX + " doc=" + joinR.Document(docIDX)); } } IndexSearcher s = NewSearcher(r); IndexSearcher joinS = new IndexSearcher(joinR); Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "x")))); int iters = 200 * RANDOM_MULTIPLIER; for (int iter = 0; iter < iters; iter++) { if (VERBOSE) { Console.WriteLine("TEST: iter=" + (1 + iter) + " of " + iters); } Query childQuery; if (Random().Next(3) == 2) { int childFieldID = Random().Next(childFields.Length); childQuery = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)])); } else if (Random().Next(3) == 2) { BooleanQuery bq = new BooleanQuery(); childQuery = bq; int numClauses = TestUtil.NextInt(Random(), 2, 4); bool didMust = false; for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++) { Query clause; BooleanClause.Occur occur; if (!didMust && Random().NextBoolean()) { occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT; clause = new TermQuery(RandomChildTerm(childFields[0])); didMust = true; } else { occur = BooleanClause.Occur.SHOULD; int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1); clause = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)])); } bq.Add(clause, occur); } } else { BooleanQuery bq = new BooleanQuery(); childQuery = bq; bq.Add(new TermQuery(RandomChildTerm(childFields[0])), BooleanClause.Occur.MUST); int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1); bq.Add(new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT); } int x = Random().Next(4); ScoreMode agg; if (x == 0) { agg = ScoreMode.None; } else if (x == 1) { agg = ScoreMode.Max; } else if (x == 2) { agg = ScoreMode.Total; } else { agg = ScoreMode.Avg; } ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, agg); // To run against the block-join index: Query parentJoinQuery; // Same query as parentJoinQuery, but to run against // the fully denormalized index (so we can compare // results): Query parentQuery; if (Random().NextBoolean()) { parentQuery = childQuery; parentJoinQuery = childJoinQuery; } else { // AND parent field w/ child field BooleanQuery bq = new BooleanQuery(); parentJoinQuery = bq; Term parentTerm = RandomParentTerm(parentFields[0]); if (Random().NextBoolean()) { bq.Add(childJoinQuery, BooleanClause.Occur.MUST); bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST); } else { bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST); bq.Add(childJoinQuery, BooleanClause.Occur.MUST); } BooleanQuery bq2 = new BooleanQuery(); parentQuery = bq2; if (Random().NextBoolean()) { bq2.Add(childQuery, BooleanClause.Occur.MUST); bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST); } else { bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST); bq2.Add(childQuery, BooleanClause.Occur.MUST); } } Sort parentSort = GetRandomSort("parent", parentFields.Length); Sort childSort = GetRandomSort("child", childFields.Length); if (VERBOSE) { Console.WriteLine("\nTEST: query=" + parentQuery + " joinQuery=" + parentJoinQuery + " parentSort=" + parentSort + " childSort=" + childSort); } // Merge both sorts: IList<SortField> sortFields = new List<SortField>(Arrays.AsList(parentSort.GetSort())); sortFields.AddRange(Arrays.AsList(childSort.GetSort())); Sort parentAndChildSort = new Sort(sortFields.ToArray()); TopDocs results = s.Search(parentQuery, null, r.NumDocs, parentAndChildSort); if (VERBOSE) { Console.WriteLine("\nTEST: normal index gets " + results.TotalHits + " hits"); ScoreDoc[] hits = results.ScoreDocs; for (int hitIDX = 0; hitIDX < hits.Length; hitIDX++) { Document doc = s.Doc(hits[hitIDX].Doc); //System.out.println(" score=" + hits[hitIDX].Score + " parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")"); Console.WriteLine(" parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")"); FieldDoc fd = (FieldDoc)hits[hitIDX]; if (fd.Fields != null) { Console.Write(" "); foreach (object o in fd.Fields) { if (o is BytesRef) { Console.Write(((BytesRef)o).Utf8ToString() + " "); } else { Console.Write(o + " "); } } Console.WriteLine(); } } } bool trackScores; bool trackMaxScore; if (agg == ScoreMode.None) { trackScores = false; trackMaxScore = false; } else { trackScores = Random().NextBoolean(); trackMaxScore = Random().NextBoolean(); } ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(parentSort, 10, trackScores, trackMaxScore); joinS.Search(parentJoinQuery, c); int hitsPerGroup = TestUtil.NextInt(Random(), 1, 20); //final int hitsPerGroup = 100; TopGroups<int> joinResults = c.GetTopGroups(childJoinQuery, childSort, 0, hitsPerGroup, 0, true); if (VERBOSE) { Console.WriteLine("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.Groups.Length) + " groups; hitsPerGroup=" + hitsPerGroup); if (joinResults != null) { IGroupDocs<int>[] groups = joinResults.Groups; for (int groupIDX = 0; groupIDX < groups.Length; groupIDX++) { IGroupDocs<int> group = groups[groupIDX]; if (group.GroupSortValues != null) { Console.Write(" "); foreach (object o in group.GroupSortValues) { if (o is BytesRef) { Console.Write(((BytesRef)o).Utf8ToString() + " "); } else { Console.Write(o + " "); } } Console.WriteLine(); } assertNotNull(group.GroupValue); Document parentDoc = joinS.Doc(group.GroupValue); Console.WriteLine(" group parentID=" + parentDoc.Get("parentID") + " (docID=" + group.GroupValue + ")"); for (int hitIDX = 0; hitIDX < group.ScoreDocs.Length; hitIDX++) { Document doc = joinS.Doc(group.ScoreDocs[hitIDX].Doc); //System.out.println(" score=" + group.ScoreDocs[hitIDX].Score + " childID=" + doc.Get("childID") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")"); Console.WriteLine(" childID=" + doc.Get("childID") + " child0=" + doc.Get("child0") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")"); } } } } if (results.TotalHits == 0) { assertNull(joinResults); } else { CompareHits(r, joinR, results, joinResults); TopDocs b = joinS.Search(childJoinQuery, 10); foreach (ScoreDoc hit in b.ScoreDocs) { Explanation explanation = joinS.Explain(childJoinQuery, hit.Doc); Document document = joinS.Doc(hit.Doc - 1); int childId = Convert.ToInt32(document.Get("childID")); assertTrue(explanation.IsMatch); assertEquals(hit.Score, explanation.Value, 0.0f); assertEquals(string.Format("Score based on child doc range from {0} to {1}", hit.Doc - 1 - childId, hit.Doc - 1), explanation.Description); } } // Test joining in the opposite direction (parent to // child): // Get random query against parent documents: Query parentQuery2; if (Random().Next(3) == 2) { int fieldID = Random().Next(parentFields.Length); parentQuery2 = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)])); } else if (Random().Next(3) == 2) { BooleanQuery bq = new BooleanQuery(); parentQuery2 = bq; int numClauses = TestUtil.NextInt(Random(), 2, 4); bool didMust = false; for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++) { Query clause; BooleanClause.Occur occur; if (!didMust && Random().NextBoolean()) { occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT; clause = new TermQuery(RandomParentTerm(parentFields[0])); didMust = true; } else { occur = BooleanClause.Occur.SHOULD; int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1); clause = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)])); } bq.Add(clause, occur); } } else { BooleanQuery bq = new BooleanQuery(); parentQuery2 = bq; bq.Add(new TermQuery(RandomParentTerm(parentFields[0])), BooleanClause.Occur.MUST); int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1); bq.Add(new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT); } if (VERBOSE) { Console.WriteLine("\nTEST: top down: parentQuery2=" + parentQuery2); } // Maps parent query to child docs: ToChildBlockJoinQuery parentJoinQuery2 = new ToChildBlockJoinQuery(parentQuery2, parentsFilter, Random().NextBoolean()); // To run against the block-join index: Query childJoinQuery2; // Same query as parentJoinQuery, but to run against // the fully denormalized index (so we can compare // results): Query childQuery2; // apply a filter to children Filter childFilter2, childJoinFilter2; if (Random().NextBoolean()) { childQuery2 = parentQuery2; childJoinQuery2 = parentJoinQuery2; childFilter2 = null; childJoinFilter2 = null; } else { Term childTerm = RandomChildTerm(childFields[0]); if (Random().NextBoolean()) // filtered case { childJoinQuery2 = parentJoinQuery2; Filter f = new QueryWrapperFilter(new TermQuery(childTerm)); childJoinFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f; } else { childJoinFilter2 = null; // AND child field w/ parent query: BooleanQuery bq = new BooleanQuery(); childJoinQuery2 = bq; if (Random().NextBoolean()) { bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST); bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST); } else { bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST); bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST); } } if (Random().NextBoolean()) // filtered case { childQuery2 = parentQuery2; Filter f = new QueryWrapperFilter(new TermQuery(childTerm)); childFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f; } else { childFilter2 = null; BooleanQuery bq2 = new BooleanQuery(); childQuery2 = bq2; if (Random().NextBoolean()) { bq2.Add(parentQuery2, BooleanClause.Occur.MUST); bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST); } else { bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST); bq2.Add(parentQuery2, BooleanClause.Occur.MUST); } } } Sort childSort2 = GetRandomSort("child", childFields.Length); // Search denormalized index: if (VERBOSE) { Console.WriteLine("TEST: run top down query=" + childQuery2 + " filter=" + childFilter2 + " sort=" + childSort2); } TopDocs results2 = s.Search(childQuery2, childFilter2, r.NumDocs, childSort2); if (VERBOSE) { Console.WriteLine(" " + results2.TotalHits + " totalHits:"); foreach (ScoreDoc sd in results2.ScoreDocs) { Document doc = s.Doc(sd.Doc); Console.WriteLine(" childID=" + doc.Get("childID") + " parentID=" + doc.Get("parentID") + " docID=" + sd.Doc); } } // Search join index: if (VERBOSE) { Console.WriteLine("TEST: run top down join query=" + childJoinQuery2 + " filter=" + childJoinFilter2 + " sort=" + childSort2); } TopDocs joinResults2 = joinS.Search(childJoinQuery2, childJoinFilter2, joinR.NumDocs, childSort2); if (VERBOSE) { Console.WriteLine(" " + joinResults2.TotalHits + " totalHits:"); foreach (ScoreDoc sd in joinResults2.ScoreDocs) { Document doc = joinS.Doc(sd.Doc); Document parentDoc = GetParentDoc(joinR, parentsFilter, sd.Doc); Console.WriteLine(" childID=" + doc.Get("childID") + " parentID=" + parentDoc.Get("parentID") + " docID=" + sd.Doc); } } CompareChildHits(r, joinR, results2, joinResults2); } r.Dispose(); joinR.Dispose(); dir.Dispose(); joinDir.Dispose(); }
public void TestGetTopGroups() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); IList<Document> docs = new List<Document>(); docs.Add(MakeJob("ruby", 2005)); docs.Add(MakeJob("java", 2006)); docs.Add(MakeJob("java", 2010)); docs.Add(MakeJob("java", 2012)); CollectionsHelper.Shuffle(docs); docs.Add(MakeResume("Frank", "United States")); AddSkillless(w); w.AddDocuments(docs); AddSkillless(w); IndexReader r = w.Reader; w.Dispose(); IndexSearcher s = new IndexSearcher(r); // Create a filter that defines "parent" documents in the index - in this case resumes Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))); // Define child document criteria (finds an example of relevant work experience) BooleanQuery childQuery = new BooleanQuery(); childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST)); childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST)); // Wrap the child document query to 'join' any matches // up to corresponding parent: ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 2, true, true); s.Search(childJoinQuery, c); //Get all child documents within groups TopGroups<int>[] getTopGroupsResults = new TopGroups<int>[2]; getTopGroupsResults[0] = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true); getTopGroupsResults[1] = c.GetTopGroupsWithAllChildDocs(childJoinQuery, null, 0, 0, true); foreach (TopGroups<int> results in getTopGroupsResults) { assertFalse(float.IsNaN(results.MaxScore)); assertEquals(2, results.TotalGroupedHitCount); assertEquals(1, results.Groups.Length); IGroupDocs<int> resultGroup = results.Groups[0]; assertEquals(2, resultGroup.TotalHits); assertFalse(float.IsNaN(resultGroup.Score)); assertNotNull(resultGroup.GroupValue); Document parentDocument = s.Doc(resultGroup.GroupValue); assertEquals("Frank", parentDocument.Get("name")); assertEquals(2, resultGroup.ScoreDocs.Length); //all matched child documents collected foreach (ScoreDoc scoreDoc in resultGroup.ScoreDocs) { Document childDoc = s.Doc(scoreDoc.Doc); assertEquals("java", childDoc.Get("skill")); int year = Convert.ToInt32(childDoc.Get("year")); assertTrue(year >= 2006 && year <= 2011); } } //Get part of child documents TopGroups<int> boundedResults = c.GetTopGroups(childJoinQuery, null, 0, 1, 0, true); assertFalse(float.IsNaN(boundedResults.MaxScore)); assertEquals(2, boundedResults.TotalGroupedHitCount); assertEquals(1, boundedResults.Groups.Length); IGroupDocs<int> group = boundedResults.Groups[0]; assertEquals(2, group.TotalHits); assertFalse(float.IsNaN(group.Score)); assertNotNull(group.GroupValue); Document parentDoc = s.Doc(group.GroupValue); assertEquals("Frank", parentDoc.Get("name")); assertEquals(1, group.ScoreDocs.Length); //not all matched child documents collected foreach (ScoreDoc scoreDoc in group.ScoreDocs) { Document childDoc = s.Doc(scoreDoc.Doc); assertEquals("java", childDoc.Get("skill")); int year = Convert.ToInt32(childDoc.Get("year")); assertTrue(year >= 2006 && year <= 2011); } r.Dispose(); dir.Dispose(); }
public void TestNestedSorting() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES)); IList <Document> docs = new List <Document>(); Document document = new Document(); document.Add(new StringField("field2", "a", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "b", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "c", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "a", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); w.Commit(); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "c", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "d", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "e", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "b", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "e", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "f", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "g", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "c", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "g", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "h", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "i", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "d", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); w.Commit(); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "i", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "j", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "k", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "f", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "k", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "l", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "m", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "g", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); // This doc will not be included, because it doesn't have nested docs document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "h", Field.Store.NO)); w.AddDocument(document); docs.Clear(); document = new Document(); document.Add(new StringField("field2", "m", Field.Store.NO)); document.Add(new StringField("filter_1", "T", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "n", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("field2", "o", Field.Store.NO)); document.Add(new StringField("filter_1", "F", Field.Store.NO)); docs.Add(document); document = new Document(); document.Add(new StringField("__type", "parent", Field.Store.NO)); document.Add(new StringField("field1", "i", Field.Store.NO)); docs.Add(document); w.AddDocuments(docs); w.Commit(); // Some garbage docs, just to check if the NestedFieldComparer can deal with this. document = new Document(); document.Add(new StringField("fieldXXX", "x", Field.Store.NO)); w.AddDocument(document); document = new Document(); document.Add(new StringField("fieldXXX", "x", Field.Store.NO)); w.AddDocument(document); document = new Document(); document.Add(new StringField("fieldXXX", "x", Field.Store.NO)); w.AddDocument(document); IndexSearcher searcher = new IndexSearcher(DirectoryReader.Open(w.IndexWriter, false)); w.Dispose(); Filter parentFilter = new QueryWrapperFilter(new TermQuery(new Term("__type", "parent"))); Filter childFilter = new QueryWrapperFilter(new PrefixQuery(new Term("field2"))); ToParentBlockJoinQuery query = new ToParentBlockJoinQuery(new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None); // Sort by field ascending, order first ToParentBlockJoinSortField sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, false, Wrap(parentFilter), Wrap(childFilter)); Sort sort = new Sort(sortField); TopFieldDocs topDocs = searcher.Search(query, 5, sort); assertEquals(7, topDocs.TotalHits); assertEquals(5, topDocs.ScoreDocs.Length); assertEquals(3, topDocs.ScoreDocs[0].Doc); assertEquals("a", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString()); assertEquals(7, topDocs.ScoreDocs[1].Doc); assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString()); assertEquals(11, topDocs.ScoreDocs[2].Doc); assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString()); assertEquals(15, topDocs.ScoreDocs[3].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString()); assertEquals(19, topDocs.ScoreDocs[4].Doc); assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString()); // Sort by field ascending, order last sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, false, true, Wrap(parentFilter), Wrap(childFilter)); sort = new Sort(sortField); topDocs = searcher.Search(query, 5, sort); assertEquals(7, topDocs.TotalHits); assertEquals(5, topDocs.ScoreDocs.Length); assertEquals(3, topDocs.ScoreDocs[0].Doc); assertEquals("c", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString()); assertEquals(7, topDocs.ScoreDocs[1].Doc); assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString()); assertEquals(11, topDocs.ScoreDocs[2].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString()); assertEquals(15, topDocs.ScoreDocs[3].Doc); assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString()); assertEquals(19, topDocs.ScoreDocs[4].Doc); assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString()); // Sort by field descending, order last sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, true, Wrap(parentFilter), Wrap(childFilter)); sort = new Sort(sortField); topDocs = searcher.Search(query, 5, sort); assertEquals(topDocs.TotalHits, 7); assertEquals(5, topDocs.ScoreDocs.Length); assertEquals(28, topDocs.ScoreDocs[0].Doc); assertEquals("o", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString()); assertEquals(23, topDocs.ScoreDocs[1].Doc); assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString()); assertEquals(19, topDocs.ScoreDocs[2].Doc); assertEquals("k", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString()); assertEquals(15, topDocs.ScoreDocs[3].Doc); assertEquals("i", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString()); assertEquals(11, topDocs.ScoreDocs[4].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString()); // Sort by field descending, order last, sort filter (filter_1:T) childFilter = new QueryWrapperFilter(new TermQuery((new Term("filter_1", "T")))); query = new ToParentBlockJoinQuery( new FilteredQuery(new MatchAllDocsQuery(), childFilter), new FixedBitSetCachingWrapperFilter(parentFilter), ScoreMode.None); sortField = new ToParentBlockJoinSortField("field2", SortFieldType.STRING, true, Wrap(parentFilter), Wrap(childFilter)); sort = new Sort(sortField); topDocs = searcher.Search(query, 5, sort); assertEquals(6, topDocs.TotalHits); assertEquals(5, topDocs.ScoreDocs.Length); assertEquals(23, topDocs.ScoreDocs[0].Doc); assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[0]).Fields[0]).Utf8ToString()); assertEquals(28, topDocs.ScoreDocs[1].Doc); assertEquals("m", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[1]).Fields[0]).Utf8ToString()); assertEquals(11, topDocs.ScoreDocs[2].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[2]).Fields[0]).Utf8ToString()); assertEquals(15, topDocs.ScoreDocs[3].Doc); assertEquals("g", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[3]).Fields[0]).Utf8ToString()); assertEquals(7, topDocs.ScoreDocs[4].Doc); assertEquals("e", ((BytesRef)((FieldDoc)topDocs.ScoreDocs[4]).Fields[0]).Utf8ToString()); searcher.IndexReader.Dispose(); dir.Dispose(); }
public void TestSimple() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); IList<Document> docs = new List<Document>(); docs.Add(MakeJob("java", 2007)); docs.Add(MakeJob("python", 2010)); docs.Add(MakeResume("Lisa", "United Kingdom")); w.AddDocuments(docs); docs.Clear(); docs.Add(MakeJob("ruby", 2005)); docs.Add(MakeJob("java", 2006)); docs.Add(MakeResume("Frank", "United States")); w.AddDocuments(docs); IndexReader r = w.Reader; w.Dispose(); IndexSearcher s = NewSearcher(r); // Create a filter that defines "parent" documents in the index - in this case resumes Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))); // Define child document criteria (finds an example of relevant work experience) BooleanQuery childQuery = new BooleanQuery(); childQuery.Add(new BooleanClause(new TermQuery(new Term("skill", "java")), BooleanClause.Occur.MUST)); childQuery.Add(new BooleanClause(NumericRangeQuery.NewIntRange("year", 2006, 2011, true, true), BooleanClause.Occur.MUST)); // Define parent document criteria (find a resident in the UK) Query parentQuery = new TermQuery(new Term("country", "United Kingdom")); // Wrap the child document query to 'join' any matches // up to corresponding parent: ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, ScoreMode.Avg); // Combine the parent and nested child queries into a single query for a candidate BooleanQuery fullQuery = new BooleanQuery(); fullQuery.Add(new BooleanClause(parentQuery, BooleanClause.Occur.MUST)); fullQuery.Add(new BooleanClause(childJoinQuery, BooleanClause.Occur.MUST)); ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(Sort.RELEVANCE, 1, true, true); s.Search(fullQuery, c); TopGroups<int> results = c.GetTopGroups(childJoinQuery, null, 0, 10, 0, true); assertFalse(float.IsNaN(results.MaxScore)); //assertEquals(1, results.totalHitCount); assertEquals(1, results.TotalGroupedHitCount); assertEquals(1, results.Groups.Length); IGroupDocs<int> group = results.Groups[0]; assertEquals(1, group.TotalHits); assertFalse(float.IsNaN(group.Score)); Document childDoc = s.Doc(group.ScoreDocs[0].Doc); //System.out.println(" doc=" + group.ScoreDocs[0].Doc); assertEquals("java", childDoc.Get("skill")); assertNotNull(group.GroupValue); Document parentDoc = s.Doc(group.GroupValue); assertEquals("Lisa", parentDoc.Get("name")); //System.out.println("TEST: now test up"); // Now join "up" (map parent hits to child docs) instead...: ToChildBlockJoinQuery parentJoinQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter, Random().NextBoolean()); BooleanQuery fullChildQuery = new BooleanQuery(); fullChildQuery.Add(new BooleanClause(parentJoinQuery, BooleanClause.Occur.MUST)); fullChildQuery.Add(new BooleanClause(childQuery, BooleanClause.Occur.MUST)); //System.out.println("FULL: " + fullChildQuery); TopDocs hits = s.Search(fullChildQuery, 10); assertEquals(1, hits.TotalHits); childDoc = s.Doc(hits.ScoreDocs[0].Doc); //System.out.println("CHILD = " + childDoc + " docID=" + hits.ScoreDocs[0].Doc); assertEquals("java", childDoc.Get("skill")); assertEquals(2007, childDoc.GetField("year").NumericValue); assertEquals("Lisa", GetParentDoc(r, parentsFilter, hits.ScoreDocs[0].Doc).Get("name")); // Test with filter on child docs: assertEquals(0, s.Search(fullChildQuery, new QueryWrapperFilter(new TermQuery(new Term("skill", "foosball"))), 1).TotalHits); r.Dispose(); dir.Dispose(); }
public void Test() { RandomIndexWriter writer; DirectoryReader indexReader; int numParents = AtLeast(200); IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); cfg.SetMergePolicy(NewLogMergePolicy()); using (writer = new RandomIndexWriter(Random(), NewDirectory(), cfg)) { Document parentDoc = new Document(); NumericDocValuesField parentVal = new NumericDocValuesField("parent_val", 0L); parentDoc.Add(parentVal); StringField parent = new StringField("parent", "true", Field.Store.YES); parentDoc.Add(parent); for (int i = 0; i < numParents; ++i) { List<Document> documents = new List<Document>(); int numChildren = Random().nextInt(10); for (int j = 0; j < numChildren; ++j) { Document childDoc = new Document(); childDoc.Add(new NumericDocValuesField("child_val", Random().nextInt(5))); documents.Add(childDoc); } parentVal.LongValue = (Random().nextInt(50)); documents.Add(parentDoc); writer.AddDocuments(documents); } writer.ForceMerge(1); indexReader = writer.Reader; } AtomicReader reader = GetOnlySegmentReader(indexReader); Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "true")))); FixedBitSet parentBits = (FixedBitSet)parentsFilter.GetDocIdSet(reader.AtomicContext, null); NumericDocValues parentValues = reader.GetNumericDocValues("parent_val"); NumericDocValues childValues = reader.GetNumericDocValues("child_val"); Sort parentSort = new Sort(new SortField("parent_val", SortField.Type_e.LONG)); Sort childSort = new Sort(new SortField("child_val", SortField.Type_e.LONG)); Sort sort = new Sort(new SortField("custom", new BlockJoinComparatorSource(parentsFilter, parentSort, childSort))); Sorter sorter = new Sorter(sort); Sorter.DocMap docMap = sorter.Sort(reader); assertEquals(reader.MaxDoc, docMap.Count); int[] children = new int[1]; int numChildren2 = 0; int previousParent = -1; for (int i = 0; i < docMap.Count; ++i) { int oldID = docMap.NewToOld(i); if (parentBits.Get(oldID)) { // check that we have the right children for (int j = 0; j < numChildren2; ++j) { assertEquals(oldID, parentBits.NextSetBit(children[j])); } // check that children are sorted for (int j = 1; j < numChildren2; ++j) { int doc1 = children[j - 1]; int doc2 = children[j]; if (childValues.Get(doc1) == childValues.Get(doc2)) { assertTrue(doc1 < doc2); // sort is stable } else { assertTrue(childValues.Get(doc1) < childValues.Get(doc2)); } } // check that parents are sorted if (previousParent != -1) { if (parentValues.Get(previousParent) == parentValues.Get(oldID)) { assertTrue(previousParent < oldID); } else { assertTrue(parentValues.Get(previousParent) < parentValues.Get(oldID)); } } // reset previousParent = oldID; numChildren2 = 0; } else { children = ArrayUtil.Grow(children, numChildren2 + 1); children[numChildren2++] = oldID; } } indexReader.Dispose(); writer.w.Directory.Dispose(); }
public virtual void TestBasic() { string groupField = "author"; FieldType customType = new FieldType(); customType.Stored = (true); Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); bool canUseIDV = !"Lucene3x".Equals(w.w.Config.Codec.Name, StringComparison.Ordinal); List<Document> documents = new List<Document>(); // 0 Document doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV); doc.Add(new TextField("content", "random text", Field.Store.YES)); doc.Add(new Field("id", "1", customType)); documents.Add(doc); // 1 doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV); doc.Add(new TextField("content", "some more random text", Field.Store.YES)); doc.Add(new Field("id", "2", customType)); documents.Add(doc); // 2 doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV); doc.Add(new TextField("content", "some more random textual data", Field.Store.YES)); doc.Add(new Field("id", "3", customType)); doc.Add(new StringField("groupend", "x", Field.Store.NO)); documents.Add(doc); w.AddDocuments(documents); documents.Clear(); // 3 doc = new Document(); AddGroupField(doc, groupField, "author2", canUseIDV); doc.Add(new TextField("content", "some random text", Field.Store.YES)); doc.Add(new Field("id", "4", customType)); doc.Add(new StringField("groupend", "x", Field.Store.NO)); w.AddDocument(doc); // 4 doc = new Document(); AddGroupField(doc, groupField, "author3", canUseIDV); doc.Add(new TextField("content", "some more random text", Field.Store.YES)); doc.Add(new Field("id", "5", customType)); documents.Add(doc); // 5 doc = new Document(); AddGroupField(doc, groupField, "author3", canUseIDV); doc.Add(new TextField("content", "random", Field.Store.YES)); doc.Add(new Field("id", "6", customType)); doc.Add(new StringField("groupend", "x", Field.Store.NO)); documents.Add(doc); w.AddDocuments(documents); documents.Clear(); // 6 -- no author field doc = new Document(); doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES)); doc.Add(new Field("id", "6", customType)); doc.Add(new StringField("groupend", "x", Field.Store.NO)); w.AddDocument(doc); IndexSearcher indexSearcher = NewSearcher(w.Reader); w.Dispose(); Sort groupSort = Sort.RELEVANCE; GroupingSearch groupingSearch = CreateRandomGroupingSearch(groupField, groupSort, 5, canUseIDV); ITopGroups<object> groups = groupingSearch.Search(indexSearcher, (Filter)null, new TermQuery(new Index.Term("content", "random")), 0, 10); assertEquals(7, groups.TotalHitCount); assertEquals(7, groups.TotalGroupedHitCount); assertEquals(4, groups.Groups.Length); // relevance order: 5, 0, 3, 4, 1, 2, 6 // the later a document is added the higher this docId // value IGroupDocs<object> group = groups.Groups[0]; CompareGroupValue("author3", group); assertEquals(2, group.ScoreDocs.Length); assertEquals(5, group.ScoreDocs[0].Doc); assertEquals(4, group.ScoreDocs[1].Doc); assertTrue(group.ScoreDocs[0].Score > group.ScoreDocs[1].Score); group = groups.Groups[1]; CompareGroupValue("author1", group); assertEquals(3, group.ScoreDocs.Length); assertEquals(0, group.ScoreDocs[0].Doc); assertEquals(1, group.ScoreDocs[1].Doc); assertEquals(2, group.ScoreDocs[2].Doc); assertTrue(group.ScoreDocs[0].Score > group.ScoreDocs[1].Score); assertTrue(group.ScoreDocs[1].Score > group.ScoreDocs[2].Score); group = groups.Groups[2]; CompareGroupValue("author2", group); assertEquals(1, group.ScoreDocs.Length); assertEquals(3, group.ScoreDocs[0].Doc); group = groups.Groups[3]; CompareGroupValue(null, group); assertEquals(1, group.ScoreDocs.Length); assertEquals(6, group.ScoreDocs[0].Doc); Filter lastDocInBlock = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Index.Term("groupend", "x")))); groupingSearch = new GroupingSearch(lastDocInBlock); groups = groupingSearch.Search(indexSearcher, null, new TermQuery(new Index.Term("content", "random")), 0, 10); assertEquals(7, groups.TotalHitCount); assertEquals(7, groups.TotalGroupedHitCount); assertEquals(4, groups.TotalGroupCount.GetValueOrDefault()); assertEquals(4, groups.Groups.Length); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }