private TopDocs CreateExpectedTopDocs(string queryValue, bool from, ScoreMode scoreMode, IndexIterationContext context) { var hitsToJoinScores = @from ? context.FromHitsToJoinScore[queryValue] : context.ToHitsToJoinScore[queryValue]; var hits = new JCG.List <KeyValuePair <int, JoinScore> >(hitsToJoinScores); hits.Sort(Comparer <KeyValuePair <int, JoinScore> > .Create((hit1, hit2) => { float score1 = hit1.Value.Score(scoreMode); float score2 = hit2.Value.Score(scoreMode); int cmp = score2.CompareTo(score1); if (cmp != 0) { return(cmp); } return(hit1.Key - hit2.Key); })); ScoreDoc[] scoreDocs = new ScoreDoc[Math.Min(10, hits.Count)]; for (int i = 0; i < scoreDocs.Length; i++) { KeyValuePair <int, JoinScore> hit = hits[i]; scoreDocs[i] = new ScoreDoc(hit.Key, hit.Value.Score(scoreMode)); } return(new TopDocs(hits.Count, scoreDocs, hits.Count == 0 ? float.NaN : hits[0].Value.Score(scoreMode))); }
public CollectorAnonymousInnerClassHelper5(TestJoinUtil testJoinUtil, IndexIterationContext context, string toField, IDictionary <BytesRef, JoinScore> joinValueToJoinScores, IDictionary <int, JoinScore> docToJoinScore) { OuterInstance = testJoinUtil; _toField = toField; _joinValueToJoinScores = joinValueToJoinScores; _docToJoinScore = docToJoinScore; }
public CollectorAnonymousInnerClassHelper4(TestJoinUtil outerInstance, IndexIterationContext context, string fromField, IDictionary <BytesRef, JoinScore> joinValueToJoinScores) { OuterInstance = outerInstance; Context = context; FromField = fromField; JoinValueToJoinScores = joinValueToJoinScores; spare = new BytesRef(); }
public CollectorAnonymousInnerClassHelper2(TestJoinUtil outerInstance, bool scoreDocsInOrder, IndexIterationContext context, FixedBitSet actualResult, TopScoreDocCollector topScoreDocCollector) { OuterInstance = outerInstance; ScoreDocsInOrder = scoreDocsInOrder; Context = context; ActualResult = actualResult; TopScoreDocCollector = topScoreDocCollector; }
private FixedBitSet CreateExpectedResult(string queryValue, bool from, IndexReader topLevelReader, IndexIterationContext context) { IDictionary <string, IList <RandomDoc> > randomValueDocs; IDictionary <string, IList <RandomDoc> > linkValueDocuments; if (from) { randomValueDocs = context.RandomValueFromDocs; linkValueDocuments = context.ToDocuments; } else { randomValueDocs = context.RandomValueToDocs; linkValueDocuments = context.FromDocuments; } FixedBitSet expectedResult = new FixedBitSet(topLevelReader.MaxDoc); if (!randomValueDocs.TryGetValue(queryValue, out IList <RandomDoc> matchingDocs) || matchingDocs == null) { return(new FixedBitSet(topLevelReader.MaxDoc)); } foreach (RandomDoc matchingDoc in matchingDocs) { foreach (string linkValue in matchingDoc.linkValues) { if (!linkValueDocuments.TryGetValue(linkValue, out IList <RandomDoc> otherMatchingDocs) || otherMatchingDocs == null) { continue; } foreach (RandomDoc otherSideDoc in otherMatchingDocs) { DocsEnum docsEnum = MultiFields.GetTermDocsEnum(topLevelReader, MultiFields.GetLiveDocs(topLevelReader), "id", new BytesRef(otherSideDoc.id), 0); if (Debugging.AssertsEnabled) { Debugging.Assert(docsEnum != null); } int doc = docsEnum.NextDoc(); expectedResult.Set(doc); } } } return(expectedResult); }
private TopDocs CreateExpectedTopDocs(string queryValue, bool from, ScoreMode scoreMode, IndexIterationContext context) { var hitsToJoinScores = @from ? context.FromHitsToJoinScore[queryValue] : context.ToHitsToJoinScore[queryValue]; var hits = new List <KeyValuePair <int, JoinScore> >(hitsToJoinScores.EntrySet()); hits.Sort(new ComparerAnonymousInnerClassHelper(this, scoreMode)); ScoreDoc[] scoreDocs = new ScoreDoc[Math.Min(10, hits.Count)]; for (int i = 0; i < scoreDocs.Length; i++) { KeyValuePair <int, JoinScore> hit = hits[i]; scoreDocs[i] = new ScoreDoc(hit.Key, hit.Value.Score(scoreMode)); } return(new TopDocs(hits.Count, scoreDocs, hits.Count == 0 ? float.NaN : hits[0].Value.Score(scoreMode))); }
private IndexIterationContext CreateContext(int nDocs, RandomIndexWriter fromWriter, RandomIndexWriter toWriter, bool multipleValuesPerDocument, bool scoreDocsInOrder) { IndexIterationContext context = new IndexIterationContext(); int numRandomValues = nDocs / 2; context.RandomUniqueValues = new string[numRandomValues]; ISet <string> trackSet = new JCG.HashSet <string>(); context.RandomFrom = new bool[numRandomValues]; for (int i = 0; i < numRandomValues; i++) { string uniqueRandomValue; do { uniqueRandomValue = TestUtil.RandomRealisticUnicodeString(Random); // uniqueRandomValue = TestUtil.randomSimpleString(random); } while ("".Equals(uniqueRandomValue, StringComparison.Ordinal) || trackSet.Contains(uniqueRandomValue)); // Generate unique values and empty strings aren't allowed. trackSet.Add(uniqueRandomValue); context.RandomFrom[i] = Random.NextBoolean(); context.RandomUniqueValues[i] = uniqueRandomValue; } RandomDoc[] docs = new RandomDoc[nDocs]; for (int i = 0; i < nDocs; i++) { string id = Convert.ToString(i, CultureInfo.InvariantCulture); int randomI = Random.Next(context.RandomUniqueValues.Length); string value = context.RandomUniqueValues[randomI]; Document document = new Document(); document.Add(NewTextField(Random, "id", id, Field.Store.NO)); document.Add(NewTextField(Random, "value", value, Field.Store.NO)); bool from = context.RandomFrom[randomI]; int numberOfLinkValues = multipleValuesPerDocument ? 2 + Random.Next(10) : 1; docs[i] = new RandomDoc(id, numberOfLinkValues, value, from); for (int j = 0; j < numberOfLinkValues; j++) { string linkValue = context.RandomUniqueValues[Random.Next(context.RandomUniqueValues.Length)]; docs[i].linkValues.Add(linkValue); if (from) { if (!context.FromDocuments.TryGetValue(linkValue, out IList <RandomDoc> fromDocs)) { context.FromDocuments[linkValue] = fromDocs = new JCG.List <RandomDoc>(); } if (!context.RandomValueFromDocs.TryGetValue(value, out IList <RandomDoc> randomValueFromDocs)) { context.RandomValueFromDocs[value] = randomValueFromDocs = new JCG.List <RandomDoc>(); } fromDocs.Add(docs[i]); randomValueFromDocs.Add(docs[i]); document.Add(NewTextField(Random, "from", linkValue, Field.Store.NO)); } else { if (!context.ToDocuments.TryGetValue(linkValue, out IList <RandomDoc> toDocuments)) { context.ToDocuments[linkValue] = toDocuments = new JCG.List <RandomDoc>(); } if (!context.RandomValueToDocs.TryGetValue(value, out IList <RandomDoc> randomValueToDocs)) { context.RandomValueToDocs[value] = randomValueToDocs = new JCG.List <RandomDoc>(); } toDocuments.Add(docs[i]); randomValueToDocs.Add(docs[i]); document.Add(NewTextField(Random, "to", linkValue, Field.Store.NO)); } } RandomIndexWriter w; if (from) { w = fromWriter; } else { w = toWriter; } w.AddDocument(document); if (Random.Next(10) == 4) { w.Commit(); } if (Verbose) { Console.WriteLine("Added document[" + docs[i].id + "]: " + document); } } // Pre-compute all possible hits for all unique random values. On top of this also compute all possible score for // any ScoreMode. IndexSearcher fromSearcher = NewSearcher(fromWriter.GetReader()); IndexSearcher toSearcher = NewSearcher(toWriter.GetReader()); for (int i = 0; i < context.RandomUniqueValues.Length; i++) { string uniqueRandomValue = context.RandomUniqueValues[i]; string fromField; string toField; IDictionary <string, IDictionary <int, JoinScore> > queryVals; if (context.RandomFrom[i]) { fromField = "from"; toField = "to"; queryVals = context.FromHitsToJoinScore; } else { fromField = "to"; toField = "from"; queryVals = context.ToHitsToJoinScore; } IDictionary <BytesRef, JoinScore> joinValueToJoinScores = new Dictionary <BytesRef, JoinScore>(); if (multipleValuesPerDocument) { fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)), new CollectorAnonymousClass3(fromField, joinValueToJoinScores)); } else { fromSearcher.Search(new TermQuery(new Term("value", uniqueRandomValue)), new CollectorAnonymousClass4(fromField, joinValueToJoinScores)); } IDictionary <int, JoinScore> docToJoinScore = new Dictionary <int, JoinScore>(); if (multipleValuesPerDocument) { if (scoreDocsInOrder) { AtomicReader slowCompositeReader = SlowCompositeReaderWrapper.Wrap(toSearcher.IndexReader); Terms terms = slowCompositeReader.GetTerms(toField); if (terms != null) { DocsEnum docsEnum = null; TermsEnum termsEnum = null; JCG.SortedSet <BytesRef> joinValues = new JCG.SortedSet <BytesRef>(BytesRef.UTF8SortedAsUnicodeComparer); joinValues.UnionWith(joinValueToJoinScores.Keys); foreach (BytesRef joinValue in joinValues) { termsEnum = terms.GetEnumerator(termsEnum); if (termsEnum.SeekExact(joinValue)) { docsEnum = termsEnum.Docs(slowCompositeReader.LiveDocs, docsEnum, DocsFlags.NONE); JoinScore joinScore = joinValueToJoinScores[joinValue]; for (int doc = docsEnum.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.NextDoc()) { // First encountered join value determines the score. // Something to keep in mind for many-to-many relations. if (!docToJoinScore.ContainsKey(doc)) { docToJoinScore[doc] = joinScore; } } } } } } else { toSearcher.Search(new MatchAllDocsQuery(), new CollectorAnonymousClass5(toField, joinValueToJoinScores, docToJoinScore)); } } else { toSearcher.Search(new MatchAllDocsQuery(), new CollectorAnonymousClass6(toField, joinValueToJoinScores, docToJoinScore)); } queryVals[uniqueRandomValue] = docToJoinScore; } fromSearcher.IndexReader.Dispose(); toSearcher.IndexReader.Dispose(); return(context); }
private void ExecuteRandomJoin(bool multipleValuesPerDocument, int maxIndexIter, int maxSearchIter, int numberOfDocumentsToIndex) { for (int indexIter = 1; indexIter <= maxIndexIter; indexIter++) { if (Verbose) { Console.WriteLine("indexIter=" + indexIter); } Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)) .SetMergePolicy(NewLogMergePolicy())); bool scoreDocsInOrder = TestJoinUtil.Random.NextBoolean(); IndexIterationContext context = CreateContext(numberOfDocumentsToIndex, w, multipleValuesPerDocument, scoreDocsInOrder); IndexReader topLevelReader = w.GetReader(); w.Dispose(); for (int searchIter = 1; searchIter <= maxSearchIter; searchIter++) { if (Verbose) { Console.WriteLine("searchIter=" + searchIter); } IndexSearcher indexSearcher = NewSearcher(topLevelReader); int r = Random.Next(context.RandomUniqueValues.Length); bool from = context.RandomFrom[r]; string randomValue = context.RandomUniqueValues[r]; FixedBitSet expectedResult = CreateExpectedResult(randomValue, from, indexSearcher.IndexReader, context); Query actualQuery = new TermQuery(new Term("value", randomValue)); if (Verbose) { Console.WriteLine("actualQuery=" + actualQuery); } var scoreModeLength = Enum.GetNames(typeof(ScoreMode)).Length; ScoreMode scoreMode = (ScoreMode)Random.Next(scoreModeLength); if (Verbose) { Console.WriteLine("scoreMode=" + scoreMode); } Query joinQuery; if (from) { joinQuery = JoinUtil.CreateJoinQuery("from", multipleValuesPerDocument, "to", actualQuery, indexSearcher, scoreMode); } else { joinQuery = JoinUtil.CreateJoinQuery("to", multipleValuesPerDocument, "from", actualQuery, indexSearcher, scoreMode); } if (Verbose) { Console.WriteLine("joinQuery=" + joinQuery); } // Need to know all documents that have matches. TopDocs doesn't give me that and then I'd be also testing TopDocsCollector... FixedBitSet actualResult = new FixedBitSet(indexSearcher.IndexReader.MaxDoc); TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.Create(10, false); indexSearcher.Search(joinQuery, new CollectorAnonymousClass2(scoreDocsInOrder, actualResult, topScoreDocCollector)); // Asserting bit set... if (Verbose) { Console.WriteLine("expected cardinality:" + expectedResult.Cardinality); DocIdSetIterator iterator = expectedResult.GetIterator(); for (int doc = iterator.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.NextDoc()) { Console.WriteLine(string.Format("Expected doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id"))); } Console.WriteLine("actual cardinality:" + actualResult.Cardinality); iterator = actualResult.GetIterator(); for (int doc = iterator.NextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.NextDoc()) { Console.WriteLine(string.Format("Actual doc[{0}] with id value {1}", doc, indexSearcher.Doc(doc).Get("id"))); } } assertEquals(expectedResult, actualResult); // Asserting TopDocs... TopDocs expectedTopDocs = CreateExpectedTopDocs(randomValue, from, scoreMode, context); TopDocs actualTopDocs = topScoreDocCollector.GetTopDocs(); assertEquals(expectedTopDocs.TotalHits, actualTopDocs.TotalHits); assertEquals(expectedTopDocs.ScoreDocs.Length, actualTopDocs.ScoreDocs.Length); if (scoreMode == ScoreMode.None) { continue; } assertEquals(expectedTopDocs.MaxScore, actualTopDocs.MaxScore, 0.0f); for (int i = 0; i < expectedTopDocs.ScoreDocs.Length; i++) { if (Verbose) { Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "Expected doc: {0} | Actual doc: {1}\n", expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc)); Console.WriteLine(string.Format(CultureInfo.InvariantCulture, "Expected score: {0} | Actual score: {1}\n", expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score)); } assertEquals(expectedTopDocs.ScoreDocs[i].Doc, actualTopDocs.ScoreDocs[i].Doc); assertEquals(expectedTopDocs.ScoreDocs[i].Score, actualTopDocs.ScoreDocs[i].Score, 0.0f); Explanation explanation = indexSearcher.Explain(joinQuery, expectedTopDocs.ScoreDocs[i].Doc); assertEquals(expectedTopDocs.ScoreDocs[i].Score, explanation.Value, 0.0f); } } topLevelReader.Dispose(); dir.Dispose(); } }