public void TestRandomIndex() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random()); analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter(Random(), dir, analyzer, Similarity, TimeZone); CreateRandomIndex(AtLeast(50), w, Random().NextLong()); DirectoryReader reader = w.Reader; AtomicReader wrapper = SlowCompositeReaderWrapper.Wrap(reader); string field = @"body"; Terms terms = wrapper.GetTerms(field); var lowFreqQueue = new AnonymousPriorityQueue(this, 5); Util.PriorityQueue <TermAndFreq> highFreqQueue = new AnonymousPriorityQueue1(this, 5); try { TermsEnum iterator = terms.GetIterator(null); while (iterator.Next() != null) { if (highFreqQueue.Count < 5) { highFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq)); lowFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq)); } else { if (highFreqQueue.Top.freq < iterator.DocFreq) { highFreqQueue.Top.freq = iterator.DocFreq; highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term); highFreqQueue.UpdateTop(); } if (lowFreqQueue.Top.freq > iterator.DocFreq) { lowFreqQueue.Top.freq = iterator.DocFreq; lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term); lowFreqQueue.UpdateTop(); } } } int lowFreq = lowFreqQueue.Top.freq; int highFreq = highFreqQueue.Top.freq; AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq); List <TermAndFreq> highTerms = QueueToList(highFreqQueue); List <TermAndFreq> lowTerms = QueueToList(lowFreqQueue); IndexSearcher searcher = NewSearcher(reader); Occur lowFreqOccur = RandomOccur(Random()); BooleanQuery verifyQuery = new BooleanQuery(); CommonTermsQuery cq = new CommonTermsQuery(RandomOccur(Random()), lowFreqOccur, highFreq - 1, Random().NextBoolean()); foreach (TermAndFreq termAndFreq in lowTerms) { cq.Add(new Term(field, termAndFreq.term)); verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur)); } foreach (TermAndFreq termAndFreq in highTerms) { cq.Add(new Term(field, termAndFreq.term)); } TopDocs cqSearch = searcher.Search(cq, reader.MaxDoc); TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc); assertEquals(verifySearch.TotalHits, cqSearch.TotalHits); var hits = new HashSet <int>(); foreach (ScoreDoc doc in verifySearch.ScoreDocs) { hits.Add(doc.Doc); } foreach (ScoreDoc doc in cqSearch.ScoreDocs) { assertTrue(hits.Remove(doc.Doc)); } assertTrue(hits.Count == 0); w.ForceMerge(1); DirectoryReader reader2 = w.Reader; QueryUtils.Check(Random(), cq, NewSearcher(reader2), Similarity); reader2.Dispose(); } finally { reader.Dispose(); wrapper.Dispose(); w.Dispose(); dir.Dispose(); } }
public void TestMoreLikeThisQuery() { Query query = new MoreLikeThisQuery("this is a test", new[] { "text" }, new MockAnalyzer(Random), "text"); QueryUtils.Check(Random, query, searcher, Similarity); }
public void TestRandomIndex() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random); analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir, analyzer); CreateRandomIndex(AtLeast(50), w, Random.NextInt64()); DirectoryReader reader = w.GetReader(); AtomicReader wrapper = SlowCompositeReaderWrapper.Wrap(reader); string field = @"body"; Terms terms = wrapper.GetTerms(field); var lowFreqQueue = new PriorityQueueAnonymousClass(5); var highFreqQueue = new PriorityQueueAnonymousClass1(5); try { TermsEnum iterator = terms.GetEnumerator(); while (iterator.MoveNext()) { if (highFreqQueue.Count < 5) { highFreqQueue.Add(new TermAndFreq( BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq)); lowFreqQueue.Add(new TermAndFreq( BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq)); } else { if (highFreqQueue.Top.freq < iterator.DocFreq) { highFreqQueue.Top.freq = iterator.DocFreq; highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term); highFreqQueue.UpdateTop(); } if (lowFreqQueue.Top.freq > iterator.DocFreq) { lowFreqQueue.Top.freq = iterator.DocFreq; lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term); lowFreqQueue.UpdateTop(); } } } int lowFreq = lowFreqQueue.Top.freq; int highFreq = highFreqQueue.Top.freq; AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq); List <TermAndFreq> highTerms = QueueToList(highFreqQueue); List <TermAndFreq> lowTerms = QueueToList(lowFreqQueue); IndexSearcher searcher = NewSearcher(reader); Occur lowFreqOccur = RandomOccur(Random); BooleanQuery verifyQuery = new BooleanQuery(); CommonTermsQuery cq = new CommonTermsQuery(RandomOccur(Random), lowFreqOccur, highFreq - 1, Random.NextBoolean()); foreach (TermAndFreq termAndFreq in lowTerms) { cq.Add(new Term(field, termAndFreq.term)); verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur)); } foreach (TermAndFreq termAndFreq in highTerms) { cq.Add(new Term(field, termAndFreq.term)); } TopDocs cqSearch = searcher.Search(cq, reader.MaxDoc); TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc); assertEquals(verifySearch.TotalHits, cqSearch.TotalHits); var hits = new JCG.HashSet <int>(); foreach (ScoreDoc doc in verifySearch.ScoreDocs) { hits.Add(doc.Doc); } foreach (ScoreDoc doc in cqSearch.ScoreDocs) { assertTrue(hits.Remove(doc.Doc)); } assertTrue(hits.Count == 0); /* * need to force merge here since QueryUtils adds checks based * on leave readers which have different statistics than the top * level reader if we have more than one segment. This could * result in a different query / results. */ w.ForceMerge(1); DirectoryReader reader2 = w.GetReader(); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, cq, NewSearcher(reader2)); reader2.Dispose(); } finally { reader.Dispose(); wrapper.Dispose(); w.Dispose(); dir.Dispose(); } }
// verify results are as expected. private void VerifyResults(float boost, IndexSearcher s, IDictionary <int, float> h1, IDictionary <int, float> h2customNeutral, IDictionary <int, float> h3CustomMul, IDictionary <int, float> h4CustomAdd, IDictionary <int, float> h5CustomMulAdd, Query q1, Query q2, Query q3, Query q4, Query q5) { // verify numbers of matches Log("#hits = " + h1.Count); assertEquals("queries should have same #hits", h1.Count, h2customNeutral.Count); assertEquals("queries should have same #hits", h1.Count, h3CustomMul.Count); assertEquals("queries should have same #hits", h1.Count, h4CustomAdd.Count); assertEquals("queries should have same #hits", h1.Count, h5CustomMulAdd.Count); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, q1, s, Rarely()); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, q2, s, Rarely()); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, q3, s, Rarely()); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, q4, s, Rarely()); QueryUtils.Check( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, q5, s, Rarely()); // verify scores ratios foreach (int doc in h1.Keys) { Log("doc = " + doc); float fieldScore = ExpectedFieldScore(s.IndexReader.Document(doc).Get(ID_FIELD)); Log("fieldScore = " + fieldScore); assertTrue("fieldScore should not be 0", fieldScore > 0); float score1 = h1[doc]; LogResult("score1=", s, q1, doc, score1); float score2 = h2customNeutral[doc]; LogResult("score2=", s, q2, doc, score2); assertEquals("same score (just boosted) for neutral", boost * score1, score2, CheckHits.ExplainToleranceDelta(boost * score1, score2)); float score3 = h3CustomMul[doc]; LogResult("score3=", s, q3, doc, score3); assertEquals("new score for custom mul", boost * fieldScore * score1, score3, CheckHits.ExplainToleranceDelta(boost * fieldScore * score1, score3)); float score4 = h4CustomAdd[doc]; LogResult("score4=", s, q4, doc, score4); assertEquals("new score for custom add", boost * (fieldScore + score1), score4, CheckHits.ExplainToleranceDelta(boost * (fieldScore + score1), score4)); float score5 = h5CustomMulAdd[doc]; LogResult("score5=", s, q5, doc, score5); assertEquals("new score for custom mul add", boost * fieldScore * (score1 + fieldScore), score5, CheckHits.ExplainToleranceDelta(boost * fieldScore * (score1 + fieldScore), score5)); } }