Ejemplo n.º 1
0
        public void TestRandomIndex()
        {
            Directory    dir      = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random());

            analyzer.MaxTokenLength = TestUtil.NextInt(Random(), 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, analyzer, Similarity, TimeZone);

            CreateRandomIndex(AtLeast(50), w, Random().NextLong());
            DirectoryReader reader       = w.Reader;
            AtomicReader    wrapper      = SlowCompositeReaderWrapper.Wrap(reader);
            string          field        = @"body";
            Terms           terms        = wrapper.GetTerms(field);
            var             lowFreqQueue = new AnonymousPriorityQueue(this, 5);

            Util.PriorityQueue <TermAndFreq> highFreqQueue = new AnonymousPriorityQueue1(this, 5);
            try
            {
                TermsEnum iterator = terms.GetIterator(null);
                while (iterator.Next() != null)
                {
                    if (highFreqQueue.Count < 5)
                    {
                        highFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                        lowFreqQueue.Add(new TermAndFreq(BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                    }
                    else
                    {
                        if (highFreqQueue.Top.freq < iterator.DocFreq)
                        {
                            highFreqQueue.Top.freq = iterator.DocFreq;
                            highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            highFreqQueue.UpdateTop();
                        }

                        if (lowFreqQueue.Top.freq > iterator.DocFreq)
                        {
                            lowFreqQueue.Top.freq = iterator.DocFreq;
                            lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            lowFreqQueue.UpdateTop();
                        }
                    }
                }

                int lowFreq  = lowFreqQueue.Top.freq;
                int highFreq = highFreqQueue.Top.freq;
                AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq);
                List <TermAndFreq> highTerms  = QueueToList(highFreqQueue);
                List <TermAndFreq> lowTerms   = QueueToList(lowFreqQueue);
                IndexSearcher      searcher   = NewSearcher(reader);
                Occur            lowFreqOccur = RandomOccur(Random());
                BooleanQuery     verifyQuery  = new BooleanQuery();
                CommonTermsQuery cq           = new CommonTermsQuery(RandomOccur(Random()), lowFreqOccur, highFreq - 1, Random().NextBoolean());
                foreach (TermAndFreq termAndFreq in lowTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                    verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field, termAndFreq.term)), lowFreqOccur));
                }

                foreach (TermAndFreq termAndFreq in highTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                }

                TopDocs cqSearch     = searcher.Search(cq, reader.MaxDoc);
                TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc);
                assertEquals(verifySearch.TotalHits, cqSearch.TotalHits);
                var hits = new HashSet <int>();
                foreach (ScoreDoc doc in verifySearch.ScoreDocs)
                {
                    hits.Add(doc.Doc);
                }

                foreach (ScoreDoc doc in cqSearch.ScoreDocs)
                {
                    assertTrue(hits.Remove(doc.Doc));
                }

                assertTrue(hits.Count == 0);
                w.ForceMerge(1);
                DirectoryReader reader2 = w.Reader;
                QueryUtils.Check(Random(), cq, NewSearcher(reader2), Similarity);
                reader2.Dispose();
            }
            finally
            {
                reader.Dispose();
                wrapper.Dispose();
                w.Dispose();
                dir.Dispose();
            }
        }
Ejemplo n.º 2
0
        public void TestMoreLikeThisQuery()
        {
            Query query = new MoreLikeThisQuery("this is a test", new[] { "text" }, new MockAnalyzer(Random), "text");

            QueryUtils.Check(Random, query, searcher, Similarity);
        }
Ejemplo n.º 3
0
        public void TestRandomIndex()
        {
            Directory    dir      = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir, analyzer);

            CreateRandomIndex(AtLeast(50), w, Random.NextInt64());
            DirectoryReader reader        = w.GetReader();
            AtomicReader    wrapper       = SlowCompositeReaderWrapper.Wrap(reader);
            string          field         = @"body";
            Terms           terms         = wrapper.GetTerms(field);
            var             lowFreqQueue  = new PriorityQueueAnonymousClass(5);
            var             highFreqQueue = new PriorityQueueAnonymousClass1(5);

            try
            {
                TermsEnum iterator = terms.GetEnumerator();
                while (iterator.MoveNext())
                {
                    if (highFreqQueue.Count < 5)
                    {
                        highFreqQueue.Add(new TermAndFreq(
                                              BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                        lowFreqQueue.Add(new TermAndFreq(
                                             BytesRef.DeepCopyOf(iterator.Term), iterator.DocFreq));
                    }
                    else
                    {
                        if (highFreqQueue.Top.freq < iterator.DocFreq)
                        {
                            highFreqQueue.Top.freq = iterator.DocFreq;
                            highFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            highFreqQueue.UpdateTop();
                        }

                        if (lowFreqQueue.Top.freq > iterator.DocFreq)
                        {
                            lowFreqQueue.Top.freq = iterator.DocFreq;
                            lowFreqQueue.Top.term = BytesRef.DeepCopyOf(iterator.Term);
                            lowFreqQueue.UpdateTop();
                        }
                    }
                }

                int lowFreq  = lowFreqQueue.Top.freq;
                int highFreq = highFreqQueue.Top.freq;
                AssumeTrue(@"unlucky index", highFreq - 1 > lowFreq);
                List <TermAndFreq> highTerms = QueueToList(highFreqQueue);
                List <TermAndFreq> lowTerms  = QueueToList(lowFreqQueue);

                IndexSearcher    searcher     = NewSearcher(reader);
                Occur            lowFreqOccur = RandomOccur(Random);
                BooleanQuery     verifyQuery  = new BooleanQuery();
                CommonTermsQuery cq           = new CommonTermsQuery(RandomOccur(Random),
                                                                     lowFreqOccur, highFreq - 1, Random.NextBoolean());
                foreach (TermAndFreq termAndFreq in lowTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                    verifyQuery.Add(new BooleanClause(new TermQuery(new Term(field,
                                                                             termAndFreq.term)), lowFreqOccur));
                }
                foreach (TermAndFreq termAndFreq in highTerms)
                {
                    cq.Add(new Term(field, termAndFreq.term));
                }

                TopDocs cqSearch = searcher.Search(cq, reader.MaxDoc);

                TopDocs verifySearch = searcher.Search(verifyQuery, reader.MaxDoc);
                assertEquals(verifySearch.TotalHits, cqSearch.TotalHits);
                var hits = new JCG.HashSet <int>();
                foreach (ScoreDoc doc in verifySearch.ScoreDocs)
                {
                    hits.Add(doc.Doc);
                }

                foreach (ScoreDoc doc in cqSearch.ScoreDocs)
                {
                    assertTrue(hits.Remove(doc.Doc));
                }

                assertTrue(hits.Count == 0);

                /*
                 *  need to force merge here since QueryUtils adds checks based
                 *  on leave readers which have different statistics than the top
                 *  level reader if we have more than one segment. This could
                 *  result in a different query / results.
                 */
                w.ForceMerge(1);
                DirectoryReader reader2 = w.GetReader();
                QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                    this,
#endif
                    Random, cq, NewSearcher(reader2));
                reader2.Dispose();
            }
            finally
            {
                reader.Dispose();
                wrapper.Dispose();
                w.Dispose();
                dir.Dispose();
            }
        }
Ejemplo n.º 4
0
        // verify results are as expected.
        private void VerifyResults(float boost, IndexSearcher s, IDictionary <int, float> h1, IDictionary <int, float> h2customNeutral, IDictionary <int, float> h3CustomMul, IDictionary <int, float> h4CustomAdd, IDictionary <int, float> h5CustomMulAdd, Query q1, Query q2, Query q3, Query q4, Query q5)
        {
            // verify numbers of matches
            Log("#hits = " + h1.Count);
            assertEquals("queries should have same #hits", h1.Count, h2customNeutral.Count);
            assertEquals("queries should have same #hits", h1.Count, h3CustomMul.Count);
            assertEquals("queries should have same #hits", h1.Count, h4CustomAdd.Count);
            assertEquals("queries should have same #hits", h1.Count, h5CustomMulAdd.Count);

            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, q1, s, Rarely());
            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, q2, s, Rarely());
            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, q3, s, Rarely());
            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, q4, s, Rarely());
            QueryUtils.Check(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, q5, s, Rarely());

            // verify scores ratios
            foreach (int doc in h1.Keys)
            {
                Log("doc = " + doc);

                float fieldScore = ExpectedFieldScore(s.IndexReader.Document(doc).Get(ID_FIELD));
                Log("fieldScore = " + fieldScore);
                assertTrue("fieldScore should not be 0", fieldScore > 0);

                float score1 = h1[doc];
                LogResult("score1=", s, q1, doc, score1);

                float score2 = h2customNeutral[doc];
                LogResult("score2=", s, q2, doc, score2);
                assertEquals("same score (just boosted) for neutral", boost * score1, score2, CheckHits.ExplainToleranceDelta(boost * score1, score2));

                float score3 = h3CustomMul[doc];
                LogResult("score3=", s, q3, doc, score3);
                assertEquals("new score for custom mul", boost * fieldScore * score1, score3, CheckHits.ExplainToleranceDelta(boost * fieldScore * score1, score3));

                float score4 = h4CustomAdd[doc];
                LogResult("score4=", s, q4, doc, score4);
                assertEquals("new score for custom add", boost * (fieldScore + score1), score4, CheckHits.ExplainToleranceDelta(boost * (fieldScore + score1), score4));

                float score5 = h5CustomMulAdd[doc];
                LogResult("score5=", s, q5, doc, score5);
                assertEquals("new score for custom mul add", boost * fieldScore * (score1 + fieldScore), score5, CheckHits.ExplainToleranceDelta(boost * fieldScore * (score1 + fieldScore), score5));
            }
        }