public override void SetUp()
 {
     base.SetUp();
     Directory = NewDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
     for (int i = 0; i < DocFields.Length; i++)
     {
         Document doc = new Document();
         doc.Add(NewTextField(FIELD, DocFields[i], Field.Store.NO));
         writer.AddDocument(doc);
     }
     Reader = writer.Reader;
     writer.Dispose();
     Searcher = NewSearcher(Reader);
 }
Beispiel #2
0
        public override void SetUp()
        {
            base.SetUp();
            Directory = NewDirectory();

            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()).SetSimilarity(new DefaultSimilarity()));
            for (int i = 0; i < Values.Length; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField(FIELD, Values[i], Field.Store.YES));
                writer.AddDocument(doc);
            }
            IndexReader = SlowCompositeReaderWrapper.Wrap(writer.Reader);
            writer.Dispose();
            IndexSearcher = NewSearcher(IndexReader);
            IndexSearcher.Similarity = new DefaultSimilarity();
        }
 public override void SetUp()
 {
     base.SetUp();
     Dir = NewDirectory();
     Iw = new RandomIndexWriter(Random(), Dir);
     Document doc = new Document();
     Field idField = new StringField("id", "", Field.Store.NO);
     doc.Add(idField);
     // add 500 docs with id 0..499
     for (int i = 0; i < 500; i++)
     {
         idField.StringValue = Convert.ToString(i);
         Iw.AddDocument(doc);
     }
     // delete 20 of them
     for (int i = 0; i < 20; i++)
     {
         Iw.DeleteDocuments(new Term("id", Convert.ToString(Random().Next(Iw.MaxDoc()))));
     }
     Ir = Iw.Reader;
     @is = NewSearcher(Ir);
 }
        public virtual void TestChangeGaps()
        {
            // LUCENE-5324: check that it is possible to change the wrapper's gaps
            int positionGap = Random().Next(1000);
            int offsetGap = Random().Next(1000);
            Analyzer @delegate = new MockAnalyzer(Random());
            Analyzer a = new AnalyzerWrapperAnonymousInnerClassHelper2(this, @delegate.Strategy, positionGap, offsetGap, @delegate);

            RandomIndexWriter writer = new RandomIndexWriter(Random(), NewDirectory());
            Document doc = new Document();
            FieldType ft = new FieldType();
            ft.Indexed = true;
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
            ft.Tokenized = true;
            ft.StoreTermVectors = true;
            ft.StoreTermVectorPositions = true;
            ft.StoreTermVectorOffsets = true;
            doc.Add(new Field("f", "a", ft));
            doc.Add(new Field("f", "a", ft));
            writer.AddDocument(doc, a);
            AtomicReader reader = GetOnlySegmentReader(writer.Reader);
            Fields fields = reader.GetTermVectors(0);
            Terms terms = fields.Terms("f");
            TermsEnum te = terms.Iterator(null);
            Assert.AreEqual(new BytesRef("a"), te.Next());
            DocsAndPositionsEnum dpe = te.DocsAndPositions(null, null);
            Assert.AreEqual(0, dpe.NextDoc());
            Assert.AreEqual(2, dpe.Freq());
            Assert.AreEqual(0, dpe.NextPosition());
            Assert.AreEqual(0, dpe.StartOffset());
            int endOffset = dpe.EndOffset();
            Assert.AreEqual(1 + positionGap, dpe.NextPosition());
            Assert.AreEqual(1 + endOffset + offsetGap, dpe.EndOffset());
            Assert.AreEqual(null, te.Next());
            reader.Dispose();
            writer.Dispose();
            writer.w.Directory.Dispose();
        }
        public virtual void TestSimple()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir);
            Document doc = new Document();
            Field field = NewTextField("foo", "", Field.Store.NO);
            doc.Add(field);
            Field dvField = new FloatDocValuesField("foo_boost", 0.0F);
            doc.Add(dvField);
            Field field2 = NewTextField("bar", "", Field.Store.NO);
            doc.Add(field2);

            field.StringValue = "quick brown fox";
            field2.StringValue = "quick brown fox";
            dvField.FloatValue = 2f; // boost x2
            iw.AddDocument(doc);
            field.StringValue = "jumps over lazy brown dog";
            field2.StringValue = "jumps over lazy brown dog";
            dvField.FloatValue = 4f; // boost x4
            iw.AddDocument(doc);
            IndexReader ir = iw.Reader;
            iw.Dispose();

            // no boosting
            IndexSearcher searcher1 = NewSearcher(ir, false);
            Similarity @base = searcher1.Similarity;
            // boosting
            IndexSearcher searcher2 = NewSearcher(ir, false);
            searcher2.Similarity = new PerFieldSimilarityWrapperAnonymousInnerClassHelper(this, field, @base);

            // in this case, we searched on field "foo". first document should have 2x the score.
            TermQuery tq = new TermQuery(new Term("foo", "quick"));
            QueryUtils.Check(Random(), tq, searcher1);
            QueryUtils.Check(Random(), tq, searcher2);

            TopDocs noboost = searcher1.Search(tq, 10);
            TopDocs boost = searcher2.Search(tq, 10);
            Assert.AreEqual(1, noboost.TotalHits);
            Assert.AreEqual(1, boost.TotalHits);

            //System.out.println(searcher2.Explain(tq, boost.ScoreDocs[0].Doc));
            Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score * 2f, SCORE_EPSILON);

            // this query matches only the second document, which should have 4x the score.
            tq = new TermQuery(new Term("foo", "jumps"));
            QueryUtils.Check(Random(), tq, searcher1);
            QueryUtils.Check(Random(), tq, searcher2);

            noboost = searcher1.Search(tq, 10);
            boost = searcher2.Search(tq, 10);
            Assert.AreEqual(1, noboost.TotalHits);
            Assert.AreEqual(1, boost.TotalHits);

            Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score * 4f, SCORE_EPSILON);

            // search on on field bar just for kicks, nothing should happen, since we setup
            // our sim provider to only use foo_boost for field foo.
            tq = new TermQuery(new Term("bar", "quick"));
            QueryUtils.Check(Random(), tq, searcher1);
            QueryUtils.Check(Random(), tq, searcher2);

            noboost = searcher1.Search(tq, 10);
            boost = searcher2.Search(tq, 10);
            Assert.AreEqual(1, noboost.TotalHits);
            Assert.AreEqual(1, boost.TotalHits);

            Assert.AreEqual(boost.ScoreDocs[0].Score, noboost.ScoreDocs[0].Score, SCORE_EPSILON);

            ir.Dispose();
            dir.Dispose();
        }
        private static void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength, bool useCharFilter, bool simple, bool offsetsAreCorrect, RandomIndexWriter iw)
        {
            LineFileDocs docs = new LineFileDocs(random);
            Document doc = null;
            Field field = null, currentField = null;
            StringReader bogus = new StringReader("");
            if (iw != null)
            {
                doc = new Document();
                FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
                if (random.NextBoolean())
                {
                    ft.StoreTermVectors = true;
                    ft.StoreTermVectorOffsets = random.NextBoolean();
                    ft.StoreTermVectorPositions = random.NextBoolean();
                    if (ft.StoreTermVectorPositions && !OLD_FORMAT_IMPERSONATION_IS_ACTIVE)
                    {
                        ft.StoreTermVectorPayloads = random.NextBoolean();
                    }
                }
                if (random.NextBoolean())
                {
                    ft.OmitNorms = true;
                }
                string pf = TestUtil.GetPostingsFormat("dummy");
                bool supportsOffsets = !DoesntSupportOffsets.Contains(pf);
                switch (random.Next(4))
                {
                    case 0:
                        ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
                        break;

                    case 1:
                        ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;
                        break;

                    case 2:
                        ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                        break;

                    default:
                        if (supportsOffsets && offsetsAreCorrect)
                        {
                            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
                        }
                        else
                        {
                            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
                        }
                        break;
                }
                currentField = field = new Field("dummy", bogus, ft);
                doc.Add(currentField);
            }

            try
            {
                for (int i = 0; i < iterations; i++)
                {
                    string text;

                    if (random.Next(10) == 7)
                    {
                        // real data from linedocs
                        text = docs.NextDoc().Get("body");
                        if (text.Length > maxWordLength)
                        {
                            // Take a random slice from the text...:
                            int startPos = random.Next(text.Length - maxWordLength);
                            if (startPos > 0 && char.IsLowSurrogate(text[startPos]))
                            {
                                // Take care not to split up a surrogate pair:
                                startPos--;
                                Assert.True(char.IsHighSurrogate(text[startPos]));
                            }
                            int endPos = startPos + maxWordLength - 1;
                            if (char.IsHighSurrogate(text[endPos]))
                            {
                                // Take care not to split up a surrogate pair:
                                endPos--;
                            }
                            text = text.Substring(startPos, 1 + endPos - startPos);
                        }
                    }
                    else
                    {
                        // synthetic
                        text = TestUtil.RandomAnalysisString(random, maxWordLength, simple);
                    }

                    try
                    {
                        CheckAnalysisConsistency(random, a, useCharFilter, text, offsetsAreCorrect, currentField);
                        if (iw != null)
                        {
                            if (random.Next(7) == 0)
                            {
                                // pile up a multivalued field
                                var ft = (FieldType)field.FieldType();
                                currentField = new Field("dummy", bogus, ft);
                                doc.Add(currentField);
                            }
                            else
                            {
                                iw.AddDocument(doc);
                                if (doc.Fields.Count > 1)
                                {
                                    // back to 1 field
                                    currentField = field;
                                    doc.RemoveFields("dummy");
                                    doc.Add(currentField);
                                }
                            }
                        }
                    }
                    catch (Exception t)
                    {
                        // TODO: really we should pass a random seed to
                        // checkAnalysisConsistency then print it here too:
                        Console.Error.WriteLine("TEST FAIL: useCharFilter=" + useCharFilter + " text='" + Escape(text) + "'");
                        throw;
                    }
                }
            }
            finally
            {
                IOUtils.CloseWhileHandlingException(docs);
            }
        }
        public static void CheckRandomData(Random random, Analyzer a, int iterations, int maxWordLength, bool simple, bool offsetsAreCorrect)
        {
            CheckResetException(a, "best effort");
            long seed = random.Next();
            bool useCharFilter = random.NextBoolean();
            Directory dir = null;
            RandomIndexWriter iw = null;
            string postingsFormat = TestUtil.GetPostingsFormat("dummy");
            bool codecOk = iterations * maxWordLength < 100000
                || !(postingsFormat.Equals("Memory") || postingsFormat.Equals("SimpleText"));
            if (Rarely(random) && codecOk)
            {
                dir = NewFSDirectory(CreateTempDir("bttc"));
                iw = new RandomIndexWriter(new Random((int)seed), dir, a);
            }

            bool success = false;
            try
            {
                CheckRandomData(new Random((int)seed), a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect, iw);
                // now test with multiple threads: note we do the EXACT same thing we did before in each thread,
                // so this should only really fail from another thread if its an actual thread problem
                int numThreads = TestUtil.NextInt(random, 2, 4);
                var startingGun = new CountDownLatch(1);
                var threads = new AnalysisThread[numThreads];
                for (int i = 0; i < threads.Length; i++)
                {
                    threads[i] = new AnalysisThread(seed, /*startingGun,*/ a, iterations, maxWordLength, useCharFilter, simple, offsetsAreCorrect, iw);
                }

                Array.ForEach(threads, thread => thread.Start());

                startingGun.countDown();

                foreach (var t in threads)
                {
                    try
                    {
                        t.Join();
                    }
                    catch (ThreadInterruptedException e)
                    {
                        Fail("Thread interrupted");
                    }
                }

                if (threads.Any(x => x.Failed))
                    Fail("Thread interrupted");

                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(iw, dir);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(iw, dir); // checkindex
                }
            }
        }
 internal AnalysisThread(long seed, /*CountDownLatch latch,*/ Analyzer a, int iterations, int maxWordLength, bool useCharFilter, bool simple, bool offsetsAreCorrect, RandomIndexWriter iw)
 {
     this.Seed = seed;
     this.a = a;
     this.Iterations = iterations;
     this.MaxWordLength = maxWordLength;
     this.UseCharFilter = useCharFilter;
     this.Simple = simple;
     this.OffsetsAreCorrect = offsetsAreCorrect;
     this.Iw = iw;
     this._latch = null;
 }
        public virtual void Test2()
        {
            Random            random    = Random();
            int               NUM_DOCS  = AtLeast(100);
            Directory         dir       = NewDirectory();
            RandomIndexWriter writer    = new RandomIndexWriter(random, dir);
            bool              allowDups = random.NextBoolean();
            HashSet <string>  seen      = new HashSet <string>();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS + " allowDups=" + allowDups);
            }
            int numDocs = 0;
            IList <BytesRef> docValues = new List <BytesRef>();

            // TODO: deletions
            while (numDocs < NUM_DOCS)
            {
                string s;
                if (random.NextBoolean())
                {
                    s = TestUtil.RandomSimpleString(random);
                }
                else
                {
                    s = TestUtil.RandomUnicodeString(random);
                }
                BytesRef br = new BytesRef(s);

                if (!allowDups)
                {
                    if (seen.Contains(s))
                    {
                        continue;
                    }
                    seen.Add(s);
                }

                if (VERBOSE)
                {
                    Console.WriteLine("  " + numDocs + ": s=" + s);
                }

                Document doc = new Document();
                doc.Add(new SortedDocValuesField("stringdv", br));
                doc.Add(new NumericDocValuesField("id", numDocs));
                docValues.Add(br);
                writer.AddDocument(doc);
                numDocs++;

                if (random.Next(40) == 17)
                {
                    // force flush
                    writer.Reader.Dispose();
                }
            }

            writer.ForceMerge(1);
            DirectoryReader r = writer.Reader;

            writer.Dispose();

            AtomicReader sr = GetOnlySegmentReader(r);

            long END_TIME = DateTime.Now.Millisecond + (TEST_NIGHTLY ? 30 : 1);

            int NUM_THREADS = TestUtil.NextInt(Random(), 1, 10);

            ThreadClass[] threads = new ThreadClass[NUM_THREADS];
            for (int thread = 0; thread < NUM_THREADS; thread++)
            {
                threads[thread] = new ThreadAnonymousInnerClassHelper2(this, random, docValues, sr, END_TIME);
                threads[thread].Start();
            }

            foreach (ThreadClass thread in threads)
            {
                thread.Join();
            }

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestCachingWorks()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            writer.Dispose();

            IndexReader reader = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir));
            AtomicReaderContext context = (AtomicReaderContext)reader.Context;
            MockFilter filter = new MockFilter();
            CachingWrapperFilter cacher = new CachingWrapperFilter(filter);

            // first time, nested filter is called
            DocIdSet strongRef = cacher.GetDocIdSet(context, (context.AtomicReader).LiveDocs);
            Assert.IsTrue(filter.WasCalled(), "first time");

            // make sure no exception if cache is holding the wrong docIdSet
            cacher.GetDocIdSet(context, (context.AtomicReader).LiveDocs);

            // second time, nested filter should not be called
            filter.Clear();
            cacher.GetDocIdSet(context, (context.AtomicReader).LiveDocs);
            Assert.IsFalse(filter.WasCalled(), "second time");

            reader.Dispose();
            dir.Dispose();
        }
        public static void BeforeClass()
        {
            Dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), Dir);
            int numDocs = AtLeast(300);
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();

                AddSome(doc, AlwaysTerms);

                if (Random().Next(100) < 90)
                {
                    AddSome(doc, CommonTerms);
                }
                if (Random().Next(100) < 50)
                {
                    AddSome(doc, MediumTerms);
                }
                if (Random().Next(100) < 10)
                {
                    AddSome(doc, RareTerms);
                }
                iw.AddDocument(doc);
            }
            iw.ForceMerge(1);
            iw.Dispose();
            r = DirectoryReader.Open(Dir);
            atomicReader = GetOnlySegmentReader(r);
            Searcher = new IndexSearcher(atomicReader);
            Searcher.Similarity = new DefaultSimilarityAnonymousInnerClassHelper();
        }
Beispiel #12
0
        public virtual void TestEmptyTerm()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir);
            Document doc = new Document();
            doc.Add(NewTextField("foo", "bar", Field.Store.NO));
            iw.AddDocument(doc);
            IndexReader ir = iw.Reader;
            iw.Dispose();
            IndexSearcher @is = NewSearcher(ir);

            foreach (Similarity sim in Sims)
            {
                @is.Similarity = sim;
                BooleanQuery query = new BooleanQuery(true);
                query.Add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
                query.Add(new TermQuery(new Term("foo", "baz")), BooleanClause.Occur.SHOULD);
                Assert.AreEqual(1, @is.Search(query, 10).TotalHits);
            }
            ir.Dispose();
            dir.Dispose();
        }
Beispiel #13
0
        public virtual void TestEmptyIndex()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir);
            IndexReader ir = iw.Reader;
            iw.Dispose();
            IndexSearcher @is = NewSearcher(ir);

            foreach (Similarity sim in Sims)
            {
                @is.Similarity = sim;
                Assert.AreEqual(0, @is.Search(new TermQuery(new Term("foo", "bar")), 10).TotalHits);
            }
            ir.Dispose();
            dir.Dispose();
        }
        public virtual void TestNullDocIdSetIterator()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            writer.Dispose();

            IndexReader reader = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir));
            AtomicReaderContext context = (AtomicReaderContext)reader.Context;

            Filter filter = new FilterAnonymousInnerClassHelper2(this, context);
            CachingWrapperFilter cacher = new CachingWrapperFilter(filter);

            // the caching filter should return the empty set constant
            Assert.IsNull(cacher.GetDocIdSet(context, (context.AtomicReader).LiveDocs));

            reader.Dispose();
            dir.Dispose();
        }
        public virtual void TestIsCacheAble()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir);
            writer.AddDocument(new Document());
            writer.Dispose();

            IndexReader reader = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir));

            // not cacheable:
            AssertDocIdSetCacheable(reader, new QueryWrapperFilter(new TermQuery(new Term("test", "value"))), false);
            // returns default empty docidset, always cacheable:
            AssertDocIdSetCacheable(reader, NumericRangeFilter.NewIntRange("test", Convert.ToInt32(10000), Convert.ToInt32(-10000), true, true), true);
            // is cacheable:
            AssertDocIdSetCacheable(reader, FieldCacheRangeFilter.NewIntRange("test", Convert.ToInt32(10), Convert.ToInt32(20), true, true), true);
            // a fixedbitset filter is always cacheable
            AssertDocIdSetCacheable(reader, new FilterAnonymousInnerClassHelper3(this), true);

            reader.Dispose();
            dir.Dispose();
        }
        public virtual void TestEnforceDeletions()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(NewLogMergePolicy(10)));
            // asserts below requires no unexpected merges:

            // NOTE: cannot use writer.getReader because RIW (on
            // flipping a coin) may give us a newly opened reader,
            // but we use .reopen on this reader below and expect to
            // (must) get an NRT reader:
            DirectoryReader reader = DirectoryReader.Open(writer.w, true);
            // same reason we don't wrap?
            IndexSearcher searcher = NewSearcher(reader, false);

            // add a doc, refresh the reader, and check that it's there
            Document doc = new Document();
            doc.Add(NewStringField("id", "1", Field.Store.YES));
            writer.AddDocument(doc);

            reader = RefreshReader(reader);
            searcher = NewSearcher(reader, false);

            TopDocs docs = searcher.Search(new MatchAllDocsQuery(), 1);
            Assert.AreEqual(1, docs.TotalHits, "Should find a hit...");

            Filter startFilter = new QueryWrapperFilter(new TermQuery(new Term("id", "1")));

            CachingWrapperFilter filter = new CachingWrapperFilter(startFilter);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.IsTrue(filter.SizeInBytes() > 0);

            Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit...");

            Query constantScore = new ConstantScoreQuery(filter);
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit...");

            // make sure we get a cache hit when we reopen reader
            // that had no change to deletions

            // fake delete (deletes nothing):
            writer.DeleteDocuments(new Term("foo", "bar"));

            IndexReader oldReader = reader;
            reader = RefreshReader(reader);
            Assert.IsTrue(reader == oldReader);
            int missCount = filter.MissCount;
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit...");

            // cache hit:
            Assert.AreEqual(missCount, filter.MissCount);

            // now delete the doc, refresh the reader, and see that it's not there
            writer.DeleteDocuments(new Term("id", "1"));

            // NOTE: important to hold ref here so GC doesn't clear
            // the cache entry!  Else the assert below may sometimes
            // fail:
            oldReader = reader;
            reader = RefreshReader(reader);

            searcher = NewSearcher(reader, false);

            missCount = filter.MissCount;
            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit...");

            // cache hit
            Assert.AreEqual(missCount, filter.MissCount);
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit...");

            // apply deletes dynamically:
            filter = new CachingWrapperFilter(startFilter);
            writer.AddDocument(doc);
            reader = RefreshReader(reader);
            searcher = NewSearcher(reader, false);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit...");
            missCount = filter.MissCount;
            Assert.IsTrue(missCount > 0);
            constantScore = new ConstantScoreQuery(filter);
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit...");
            Assert.AreEqual(missCount, filter.MissCount);

            writer.AddDocument(doc);

            // NOTE: important to hold ref here so GC doesn't clear
            // the cache entry!  Else the assert below may sometimes
            // fail:
            oldReader = reader;

            reader = RefreshReader(reader);
            searcher = NewSearcher(reader, false);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(2, docs.TotalHits, "[query + filter] Should find 2 hits...");
            Assert.IsTrue(filter.MissCount > missCount);
            missCount = filter.MissCount;

            constantScore = new ConstantScoreQuery(filter);
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(2, docs.TotalHits, "[just filter] Should find a hit...");
            Assert.AreEqual(missCount, filter.MissCount);

            // now delete the doc, refresh the reader, and see that it's not there
            writer.DeleteDocuments(new Term("id", "1"));

            reader = RefreshReader(reader);
            searcher = NewSearcher(reader, false);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit...");
            // CWF reused the same entry (it dynamically applied the deletes):
            Assert.AreEqual(missCount, filter.MissCount);

            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit...");
            // CWF reused the same entry (it dynamically applied the deletes):
            Assert.AreEqual(missCount, filter.MissCount);

            // NOTE: silliness to make sure JRE does not eliminate
            // our holding onto oldReader to prevent
            // CachingWrapperFilter's WeakHashMap from dropping the
            // entry:
            Assert.IsTrue(oldReader != null);

            reader.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
        public static void BeforeClass()
        {
            NoDocs = AtLeast(4096);
            Distance = (1L << 60) / NoDocs;
            Directory = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Directory, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(Random(), 100, 1000)).SetMergePolicy(NewLogMergePolicy()));

            FieldType storedLong = new FieldType(LongField.TYPE_NOT_STORED);
            storedLong.Stored = true;
            storedLong.Freeze();

            FieldType storedLong8 = new FieldType(storedLong);
            storedLong8.NumericPrecisionStep = 8;

            FieldType storedLong4 = new FieldType(storedLong);
            storedLong4.NumericPrecisionStep = 4;

            FieldType storedLong6 = new FieldType(storedLong);
            storedLong6.NumericPrecisionStep = 6;

            FieldType storedLong2 = new FieldType(storedLong);
            storedLong2.NumericPrecisionStep = 2;

            FieldType storedLongNone = new FieldType(storedLong);
            storedLongNone.NumericPrecisionStep = int.MaxValue;

            FieldType unstoredLong = LongField.TYPE_NOT_STORED;

            FieldType unstoredLong8 = new FieldType(unstoredLong);
            unstoredLong8.NumericPrecisionStep = 8;

            FieldType unstoredLong6 = new FieldType(unstoredLong);
            unstoredLong6.NumericPrecisionStep = 6;

            FieldType unstoredLong4 = new FieldType(unstoredLong);
            unstoredLong4.NumericPrecisionStep = 4;

            FieldType unstoredLong2 = new FieldType(unstoredLong);
            unstoredLong2.NumericPrecisionStep = 2;

            LongField field8 = new LongField("field8", 0L, storedLong8), field6 = new LongField("field6", 0L, storedLong6), field4 = new LongField("field4", 0L, storedLong4), field2 = new LongField("field2", 0L, storedLong2), fieldNoTrie = new LongField("field" + int.MaxValue, 0L, storedLongNone), ascfield8 = new LongField("ascfield8", 0L, unstoredLong8), ascfield6 = new LongField("ascfield6", 0L, unstoredLong6), ascfield4 = new LongField("ascfield4", 0L, unstoredLong4), ascfield2 = new LongField("ascfield2", 0L, unstoredLong2);

            Document doc = new Document();
            // add fields, that have a distance to test general functionality
            doc.Add(field8);
            doc.Add(field6);
            doc.Add(field4);
            doc.Add(field2);
            doc.Add(fieldNoTrie);
            // add ascending fields with a distance of 1, beginning at -noDocs/2 to test the correct splitting of range and inclusive/exclusive
            doc.Add(ascfield8);
            doc.Add(ascfield6);
            doc.Add(ascfield4);
            doc.Add(ascfield2);

            // Add a series of noDocs docs with increasing long values, by updating the fields
            for (int l = 0; l < NoDocs; l++)
            {
                long val = Distance * l + StartOffset;
                field8.LongValue = val;
                field6.LongValue = val;
                field4.LongValue = val;
                field2.LongValue = val;
                fieldNoTrie.LongValue = val;

                val = l - (NoDocs / 2);
                ascfield8.LongValue = val;
                ascfield6.LongValue = val;
                ascfield4.LongValue = val;
                ascfield2.LongValue = val;
                writer.AddDocument(doc);
            }
            Reader = writer.Reader;
            Searcher = NewSearcher(Reader);
            writer.Dispose();
        }
        public virtual void TestInfiniteValues()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));
            Document doc = new Document();
            doc.Add(new DoubleField("double", double.NegativeInfinity, Field.Store.NO));
            doc.Add(new LongField("long", long.MinValue, Field.Store.NO));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new DoubleField("double", double.PositiveInfinity, Field.Store.NO));
            doc.Add(new LongField("long", long.MaxValue, Field.Store.NO));
            writer.AddDocument(doc);

            doc = new Document();
            doc.Add(new DoubleField("double", 0.0, Field.Store.NO));
            doc.Add(new LongField("long", 0L, Field.Store.NO));
            writer.AddDocument(doc);

            foreach (double d in TestNumericUtils.DOUBLE_NANs)
            {
                doc = new Document();
                doc.Add(new DoubleField("double", d, Field.Store.NO));
                writer.AddDocument(doc);
            }

            writer.Dispose();

            IndexReader r = DirectoryReader.Open(dir);
            IndexSearcher s = NewSearcher(r);

            Query q = NumericRangeQuery.NewLongRange("long", null, null, true, true);
            TopDocs topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q = NumericRangeQuery.NewLongRange("long", null, null, false, false);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q = NumericRangeQuery.NewLongRange("long", long.MinValue, long.MaxValue, true, true);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q = NumericRangeQuery.NewLongRange("long", long.MinValue, long.MaxValue, false, false);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count");

            q = NumericRangeQuery.NewDoubleRange("double", null, null, true, true);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q = NumericRangeQuery.NewDoubleRange("double", null, null, false, false);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q = NumericRangeQuery.NewDoubleRange("double", double.NegativeInfinity, double.PositiveInfinity, true, true);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(3, topDocs.ScoreDocs.Length, "Score doc count");

            q = NumericRangeQuery.NewDoubleRange("double", double.NegativeInfinity, double.PositiveInfinity, false, false);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(1, topDocs.ScoreDocs.Length, "Score doc count");

            q = NumericRangeQuery.NewDoubleRange("double", double.NaN, double.NaN, true, true);
            topDocs = s.Search(q, 10);
            Assert.AreEqual(TestNumericUtils.DOUBLE_NANs.Length, topDocs.ScoreDocs.Length, "Score doc count");

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestPostings()
        {
            Directory dir = NewFSDirectory(CreateTempDir("postings"));
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetCodec(Codec.ForName("Lucene40"));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            Document doc = new Document();

            // id field
            FieldType idType = new FieldType(StringField.TYPE_NOT_STORED);
            idType.StoreTermVectors = true;
            Field idField = new Field("id", "", idType);
            doc.Add(idField);

            // title field: short text field
            FieldType titleType = new FieldType(TextField.TYPE_NOT_STORED);
            titleType.StoreTermVectors = true;
            titleType.StoreTermVectorPositions = true;
            titleType.StoreTermVectorOffsets = true;
            titleType.IndexOptions = IndexOptions();
            Field titleField = new Field("title", "", titleType);
            doc.Add(titleField);

            // body field: long text field
            FieldType bodyType = new FieldType(TextField.TYPE_NOT_STORED);
            bodyType.StoreTermVectors = true;
            bodyType.StoreTermVectorPositions = true;
            bodyType.StoreTermVectorOffsets = true;
            bodyType.IndexOptions = IndexOptions();
            Field bodyField = new Field("body", "", bodyType);
            doc.Add(bodyField);

            int numDocs = AtLeast(1000);
            for (int i = 0; i < numDocs; i++)
            {
                idField.StringValue = Convert.ToString(i);
                titleField.StringValue = FieldValue(1);
                bodyField.StringValue = FieldValue(3);
                iw.AddDocument(doc);
                if (Random().Next(20) == 0)
                {
                    iw.DeleteDocuments(new Term("id", Convert.ToString(i)));
                }
            }
            if (Random().NextBoolean())
            {
                // delete 1-100% of docs
                iw.DeleteDocuments(new Term("title", Terms[Random().Next(Terms.Length)]));
            }
            iw.Dispose();
            dir.Dispose(); // checkindex
        }
Beispiel #20
0
        public virtual void TestOmitTFAndNorms()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir);
            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            ft.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;
            ft.OmitNorms = true;
            ft.Freeze();
            Field f = NewField("foo", "bar", ft);
            doc.Add(f);
            iw.AddDocument(doc);
            IndexReader ir = iw.Reader;
            iw.Dispose();
            IndexSearcher @is = NewSearcher(ir);

            foreach (Similarity sim in Sims)
            {
                @is.Similarity = sim;
                BooleanQuery query = new BooleanQuery(true);
                query.Add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
                Assert.AreEqual(1, @is.Search(query, 10).TotalHits);
            }
            ir.Dispose();
            dir.Dispose();
        }
Beispiel #21
0
        public virtual void TestTermUTF16SortOrder()
        {
            Random            rnd    = Random;
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                rnd, dir);
            Document d = new Document();
            // Single segment
            Field f = NewStringField("f", "", Field.Store.NO);

            d.Add(f);
            char[]        chars    = new char[2];
            ISet <string> allTerms = new JCG.HashSet <string>();

            int num = AtLeast(200);

            for (int i = 0; i < num; i++)
            {
                string s;
                if (rnd.NextBoolean())
                {
                    // Single char
                    if (rnd.NextBoolean())
                    {
                        // Above surrogates
                        chars[0] = (char)GetInt(rnd, 1 + UnicodeUtil.UNI_SUR_LOW_END, 0xffff);
                    }
                    else
                    {
                        // Below surrogates
                        chars[0] = (char)GetInt(rnd, 0, UnicodeUtil.UNI_SUR_HIGH_START - 1);
                    }
                    s = new string(chars, 0, 1);
                }
                else
                {
                    // Surrogate pair
                    chars[0] = (char)GetInt(rnd, UnicodeUtil.UNI_SUR_HIGH_START, UnicodeUtil.UNI_SUR_HIGH_END);
                    Assert.IsTrue(((int)chars[0]) >= UnicodeUtil.UNI_SUR_HIGH_START && ((int)chars[0]) <= UnicodeUtil.UNI_SUR_HIGH_END);
                    chars[1] = (char)GetInt(rnd, UnicodeUtil.UNI_SUR_LOW_START, UnicodeUtil.UNI_SUR_LOW_END);
                    s        = new string(chars, 0, 2);
                }
                allTerms.Add(s);
                f.SetStringValue(s);

                writer.AddDocument(d);

                if ((1 + i) % 42 == 0)
                {
                    writer.Commit();
                }
            }

            IndexReader r = writer.GetReader();

            // Test each sub-segment
            foreach (AtomicReaderContext ctx in r.Leaves)
            {
                CheckTermsOrder(ctx.Reader, allTerms, false);
            }
            CheckTermsOrder(r, allTerms, true);

            // Test multi segment
            r.Dispose();

            writer.ForceMerge(1);

            // Test single segment
            r = writer.GetReader();
            CheckTermsOrder(r, allTerms, true);
            r.Dispose();

            writer.Dispose();
            dir.Dispose();
        }
        public virtual void Test()
        {
            Directory dir = NewDirectory();
            Analyzer analyzer = new AnalyzerAnonymousInnerClassHelper(this, Analyzer.PER_FIELD_REUSE_STRATEGY);
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat()));
            // TODO we could actually add more fields implemented with different PFs
            // or, just put this test into the usual rotation?
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, (IndexWriterConfig)iwc.Clone());
            Document doc = new Document();
            FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED);
            // turn this on for a cross-check
            docsOnlyType.StoreTermVectors = true;
            docsOnlyType.IndexOptions = FieldInfo.IndexOptions.DOCS_ONLY;

            FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED);
            // turn this on for a cross-check
            docsAndFreqsType.StoreTermVectors = true;
            docsAndFreqsType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS;

            FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED);
            // turn these on for a cross-check
            positionsType.StoreTermVectors = true;
            positionsType.StoreTermVectorPositions = true;
            positionsType.StoreTermVectorOffsets = true;
            positionsType.StoreTermVectorPayloads = true;
            FieldType offsetsType = new FieldType(positionsType);
            offsetsType.IndexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
            Field field1 = new Field("field1docs", "", docsOnlyType);
            Field field2 = new Field("field2freqs", "", docsAndFreqsType);
            Field field3 = new Field("field3positions", "", positionsType);
            Field field4 = new Field("field4offsets", "", offsetsType);
            Field field5 = new Field("field5payloadsFixed", "", positionsType);
            Field field6 = new Field("field6payloadsVariable", "", positionsType);
            Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType);
            Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType);
            doc.Add(field1);
            doc.Add(field2);
            doc.Add(field3);
            doc.Add(field4);
            doc.Add(field5);
            doc.Add(field6);
            doc.Add(field7);
            doc.Add(field8);
            for (int i = 0; i < MAXDOC; i++)
            {
                string stringValue = Convert.ToString(i) + " verycommon " + English.IntToEnglish(i).Replace('-', ' ') + " " + TestUtil.RandomSimpleString(Random());
                field1.StringValue = stringValue;
                field2.StringValue = stringValue;
                field3.StringValue = stringValue;
                field4.StringValue = stringValue;
                field5.StringValue = stringValue;
                field6.StringValue = stringValue;
                field7.StringValue = stringValue;
                field8.StringValue = stringValue;
                iw.AddDocument(doc);
            }
            iw.Dispose();
            Verify(dir);
            TestUtil.CheckIndex(dir); // for some extra coverage, checkIndex before we forceMerge
            iwc.SetOpenMode(OpenMode_e.APPEND);
            IndexWriter iw2 = new IndexWriter(dir, (IndexWriterConfig)iwc.Clone());
            iw2.ForceMerge(1);
            iw2.Dispose();
            Verify(dir);
            dir.Dispose();
        }
Beispiel #23
0
        public virtual void TestCrazySpans()
        {
            // The problem: "normal" lucene queries create scorers, returning null if terms dont exist
            // this means they never score a term that does not exist.
            // however with spans, there is only one scorer for the whole hierarchy:
            // inner queries are not real queries, their boosts are ignored, etc.
            Directory dir = NewDirectory();
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir);
            Document doc = new Document();
            FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
            doc.Add(NewField("foo", "bar", ft));
            iw.AddDocument(doc);
            IndexReader ir = iw.Reader;
            iw.Dispose();
            IndexSearcher @is = NewSearcher(ir);

            foreach (Similarity sim in Sims)
            {
                @is.Similarity = sim;
                SpanTermQuery s1 = new SpanTermQuery(new Term("foo", "bar"));
                SpanTermQuery s2 = new SpanTermQuery(new Term("foo", "baz"));
                Query query = new SpanOrQuery(s1, s2);
                TopDocs td = @is.Search(query, 10);
                Assert.AreEqual(1, td.TotalHits);
                float score = td.ScoreDocs[0].Score;
                Assert.IsTrue(score >= 0.0f);
                Assert.IsFalse(float.IsInfinity(score), "inf score for " + sim);
            }
            ir.Dispose();
            dir.Dispose();
        }
        public override void SetUp()
        {
            base.SetUp();

            Index = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), Index, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetSimilarity(Sim).SetMergePolicy(NewLogMergePolicy()));

            // hed is the most important field, dek is secondary

            // d1 is an "ok" match for: albino elephant
            {
                Document d1 = new Document();
                d1.Add(NewField("id", "d1", NonAnalyzedType)); // Field.Keyword("id",
                // "d1"));
                d1.Add(NewTextField("hed", "elephant", Field.Store.YES)); // Field.Text("hed", "elephant"));
                d1.Add(NewTextField("dek", "elephant", Field.Store.YES)); // Field.Text("dek", "elephant"));
                writer.AddDocument(d1);
            }

            // d2 is a "good" match for: albino elephant
            {
                Document d2 = new Document();
                d2.Add(NewField("id", "d2", NonAnalyzedType)); // Field.Keyword("id",
                // "d2"));
                d2.Add(NewTextField("hed", "elephant", Field.Store.YES)); // Field.Text("hed", "elephant"));
                d2.Add(NewTextField("dek", "albino", Field.Store.YES)); // Field.Text("dek",
                // "albino"));
                d2.Add(NewTextField("dek", "elephant", Field.Store.YES)); // Field.Text("dek", "elephant"));
                writer.AddDocument(d2);
            }

            // d3 is a "better" match for: albino elephant
            {
                Document d3 = new Document();
                d3.Add(NewField("id", "d3", NonAnalyzedType)); // Field.Keyword("id",
                // "d3"));
                d3.Add(NewTextField("hed", "albino", Field.Store.YES)); // Field.Text("hed",
                // "albino"));
                d3.Add(NewTextField("hed", "elephant", Field.Store.YES)); // Field.Text("hed", "elephant"));
                writer.AddDocument(d3);
            }

            // d4 is the "best" match for: albino elephant
            {
                Document d4 = new Document();
                d4.Add(NewField("id", "d4", NonAnalyzedType)); // Field.Keyword("id",
                // "d4"));
                d4.Add(NewTextField("hed", "albino", Field.Store.YES)); // Field.Text("hed",
                // "albino"));
                d4.Add(NewField("hed", "elephant", NonAnalyzedType)); // Field.Text("hed", "elephant"));
                d4.Add(NewTextField("dek", "albino", Field.Store.YES)); // Field.Text("dek",
                // "albino"));
                writer.AddDocument(d4);
            }

            r = SlowCompositeReaderWrapper.Wrap(writer.Reader);
            writer.Dispose();
            s = NewSearcher(r);
            s.Similarity = Sim;
        }
Beispiel #25
0
 public IterableAnonymousInnerClassHelper2(RandomIndexWriter outerInstance, IEnumerable <IndexableField> doc)
 {
     this.OuterInstance = outerInstance;
     this.Doc           = doc;
 }