Beispiel #1
0
        public override void BeforeClass()
        {
            base.BeforeClass();


            Random random = Random;

            m_directory = NewDirectory();
            m_stopword  = "" + GetRandomChar();
            CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.MakeString(m_stopword));

            m_analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset);
            RandomIndexWriter iw    = new RandomIndexWriter(random, m_directory, m_analyzer);
            Document          doc   = new Document();
            Field             id    = new StringField("id", "", Field.Store.NO);
            Field             field = new TextField("field", "", Field.Store.NO);

            doc.Add(id);
            doc.Add(field);

            // index some docs
            int numDocs = AtLeast(1000);

            for (int i = 0; i < numDocs; i++)
            {
                id.SetStringValue(Convert.ToString(i, CultureInfo.InvariantCulture));
                field.SetStringValue(RandomFieldContents());
                iw.AddDocument(doc);
            }

            // delete some docs
            int numDeletes = numDocs / 20;

            for (int i = 0; i < numDeletes; i++)
            {
                Term toDelete = new Term("id", Convert.ToString(random.Next(numDocs), CultureInfo.InvariantCulture));
                if (random.NextBoolean())
                {
                    iw.DeleteDocuments(toDelete);
                }
                else
                {
                    iw.DeleteDocuments(new TermQuery(toDelete));
                }
            }

            m_reader = iw.GetReader();
            m_s1     = NewSearcher(m_reader);
            m_s2     = NewSearcher(m_reader);
            iw.Dispose();
        }
        public static void BeforeClass()
        {
            Random random = Random();

            Directory = NewDirectory();
            Stopword  = "" + RandomChar();
            CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.MakeString(Stopword));

            Analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset);
            RandomIndexWriter iw    = new RandomIndexWriter(random, Directory, Analyzer);
            Document          doc   = new Document();
            Field             id    = new StringField("id", "", Field.Store.NO);
            Field             field = new TextField("field", "", Field.Store.NO);

            doc.Add(id);
            doc.Add(field);

            // index some docs
            int numDocs = AtLeast(1000);

            for (int i = 0; i < numDocs; i++)
            {
                id.StringValue    = Convert.ToString(i);
                field.StringValue = RandomFieldContents();
                iw.AddDocument(doc);
            }

            // delete some docs
            int numDeletes = numDocs / 20;

            for (int i = 0; i < numDeletes; i++)
            {
                Term toDelete = new Term("id", Convert.ToString(random.Next(numDocs)));
                if (random.NextBoolean())
                {
                    iw.DeleteDocuments(toDelete);
                }
                else
                {
                    iw.DeleteDocuments(new TermQuery(toDelete));
                }
            }

            Reader = iw.Reader;
            S1     = NewSearcher(Reader);
            S2     = NewSearcher(Reader);
            iw.Dispose();
        }
 private void CreateRandomIndexes(int maxSegments)
 {
     dir = NewDirectory();
     numDocs = AtLeast(150);
     int numTerms = TestUtil.NextInt(Random(), 1, numDocs / 5);
     ISet<string> randomTerms = new HashSet<string>();
     while (randomTerms.size() < numTerms)
     {
         randomTerms.add(TestUtil.RandomSimpleString(Random()));
     }
     terms = new List<string>(randomTerms);
     int seed = Random().Next();
     IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random(seed)));
     iwc.SetMergePolicy(TestSortingMergePolicy.NewSortingMergePolicy(sort));
     iw = new RandomIndexWriter(new Random(seed), dir, iwc);
     for (int i = 0; i < numDocs; ++i)
     {
         Document doc = RandomDocument();
         iw.AddDocument(doc);
         if (i == numDocs / 2 || (i != numDocs - 1 && Random().nextInt(8) == 0))
         {
             iw.Commit();
         }
         if (Random().nextInt(15) == 0)
         {
             string term = RandomInts.RandomFrom(Random(), terms);
             iw.DeleteDocuments(new Term("s", term));
         }
     }
     reader = iw.Reader;
 }
Beispiel #4
0
        private void CreateRandomIndexes(int maxSegments)
        {
            dir     = NewDirectory();
            numDocs = AtLeast(150);
            int           numTerms    = TestUtil.NextInt32(Random, 1, numDocs / 5);
            ISet <string> randomTerms = new JCG.HashSet <string>();

            while (randomTerms.size() < numTerms)
            {
                randomTerms.add(TestUtil.RandomSimpleString(Random));
            }
            terms = new JCG.List <string>(randomTerms);
            long seed             = Random.NextInt64();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new J2N.Randomizer(seed)));

            iwc.SetMergePolicy(TestSortingMergePolicy.NewSortingMergePolicy(sort));
            iw = new RandomIndexWriter(new J2N.Randomizer(seed), dir, iwc);
            for (int i = 0; i < numDocs; ++i)
            {
                Document doc = RandomDocument();
                iw.AddDocument(doc);
                if (i == numDocs / 2 || (i != numDocs - 1 && Random.nextInt(8) == 0))
                {
                    iw.Commit();
                }
                if (Random.nextInt(15) == 0)
                {
                    string term = RandomPicks.RandomFrom(Random, terms);
                    iw.DeleteDocuments(new Term("s", term));
                }
            }
            reader = iw.GetReader();
        }
 public virtual void Test()
 {
     Directory dir = NewDirectory();
     IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
     conf.SetCodec(new Lucene46Codec());
     RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, conf);
     Document doc = new Document();
     // these fields should sometimes get term vectors, etc
     Field idField = NewStringField("id", "", Field.Store.NO);
     Field bodyField = NewTextField("body", "", Field.Store.NO);
     Field dvField = new NumericDocValuesField("dv", 5);
     doc.Add(idField);
     doc.Add(bodyField);
     doc.Add(dvField);
     for (int i = 0; i < 100; i++)
     {
         idField.StringValue = Convert.ToString(i);
         bodyField.StringValue = TestUtil.RandomUnicodeString(Random());
         riw.AddDocument(doc);
         if (Random().Next(7) == 0)
         {
             riw.Commit();
         }
         if (Random().Next(20) == 0)
         {
             riw.DeleteDocuments(new Term("id", Convert.ToString(i)));
         }
     }
     riw.Dispose();
     CheckHeaders(dir);
     dir.Dispose();
 }
Beispiel #6
0
        public override void SetUp()
        {
            base.SetUp();
            dir = NewDirectory();
            iw  = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, dir);
            Document doc     = new Document();
            Field    idField = new StringField("id", "", Field.Store.NO);
            doc.Add(idField);
            // add 500 docs with id 0..499
            for (int i = 0; i < 500; i++)
            {
                idField.SetStringValue(Convert.ToString(i));
                iw.AddDocument(doc);
            }
            // delete 20 of them
            for (int i = 0; i < 20; i++)
            {
                iw.DeleteDocuments(new Term("id", Convert.ToString(Random.Next(iw.MaxDoc))));
            }
            ir  = iw.GetReader();
            @is = NewSearcher(ir);
        }
        private int[] BuildIndex(RandomIndexWriter writer, int docs)
        {
            int[] docStates = new int[docs];
            for (int i = 0; i < docs; i++)
            {
                Document doc = new Document();
                if (Random.NextBoolean())
                {
                    docStates[i] = 1;
                    doc.Add(NewTextField("some", "value", Field.Store.YES));
                }
                doc.Add(NewTextField("all", "test", Field.Store.NO));
                doc.Add(NewTextField("id", "" + i, Field.Store.YES));
                writer.AddDocument(doc);
            }
            writer.Commit();
            int numDeletes = Random.Next(docs);

            for (int i = 0; i < numDeletes; i++)
            {
                int docID = Random.Next(docs);
                writer.DeleteDocuments(new Term("id", "" + docID));
                docStates[docID] = 2;
            }
            writer.Dispose();
            return(docStates);
        }
        public virtual void TestRandom()
        {
            Directory         d = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                Random, d);

            w.IndexWriter.Config.SetMaxBufferedDocs(17);
            int           numDocs = AtLeast(100);
            ISet <string> aDocs   = new JCG.HashSet <string>();

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                string   v;
                if (Random.Next(5) == 4)
                {
                    v = "a";
                    aDocs.Add("" + i);
                }
                else
                {
                    v = "b";
                }
                Field f = NewStringField("field", v, Field.Store.NO);
                doc.Add(f);
                doc.Add(NewStringField("id", "" + i, Field.Store.YES));
                w.AddDocument(doc);
            }

            int numDelDocs = AtLeast(10);

            for (int i = 0; i < numDelDocs; i++)
            {
                string delID = "" + Random.Next(numDocs);
                w.DeleteDocuments(new Term("id", delID));
                aDocs.Remove(delID);
            }

            IndexReader r = w.GetReader();

            w.Dispose();
            TopDocs hits = NewSearcher(r).Search(new MatchAllDocsQuery(), new QueryWrapperFilter(new TermQuery(new Term("field", "a"))), numDocs);

            Assert.AreEqual(aDocs.Count, hits.TotalHits);
            foreach (ScoreDoc sd in hits.ScoreDocs)
            {
                Assert.IsTrue(aDocs.Contains(r.Document(sd.Doc).Get("id")));
            }
            r.Dispose();
            d.Dispose();
        }
Beispiel #9
0
        public override void SetUp()
        {
            base.SetUp();
            dir       = NewDirectory();
            fieldName = Random.NextBoolean() ? "field" : ""; // sometimes use an empty string as field name
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random, MockTokenizer.KEYWORD, false)).SetMaxBufferedDocs(TestUtil.NextInt32(Random, 50, 1000)));

            JCG.List <string> terms = new JCG.List <string>();
            int num = AtLeast(200);

            for (int i = 0; i < num; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("id", Convert.ToString(i), Field.Store.NO));
                int numTerms = Random.Next(4);
                for (int j = 0; j < numTerms; j++)
                {
                    string s = TestUtil.RandomUnicodeString(Random);
                    doc.Add(NewStringField(fieldName, s, Field.Store.NO));
                    // if the default codec doesn't support sortedset, we will uninvert at search time
                    if (DefaultCodecSupportsSortedSet)
                    {
                        doc.Add(new SortedSetDocValuesField(fieldName, new BytesRef(s)));
                    }
                    terms.Add(s);
                }
                writer.AddDocument(doc);
            }

            if (Verbose)
            {
                // utf16 order
                terms.Sort(StringComparer.Ordinal);
                Console.WriteLine("UTF16 order:");
                foreach (string s in terms)
                {
                    Console.WriteLine("  " + UnicodeUtil.ToHexString(s));
                }
            }

            int numDeletions = Random.Next(num / 10);

            for (int i = 0; i < numDeletions; i++)
            {
                writer.DeleteDocuments(new Term("id", Convert.ToString(Random.Next(num))));
            }

            reader    = writer.GetReader();
            searcher1 = NewSearcher(reader);
            searcher2 = NewSearcher(reader);
            writer.Dispose();
        }
        public virtual void TestSumDocFreq_Mem()
        {
            int numDocs = AtLeast(500);

            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);

            Document doc = new Document();
            Field id = NewStringField("id", "", Field.Store.NO);
            Field field1 = NewTextField("foo", "", Field.Store.NO);
            Field field2 = NewTextField("bar", "", Field.Store.NO);
            doc.Add(id);
            doc.Add(field1);
            doc.Add(field2);
            for (int i = 0; i < numDocs; i++)
            {
                id.StringValue = "" + i;
                char ch1 = (char)TestUtil.NextInt(Random(), 'a', 'z');
                char ch2 = (char)TestUtil.NextInt(Random(), 'a', 'z');
                field1.StringValue = "" + ch1 + " " + ch2;
                ch1 = (char)TestUtil.NextInt(Random(), 'a', 'z');
                ch2 = (char)TestUtil.NextInt(Random(), 'a', 'z');
                field2.StringValue = "" + ch1 + " " + ch2;
                writer.AddDocument(doc);
            }

            IndexReader ir = writer.Reader;

            AssertSumDocFreq(ir);
            ir.Dispose();

            int numDeletions = AtLeast(20);
            for (int i = 0; i < numDeletions; i++)
            {
                writer.DeleteDocuments(new Term("id", "" + Random().Next(numDocs)));
            }
            writer.ForceMerge(1);
            writer.Dispose();

            ir = DirectoryReader.Open(dir);
            AssertSumDocFreq(ir);
            ir.Dispose();
            dir.Dispose();
        }
Beispiel #11
0
        /** Creates an index for sorting. */
        public void CreateIndex(Directory dir, int numDocs, Random random)
        {
            IList <int> ids = new List <int>();

            for (int i = 0; i < numDocs; i++)
            {
                ids.Add(i * 10);
            }
            // shuffle them for indexing
            // LUCENENET NOTE: Using LINQ, so we need to reassign the variable with the result
            ids = CollectionsHelper.Shuffle(ids);

            if (VERBOSE)
            {
                Console.WriteLine("Shuffled IDs for indexing: " + Arrays.ToString(ids.ToArray()));
            }

            PositionsTokenStream positions = new PositionsTokenStream();
            IndexWriterConfig    conf      = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));

            conf.SetMaxBufferedDocs(4);                               // create some segments
            conf.SetSimilarity(new NormsSimilarity(conf.Similarity)); // for testing norms field
            using (RandomIndexWriter writer = new RandomIndexWriter(random, dir, conf))
            {
                writer.RandomForceMerge = (false);
                foreach (int id in ids)
                {
                    writer.AddDocument(Doc(id, positions));
                }
                // delete some documents
                writer.Commit();
                foreach (int id in ids)
                {
                    if (random.NextDouble() < 0.2)
                    {
                        if (VERBOSE)
                        {
                            Console.WriteLine("delete doc_id " + id);
                        }
                        writer.DeleteDocuments(new Term(ID_FIELD, id.ToString()));
                    }
                }
            }
        }
Beispiel #12
0
        public void TestAdvanceSingleDeletedParentNoChild()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir);

            // First doc with 1 children
            Document parentDoc = new Document();
            parentDoc.Add(NewStringField("parent", "1", Field.Store.NO));
            parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
            Document childDoc = new Document();
            childDoc.Add(NewStringField("child", "1", Field.Store.NO));
            w.AddDocuments(Arrays.AsList(childDoc, parentDoc));

            parentDoc = new Document();
            parentDoc.Add(NewStringField("parent", "2", Field.Store.NO));
            parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
            w.AddDocuments(Arrays.AsList(parentDoc));

            w.DeleteDocuments(new Term("parent", "2"));

            parentDoc = new Document();
            parentDoc.Add(NewStringField("parent", "2", Field.Store.NO));
            parentDoc.Add(NewStringField("isparent", "yes", Field.Store.NO));
            childDoc = new Document();
            childDoc.Add(NewStringField("child", "2", Field.Store.NO));
            w.AddDocuments(Arrays.AsList(childDoc, parentDoc));

            IndexReader r = w.Reader;
            w.Dispose();
            IndexSearcher s = NewSearcher(r);

            // Create a filter that defines "parent" documents in the index - in this case resumes
            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isparent", "yes"))));

            Query parentQuery = new TermQuery(new Term("parent", "2"));

            ToChildBlockJoinQuery parentJoinQuery = new ToChildBlockJoinQuery(parentQuery, parentsFilter, Random().NextBoolean());
            TopDocs topdocs = s.Search(parentJoinQuery, 3);
            assertEquals(1, topdocs.TotalHits);

            r.Dispose();
            dir.Dispose();
        }
 public override void SetUp()
 {
     base.SetUp();
     Dir = NewDirectory();
     Iw = new RandomIndexWriter(Random(), Dir);
     Document doc = new Document();
     Field idField = new StringField("id", "", Field.Store.NO);
     doc.Add(idField);
     // add 500 docs with id 0..499
     for (int i = 0; i < 500; i++)
     {
         idField.StringValue = Convert.ToString(i);
         Iw.AddDocument(doc);
     }
     // delete 20 of them
     for (int i = 0; i < 20; i++)
     {
         Iw.DeleteDocuments(new Term("id", Convert.ToString(Random().Next(Iw.MaxDoc()))));
     }
     Ir = Iw.Reader;
     @is = NewSearcher(Ir);
 }
Beispiel #14
0
        public override void SetUp()
        {
            base.SetUp();
            Dir = NewDirectory();
            Iw  = new RandomIndexWriter(Random(), Dir, Similarity, TimeZone);
            Document doc     = new Document();
            Field    idField = new StringField("id", "", Field.Store.NO);

            doc.Add(idField);
            // add 500 docs with id 0..499
            for (int i = 0; i < 500; i++)
            {
                idField.SetStringValue(Convert.ToString(i));
                Iw.AddDocument(doc);
            }
            // delete 20 of them
            for (int i = 0; i < 20; i++)
            {
                Iw.DeleteDocuments(new Term("id", Convert.ToString(Random().Next(Iw.MaxDoc))));
            }
            Ir  = Iw.Reader;
            @is = NewSearcher(Ir);
        }
        /// <summary>
        /// Tests dv against stored fields with threads (all types + missing)
        /// </summary>
        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestThreads2()
        {
            AssumeTrue("Codec does not support GetDocsWithField", DefaultCodecSupportsDocsWithField());
            AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet());
            Directory dir = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf);
            Field idField = new StringField("id", "", Field.Store.NO);
            Field storedBinField = new StoredField("storedBin", new byte[0]);
            Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef());
            Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef());
            Field storedNumericField = new StoredField("storedNum", "");
            Field dvNumericField = new NumericDocValuesField("dvNum", 0);

            // index some docs
            int numDocs = AtLeast(300);
            for (int i = 0; i < numDocs; i++)
            {
                idField.StringValue = Convert.ToString(i);
                int length = TestUtil.NextInt(Random(), 0, 8);
                var buffer = new byte[length];
                Random().NextBytes(buffer);
                storedBinField.BytesValue = new BytesRef(buffer);
                dvBinField.BytesValue = new BytesRef(buffer);
                dvSortedField.BytesValue = new BytesRef(buffer);
                long numericValue = Random().NextLong();
                storedNumericField.StringValue = Convert.ToString(numericValue);
                dvNumericField.LongValue = numericValue;
                Document doc = new Document();
                doc.Add(idField);
                if (Random().Next(4) > 0)
                {
                    doc.Add(storedBinField);
                    doc.Add(dvBinField);
                    doc.Add(dvSortedField);
                }
                if (Random().Next(4) > 0)
                {
                    doc.Add(storedNumericField);
                    doc.Add(dvNumericField);
                }
                int numSortedSetFields = Random().Next(3);
                SortedSet<string> values = new SortedSet<string>();
                for (int j = 0; j < numSortedSetFields; j++)
                {
                    values.Add(TestUtil.RandomSimpleString(Random()));
                }
                foreach (string v in values)
                {
                    doc.Add(new SortedSetDocValuesField("dvSortedSet", new BytesRef(v)));
                    doc.Add(new StoredField("storedSortedSet", v));
                }
                writer.AddDocument(doc);
                if (Random().Next(31) == 0)
                {
                    writer.Commit();
                }
            }

            // delete some docs
            int numDeletions = Random().Next(numDocs / 10);
            for (int i = 0; i < numDeletions; i++)
            {
                int id = Random().Next(numDocs);
                writer.DeleteDocuments(new Term("id", Convert.ToString(id)));
            }
            writer.Dispose();

            // compare
            DirectoryReader ir = DirectoryReader.Open(dir);
            int numThreads = TestUtil.NextInt(Random(), 2, 7);
            ThreadClass[] threads = new ThreadClass[numThreads];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int i = 0; i < threads.Length; i++)
            {
                threads[i] = new ThreadAnonymousInnerClassHelper2(this, ir, startingGun);
                threads[i].Start();
            }
            startingGun.Signal();
            foreach (ThreadClass t in threads)
            {
                t.Join();
            }
            ir.Dispose();
            dir.Dispose();
        }
        public virtual void TestDeleteAllSlowly()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir);
            int NUM_DOCS = AtLeast(1000);
            IList<int?> ids = new List<int?>(NUM_DOCS);
            for (int id = 0; id < NUM_DOCS; id++)
            {
                ids.Add(id);
            }
            ids = CollectionsHelper.Shuffle(ids);
            foreach (int id in ids)
            {
                Document doc = new Document();
                doc.Add(NewStringField("id", "" + id, Field.Store.NO));
                w.AddDocument(doc);
            }
            ids = CollectionsHelper.Shuffle(ids);
            int upto = 0;
            while (upto < ids.Count)
            {
                int left = ids.Count - upto;
                int inc = Math.Min(left, TestUtil.NextInt(Random(), 1, 20));
                int limit = upto + inc;
                while (upto < limit)
                {
                    w.DeleteDocuments(new Term("id", "" + ids[upto++]));
                }
                IndexReader r = w.Reader;
                Assert.AreEqual(NUM_DOCS - upto, r.NumDocs);
                r.Dispose();
            }

            w.Dispose();
            dir.Dispose();
        }
        private void DoTestSortedVsFieldCache(int minLength, int maxLength)
        {
            Directory dir = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf);
            Document doc = new Document();
            Field idField = new StringField("id", "", Field.Store.NO);
            Field indexedField = new StringField("indexed", "", Field.Store.NO);
            Field dvField = new SortedDocValuesField("dv", new BytesRef());
            doc.Add(idField);
            doc.Add(indexedField);
            doc.Add(dvField);

            // index some docs
            int numDocs = AtLeast(300);
            for (int i = 0; i < numDocs; i++)
            {
                idField.StringValue = Convert.ToString(i);
                int length;
                if (minLength == maxLength)
                {
                    length = minLength; // fixed length
                }
                else
                {
                    length = TestUtil.NextInt(Random(), minLength, maxLength);
                }
                string value = TestUtil.RandomSimpleString(Random(), length);
                indexedField.StringValue = value;
                dvField.BytesValue = new BytesRef(value);
                writer.AddDocument(doc);
                if (Random().Next(31) == 0)
                {
                    writer.Commit();
                }
            }

            // delete some docs
            int numDeletions = Random().Next(numDocs / 10);
            for (int i = 0; i < numDeletions; i++)
            {
                int id = Random().Next(numDocs);
                writer.DeleteDocuments(new Term("id", Convert.ToString(id)));
            }
            writer.Dispose();

            // compare
            DirectoryReader ir = DirectoryReader.Open(dir);
            foreach (AtomicReaderContext context in ir.Leaves)
            {
                AtomicReader r = context.AtomicReader;
                SortedDocValues expected = FieldCache.DEFAULT.GetTermsIndex(r, "indexed");
                SortedDocValues actual = r.GetSortedDocValues("dv");
                AssertEquals(r.MaxDoc, expected, actual);
            }
            ir.Dispose();
            dir.Dispose();
        }
        private void DoTestSortedVsStoredFields(int minLength, int maxLength)
        {
            Directory dir = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf);
            Document doc = new Document();
            Field idField = new StringField("id", "", Field.Store.NO);
            Field storedField = new StoredField("stored", new byte[0]);
            Field dvField = new SortedDocValuesField("dv", new BytesRef());
            doc.Add(idField);
            doc.Add(storedField);
            doc.Add(dvField);

            // index some docs
            int numDocs = AtLeast(300);
            for (int i = 0; i < numDocs; i++)
            {
                idField.StringValue = Convert.ToString(i);
                int length;
                if (minLength == maxLength)
                {
                    length = minLength; // fixed length
                }
                else
                {
                    length = TestUtil.NextInt(Random(), minLength, maxLength);
                }
                var buffer = new byte[length];
                Random().NextBytes(buffer);
                storedField.BytesValue = new BytesRef(buffer);
                dvField.BytesValue = new BytesRef(buffer);
                writer.AddDocument(doc);
                if (Random().Next(31) == 0)
                {
                    writer.Commit();
                }
            }

            // delete some docs
            int numDeletions = Random().Next(numDocs / 10);
            for (int i = 0; i < numDeletions; i++)
            {
                int id = Random().Next(numDocs);
                writer.DeleteDocuments(new Term("id", Convert.ToString(id)));
            }
            writer.Dispose();

            // compare
            DirectoryReader ir = DirectoryReader.Open(dir);
            foreach (AtomicReaderContext context in ir.Leaves)
            {
                AtomicReader r = context.AtomicReader;
                BinaryDocValues docValues = r.GetSortedDocValues("dv");
                for (int i = 0; i < r.MaxDoc; i++)
                {
                    BytesRef binaryValue = r.Document(i).GetBinaryValue("stored");
                    BytesRef scratch = new BytesRef();
                    docValues.Get(i, scratch);
                    Assert.AreEqual(binaryValue, scratch);
                }
            }
            ir.Dispose();
            dir.Dispose();
        }
        public void BeforeClass()
        {
            Random random = Random();
            Directory = NewDirectory();
            Stopword = "" + RandomChar();
            CharacterRunAutomaton stopset = new CharacterRunAutomaton(BasicAutomata.MakeString(Stopword));
            Analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset);
            RandomIndexWriter iw = new RandomIndexWriter(random, Directory, Analyzer, ClassEnvRule.Similarity, ClassEnvRule.TimeZone);
            Document doc = new Document();
            Field id = new StringField("id", "", Field.Store.NO);
            Field field = new TextField("field", "", Field.Store.NO);
            doc.Add(id);
            doc.Add(field);

            // index some docs
            int numDocs = AtLeast(1000);
            for (int i = 0; i < numDocs; i++)
            {
                id.StringValue = Convert.ToString(i);
                field.StringValue = RandomFieldContents();
                iw.AddDocument(doc);
            }

            // delete some docs
            int numDeletes = numDocs / 20;
            for (int i = 0; i < numDeletes; i++)
            {
                Term toDelete = new Term("id", Convert.ToString(random.Next(numDocs)));
                if (random.NextBoolean())
                {
                    iw.DeleteDocuments(toDelete);
                }
                else
                {
                    iw.DeleteDocuments(new TermQuery(toDelete));
                }
            }

            Reader = iw.Reader;
            S1 = NewSearcher(Reader);
            S2 = NewSearcher(Reader);
            iw.Dispose();
        }
        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestSortedSetMergeAwayAllValues()
        {
            AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet());
            Directory directory = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwconfig.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, iwconfig);

            Document doc = new Document();
            doc.Add(new StringField("id", "0", Field.Store.NO));
            iwriter.AddDocument(doc);
            doc = new Document();
            doc.Add(new StringField("id", "1", Field.Store.NO));
            doc.Add(new SortedSetDocValuesField("field", new BytesRef("hello")));
            iwriter.AddDocument(doc);
            iwriter.Commit();
            iwriter.DeleteDocuments(new Term("id", "1"));
            iwriter.ForceMerge(1);

            DirectoryReader ireader = iwriter.Reader;
            iwriter.Dispose();

            SortedSetDocValues dv = GetOnlySegmentReader(ireader).GetSortedSetDocValues("field");
            Assert.AreEqual(0, dv.ValueCount);

            ireader.Dispose();
            directory.Dispose();
        }
        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestSortedMergeAwayAllValues()
        {
            Directory directory = NewDirectory();
            Analyzer analyzer = new MockAnalyzer(Random());
            IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            iwconfig.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iwriter = new RandomIndexWriter(Random(), directory, iwconfig);

            Document doc = new Document();
            doc.Add(new StringField("id", "0", Field.Store.NO));
            iwriter.AddDocument(doc);
            doc = new Document();
            doc.Add(new StringField("id", "1", Field.Store.NO));
            doc.Add(new SortedDocValuesField("field", new BytesRef("hello")));
            iwriter.AddDocument(doc);
            iwriter.Commit();
            iwriter.DeleteDocuments(new Term("id", "1"));
            iwriter.ForceMerge(1);

            DirectoryReader ireader = iwriter.Reader;
            iwriter.Dispose();

            SortedDocValues dv = GetOnlySegmentReader(ireader).GetSortedDocValues("field");
            if (DefaultCodecSupportsDocsWithField())
            {
                Assert.AreEqual(-1, dv.GetOrd(0));
                Assert.AreEqual(0, dv.ValueCount);
            }
            else
            {
                Assert.AreEqual(0, dv.GetOrd(0));
                Assert.AreEqual(1, dv.ValueCount);
                BytesRef @ref = new BytesRef();
                dv.LookupOrd(0, @ref);
                Assert.AreEqual(new BytesRef(), @ref);
            }

            ireader.Dispose();
            directory.Dispose();
        }
Beispiel #22
0
        public void TestNestedDocScoringWithDeletes()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));

            // Cannot assert this since we use NoMergePolicy:
            w.DoRandomForceMergeAssert = false;

            IList<Document> docs = new List<Document>();
            docs.Add(MakeJob("java", 2007));
            docs.Add(MakeJob("python", 2010));
            docs.Add(MakeResume("Lisa", "United Kingdom"));
            w.AddDocuments(docs);

            docs.Clear();
            docs.Add(MakeJob("c", 1999));
            docs.Add(MakeJob("ruby", 2005));
            docs.Add(MakeJob("java", 2006));
            docs.Add(MakeResume("Frank", "United States"));
            w.AddDocuments(docs);

            w.Commit();
            IndexSearcher s = NewSearcher(DirectoryReader.Open(dir));

            ToParentBlockJoinQuery q = new ToParentBlockJoinQuery(NumericRangeQuery.NewIntRange("year", 1990, 2010, true, true), new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("docType", "resume")))), ScoreMode.Total);

            TopDocs topDocs = s.Search(q, 10);
            assertEquals(2, topDocs.TotalHits);
            assertEquals(6, topDocs.ScoreDocs[0].Doc);
            assertEquals(3.0f, topDocs.ScoreDocs[0].Score, 0.0f);
            assertEquals(2, topDocs.ScoreDocs[1].Doc);
            assertEquals(2.0f, topDocs.ScoreDocs[1].Score, 0.0f);

            s.IndexReader.Dispose();
            w.DeleteDocuments(new Term("skill", "java"));
            w.Dispose();
            s = NewSearcher(DirectoryReader.Open(dir));

            topDocs = s.Search(q, 10);
            assertEquals(2, topDocs.TotalHits);
            assertEquals(6, topDocs.ScoreDocs[0].Doc);
            assertEquals(2.0f, topDocs.ScoreDocs[0].Score, 0.0f);
            assertEquals(2, topDocs.ScoreDocs[1].Doc);
            assertEquals(1.0f, topDocs.ScoreDocs[1].Score, 0.0f);

            s.IndexReader.Dispose();
            dir.Dispose();
        }
        private void DoTestSortedSetVsStoredFields(int minLength, int maxLength, int maxValuesPerDoc)
        {
            Directory dir = NewDirectory();
            IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf);

            // index some docs
            int numDocs = AtLeast(300);
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                Field idField = new StringField("id", Convert.ToString(i), Field.Store.NO);
                doc.Add(idField);
                int length;
                if (minLength == maxLength)
                {
                    length = minLength; // fixed length
                }
                else
                {
                    length = TestUtil.NextInt(Random(), minLength, maxLength);
                }
                int numValues = TestUtil.NextInt(Random(), 0, maxValuesPerDoc);
                // create a random set of strings
                SortedSet<string> values = new SortedSet<string>();
                for (int v = 0; v < numValues; v++)
                {
                    values.Add(TestUtil.RandomSimpleString(Random(), length));
                }

                // add ordered to the stored field
                foreach (string v in values)
                {
                    doc.Add(new StoredField("stored", v));
                }

                // add in any order to the dv field
                IList<string> unordered = new List<string>(values);
                unordered = CollectionsHelper.Shuffle(unordered);
                foreach (string v in unordered)
                {
                    doc.Add(new SortedSetDocValuesField("dv", new BytesRef(v)));
                }

                writer.AddDocument(doc);
                if (Random().Next(31) == 0)
                {
                    writer.Commit();
                }
            }

            // delete some docs
            int numDeletions = Random().Next(numDocs / 10);
            for (int i = 0; i < numDeletions; i++)
            {
                int id = Random().Next(numDocs);
                writer.DeleteDocuments(new Term("id", Convert.ToString(id)));
            }
            writer.Dispose();

            // compare
            DirectoryReader ir = DirectoryReader.Open(dir);
            foreach (AtomicReaderContext context in ir.Leaves)
            {
                AtomicReader r = context.AtomicReader;
                SortedSetDocValues docValues = r.GetSortedSetDocValues("dv");
                BytesRef scratch = new BytesRef();
                for (int i = 0; i < r.MaxDoc; i++)
                {
                    string[] stringValues = r.Document(i).GetValues("stored");
                    if (docValues != null)
                    {
                        docValues.Document = i;
                    }
                    for (int j = 0; j < stringValues.Length; j++)
                    {
                        Debug.Assert(docValues != null);
                        long ord = docValues.NextOrd();
                        Debug.Assert(ord != SortedSetDocValues.NO_MORE_ORDS);
                        docValues.LookupOrd(ord, scratch);
                        Assert.AreEqual(stringValues[j], scratch.Utf8ToString());
                    }
                    Debug.Assert(docValues == null || docValues.NextOrd() == SortedSetDocValues.NO_MORE_ORDS);
                }
            }
            ir.Dispose();
            dir.Dispose();
        }
        public virtual void TestEnforceDeletions()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(NewLogMergePolicy(10)));
            // asserts below requires no unexpected merges:

            // NOTE: cannot use writer.getReader because RIW (on
            // flipping a coin) may give us a newly opened reader,
            // but we use .reopen on this reader below and expect to
            // (must) get an NRT reader:
            DirectoryReader reader = DirectoryReader.Open(writer.w, true);
            // same reason we don't wrap?
            IndexSearcher searcher = NewSearcher(reader, false);

            // add a doc, refresh the reader, and check that it's there
            Document doc = new Document();
            doc.Add(NewStringField("id", "1", Field.Store.YES));
            writer.AddDocument(doc);

            reader = RefreshReader(reader);
            searcher = NewSearcher(reader, false);

            TopDocs docs = searcher.Search(new MatchAllDocsQuery(), 1);
            Assert.AreEqual(1, docs.TotalHits, "Should find a hit...");

            Filter startFilter = new QueryWrapperFilter(new TermQuery(new Term("id", "1")));

            CachingWrapperFilter filter = new CachingWrapperFilter(startFilter);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.IsTrue(filter.SizeInBytes() > 0);

            Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit...");

            Query constantScore = new ConstantScoreQuery(filter);
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit...");

            // make sure we get a cache hit when we reopen reader
            // that had no change to deletions

            // fake delete (deletes nothing):
            writer.DeleteDocuments(new Term("foo", "bar"));

            IndexReader oldReader = reader;
            reader = RefreshReader(reader);
            Assert.IsTrue(reader == oldReader);
            int missCount = filter.MissCount;
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit...");

            // cache hit:
            Assert.AreEqual(missCount, filter.MissCount);

            // now delete the doc, refresh the reader, and see that it's not there
            writer.DeleteDocuments(new Term("id", "1"));

            // NOTE: important to hold ref here so GC doesn't clear
            // the cache entry!  Else the assert below may sometimes
            // fail:
            oldReader = reader;
            reader = RefreshReader(reader);

            searcher = NewSearcher(reader, false);

            missCount = filter.MissCount;
            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit...");

            // cache hit
            Assert.AreEqual(missCount, filter.MissCount);
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit...");

            // apply deletes dynamically:
            filter = new CachingWrapperFilter(startFilter);
            writer.AddDocument(doc);
            reader = RefreshReader(reader);
            searcher = NewSearcher(reader, false);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit...");
            missCount = filter.MissCount;
            Assert.IsTrue(missCount > 0);
            constantScore = new ConstantScoreQuery(filter);
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit...");
            Assert.AreEqual(missCount, filter.MissCount);

            writer.AddDocument(doc);

            // NOTE: important to hold ref here so GC doesn't clear
            // the cache entry!  Else the assert below may sometimes
            // fail:
            oldReader = reader;

            reader = RefreshReader(reader);
            searcher = NewSearcher(reader, false);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(2, docs.TotalHits, "[query + filter] Should find 2 hits...");
            Assert.IsTrue(filter.MissCount > missCount);
            missCount = filter.MissCount;

            constantScore = new ConstantScoreQuery(filter);
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(2, docs.TotalHits, "[just filter] Should find a hit...");
            Assert.AreEqual(missCount, filter.MissCount);

            // now delete the doc, refresh the reader, and see that it's not there
            writer.DeleteDocuments(new Term("id", "1"));

            reader = RefreshReader(reader);
            searcher = NewSearcher(reader, false);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit...");
            // CWF reused the same entry (it dynamically applied the deletes):
            Assert.AreEqual(missCount, filter.MissCount);

            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit...");
            // CWF reused the same entry (it dynamically applied the deletes):
            Assert.AreEqual(missCount, filter.MissCount);

            // NOTE: silliness to make sure JRE does not eliminate
            // our holding onto oldReader to prevent
            // CachingWrapperFilter's WeakHashMap from dropping the
            // entry:
            Assert.IsTrue(oldReader != null);

            reader.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
Beispiel #25
0
 public virtual void TestZeroTerms()
 {
     var d = NewDirectory();
     RandomIndexWriter w = new RandomIndexWriter(Random(), d);
     Document doc = new Document();
     doc.Add(NewTextField("field", "one two three", Field.Store.NO));
     doc = new Document();
     doc.Add(NewTextField("field2", "one two three", Field.Store.NO));
     w.AddDocument(doc);
     w.Commit();
     w.DeleteDocuments(new Term("field", "one"));
     w.ForceMerge(1);
     IndexReader r = w.Reader;
     w.Dispose();
     Assert.AreEqual(1, r.NumDocs);
     Assert.AreEqual(1, r.MaxDoc);
     Terms terms = MultiFields.GetTerms(r, "field");
     if (terms != null)
     {
         Assert.IsNull(terms.Iterator(null).Next());
     }
     r.Dispose();
     d.Dispose();
 }
        public void TestWithDeletions()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            IDictionary<string, Document> docs = GenerateIndexDocuments(AtLeast(100));
            Random rand = Random();
            List<string> termsToDel = new List<string>();
            foreach (Document doc in docs.Values)
            {
                if (rand.nextBoolean() && termsToDel.size() < docs.size() - 1)
                {
                    termsToDel.Add(doc.Get(FIELD_NAME));
                }
                writer.AddDocument(doc);
            }
            writer.Commit();

            Term[] delTerms = new Term[termsToDel.size()];
            for (int i = 0; i < termsToDel.size(); i++)
            {
                delTerms[i] = new Term(FIELD_NAME, termsToDel[i]);
            }

            foreach (Term delTerm in delTerms)
            {
                writer.DeleteDocuments(delTerm);
            }
            writer.Commit();
            writer.Dispose();

            foreach (string termToDel in termsToDel)
            {
                var toDel = docs[termToDel];
                docs.Remove(termToDel);
                assertTrue(null != toDel);
            }

            IndexReader ir = DirectoryReader.Open(dir);
            assertTrue("NumDocs should be > 0 but was " + ir.NumDocs, ir.NumDocs > 0);
            assertEquals(ir.NumDocs, docs.size());
            ValueSource[] toAdd = new ValueSource[] { new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2) };

            IDictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME);
            IInputIterator inputIterator = dictionary.EntryIterator;
            BytesRef f;
            while ((f = inputIterator.Next()) != null)
            {
                string field = f.Utf8ToString();
                Document doc = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                long w1 = Convert.ToInt64(doc.GetField(WEIGHT_FIELD_NAME_1).NumericValue);
                long w2 = Convert.ToInt64(doc.GetField(WEIGHT_FIELD_NAME_2).NumericValue);
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, w2 + w1);
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).BinaryValue));
            }
            assertTrue(!docs.Any());
            ir.Dispose();
            dir.Dispose();
        }
        public void TestWithDeletions()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            KeyValuePair<List<string>, IDictionary<string, Document>> res = GenerateIndexDocuments(AtLeast(1000), false, false);
            IDictionary<string, Document> docs = res.Value;
            List<String> invalidDocTerms = res.Key;
            Random rand = Random();
            List<string> termsToDel = new List<string>();
            foreach (Document doc in docs.Values)
            {
                IndexableField f2 = doc.GetField(FIELD_NAME);
                if (rand.nextBoolean() && f2 != null && !invalidDocTerms.Contains(f2.StringValue))
                {
                    termsToDel.Add(doc.Get(FIELD_NAME));
                }
                writer.AddDocument(doc);
            }
            writer.Commit();

            Term[] delTerms = new Term[termsToDel.size()];
            for (int i = 0; i < termsToDel.size(); i++)
            {
                delTerms[i] = new Term(FIELD_NAME, termsToDel[i]);
            }

            foreach (Term delTerm in delTerms)
            {
                writer.DeleteDocuments(delTerm);
            }
            writer.Commit();
            writer.Dispose();

            foreach (string termToDel in termsToDel)
            {
                var toDel = docs[termToDel];
                assertTrue(toDel != null);
                docs.Remove(termToDel);
            }

            IndexReader ir = DirectoryReader.Open(dir);
            assertEquals(ir.NumDocs, docs.size());
            IDictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
            IInputIterator inputIterator = dictionary.EntryIterator;
            BytesRef f;
            while ((f = inputIterator.Next()) != null)
            {
                var field = f.Utf8ToString();
                Document doc = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                IndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME);
                assertEquals(inputIterator.Weight, (weightField != null) ? Convert.ToInt64(weightField.NumericValue) : 0);
                assertEquals(inputIterator.Payload, null);
            }

            foreach (string invalidTerm in invalidDocTerms)
            {
                var invalid = docs[invalidTerm];
                docs.Remove(invalidTerm);
                assertNotNull(invalid);
            }
            assertTrue(!docs.Any());

            ir.Dispose();
            dir.Dispose();
        }
        public void TestMVGroupedFacetingWithDeletes()
        {
            string    groupField = "hotel";
            FieldType customType = new FieldType();

            customType.IsStored = (true);

            Directory         dir = NewDirectory();
            RandomIndexWriter w   = new RandomIndexWriter(
                Random,
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                                     new MockAnalyzer(Random)).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
            bool useDv = false;

            // Cannot assert this since we use NoMergePolicy:
            w.DoRandomForceMergeAssert = (false);

            // 0
            Document doc = new Document();

            doc.Add(new StringField("x", "x", Field.Store.NO));
            w.AddDocument(doc);

            // 1
            doc = new Document();
            AddField(doc, groupField, "a", useDv);
            doc.Add(new StringField("airport", "ams", Field.Store.NO));
            w.AddDocument(doc);

            w.Commit();
            w.DeleteDocuments(new TermQuery(new Term("airport", "ams")));

            // 2
            doc = new Document();
            AddField(doc, groupField, "a", useDv);
            doc.Add(new StringField("airport", "ams", Field.Store.NO));
            w.AddDocument(doc);

            // 3
            doc = new Document();
            AddField(doc, groupField, "a", useDv);
            doc.Add(new StringField("airport", "dus", Field.Store.NO));

            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            doc.Add(new StringField("airport", "ams", Field.Store.NO));
            w.AddDocument(doc);

            // 5
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            doc.Add(new StringField("airport", "ams", Field.Store.NO));
            w.AddDocument(doc);

            // 6
            doc = new Document();
            AddField(doc, groupField, "b", useDv);
            doc.Add(new StringField("airport", "ams", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            // 7
            doc = new Document();
            doc.Add(new StringField("x", "x", Field.Store.NO));
            w.AddDocument(doc);
            w.Commit();

            w.Dispose();
            IndexSearcher indexSearcher = NewSearcher(DirectoryReader.Open(dir));
            AbstractGroupFacetCollector groupedAirportFacetCollector = CreateRandomCollector(groupField, "airport", null, true);

            indexSearcher.Search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
            TermGroupFacetCollector.GroupedFacetResult airportResult = groupedAirportFacetCollector.MergeSegmentResults(10, 0, false);
            assertEquals(3, airportResult.TotalCount);
            assertEquals(1, airportResult.TotalMissingCount);

            IList <TermGroupFacetCollector.FacetEntry> entries = airportResult.GetFacetEntries(0, 10);

            assertEquals(2, entries.size());
            assertEquals("ams", entries[0].Value.Utf8ToString());
            assertEquals(2, entries[0].Count);
            assertEquals("dus", entries[1].Value.Utf8ToString());
            assertEquals(1, entries[1].Count);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
        public void TestThreads()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf);
            Document doc = new Document();
            Field idField = new StringField("id", "", Field.Store.NO);
            Field storedBinField = new StoredField("storedBin", new byte[0]);
            Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef());
            Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef());
            Field storedNumericField = new StoredField("storedNum", "");
            Field dvNumericField = new NumericDocValuesField("dvNum", 0);
            doc.Add(idField);
            doc.Add(storedBinField);
            doc.Add(dvBinField);
            doc.Add(dvSortedField);
            doc.Add(storedNumericField);
            doc.Add(dvNumericField);

            // index some docs
            int numDocs = AtLeast(300);
            for (int i = 0; i < numDocs; i++)
            {
                idField.StringValue = Convert.ToString(i);
                int length = TestUtil.NextInt(Random(), 0, 8);
                var buffer = new byte[length];
                Random().NextBytes(buffer);
                storedBinField.BytesValue = new BytesRef(buffer);
                dvBinField.BytesValue = new BytesRef(buffer);
                dvSortedField.BytesValue = new BytesRef(buffer);
                long numericValue = Random().NextLong();
                storedNumericField.StringValue = Convert.ToString(numericValue);
                dvNumericField.LongValue = numericValue;
                writer.AddDocument(doc);
                if (Random().Next(31) == 0)
                {
                    writer.Commit();
                }
            }

            // delete some docs
            int numDeletions = Random().Next(numDocs / 10);
            for (int i = 0; i < numDeletions; i++)
            {
                int id = Random().Next(numDocs);
                writer.DeleteDocuments(new Term("id", Convert.ToString(id)));
            }
            writer.Dispose();

            // compare
            DirectoryReader ir = DirectoryReader.Open(dir);
            int numThreads = TestUtil.NextInt(Random(), 2, 7);
            ThreadClass[] threads = new ThreadClass[numThreads];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int i = 0; i < threads.Length; i++)
            {
                threads[i] = new ThreadAnonymousInnerClassHelper(this, ir, startingGun);
                threads[i].Start();
            }
            startingGun.Signal();
            foreach (ThreadClass t in threads)
            {
                t.Join();
            }
            ir.Dispose();
            dir.Dispose();
        }
        public virtual void TestForceMergeDeletesMaxSegSize()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            TieredMergePolicy tmp = new TieredMergePolicy();
            tmp.MaxMergedSegmentMB = 0.01;
            tmp.ForceMergeDeletesPctAllowed = 0.0;
            conf.SetMergePolicy(tmp);

            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, conf);
            w.RandomForceMerge = false;

            int numDocs = AtLeast(200);
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(NewStringField("id", "" + i, Field.Store.NO));
                doc.Add(NewTextField("content", "aaa " + i, Field.Store.NO));
                w.AddDocument(doc);
            }

            w.ForceMerge(1);
            IndexReader r = w.Reader;
            Assert.AreEqual(numDocs, r.MaxDoc);
            Assert.AreEqual(numDocs, r.NumDocs);
            r.Dispose();

            if (VERBOSE)
            {
                Console.WriteLine("\nTEST: delete doc");
            }

            w.DeleteDocuments(new Term("id", "" + (42 + 17)));

            r = w.Reader;
            Assert.AreEqual(numDocs, r.MaxDoc);
            Assert.AreEqual(numDocs - 1, r.NumDocs);
            r.Dispose();

            w.ForceMergeDeletes();

            r = w.Reader;
            Assert.AreEqual(numDocs - 1, r.MaxDoc);
            Assert.AreEqual(numDocs - 1, r.NumDocs);
            r.Dispose();

            w.Dispose();

            dir.Dispose();
        }
 // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
 public virtual void TestMerge()
 {
     RandomDocumentFactory docFactory = new RandomDocumentFactory(this, 5, 20);
     int numDocs = AtLeast(100);
     int numDeletes = Random().Next(numDocs);
     HashSet<int?> deletes = new HashSet<int?>();
     while (deletes.Count < numDeletes)
     {
         deletes.Add(Random().Next(numDocs));
     }
     foreach (Options options in ValidOptions())
     {
         RandomDocument[] docs = new RandomDocument[numDocs];
         for (int i = 0; i < numDocs; ++i)
         {
             docs[i] = docFactory.NewDocument(TestUtil.NextInt(Random(), 1, 3), AtLeast(10), options);
         }
         Directory dir = NewDirectory();
         RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, ClassEnvRule.Similarity, ClassEnvRule.TimeZone);
         for (int i = 0; i < numDocs; ++i)
         {
             writer.AddDocument(AddId(docs[i].ToDocument(), "" + i));
             if (Rarely())
             {
                 writer.Commit();
             }
         }
         foreach (int delete in deletes)
         {
             writer.DeleteDocuments(new Term("id", "" + delete));
         }
         // merge with deletes
         writer.ForceMerge(1);
         IndexReader reader = writer.Reader;
         for (int i = 0; i < numDocs; ++i)
         {
             if (!deletes.Contains(i))
             {
                 int docID = DocID(reader, "" + i);
                 AssertEquals(docs[i], reader.GetTermVectors(docID));
             }
         }
         reader.Dispose();
         writer.Dispose();
         dir.Dispose();
     }
 }
Beispiel #32
0
        public void TestRandom()
        {
            // We build two indices at once: one normalized (which
            // ToParentBlockJoinQuery/Collector,
            // ToChildBlockJoinQuery can query) and the other w/
            // the same docs, just fully denormalized:
            Directory dir = NewDirectory();
            Directory joinDir = NewDirectory();

            int numParentDocs = TestUtil.NextInt(Random(), 100 * RANDOM_MULTIPLIER, 300 * RANDOM_MULTIPLIER);
            //final int numParentDocs = 30;

            // Values for parent fields:
            string[][] parentFields = GetRandomFields(numParentDocs / 2);
            // Values for child fields:
            string[][] childFields = GetRandomFields(numParentDocs);

            bool doDeletes = Random().NextBoolean();
            IList<int> toDelete = new List<int>();

            // TODO: parallel star join, nested join cases too!
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            RandomIndexWriter joinW = new RandomIndexWriter(Random(), joinDir, Similarity, TimeZone);
            for (int parentDocID = 0; parentDocID < numParentDocs; parentDocID++)
            {
                Document parentDoc = new Document();
                Document parentJoinDoc = new Document();
                Field id = NewStringField("parentID", "" + parentDocID, Field.Store.YES);
                parentDoc.Add(id);
                parentJoinDoc.Add(id);
                parentJoinDoc.Add(NewStringField("isParent", "x", Field.Store.NO));
                for (int field = 0; field < parentFields.Length; field++)
                {
                    if (Random().NextDouble() < 0.9)
                    {
                        Field f = NewStringField("parent" + field, parentFields[field][Random().Next(parentFields[field].Length)], Field.Store.NO);
                        parentDoc.Add(f);
                        parentJoinDoc.Add(f);
                    }
                }

                if (doDeletes)
                {
                    parentDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
                    parentJoinDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
                }

                IList<Document> joinDocs = new List<Document>();

                if (VERBOSE)
                {
                    StringBuilder sb = new StringBuilder();
                    sb.Append("parentID=").Append(parentDoc.Get("parentID"));
                    for (int fieldID = 0; fieldID < parentFields.Length; fieldID++)
                    {
                        string parent = parentDoc.Get("parent" + fieldID);
                        if (parent != null)
                        {
                            sb.Append(" parent" + fieldID + "=" + parent);
                        }
                    }
                    Console.WriteLine("  " + sb);
                }

                int numChildDocs = TestUtil.NextInt(Random(), 1, 20);
                for (int childDocID = 0; childDocID < numChildDocs; childDocID++)
                {
                    // Denormalize: copy all parent fields into child doc:
                    Document childDoc = TestUtil.CloneDocument(parentDoc);
                    Document joinChildDoc = new Document();
                    joinDocs.Add(joinChildDoc);

                    Field childID = NewStringField("childID", "" + childDocID, Field.Store.YES);
                    childDoc.Add(childID);
                    joinChildDoc.Add(childID);

                    for (int childFieldID = 0; childFieldID < childFields.Length; childFieldID++)
                    {
                        if (Random().NextDouble() < 0.9)
                        {
                            Field f = NewStringField("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)], Field.Store.NO);
                            childDoc.Add(f);
                            joinChildDoc.Add(f);
                        }
                    }

                    if (VERBOSE)
                    {
                        StringBuilder sb = new StringBuilder();
                        sb.Append("childID=").Append(joinChildDoc.Get("childID"));
                        for (int fieldID = 0; fieldID < childFields.Length; fieldID++)
                        {
                            string child = joinChildDoc.Get("child" + fieldID);
                            if (child != null)
                            {
                                sb.Append(" child" + fieldID + "=" + child);
                            }
                        }
                        Console.WriteLine("    " + sb);
                    }

                    if (doDeletes)
                    {
                        joinChildDoc.Add(NewStringField("blockID", "" + parentDocID, Field.Store.NO));
                    }

                    w.AddDocument(childDoc);
                }

                // Parent last:
                joinDocs.Add(parentJoinDoc);
                joinW.AddDocuments(joinDocs);

                if (doDeletes && Random().Next(30) == 7)
                {
                    toDelete.Add(parentDocID);
                }
            }

            foreach (int deleteID in toDelete)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("DELETE parentID=" + deleteID);
                }
                w.DeleteDocuments(new Term("blockID", "" + deleteID));
                joinW.DeleteDocuments(new Term("blockID", "" + deleteID));
            }

            IndexReader r = w.Reader;
            w.Dispose();
            IndexReader joinR = joinW.Reader;
            joinW.Dispose();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: reader=" + r);
                Console.WriteLine("TEST: joinReader=" + joinR);

                for (int docIDX = 0; docIDX < joinR.MaxDoc; docIDX++)
                {
                    Console.WriteLine("  docID=" + docIDX + " doc=" + joinR.Document(docIDX));
                }
            }

            IndexSearcher s = NewSearcher(r);

            IndexSearcher joinS = new IndexSearcher(joinR);

            Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("isParent", "x"))));

            int iters = 200 * RANDOM_MULTIPLIER;

            for (int iter = 0; iter < iters; iter++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: iter=" + (1 + iter) + " of " + iters);
                }

                Query childQuery;
                if (Random().Next(3) == 2)
                {
                    int childFieldID = Random().Next(childFields.Length);
                    childQuery = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)]));
                }
                else if (Random().Next(3) == 2)
                {
                    BooleanQuery bq = new BooleanQuery();
                    childQuery = bq;
                    int numClauses = TestUtil.NextInt(Random(), 2, 4);
                    bool didMust = false;
                    for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++)
                    {
                        Query clause;
                        BooleanClause.Occur occur;
                        if (!didMust && Random().NextBoolean())
                        {
                            occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
                            clause = new TermQuery(RandomChildTerm(childFields[0]));
                            didMust = true;
                        }
                        else
                        {
                            occur = BooleanClause.Occur.SHOULD;
                            int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1);
                            clause = new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)]));
                        }
                        bq.Add(clause, occur);
                    }
                }
                else
                {
                    BooleanQuery bq = new BooleanQuery();
                    childQuery = bq;

                    bq.Add(new TermQuery(RandomChildTerm(childFields[0])), BooleanClause.Occur.MUST);
                    int childFieldID = TestUtil.NextInt(Random(), 1, childFields.Length - 1);
                    bq.Add(new TermQuery(new Term("child" + childFieldID, childFields[childFieldID][Random().Next(childFields[childFieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
                }

                int x = Random().Next(4);
                ScoreMode agg;
                if (x == 0)
                {
                    agg = ScoreMode.None;
                }
                else if (x == 1)
                {
                    agg = ScoreMode.Max;
                }
                else if (x == 2)
                {
                    agg = ScoreMode.Total;
                }
                else
                {
                    agg = ScoreMode.Avg;
                }

                ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery, parentsFilter, agg);

                // To run against the block-join index:
                Query parentJoinQuery;

                // Same query as parentJoinQuery, but to run against
                // the fully denormalized index (so we can compare
                // results):
                Query parentQuery;

                if (Random().NextBoolean())
                {
                    parentQuery = childQuery;
                    parentJoinQuery = childJoinQuery;
                }
                else
                {
                    // AND parent field w/ child field
                    BooleanQuery bq = new BooleanQuery();
                    parentJoinQuery = bq;
                    Term parentTerm = RandomParentTerm(parentFields[0]);
                    if (Random().NextBoolean())
                    {
                        bq.Add(childJoinQuery, BooleanClause.Occur.MUST);
                        bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                    }
                    else
                    {
                        bq.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                        bq.Add(childJoinQuery, BooleanClause.Occur.MUST);
                    }

                    BooleanQuery bq2 = new BooleanQuery();
                    parentQuery = bq2;
                    if (Random().NextBoolean())
                    {
                        bq2.Add(childQuery, BooleanClause.Occur.MUST);
                        bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                    }
                    else
                    {
                        bq2.Add(new TermQuery(parentTerm), BooleanClause.Occur.MUST);
                        bq2.Add(childQuery, BooleanClause.Occur.MUST);
                    }
                }

                Sort parentSort = GetRandomSort("parent", parentFields.Length);
                Sort childSort = GetRandomSort("child", childFields.Length);

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: query=" + parentQuery + " joinQuery=" + parentJoinQuery + " parentSort=" + parentSort + " childSort=" + childSort);
                }

                // Merge both sorts:
                IList<SortField> sortFields = new List<SortField>(Arrays.AsList(parentSort.GetSort()));
                sortFields.AddRange(Arrays.AsList(childSort.GetSort()));
                Sort parentAndChildSort = new Sort(sortFields.ToArray());

                TopDocs results = s.Search(parentQuery, null, r.NumDocs, parentAndChildSort);

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: normal index gets " + results.TotalHits + " hits");
                    ScoreDoc[] hits = results.ScoreDocs;
                    for (int hitIDX = 0; hitIDX < hits.Length; hitIDX++)
                    {
                        Document doc = s.Doc(hits[hitIDX].Doc);
                        //System.out.println("  score=" + hits[hitIDX].Score + " parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")");
                        Console.WriteLine("  parentID=" + doc.Get("parentID") + " childID=" + doc.Get("childID") + " (docID=" + hits[hitIDX].Doc + ")");
                        FieldDoc fd = (FieldDoc)hits[hitIDX];
                        if (fd.Fields != null)
                        {
                            Console.Write("    ");
                            foreach (object o in fd.Fields)
                            {
                                if (o is BytesRef)
                                {
                                    Console.Write(((BytesRef)o).Utf8ToString() + " ");
                                }
                                else
                                {
                                    Console.Write(o + " ");
                                }
                            }
                            Console.WriteLine();
                        }
                    }
                }

                bool trackScores;
                bool trackMaxScore;
                if (agg == ScoreMode.None)
                {
                    trackScores = false;
                    trackMaxScore = false;
                }
                else
                {
                    trackScores = Random().NextBoolean();
                    trackMaxScore = Random().NextBoolean();
                }
                ToParentBlockJoinCollector c = new ToParentBlockJoinCollector(parentSort, 10, trackScores, trackMaxScore);

                joinS.Search(parentJoinQuery, c);

                int hitsPerGroup = TestUtil.NextInt(Random(), 1, 20);
                //final int hitsPerGroup = 100;
                TopGroups<int> joinResults = c.GetTopGroups(childJoinQuery, childSort, 0, hitsPerGroup, 0, true);

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: block join index gets " + (joinResults == null ? 0 : joinResults.Groups.Length) + " groups; hitsPerGroup=" + hitsPerGroup);
                    if (joinResults != null)
                    {
                        IGroupDocs<int>[] groups = joinResults.Groups;
                        for (int groupIDX = 0; groupIDX < groups.Length; groupIDX++)
                        {
                            IGroupDocs<int> group = groups[groupIDX];
                            if (group.GroupSortValues != null)
                            {
                                Console.Write("  ");
                                foreach (object o in group.GroupSortValues)
                                {
                                    if (o is BytesRef)
                                    {
                                        Console.Write(((BytesRef)o).Utf8ToString() + " ");
                                    }
                                    else
                                    {
                                        Console.Write(o + " ");
                                    }
                                }
                                Console.WriteLine();
                            }

                            assertNotNull(group.GroupValue);
                            Document parentDoc = joinS.Doc(group.GroupValue);
                            Console.WriteLine("  group parentID=" + parentDoc.Get("parentID") + " (docID=" + group.GroupValue + ")");
                            for (int hitIDX = 0; hitIDX < group.ScoreDocs.Length; hitIDX++)
                            {
                                Document doc = joinS.Doc(group.ScoreDocs[hitIDX].Doc);
                                //System.out.println("    score=" + group.ScoreDocs[hitIDX].Score + " childID=" + doc.Get("childID") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")");
                                Console.WriteLine("    childID=" + doc.Get("childID") + " child0=" + doc.Get("child0") + " (docID=" + group.ScoreDocs[hitIDX].Doc + ")");
                            }
                        }
                    }
                }

                if (results.TotalHits == 0)
                {
                    assertNull(joinResults);
                }
                else
                {
                    CompareHits(r, joinR, results, joinResults);
                    TopDocs b = joinS.Search(childJoinQuery, 10);
                    foreach (ScoreDoc hit in b.ScoreDocs)
                    {
                        Explanation explanation = joinS.Explain(childJoinQuery, hit.Doc);
                        Document document = joinS.Doc(hit.Doc - 1);
                        int childId = Convert.ToInt32(document.Get("childID"));
                        assertTrue(explanation.IsMatch);
                        assertEquals(hit.Score, explanation.Value, 0.0f);
                        assertEquals(string.Format("Score based on child doc range from {0} to {1}", hit.Doc - 1 - childId, hit.Doc - 1), explanation.Description);
                    }
                }

                // Test joining in the opposite direction (parent to
                // child):

                // Get random query against parent documents:
                Query parentQuery2;
                if (Random().Next(3) == 2)
                {
                    int fieldID = Random().Next(parentFields.Length);
                    parentQuery2 = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)]));
                }
                else if (Random().Next(3) == 2)
                {
                    BooleanQuery bq = new BooleanQuery();
                    parentQuery2 = bq;
                    int numClauses = TestUtil.NextInt(Random(), 2, 4);
                    bool didMust = false;
                    for (int clauseIDX = 0; clauseIDX < numClauses; clauseIDX++)
                    {
                        Query clause;
                        BooleanClause.Occur occur;
                        if (!didMust && Random().NextBoolean())
                        {
                            occur = Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT;
                            clause = new TermQuery(RandomParentTerm(parentFields[0]));
                            didMust = true;
                        }
                        else
                        {
                            occur = BooleanClause.Occur.SHOULD;
                            int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1);
                            clause = new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)]));
                        }
                        bq.Add(clause, occur);
                    }
                }
                else
                {
                    BooleanQuery bq = new BooleanQuery();
                    parentQuery2 = bq;

                    bq.Add(new TermQuery(RandomParentTerm(parentFields[0])), BooleanClause.Occur.MUST);
                    int fieldID = TestUtil.NextInt(Random(), 1, parentFields.Length - 1);
                    bq.Add(new TermQuery(new Term("parent" + fieldID, parentFields[fieldID][Random().Next(parentFields[fieldID].Length)])), Random().NextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
                }

                if (VERBOSE)
                {
                    Console.WriteLine("\nTEST: top down: parentQuery2=" + parentQuery2);
                }

                // Maps parent query to child docs:
                ToChildBlockJoinQuery parentJoinQuery2 = new ToChildBlockJoinQuery(parentQuery2, parentsFilter, Random().NextBoolean());

                // To run against the block-join index:
                Query childJoinQuery2;

                // Same query as parentJoinQuery, but to run against
                // the fully denormalized index (so we can compare
                // results):
                Query childQuery2;

                // apply a filter to children
                Filter childFilter2, childJoinFilter2;

                if (Random().NextBoolean())
                {
                    childQuery2 = parentQuery2;
                    childJoinQuery2 = parentJoinQuery2;
                    childFilter2 = null;
                    childJoinFilter2 = null;
                }
                else
                {
                    Term childTerm = RandomChildTerm(childFields[0]);
                    if (Random().NextBoolean()) // filtered case
                    {
                        childJoinQuery2 = parentJoinQuery2;
                        Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
                        childJoinFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f;
                    }
                    else
                    {
                        childJoinFilter2 = null;
                        // AND child field w/ parent query:
                        BooleanQuery bq = new BooleanQuery();
                        childJoinQuery2 = bq;
                        if (Random().NextBoolean())
                        {
                            bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST);
                            bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                        }
                        else
                        {
                            bq.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                            bq.Add(parentJoinQuery2, BooleanClause.Occur.MUST);
                        }
                    }

                    if (Random().NextBoolean()) // filtered case
                    {
                        childQuery2 = parentQuery2;
                        Filter f = new QueryWrapperFilter(new TermQuery(childTerm));
                        childFilter2 = Random().NextBoolean() ? new FixedBitSetCachingWrapperFilter(f) : f;
                    }
                    else
                    {
                        childFilter2 = null;
                        BooleanQuery bq2 = new BooleanQuery();
                        childQuery2 = bq2;
                        if (Random().NextBoolean())
                        {
                            bq2.Add(parentQuery2, BooleanClause.Occur.MUST);
                            bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                        }
                        else
                        {
                            bq2.Add(new TermQuery(childTerm), BooleanClause.Occur.MUST);
                            bq2.Add(parentQuery2, BooleanClause.Occur.MUST);
                        }
                    }
                }

                Sort childSort2 = GetRandomSort("child", childFields.Length);

                // Search denormalized index:
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: run top down query=" + childQuery2 + " filter=" + childFilter2 + " sort=" + childSort2);
                }
                TopDocs results2 = s.Search(childQuery2, childFilter2, r.NumDocs, childSort2);
                if (VERBOSE)
                {
                    Console.WriteLine("  " + results2.TotalHits + " totalHits:");
                    foreach (ScoreDoc sd in results2.ScoreDocs)
                    {
                        Document doc = s.Doc(sd.Doc);
                        Console.WriteLine("  childID=" + doc.Get("childID") + " parentID=" + doc.Get("parentID") + " docID=" + sd.Doc);
                    }
                }

                // Search join index:
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: run top down join query=" + childJoinQuery2 + " filter=" + childJoinFilter2 + " sort=" + childSort2);
                }
                TopDocs joinResults2 = joinS.Search(childJoinQuery2, childJoinFilter2, joinR.NumDocs, childSort2);
                if (VERBOSE)
                {
                    Console.WriteLine("  " + joinResults2.TotalHits + " totalHits:");
                    foreach (ScoreDoc sd in joinResults2.ScoreDocs)
                    {
                        Document doc = joinS.Doc(sd.Doc);
                        Document parentDoc = GetParentDoc(joinR, parentsFilter, sd.Doc);
                        Console.WriteLine("  childID=" + doc.Get("childID") + " parentID=" + parentDoc.Get("parentID") + " docID=" + sd.Doc);
                    }
                }

                CompareChildHits(r, joinR, results2, joinResults2);
            }

            r.Dispose();
            joinR.Dispose();
            dir.Dispose();
            joinDir.Dispose();
        }
        public virtual void TestPostings()
        {
            Directory dir = NewFSDirectory(CreateTempDir("postings"));
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetCodec(Codec.ForName("Lucene40"));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            Document doc = new Document();

            // id field
            FieldType idType = new FieldType(StringField.TYPE_NOT_STORED);
            idType.StoreTermVectors = true;
            Field idField = new Field("id", "", idType);
            doc.Add(idField);

            // title field: short text field
            FieldType titleType = new FieldType(TextField.TYPE_NOT_STORED);
            titleType.StoreTermVectors = true;
            titleType.StoreTermVectorPositions = true;
            titleType.StoreTermVectorOffsets = true;
            titleType.IndexOptions = IndexOptions();
            Field titleField = new Field("title", "", titleType);
            doc.Add(titleField);

            // body field: long text field
            FieldType bodyType = new FieldType(TextField.TYPE_NOT_STORED);
            bodyType.StoreTermVectors = true;
            bodyType.StoreTermVectorPositions = true;
            bodyType.StoreTermVectorOffsets = true;
            bodyType.IndexOptions = IndexOptions();
            Field bodyField = new Field("body", "", bodyType);
            doc.Add(bodyField);

            int numDocs = AtLeast(1000);
            for (int i = 0; i < numDocs; i++)
            {
                idField.StringValue = Convert.ToString(i);
                titleField.StringValue = FieldValue(1);
                bodyField.StringValue = FieldValue(3);
                iw.AddDocument(doc);
                if (Random().Next(20) == 0)
                {
                    iw.DeleteDocuments(new Term("id", Convert.ToString(i)));
                }
            }
            if (Random().NextBoolean())
            {
                // delete 1-100% of docs
                iw.DeleteDocuments(new Term("title", Terms[Random().Next(Terms.Length)]));
            }
            iw.Dispose();
            dir.Dispose(); // checkindex
        }
        private void CreateRandomIndexes()
        {
            dir1 = NewDirectory();
            dir2 = NewDirectory();
            int numDocs = AtLeast(150);
            int numTerms = TestUtil.NextInt(Random(), 1, numDocs / 5);
            ISet<string> randomTerms = new HashSet<string>();
            while (randomTerms.size() < numTerms)
            {
                randomTerms.add(TestUtil.RandomSimpleString(Random()));
            }
            terms = new List<string>(randomTerms);
            long seed = Random().NextLong();
            IndexWriterConfig iwc1 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed)));
            IndexWriterConfig iwc2 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed)));
            iwc2.SetMergePolicy(NewSortingMergePolicy(sort));
            RandomIndexWriter iw1 = new RandomIndexWriter(new Random((int)seed), dir1, iwc1);
            RandomIndexWriter iw2 = new RandomIndexWriter(new Random((int)seed), dir2, iwc2);
            for (int i = 0; i < numDocs; ++i)
            {
                if (Random().nextInt(5) == 0 && i != numDocs - 1)
                {
                    string term = RandomInts.RandomFrom(Random(), terms);
                    iw1.DeleteDocuments(new Term("s", term));
                    iw2.DeleteDocuments(new Term("s", term));
                }
                Document doc = randomDocument();
                iw1.AddDocument(doc);
                iw2.AddDocument(doc);
                if (Random().nextInt(8) == 0)
                {
                    iw1.Commit();
                    iw2.Commit();
                }
            }
            // Make sure we have something to merge
            iw1.Commit();
            iw2.Commit();
            Document doc2 = randomDocument();
            // NOTE: don't use RIW.addDocument directly, since it sometimes commits
            // which may trigger a merge, at which case forceMerge may not do anything.
            // With field updates this is a problem, since the updates can go into the
            // single segment in the index, and threefore the index won't be sorted.
            // This hurts the assumption of the test later on, that the index is sorted
            // by SortingMP.
            iw1.w.AddDocument(doc2);
            iw2.w.AddDocument(doc2);

            if (DefaultCodecSupportsFieldUpdates())
            {
                // update NDV of docs belonging to one term (covers many documents)
                long value = Random().NextLong();
                string term = RandomInts.RandomFrom(Random(), terms);
                iw1.w.UpdateNumericDocValue(new Term("s", term), "ndv", value);
                iw2.w.UpdateNumericDocValue(new Term("s", term), "ndv", value);
            }

            iw1.ForceMerge(1);
            iw2.ForceMerge(1);
            iw1.Dispose();
            iw2.Dispose();
            reader = DirectoryReader.Open(dir1);
            sortedReader = DirectoryReader.Open(dir2);
        }
Beispiel #35
0
        private void CreateRandomIndexes()
        {
            dir1 = NewDirectory();
            dir2 = NewDirectory();
            int           numDocs     = AtLeast(150);
            int           numTerms    = TestUtil.NextInt32(Random, 1, numDocs / 5);
            ISet <string> randomTerms = new JCG.HashSet <string>();

            while (randomTerms.size() < numTerms)
            {
                randomTerms.add(TestUtil.RandomSimpleString(Random));
            }
            terms = new JCG.List <string>(randomTerms);
            long seed = Random.NextInt64();
            IndexWriterConfig iwc1 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed)));
            IndexWriterConfig iwc2 = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(new Random((int)seed)));

            iwc2.SetMergePolicy(NewSortingMergePolicy(sort));
            RandomIndexWriter iw1 = new RandomIndexWriter(new Random((int)seed), dir1, iwc1);
            RandomIndexWriter iw2 = new RandomIndexWriter(new Random((int)seed), dir2, iwc2);

            for (int i = 0; i < numDocs; ++i)
            {
                if (Random.nextInt(5) == 0 && i != numDocs - 1)
                {
                    string term = RandomPicks.RandomFrom(Random, terms);
                    iw1.DeleteDocuments(new Term("s", term));
                    iw2.DeleteDocuments(new Term("s", term));
                }
                Document doc = randomDocument();
                iw1.AddDocument(doc);
                iw2.AddDocument(doc);
                if (Random.nextInt(8) == 0)
                {
                    iw1.Commit();
                    iw2.Commit();
                }
            }
            // Make sure we have something to merge
            iw1.Commit();
            iw2.Commit();
            Document doc2 = randomDocument();

            // NOTE: don't use RIW.addDocument directly, since it sometimes commits
            // which may trigger a merge, at which case forceMerge may not do anything.
            // With field updates this is a problem, since the updates can go into the
            // single segment in the index, and threefore the index won't be sorted.
            // This hurts the assumption of the test later on, that the index is sorted
            // by SortingMP.
            iw1.IndexWriter.AddDocument(doc2);
            iw2.IndexWriter.AddDocument(doc2);

            if (DefaultCodecSupportsFieldUpdates)
            {
                // update NDV of docs belonging to one term (covers many documents)
                long   value = Random.NextInt64();
                string term  = RandomPicks.RandomFrom(Random, terms);
                iw1.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value);
                iw2.IndexWriter.UpdateNumericDocValue(new Term("s", term), "ndv", value);
            }

            iw1.ForceMerge(1);
            iw2.ForceMerge(1);
            iw1.Dispose();
            iw2.Dispose();
            reader       = DirectoryReader.Open(dir1);
            sortedReader = DirectoryReader.Open(dir2);
        }
        public void TestWithDeletions()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer            = new RandomIndexWriter(Random(), dir, iwc);
            IDictionary <string, Document> docs = GenerateIndexDocuments(AtLeast(100));
            Random        rand       = Random();
            List <string> termsToDel = new List <string>();

            foreach (Document doc in docs.Values)
            {
                if (rand.nextBoolean() && termsToDel.size() < docs.size() - 1)
                {
                    termsToDel.Add(doc.Get(FIELD_NAME));
                }
                writer.AddDocument(doc);
            }
            writer.Commit();

            Term[] delTerms = new Term[termsToDel.size()];
            for (int i = 0; i < termsToDel.size(); i++)
            {
                delTerms[i] = new Term(FIELD_NAME, termsToDel[i]);
            }

            foreach (Term delTerm in delTerms)
            {
                writer.DeleteDocuments(delTerm);
            }
            writer.Commit();
            writer.Dispose();

            foreach (string termToDel in termsToDel)
            {
                var toDel = docs[termToDel];
                docs.Remove(termToDel);
                assertTrue(null != toDel);
            }

            IndexReader ir = DirectoryReader.Open(dir);

            assertTrue("NumDocs should be > 0 but was " + ir.NumDocs, ir.NumDocs > 0);
            assertEquals(ir.NumDocs, docs.size());
            ValueSource[] toAdd = new ValueSource[] { new Int64FieldSource(WEIGHT_FIELD_NAME_1), new Int64FieldSource(WEIGHT_FIELD_NAME_2) };

            IDictionary    dictionary    = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumSingleFunction(toAdd), PAYLOAD_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                string   field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                long w1 = doc.GetField(WEIGHT_FIELD_NAME_1).GetInt64ValueOrDefault();
                long w2 = doc.GetField(WEIGHT_FIELD_NAME_2).GetInt64ValueOrDefault();
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                assertEquals(inputIterator.Weight, w2 + w1);
                assertTrue(inputIterator.Payload.equals(doc.GetField(PAYLOAD_FIELD_NAME).GetBinaryValue()));
            }
            assertTrue(!docs.Any());
            ir.Dispose();
            dir.Dispose();
        }
        private void DoTestMissingVsFieldCache(LongProducer longs)
        {
            AssumeTrue("Codec does not support GetDocsWithField", DefaultCodecSupportsDocsWithField());
            Directory dir = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf);
            Field idField = new StringField("id", "", Field.Store.NO);
            Field indexedField = NewStringField("indexed", "", Field.Store.NO);
            Field dvField = new NumericDocValuesField("dv", 0);

            // index some docs
            int numDocs = AtLeast(300);
            // numDocs should be always > 256 so that in case of a codec that optimizes
            // for numbers of values <= 256, all storage layouts are tested
            Debug.Assert(numDocs > 256);
            for (int i = 0; i < numDocs; i++)
            {
                idField.StringValue = Convert.ToString(i);
                long value = longs.Next();
                indexedField.StringValue = Convert.ToString(value);
                dvField.LongValue = value;
                Document doc = new Document();
                doc.Add(idField);
                // 1/4 of the time we neglect to add the fields
                if (Random().Next(4) > 0)
                {
                    doc.Add(indexedField);
                    doc.Add(dvField);
                }
                writer.AddDocument(doc);
                if (Random().Next(31) == 0)
                {
                    writer.Commit();
                }
            }

            // delete some docs
            int numDeletions = Random().Next(numDocs / 10);
            for (int i = 0; i < numDeletions; i++)
            {
                int id = Random().Next(numDocs);
                writer.DeleteDocuments(new Term("id", Convert.ToString(id)));
            }

            // merge some segments and ensure that at least one of them has more than
            // 256 values
            writer.ForceMerge(numDocs / 256);

            writer.Dispose();

            // compare
            DirectoryReader ir = DirectoryReader.Open(dir);
            foreach (var context in ir.Leaves)
            {
                AtomicReader r = context.AtomicReader;
                Bits expected = FieldCache.DEFAULT.GetDocsWithField(r, "indexed");
                Bits actual = FieldCache.DEFAULT.GetDocsWithField(r, "dv");
                AssertEquals(expected, actual);
            }
            ir.Dispose();
            dir.Dispose();
        }
        public void TestWithDeletions()
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, iwc);
            KeyValuePair <List <string>, IDictionary <string, Document> > res = GenerateIndexDocuments(AtLeast(1000), false, false);
            IDictionary <string, Document> docs = res.Value;
            List <String> invalidDocTerms       = res.Key;
            Random        rand       = Random();
            List <string> termsToDel = new List <string>();

            foreach (Document doc in docs.Values)
            {
                IIndexableField f2 = doc.GetField(FIELD_NAME);
                if (rand.nextBoolean() && f2 != null && !invalidDocTerms.Contains(f2.GetStringValue()))
                {
                    termsToDel.Add(doc.Get(FIELD_NAME));
                }
                writer.AddDocument(doc);
            }
            writer.Commit();

            Term[] delTerms = new Term[termsToDel.size()];
            for (int i = 0; i < termsToDel.size(); i++)
            {
                delTerms[i] = new Term(FIELD_NAME, termsToDel[i]);
            }

            foreach (Term delTerm in delTerms)
            {
                writer.DeleteDocuments(delTerm);
            }
            writer.Commit();
            writer.Dispose();

            foreach (string termToDel in termsToDel)
            {
                var toDel = docs[termToDel];
                assertTrue(toDel != null);
                docs.Remove(termToDel);
            }

            IndexReader ir = DirectoryReader.Open(dir);

            assertEquals(ir.NumDocs, docs.size());
            IDictionary    dictionary    = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME);
            IInputIterator inputIterator = dictionary.GetEntryIterator();
            BytesRef       f;

            while ((f = inputIterator.Next()) != null)
            {
                var      field = f.Utf8ToString();
                Document doc   = docs.ContainsKey(field) ? docs[field] : null;
                docs.Remove(field);
                assertTrue(f.equals(new BytesRef(doc.Get(FIELD_NAME))));
                IIndexableField weightField = doc.GetField(WEIGHT_FIELD_NAME);
                assertEquals(inputIterator.Weight, (weightField != null) ? Convert.ToInt64(weightField.GetNumericValue()) : 0);
                assertEquals(inputIterator.Payload, null);
            }

            foreach (string invalidTerm in invalidDocTerms)
            {
                var invalid = docs[invalidTerm];
                docs.Remove(invalidTerm);
                assertNotNull(invalid);
            }
            assertTrue(!docs.Any());

            ir.Dispose();
            dir.Dispose();
        }
        public virtual void TestPostings()
        {
            Directory         dir = NewFSDirectory(CreateTempDir("postings"));
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));

            iwc.SetCodec(Codec.ForName("Lucene40"));
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            Document doc = new Document();

            // id field
            FieldType idType = new FieldType(StringField.TYPE_NOT_STORED);

            idType.StoreTermVectors = true;
            Field idField = new Field("id", "", idType);

            doc.Add(idField);

            // title field: short text field
            FieldType titleType = new FieldType(TextField.TYPE_NOT_STORED);

            titleType.StoreTermVectors         = true;
            titleType.StoreTermVectorPositions = true;
            titleType.StoreTermVectorOffsets   = true;
            titleType.IndexOptions             = IndexOptions();
            Field titleField = new Field("title", "", titleType);

            doc.Add(titleField);

            // body field: long text field
            FieldType bodyType = new FieldType(TextField.TYPE_NOT_STORED);

            bodyType.StoreTermVectors         = true;
            bodyType.StoreTermVectorPositions = true;
            bodyType.StoreTermVectorOffsets   = true;
            bodyType.IndexOptions             = IndexOptions();
            Field bodyField = new Field("body", "", bodyType);

            doc.Add(bodyField);

            int numDocs = AtLeast(1000);

            for (int i = 0; i < numDocs; i++)
            {
                idField.StringValue    = Convert.ToString(i);
                titleField.StringValue = FieldValue(1);
                bodyField.StringValue  = FieldValue(3);
                iw.AddDocument(doc);
                if (Random().Next(20) == 0)
                {
                    iw.DeleteDocuments(new Term("id", Convert.ToString(i)));
                }
            }
            if (Random().NextBoolean())
            {
                // delete 1-100% of docs
                iw.DeleteDocuments(new Term("title", Terms[Random().Next(Terms.Length)]));
            }
            iw.Dispose();
            dir.Dispose(); // checkindex
        }
        public virtual void TestTotalBytesSize()
        {
            Directory d = NewDirectory();
            if (d is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)d).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            iwc.SetMaxBufferedDocs(5);
            iwc.SetMergeScheduler(new TrackingCMS());
            if (TestUtil.GetPostingsFormat("id").Equals("SimpleText"))
            {
                // no
                iwc.SetCodec(TestUtil.AlwaysPostingsFormat(new Lucene41PostingsFormat()));
            }
            RandomIndexWriter w = new RandomIndexWriter(Random(), d, iwc);
            for (int i = 0; i < 1000; i++)
            {
                Document doc = new Document();
                doc.Add(new StringField("id", "" + i, Field.Store.NO));
                w.AddDocument(doc);

                if (Random().NextBoolean())
                {
                    w.DeleteDocuments(new Term("id", "" + Random().Next(i + 1)));
                }
            }
            Assert.IsTrue(((TrackingCMS)w.w.Config.MergeScheduler).TotMergedBytes != 0);
            w.Dispose();
            d.Dispose();
        }
 public void TestBulkMergeWithDeletes()
 {
     int numDocs = AtLeast(200);
     Directory dir = NewDirectory();
     RandomIndexWriter w = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NoMergePolicy.COMPOUND_FILES));
     for (int i = 0; i < numDocs; ++i)
     {
         Document doc = new Document();
         doc.Add(new StringField("id", Convert.ToString(i), Field.Store.YES));
         doc.Add(new StoredField("f", TestUtil.RandomSimpleString(Random())));
         w.AddDocument(doc);
     }
     int deleteCount = TestUtil.NextInt(Random(), 5, numDocs);
     for (int i = 0; i < deleteCount; ++i)
     {
         int id = Random().Next(numDocs);
         w.DeleteDocuments(new Term("id", Convert.ToString(id)));
     }
     w.Commit();
     w.Dispose();
     w = new RandomIndexWriter(Random(), dir);
     w.ForceMerge(TestUtil.NextInt(Random(), 1, 3));
     w.Commit();
     w.Dispose();
     TestUtil.CheckIndex(dir);
     dir.Dispose();
 }
Beispiel #42
0
        public virtual void TestEnforceDeletions()
        {
            Directory         dir    = NewDirectory();
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergeScheduler(new SerialMergeScheduler()).SetMergePolicy(NewLogMergePolicy(10)));
            // asserts below requires no unexpected merges:

            // NOTE: cannot use writer.getReader because RIW (on
            // flipping a coin) may give us a newly opened reader,
            // but we use .reopen on this reader below and expect to
            // (must) get an NRT reader:
            DirectoryReader reader = DirectoryReader.Open(writer.IndexWriter, true);
            // same reason we don't wrap?
            IndexSearcher searcher = NewSearcher(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                reader, false);

            // add a doc, refresh the reader, and check that it's there
            Document doc = new Document();

            doc.Add(NewStringField("id", "1", Field.Store.YES));
            writer.AddDocument(doc);

            reader   = RefreshReader(reader);
            searcher = NewSearcher(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                reader, false);

            TopDocs docs = searcher.Search(new MatchAllDocsQuery(), 1);
            Assert.AreEqual(1, docs.TotalHits, "Should find a hit...");

            Filter startFilter = new QueryWrapperFilter(new TermQuery(new Term("id", "1")));

            CachingWrapperFilter filter = new CachingWrapperFilter(startFilter);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.IsTrue(filter.GetSizeInBytes() > 0);

            Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit...");

            Query constantScore = new ConstantScoreQuery(filter);
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit...");

            // make sure we get a cache hit when we reopen reader
            // that had no change to deletions

            // fake delete (deletes nothing):
            writer.DeleteDocuments(new Term("foo", "bar"));

            IndexReader oldReader = reader;
            reader = RefreshReader(reader);
            Assert.IsTrue(reader == oldReader);
            int missCount = filter.missCount;
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit...");

            // cache hit:
            Assert.AreEqual(missCount, filter.missCount);

            // now delete the doc, refresh the reader, and see that it's not there
            writer.DeleteDocuments(new Term("id", "1"));

            // NOTE: important to hold ref here so GC doesn't clear
            // the cache entry!  Else the assert below may sometimes
            // fail:
            oldReader = reader;
            reader    = RefreshReader(reader);

            searcher = NewSearcher(reader, false);

            missCount = filter.missCount;
            docs      = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit...");

            // cache hit
            Assert.AreEqual(missCount, filter.missCount);
            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit...");

            // apply deletes dynamically:
            filter = new CachingWrapperFilter(startFilter);
            writer.AddDocument(doc);
            reader   = RefreshReader(reader);
            searcher = NewSearcher(reader, false);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(1, docs.TotalHits, "[query + filter] Should find a hit...");
            missCount = filter.missCount;
            Assert.IsTrue(missCount > 0);
            constantScore = new ConstantScoreQuery(filter);
            docs          = searcher.Search(constantScore, 1);
            Assert.AreEqual(1, docs.TotalHits, "[just filter] Should find a hit...");
            Assert.AreEqual(missCount, filter.missCount);

            writer.AddDocument(doc);

            // NOTE: important to hold ref here so GC doesn't clear
            // the cache entry!  Else the assert below may sometimes
            // fail:
            oldReader = reader;

            reader   = RefreshReader(reader);
            searcher = NewSearcher(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                reader, false);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(2, docs.TotalHits, "[query + filter] Should find 2 hits...");
            Assert.IsTrue(filter.missCount > missCount);
            missCount = filter.missCount;

            constantScore = new ConstantScoreQuery(filter);
            docs          = searcher.Search(constantScore, 1);
            Assert.AreEqual(2, docs.TotalHits, "[just filter] Should find a hit...");
            Assert.AreEqual(missCount, filter.missCount);

            // now delete the doc, refresh the reader, and see that it's not there
            writer.DeleteDocuments(new Term("id", "1"));

            reader   = RefreshReader(reader);
            searcher = NewSearcher(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
                this,
#endif
                reader, false);

            docs = searcher.Search(new MatchAllDocsQuery(), filter, 1);
            Assert.AreEqual(0, docs.TotalHits, "[query + filter] Should *not* find a hit...");
            // CWF reused the same entry (it dynamically applied the deletes):
            Assert.AreEqual(missCount, filter.missCount);

            docs = searcher.Search(constantScore, 1);
            Assert.AreEqual(0, docs.TotalHits, "[just filter] Should *not* find a hit...");
            // CWF reused the same entry (it dynamically applied the deletes):
            Assert.AreEqual(missCount, filter.missCount);

            // NOTE: silliness to make sure JRE does not eliminate
            // our holding onto oldReader to prevent
            // CachingWrapperFilter's WeakHashMap from dropping the
            // entry:
            Assert.IsTrue(oldReader != null);

            reader.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
        public virtual void TestRandomStoredFields()
        {
            Directory dir = NewDirectory();
            Random rand = Random();
            RandomIndexWriter w = new RandomIndexWriter(rand, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(TestUtil.NextInt(rand, 5, 20)));
            //w.w.setNoCFSRatio(0.0);
            int docCount = AtLeast(200);
            int fieldCount = TestUtil.NextInt(rand, 1, 5);

            IList<int?> fieldIDs = new List<int?>();

            FieldType customType = new FieldType(TextField.TYPE_STORED);
            customType.Tokenized = false;
            Field idField = NewField("id", "", customType);

            for (int i = 0; i < fieldCount; i++)
            {
                fieldIDs.Add(i);
            }

            IDictionary<string, Document> docs = new Dictionary<string, Document>();

            if (VERBOSE)
            {
                Console.WriteLine("TEST: build index docCount=" + docCount);
            }

            FieldType customType2 = new FieldType();
            customType2.Stored = true;
            for (int i = 0; i < docCount; i++)
            {
                Document doc = new Document();
                doc.Add(idField);
                string id = "" + i;
                idField.StringValue = id;
                docs[id] = doc;
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: add doc id=" + id);
                }

                foreach (int field in fieldIDs)
                {
                    string s;
                    if (rand.Next(4) != 3)
                    {
                        s = TestUtil.RandomUnicodeString(rand, 1000);
                        doc.Add(NewField("f" + field, s, customType2));
                    }
                    else
                    {
                        s = null;
                    }
                }
                w.AddDocument(doc);
                if (rand.Next(50) == 17)
                {
                    // mixup binding of field name -> Number every so often
                    fieldIDs = CollectionsHelper.Shuffle(fieldIDs);
                }
                if (rand.Next(5) == 3 && i > 0)
                {
                    string delID = "" + rand.Next(i);
                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: delete doc id=" + delID);
                    }
                    w.DeleteDocuments(new Term("id", delID));
                    docs.Remove(delID);
                }
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: " + docs.Count + " docs in index; now load fields");
            }
            if (docs.Count > 0)
            {
                string[] idsList = docs.Keys.ToArray(/*new string[docs.Count]*/);

                for (int x = 0; x < 2; x++)
                {
                    IndexReader r = w.Reader;
                    IndexSearcher s = NewSearcher(r);

                    if (VERBOSE)
                    {
                        Console.WriteLine("TEST: cycle x=" + x + " r=" + r);
                    }

                    int num = AtLeast(1000);
                    for (int iter = 0; iter < num; iter++)
                    {
                        string testID = idsList[rand.Next(idsList.Length)];
                        if (VERBOSE)
                        {
                            Console.WriteLine("TEST: test id=" + testID);
                        }
                        TopDocs hits = s.Search(new TermQuery(new Term("id", testID)), 1);
                        Assert.AreEqual(1, hits.TotalHits);
                        Document doc = r.Document(hits.ScoreDocs[0].Doc);
                        Document docExp = docs[testID];
                        for (int i = 0; i < fieldCount; i++)
                        {
                            Assert.AreEqual("doc " + testID + ", field f" + fieldCount + " is wrong", docExp.Get("f" + i), doc.Get("f" + i));
                        }
                    }
                    r.Dispose();
                    w.ForceMerge(1);
                }
            }
            w.Dispose();
            dir.Dispose();
        }
        private void DoTestNumericsVsStoredFields(LongProducer longs)
        {
            Directory dir = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf);
            Document doc = new Document();
            Field idField = new StringField("id", "", Field.Store.NO);
            Field storedField = NewStringField("stored", "", Field.Store.YES);
            Field dvField = new NumericDocValuesField("dv", 0);
            doc.Add(idField);
            doc.Add(storedField);
            doc.Add(dvField);

            // index some docs
            int numDocs = AtLeast(300);
            // numDocs should be always > 256 so that in case of a codec that optimizes
            // for numbers of values <= 256, all storage layouts are tested
            Debug.Assert(numDocs > 256);
            for (int i = 0; i < numDocs; i++)
            {
                idField.StringValue = Convert.ToString(i);
                long value = longs.Next();
                storedField.StringValue = Convert.ToString(value);
                dvField.LongValue = value;
                writer.AddDocument(doc);
                if (Random().Next(31) == 0)
                {
                    writer.Commit();
                }
            }

            // delete some docs
            int numDeletions = Random().Next(numDocs / 10);
            for (int i = 0; i < numDeletions; i++)
            {
                int id = Random().Next(numDocs);
                writer.DeleteDocuments(new Term("id", Convert.ToString(id)));
            }

            // merge some segments and ensure that at least one of them has more than
            // 256 values
            writer.ForceMerge(numDocs / 256);

            writer.Dispose();

            // compare
            DirectoryReader ir = DirectoryReader.Open(dir);
            foreach (AtomicReaderContext context in ir.Leaves)
            {
                AtomicReader r = context.AtomicReader;
                NumericDocValues docValues = r.GetNumericDocValues("dv");
                for (int i = 0; i < r.MaxDoc; i++)
                {
                    long storedValue = Convert.ToInt64(r.Document(i).Get("stored"));
                    Assert.AreEqual(storedValue, docValues.Get(i));
                }
            }
            ir.Dispose();
            dir.Dispose();
        }
        private void DoTestSortedSetVsUninvertedField(int minLength, int maxLength)
        {
            Directory dir = NewDirectory();
            IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf);

            // index some docs
            int numDocs = AtLeast(300);
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                Field idField = new StringField("id", Convert.ToString(i), Field.Store.NO);
                doc.Add(idField);
                int length;
                if (minLength == maxLength)
                {
                    length = minLength; // fixed length
                }
                else
                {
                    length = TestUtil.NextInt(Random(), minLength, maxLength);
                }
                int numValues = Random().Next(17);
                // create a random list of strings
                IList<string> values = new List<string>();
                for (int v = 0; v < numValues; v++)
                {
                    values.Add(TestUtil.RandomSimpleString(Random(), length));
                }

                // add in any order to the indexed field
                IList<string> unordered = new List<string>(values);
                unordered = CollectionsHelper.Shuffle(unordered);
                foreach (string v in unordered)
                {
                    doc.Add(NewStringField("indexed", v, Field.Store.NO));
                }

                // add in any order to the dv field
                IList<string> unordered2 = new List<string>(values);
                unordered2 = CollectionsHelper.Shuffle(unordered2);
                foreach (string v in unordered2)
                {
                    doc.Add(new SortedSetDocValuesField("dv", new BytesRef(v)));
                }

                writer.AddDocument(doc);
                if (Random().Next(31) == 0)
                {
                    writer.Commit();
                }
            }

            // delete some docs
            int numDeletions = Random().Next(numDocs / 10);
            for (int i = 0; i < numDeletions; i++)
            {
                int id = Random().Next(numDocs);
                writer.DeleteDocuments(new Term("id", Convert.ToString(id)));
            }

            // compare per-segment
            DirectoryReader ir = writer.Reader;
            foreach (AtomicReaderContext context in ir.Leaves)
            {
                AtomicReader r = context.AtomicReader;
                SortedSetDocValues expected = FieldCache.DEFAULT.GetDocTermOrds(r, "indexed");
                SortedSetDocValues actual = r.GetSortedSetDocValues("dv");
                AssertEquals(r.MaxDoc, expected, actual);
            }
            ir.Dispose();

            writer.ForceMerge(1);

            // now compare again after the merge
            ir = writer.Reader;
            AtomicReader ar = GetOnlySegmentReader(ir);
            SortedSetDocValues expected_ = FieldCache.DEFAULT.GetDocTermOrds(ar, "indexed");
            SortedSetDocValues actual_ = ar.GetSortedSetDocValues("dv");
            AssertEquals(ir.MaxDoc, expected_, actual_);
            ir.Dispose();

            writer.Dispose();
            dir.Dispose();
        }