Пример #1
0
        /// <summary>
        /// NOTE: This was getLongEnumerable() in Lucene
        /// </summary>
        private IEnumerable <long?> GetInt64Enumerable(SegmentReader reader, string field, NumericDocValuesFieldUpdates fieldUpdates)
        {
            int              maxDoc        = reader.MaxDoc;
            IBits            DocsWithField = reader.GetDocsWithField(field);
            NumericDocValues currentValues = reader.GetNumericDocValues(field);

            NumericDocValuesFieldUpdates.Iterator iter = (NumericDocValuesFieldUpdates.Iterator)fieldUpdates.GetIterator();
            int updateDoc = iter.NextDoc();

            for (int curDoc = 0; curDoc < maxDoc; ++curDoc)
            {
                if (curDoc == updateDoc)        //document has an updated value
                {
                    long?value = iter.Value;    // either null or updated
                    updateDoc = iter.NextDoc(); //prepare for next round
                    yield return(value);
                }
                else
                {   // no update for this document
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(curDoc < updateDoc);
                    }
                    if (currentValues != null && DocsWithField.Get(curDoc))
                    {
                        // only read the current value if the document had a value before
                        yield return(currentValues.Get(curDoc));
                    }
                    else
                    {
                        yield return(null);
                    }
                }
            }
        }
Пример #2
0
        public virtual void TestDocValuesUnstored()
        {
            Directory         dir      = NewDirectory();
            IndexWriterConfig iwconfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            iwconfig.SetMergePolicy(NewLogMergePolicy());
            IndexWriter writer = new IndexWriter(dir, iwconfig);

            for (int i = 0; i < 50; i++)
            {
                Document doc = new Document();
                doc.Add(new NumericDocValuesField("dv", i));
                doc.Add(new TextField("docId", "" + i, Field.Store.YES));
                writer.AddDocument(doc);
            }
            DirectoryReader r      = writer.GetReader();
            AtomicReader    slow   = SlowCompositeReaderWrapper.Wrap(r);
            FieldInfos      fi     = slow.FieldInfos;
            FieldInfo       dvInfo = fi.FieldInfo("dv");

            Assert.IsTrue(dvInfo.HasDocValues);
            NumericDocValues dv = slow.GetNumericDocValues("dv");

            for (int i = 0; i < 50; i++)
            {
                Assert.AreEqual(i, dv.Get(i));
                Document d = slow.Document(i);
                // cannot use d.Get("dv") due to another bug!
                Assert.IsNull(d.GetField("dv"));
                Assert.AreEqual(Convert.ToString(i), d.Get("docId"));
            }
            slow.Dispose();
            writer.Dispose();
            dir.Dispose();
        }
Пример #3
0
 public override long Get(int docID)
 {
     if (Debugging.AssertsEnabled)
     {
         Debugging.Assert(docID >= 0 && docID < maxDoc);
     }
     return(@in.Get(docID));
 }
Пример #4
0
        public virtual void Test()
        {
            NumericDocValues fooNorms = MultiDocValues.GetNormValues(reader, "foo");

            for (int i = 0; i < reader.MaxDoc; i++)
            {
                Assert.AreEqual((int)expected[i], fooNorms.Get(i) & 0xff);
            }
        }
Пример #5
0
        public virtual void TestNumerics()
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BNumerics"));

            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER;
            }

            IndexWriter w = new IndexWriter(dir,
                                            new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))
                                            .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                                            .SetRAMBufferSizeMB(256.0)
                                            .SetMergeScheduler(new ConcurrentMergeScheduler())
                                            .SetMergePolicy(NewLogMergePolicy(false, 10))
                                            .SetOpenMode(OpenMode.CREATE));

            Document doc = new Document();
            NumericDocValuesField dvField = new NumericDocValuesField("dv", 0);

            doc.Add(dvField);

            for (int i = 0; i < int.MaxValue; i++)
            {
                dvField.SetInt64Value(i);
                w.AddDocument(doc);
                if (i % 100000 == 0)
                {
                    Console.WriteLine("indexed: " + i);
                    Console.Out.Flush();
                }
            }

            w.ForceMerge(1);
            w.Dispose();

            Console.WriteLine("verifying...");
            Console.Out.Flush();

            DirectoryReader r             = DirectoryReader.Open(dir);
            long            expectedValue = 0;

            foreach (AtomicReaderContext context in r.Leaves)
            {
                AtomicReader     reader = context.AtomicReader;
                NumericDocValues dv     = reader.GetNumericDocValues("dv");
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    Assert.AreEqual(expectedValue, dv.Get(i));
                    expectedValue++;
                }
            }

            r.Dispose();
            dir.Dispose();
        }
Пример #6
0
        public void Test()
        {
            NumericDocValues fooNorms = MultiDocValues.GetNormValues(reader, "foo");

            assertNotNull(fooNorms);
            for (int i = 0; i < reader.MaxDoc; i++)
            {
                assertEquals(expected[i], fooNorms.Get(i));
            }
        }
Пример #7
0
        /// <summary>
        /// Indexes at least 1 document with f1, and at least 1 document with f2.
        /// returns the norms for "field".
        /// </summary>
        internal virtual NumericDocValues GetNorms(string field, Field f1, Field f2)
        {
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter riw = new RandomIndexWriter(Random, dir, iwc);

            // add f1
            Document d = new Document();

            d.Add(f1);
            riw.AddDocument(d);

            // add f2
            d = new Document();
            d.Add(f2);
            riw.AddDocument(d);

            // add a mix of f1's and f2's
            int numExtraDocs = TestUtil.NextInt32(Random, 1, 1000);

            for (int i = 0; i < numExtraDocs; i++)
            {
                d = new Document();
                d.Add(Random.NextBoolean() ? f1 : f2);
                riw.AddDocument(d);
            }

            IndexReader ir1 = riw.GetReader();
            // todo: generalize
            NumericDocValues norms1 = MultiDocValues.GetNormValues(ir1, field);

            // fully merge and validate MultiNorms against single segment.
            riw.ForceMerge(1);
            DirectoryReader  ir2    = riw.GetReader();
            NumericDocValues norms2 = GetOnlySegmentReader(ir2).GetNormValues(field);

            if (norms1 == null)
            {
                Assert.IsNull(norms2);
            }
            else
            {
                for (int docID = 0; docID < ir1.MaxDoc; docID++)
                {
                    Assert.AreEqual(norms1.Get(docID), norms2.Get(docID));
                }
            }
            ir1.Dispose();
            ir2.Dispose();
            riw.Dispose();
            dir.Dispose();
            return(norms1);
        }
Пример #8
0
        public virtual void TestFloatNorms()
        {
            Directory    dir      = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random);

            analyzer.MaxTokenLength = TestUtil.NextInt32(Random, 1, IndexWriter.MAX_TERM_LENGTH);

            IndexWriterConfig config   = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);
            Similarity        provider = new MySimProvider(this);

            config.SetSimilarity(provider);
            RandomIndexWriter writer = new RandomIndexWriter(Random, dir, config);
            LineFileDocs      docs   = new LineFileDocs(Random);
            int num = AtLeast(100);

            for (int i = 0; i < num; i++)
            {
                Document doc       = docs.NextDoc();
                float    nextFloat = Random.nextFloat();
                // Cast to a double to get more precision output to the string.
                Field f = new TextField(floatTestField, "" + ((double)nextFloat).ToString(CultureInfo.InvariantCulture), Field.Store.YES);
                f.Boost = nextFloat;

                doc.Add(f);
                writer.AddDocument(doc);
                doc.RemoveField(floatTestField);
                if (Rarely())
                {
                    writer.Commit();
                }
            }
            writer.Commit();
            writer.Dispose();
            AtomicReader     open  = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir));
            NumericDocValues norms = open.GetNormValues(floatTestField);

            Assert.IsNotNull(norms);
            for (int i = 0; i < open.MaxDoc; i++)
            {
                Document document = open.Document(i);
                float    expected = Convert.ToSingle(document.Get(floatTestField), CultureInfo.InvariantCulture);
                Assert.AreEqual(expected, J2N.BitConversion.Int32BitsToSingle((int)norms.Get(i)), 0.0f);
            }
            open.Dispose();
            dir.Dispose();
            docs.Dispose();
        }
Пример #9
0
        public virtual void TestUpdateDifferentDocsInDifferentGens()
        {
            // update same document multiple times across generations
            Directory         dir  = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));

            conf.SetMaxBufferedDocs(4);
            IndexWriter writer  = new IndexWriter(dir, conf);
            int         numDocs = AtLeast(10);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new StringField("id", "doc" + i, Store.NO));
                long value = Random.Next();
                doc.Add(new BinaryDocValuesField("f", TestBinaryDocValuesUpdates.ToBytes(value)));
                doc.Add(new NumericDocValuesField("cf", value * 2));
                writer.AddDocument(doc);
            }

            int      numGens = AtLeast(5);
            BytesRef scratch = new BytesRef();

            for (int i = 0; i < numGens; i++)
            {
                int  doc   = Random.Next(numDocs);
                Term t     = new Term("id", "doc" + doc);
                long value = Random.NextInt64();
                writer.UpdateBinaryDocValue(t, "f", TestBinaryDocValuesUpdates.ToBytes(value));
                writer.UpdateNumericDocValue(t, "cf", value * 2);
                DirectoryReader reader = DirectoryReader.Open(writer, true);
                foreach (AtomicReaderContext context in reader.Leaves)
                {
                    AtomicReader     r     = context.AtomicReader;
                    BinaryDocValues  fbdv  = r.GetBinaryDocValues("f");
                    NumericDocValues cfndv = r.GetNumericDocValues("cf");
                    for (int j = 0; j < r.MaxDoc; j++)
                    {
                        Assert.AreEqual(cfndv.Get(j), TestBinaryDocValuesUpdates.GetValue(fbdv, j, scratch) * 2);
                    }
                }
                reader.Dispose();
            }
            writer.Dispose();
            dir.Dispose();
        }
Пример #10
0
        public virtual void TestMaxByteNorms()
        {
            Directory dir = NewFSDirectory(CreateTempDir("TestNorms.testMaxByteNorms"));

            BuildIndex(dir);
            AtomicReader     open       = SlowCompositeReaderWrapper.Wrap(DirectoryReader.Open(dir));
            NumericDocValues normValues = open.GetNormValues(ByteTestField);

            Assert.IsNotNull(normValues);
            for (int i = 0; i < open.MaxDoc; i++)
            {
                Document document = open.Document(i);
                int      expected = Convert.ToInt32(document.Get(ByteTestField));
                Assert.AreEqual(expected, normValues.Get(i) & 0xff);
            }
            open.Dispose();
            dir.Dispose();
        }
Пример #11
0
        public virtual void TestCustomEncoder()
        {
            Directory    dir      = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random());

            IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            config.SetSimilarity(new CustomNormEncodingSimilarity(this));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, config);
            Document          doc    = new Document();
            Field             foo    = NewTextField("foo", "", Field.Store.NO);
            Field             bar    = NewTextField("bar", "", Field.Store.NO);

            doc.Add(foo);
            doc.Add(bar);

            for (int i = 0; i < 100; i++)
            {
                bar.SetStringValue("singleton");
                writer.AddDocument(doc);
            }

            IndexReader reader = writer.Reader;

            writer.Dispose();

            NumericDocValues fooNorms = MultiDocValues.GetNormValues(reader, "foo");

            for (int i = 0; i < reader.MaxDoc; i++)
            {
                Assert.AreEqual(0, fooNorms.Get(i));
            }

            NumericDocValues barNorms = MultiDocValues.GetNormValues(reader, "bar");

            for (int i = 0; i < reader.MaxDoc; i++)
            {
                Assert.AreEqual(1, barNorms.Get(i));
            }

            reader.Dispose();
            dir.Dispose();
        }
Пример #12
0
        public virtual void TestNumerics()
        {
            Directory dir   = NewDirectory();
            Document  doc   = new Document();
            Field     field = new NumericDocValuesField("numbers", 0);

            doc.Add(field);

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int numDocs = AtLeast(500);

            for (int i = 0; i < numDocs; i++)
            {
                field.SetInt64Value(Random().NextLong());
                iw.AddDocument(doc);
                if (Random().Next(17) == 0)
                {
                    iw.Commit();
                }
            }
            DirectoryReader ir = iw.Reader;

            iw.ForceMerge(1);
            DirectoryReader ir2    = iw.Reader;
            AtomicReader    merged = GetOnlySegmentReader(ir2);

            iw.Dispose();

            NumericDocValues multi  = MultiDocValues.GetNumericValues(ir, "numbers");
            NumericDocValues single = merged.GetNumericDocValues("numbers");

            for (int i = 0; i < numDocs; i++)
            {
                Assert.AreEqual(single.Get(i), multi.Get(i));
            }
            ir.Dispose();
            ir2.Dispose();
            dir.Dispose();
        }
Пример #13
0
 public override long Get(int docID)
 {
     Debug.Assert(docID >= 0 && docID < maxDoc);
     return(@in.Get(docID));
 }
Пример #14
0
        public virtual void TestManyReopensAndFields()
        {
            Directory         dir    = NewDirectory();
            Random            random = Random;
            IndexWriterConfig conf   = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
            LogMergePolicy    lmp    = NewLogMergePolicy();

            lmp.MergeFactor = 3; // merge often
            conf.SetMergePolicy(lmp);
            IndexWriter writer = new IndexWriter(dir, conf);

            bool            isNRT = random.NextBoolean();
            DirectoryReader reader;

            if (isNRT)
            {
                reader = DirectoryReader.Open(writer, true);
            }
            else
            {
                writer.Commit();
                reader = DirectoryReader.Open(dir);
            }

            int numFields    = random.Next(4) + 3;             // 3-7
            int numNDVFields = random.Next(numFields / 2) + 1; // 1-3

            long[] fieldValues   = new long[numFields];
            bool[] fieldHasValue = new bool[numFields];
            Arrays.Fill(fieldHasValue, true);
            for (int i = 0; i < fieldValues.Length; i++)
            {
                fieldValues[i] = 1;
            }

            int numRounds = AtLeast(15);
            int docID     = 0;

            for (int i = 0; i < numRounds; i++)
            {
                int numDocs = AtLeast(5);
                //      System.out.println("[" + Thread.currentThread().getName() + "]: round=" + i + ", numDocs=" + numDocs);
                for (int j = 0; j < numDocs; j++)
                {
                    Document doc = new Document();
                    doc.Add(new StringField("id", "doc-" + docID, Store.NO));
                    doc.Add(new StringField("key", "all", Store.NO)); // update key
                    // add all fields with their current value
                    for (int f = 0; f < fieldValues.Length; f++)
                    {
                        if (f < numNDVFields)
                        {
                            doc.Add(new NumericDocValuesField("f" + f, fieldValues[f]));
                        }
                        else
                        {
                            doc.Add(new BinaryDocValuesField("f" + f, TestBinaryDocValuesUpdates.ToBytes(fieldValues[f])));
                        }
                    }
                    writer.AddDocument(doc);
                    ++docID;
                }

                // if field's value was unset before, unset it from all new added documents too
                for (int field = 0; field < fieldHasValue.Length; field++)
                {
                    if (!fieldHasValue[field])
                    {
                        if (field < numNDVFields)
                        {
                            writer.UpdateNumericDocValue(new Term("key", "all"), "f" + field, null);
                        }
                        else
                        {
                            writer.UpdateBinaryDocValue(new Term("key", "all"), "f" + field, null);
                        }
                    }
                }

                int    fieldIdx    = random.Next(fieldValues.Length);
                string updateField = "f" + fieldIdx;
                if (random.NextBoolean())
                {
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: unset field '" + updateField + "'");
                    fieldHasValue[fieldIdx] = false;
                    if (fieldIdx < numNDVFields)
                    {
                        writer.UpdateNumericDocValue(new Term("key", "all"), updateField, null);
                    }
                    else
                    {
                        writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, null);
                    }
                }
                else
                {
                    fieldHasValue[fieldIdx] = true;
                    if (fieldIdx < numNDVFields)
                    {
                        writer.UpdateNumericDocValue(new Term("key", "all"), updateField, ++fieldValues[fieldIdx]);
                    }
                    else
                    {
                        writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, TestBinaryDocValuesUpdates.ToBytes(++fieldValues[fieldIdx]));
                    }
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: updated field '" + updateField + "' to value " + fieldValues[fieldIdx]);
                }

                if (random.NextDouble() < 0.2)
                {
                    int deleteDoc = random.Next(docID); // might also delete an already deleted document, ok!
                    writer.DeleteDocuments(new Term("id", "doc-" + deleteDoc));
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: deleted document: doc-" + deleteDoc);
                }

                // verify reader
                if (!isNRT)
                {
                    writer.Commit();
                }

                //      System.out.println("[" + Thread.currentThread().getName() + "]: reopen reader: " + reader);
                DirectoryReader newReader = DirectoryReader.OpenIfChanged(reader);
                Assert.IsNotNull(newReader);
                reader.Dispose();
                reader = newReader;
                //      System.out.println("[" + Thread.currentThread().getName() + "]: reopened reader: " + reader);
                Assert.IsTrue(reader.NumDocs > 0); // we delete at most one document per round
                BytesRef scratch = new BytesRef();
                foreach (AtomicReaderContext context in reader.Leaves)
                {
                    AtomicReader r = context.AtomicReader;
                    //        System.out.println(((SegmentReader) r).getSegmentName());
                    IBits liveDocs = r.LiveDocs;
                    for (int field = 0; field < fieldValues.Length; field++)
                    {
                        string           f             = "f" + field;
                        BinaryDocValues  bdv           = r.GetBinaryDocValues(f);
                        NumericDocValues ndv           = r.GetNumericDocValues(f);
                        IBits            docsWithField = r.GetDocsWithField(f);
                        if (field < numNDVFields)
                        {
                            Assert.IsNotNull(ndv);
                            Assert.IsNull(bdv);
                        }
                        else
                        {
                            Assert.IsNull(ndv);
                            Assert.IsNotNull(bdv);
                        }
                        int maxDoc = r.MaxDoc;
                        for (int doc = 0; doc < maxDoc; doc++)
                        {
                            if (liveDocs is null || liveDocs.Get(doc))
                            {
                                //              System.out.println("doc=" + (doc + context.docBase) + " f='" + f + "' vslue=" + getValue(bdv, doc, scratch));
                                if (fieldHasValue[field])
                                {
                                    Assert.IsTrue(docsWithField.Get(doc));
                                    if (field < numNDVFields)
                                    {
                                        Assert.AreEqual(fieldValues[field], ndv.Get(doc), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r);
                                    }
                                    else
                                    {
                                        Assert.AreEqual(fieldValues[field], TestBinaryDocValuesUpdates.GetValue(bdv, doc, scratch), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r);
                                    }
                                }
                                else
                                {
                                    Assert.IsFalse(docsWithField.Get(doc));
                                }
                            }
                        }
                    }
                }
                //      System.out.println();
            }

            IOUtils.Dispose(writer, reader, dir);
        }
Пример #15
0
        public virtual void TestTonsOfUpdates()
        {
            // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM
            Directory         dir    = NewDirectory();
            Random            random = Random;
            IndexWriterConfig conf   = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));

            conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
            conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc
            IndexWriter writer = new IndexWriter(dir, conf);

            // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds)
            int           numDocs         = AtLeast(20000);
            int           numBinaryFields = AtLeast(5);
            int           numTerms        = TestUtil.NextInt32(random, 10, 100); // terms should affect many docs
            ISet <string> updateTerms     = new JCG.HashSet <string>();

            while (updateTerms.Count < numTerms)
            {
                updateTerms.Add(TestUtil.RandomSimpleString(random));
            }

            //    System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms);

            // build a large index with many BDV fields and update terms
            for (int i = 0; i < numDocs; i++)
            {
                Document doc            = new Document();
                int      numUpdateTerms = TestUtil.NextInt32(random, 1, numTerms / 10);
                for (int j = 0; j < numUpdateTerms; j++)
                {
                    doc.Add(new StringField("upd", RandomPicks.RandomFrom(random, updateTerms), Store.NO));
                }
                for (int j = 0; j < numBinaryFields; j++)
                {
                    long val = random.Next();
                    doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(val)));
                    doc.Add(new NumericDocValuesField("cf" + j, val * 2));
                }
                writer.AddDocument(doc);
            }

            writer.Commit(); // commit so there's something to apply to

            // set to flush every 2048 bytes (approximately every 12 updates), so we get
            // many flushes during binary updates
            writer.Config.SetRAMBufferSizeMB(2048.0 / 1024 / 1024);
            int numUpdates = AtLeast(100);

            //    System.out.println("numUpdates=" + numUpdates);
            for (int i = 0; i < numUpdates; i++)
            {
                int  field      = random.Next(numBinaryFields);
                Term updateTerm = new Term("upd", RandomPicks.RandomFrom(random, updateTerms));
                long value      = random.Next();
                writer.UpdateBinaryDocValue(updateTerm, "f" + field, TestBinaryDocValuesUpdates.ToBytes(value));
                writer.UpdateNumericDocValue(updateTerm, "cf" + field, value * 2);
            }

            writer.Dispose();

            DirectoryReader reader  = DirectoryReader.Open(dir);
            BytesRef        scratch = new BytesRef();

            foreach (AtomicReaderContext context in reader.Leaves)
            {
                for (int i = 0; i < numBinaryFields; i++)
                {
                    AtomicReader     r  = context.AtomicReader;
                    BinaryDocValues  f  = r.GetBinaryDocValues("f" + i);
                    NumericDocValues cf = r.GetNumericDocValues("cf" + i);
                    for (int j = 0; j < r.MaxDoc; j++)
                    {
                        Assert.AreEqual(cf.Get(j), TestBinaryDocValuesUpdates.GetValue(f, j, scratch) * 2, "reader=" + r + ", field=f" + i + ", doc=" + j);
                    }
                }
            }
            reader.Dispose();

            dir.Dispose();
        }
Пример #16
0
        public virtual void TestStressMultiThreading()
        {
            Directory         dir    = NewDirectory();
            IndexWriterConfig conf   = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
            IndexWriter       writer = new IndexWriter(dir, conf);

            // create index
            int numThreads = TestUtil.NextInt32(Random, 3, 6);
            int numDocs    = AtLeast(2000);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new StringField("id", "doc" + i, Store.NO));
                double group = Random.NextDouble();
                string g;
                if (group < 0.1)
                {
                    g = "g0";
                }
                else if (group < 0.5)
                {
                    g = "g1";
                }
                else if (group < 0.8)
                {
                    g = "g2";
                }
                else
                {
                    g = "g3";
                }
                doc.Add(new StringField("updKey", g, Store.NO));
                for (int j = 0; j < numThreads; j++)
                {
                    long value = Random.Next();
                    doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(value)));
                    doc.Add(new NumericDocValuesField("cf" + j, value * 2)); // control, always updated to f * 2
                }
                writer.AddDocument(doc);
            }

            CountdownEvent done       = new CountdownEvent(numThreads);
            AtomicInt32    numUpdates = new AtomicInt32(AtLeast(100));

            // same thread updates a field as well as reopens
            ThreadJob[] threads = new ThreadJob[numThreads];
            for (int i = 0; i < threads.Length; i++)
            {
                string f  = "f" + i;
                string cf = "cf" + i;
                threads[i] = new ThreadAnonymousClass(this, "UpdateThread-" + i, writer, numDocs, done, numUpdates, f, cf);
            }

            foreach (ThreadJob t in threads)
            {
                t.Start();
            }
            done.Wait();
            writer.Dispose();

            DirectoryReader reader  = DirectoryReader.Open(dir);
            BytesRef        scratch = new BytesRef();

            foreach (AtomicReaderContext context in reader.Leaves)
            {
                AtomicReader r = context.AtomicReader;
                for (int i = 0; i < numThreads; i++)
                {
                    BinaryDocValues  bdv             = r.GetBinaryDocValues("f" + i);
                    NumericDocValues control         = r.GetNumericDocValues("cf" + i);
                    IBits            docsWithBdv     = r.GetDocsWithField("f" + i);
                    IBits            docsWithControl = r.GetDocsWithField("cf" + i);
                    IBits            liveDocs        = r.LiveDocs;
                    for (int j = 0; j < r.MaxDoc; j++)
                    {
                        if (liveDocs is null || liveDocs.Get(j))
                        {
                            Assert.AreEqual(docsWithBdv.Get(j), docsWithControl.Get(j));
                            if (docsWithBdv.Get(j))
                            {
                                long ctrlValue = control.Get(j);
                                long bdvValue  = TestBinaryDocValuesUpdates.GetValue(bdv, j, scratch) * 2;
                                //              if (ctrlValue != bdvValue) {
                                //                System.out.println("seg=" + r + ", f=f" + i + ", doc=" + j + ", group=" + r.Document(j).Get("updKey") + ", ctrlValue=" + ctrlValue + ", bdvBytes=" + scratch);
                                //              }
                                Assert.AreEqual(ctrlValue, bdvValue);
                            }
                        }
                    }
                }
            }
            reader.Dispose();

            dir.Dispose();
        }
        public virtual void SearchIndex(Directory dir, string oldName)
        {
            //QueryParser parser = new QueryParser("contents", new MockAnalyzer(random));
            //Query query = parser.parse("handle:1");

            IndexReader   reader   = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);

            TestUtil.CheckIndex(dir);

            // true if this is a 4.0+ index
            bool is40Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("content5") != null;
            // true if this is a 4.2+ index
            bool is42Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("dvSortedSet") != null;

            Debug.Assert(is40Index); // NOTE: currently we can only do this on trunk!

            Bits liveDocs = MultiFields.GetLiveDocs(reader);

            for (int i = 0; i < 35; i++)
            {
                if (liveDocs.Get(i))
                {
                    Document d = reader.Document(i);
                    IList <IndexableField> fields = d.Fields;
                    bool isProxDoc = d.GetField("content3") == null;
                    if (isProxDoc)
                    {
                        int numFields = is40Index ? 7 : 5;
                        Assert.AreEqual(numFields, fields.Count);
                        IndexableField f = d.GetField("id");
                        Assert.AreEqual("" + i, f.StringValue);

                        f = d.GetField("utf8");
                        Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                        f = d.GetField("autf8");
                        Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                        f = d.GetField("content2");
                        Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue);

                        f = d.GetField("fie\u2C77ld");
                        Assert.AreEqual("field with non-ascii name", f.StringValue);
                    }

                    Fields tfvFields = reader.GetTermVectors(i);
                    Assert.IsNotNull(tfvFields, "i=" + i);
                    Terms tfv = tfvFields.Terms("utf8");
                    Assert.IsNotNull(tfv, "docID=" + i + " index=" + oldName);
                }
                else
                {
                    // Only ID 7 is deleted
                    Assert.AreEqual(7, i);
                }
            }

            if (is40Index)
            {
                // check docvalues fields
                NumericDocValues   dvByte               = MultiDocValues.GetNumericValues(reader, "dvByte");
                BinaryDocValues    dvBytesDerefFixed    = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefFixed");
                BinaryDocValues    dvBytesDerefVar      = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefVar");
                SortedDocValues    dvBytesSortedFixed   = MultiDocValues.GetSortedValues(reader, "dvBytesSortedFixed");
                SortedDocValues    dvBytesSortedVar     = MultiDocValues.GetSortedValues(reader, "dvBytesSortedVar");
                BinaryDocValues    dvBytesStraightFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightFixed");
                BinaryDocValues    dvBytesStraightVar   = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightVar");
                NumericDocValues   dvDouble             = MultiDocValues.GetNumericValues(reader, "dvDouble");
                NumericDocValues   dvFloat              = MultiDocValues.GetNumericValues(reader, "dvFloat");
                NumericDocValues   dvInt       = MultiDocValues.GetNumericValues(reader, "dvInt");
                NumericDocValues   dvLong      = MultiDocValues.GetNumericValues(reader, "dvLong");
                NumericDocValues   dvPacked    = MultiDocValues.GetNumericValues(reader, "dvPacked");
                NumericDocValues   dvShort     = MultiDocValues.GetNumericValues(reader, "dvShort");
                SortedSetDocValues dvSortedSet = null;
                if (is42Index)
                {
                    dvSortedSet = MultiDocValues.GetSortedSetValues(reader, "dvSortedSet");
                }

                for (int i = 0; i < 35; i++)
                {
                    int id = Convert.ToInt32(reader.Document(i).Get("id"));
                    Assert.AreEqual(id, dvByte.Get(i));

                    sbyte[]  bytes       = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id };
                    BytesRef expectedRef = new BytesRef((byte[])(Array)bytes);
                    BytesRef scratch     = new BytesRef();

                    dvBytesDerefFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesDerefVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesSortedFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesSortedVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesStraightFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesStraightVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);

                    Assert.AreEqual((double)id, BitConverter.Int64BitsToDouble(dvDouble.Get(i)), 0D);
                    Assert.AreEqual((float)id, Number.IntBitsToFloat((int)dvFloat.Get(i)), 0F);
                    Assert.AreEqual(id, dvInt.Get(i));
                    Assert.AreEqual(id, dvLong.Get(i));
                    Assert.AreEqual(id, dvPacked.Get(i));
                    Assert.AreEqual(id, dvShort.Get(i));
                    if (is42Index)
                    {
                        dvSortedSet.Document = i;
                        long ord = dvSortedSet.NextOrd();
                        Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.NextOrd());
                        dvSortedSet.LookupOrd(ord, scratch);
                        Assert.AreEqual(expectedRef, scratch);
                    }
                }
            }

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;

            // First document should be #0
            Document doc = searcher.IndexReader.Document(hits[0].Doc);

            assertEquals("didn't get the right document first", "0", doc.Get("id"));

            DoTestHits(hits, 34, searcher.IndexReader);

            if (is40Index)
            {
                hits = searcher.Search(new TermQuery(new Term("content5", "aaa")), null, 1000).ScoreDocs;

                DoTestHits(hits, 34, searcher.IndexReader);

                hits = searcher.Search(new TermQuery(new Term("content6", "aaa")), null, 1000).ScoreDocs;

                DoTestHits(hits, 34, searcher.IndexReader);
            }

            hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);
            hits = searcher.Search(new TermQuery(new Term("utf8", "lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);
            hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);

            reader.Dispose();
        }
Пример #18
0
 public void AssertDocValuesEquals(string info, int num, NumericDocValues leftDocValues, NumericDocValues rightDocValues)
 {
     Assert.IsNotNull(leftDocValues, info);
     Assert.IsNotNull(rightDocValues, info);
     for (int docID = 0; docID < num; docID++)
     {
         Assert.AreEqual(leftDocValues.Get(docID), rightDocValues.Get(docID));
     }
 }
Пример #19
0
        private Explanation ExplainScore(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms)
        {
            Explanation result = new Explanation();
            result.Description = "score(doc=" + doc + ",freq=" + freq + "), product of:";

            Explanation boostExpl = new Explanation(stats.QueryBoost * stats.TopLevelBoost, "boost");
            if (boostExpl.Value != 1.0f)
            {
                result.AddDetail(boostExpl);
            }

            result.AddDetail(stats.Idf);

            Explanation tfNormExpl = new Explanation();
            tfNormExpl.Description = "tfNorm, computed from:";
            tfNormExpl.AddDetail(freq);
            tfNormExpl.AddDetail(new Explanation(K1_Renamed, "parameter k1"));
            if (norms == null)
            {
                tfNormExpl.AddDetail(new Explanation(0, "parameter b (norms omitted for field)"));
                tfNormExpl.Value = (freq.Value * (K1_Renamed + 1)) / (freq.Value + K1_Renamed);
            }
            else
            {
                float doclen = DecodeNormValue((sbyte)norms.Get(doc));
                tfNormExpl.AddDetail(new Explanation(b, "parameter b"));
                tfNormExpl.AddDetail(new Explanation(stats.Avgdl, "avgFieldLength"));
                tfNormExpl.AddDetail(new Explanation(doclen, "fieldLength"));
                tfNormExpl.Value = (freq.Value * (K1_Renamed + 1)) / (freq.Value + K1_Renamed * (1 - b + b * doclen / stats.Avgdl));
            }
            result.AddDetail(tfNormExpl);
            result.Value = boostExpl.Value * stats.Idf.Value * tfNormExpl.Value;
            return result;
        }
Пример #20
0
 private static void CheckNumericDocValues(string fieldName, AtomicReader reader, NumericDocValues ndv, Bits docsWithField)
 {
     for (int i = 0; i < reader.MaxDoc; i++)
     {
         long value = ndv.Get(i);
         if (docsWithField.Get(i) == false && value != 0)
         {
             throw new Exception("dv for field: " + fieldName + " is marked missing but has value=" + value + " for doc: " + i);
         }
     }
 }