Field that stores a per-document BytesRef value, indexed for sorting. Here's an example usage:

 document.add(new SortedDocValuesField(name, new BytesRef("hello"))); 

If you also need to store the value, you should add a separate StoredField instance.

Inheritance: Field
Ejemplo n.º 1
0
        public virtual void TestFixedSorted([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler)
        {
            BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedSorted"));
            if (dir is MockDirectoryWrapper)
            {
                ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER;
            }

            IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))
                                .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
                                .SetRAMBufferSizeMB(256.0)
                                .SetMergeScheduler(scheduler)
                                .SetMergePolicy(NewLogMergePolicy(false, 10))
                                .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE));

            Document doc = new Document();
            var bytes = new byte[2];
            BytesRef data = new BytesRef(bytes);
            SortedDocValuesField dvField = new SortedDocValuesField("dv", data);
            doc.Add(dvField);

            for (int i = 0; i < int.MaxValue; i++)
            {
                bytes[0] = (byte)(i >> 8);
                bytes[1] = (byte)i;
                w.AddDocument(doc);
                if (i % 100000 == 0)
                {
                    Console.WriteLine("indexed: " + i);
                    Console.Out.Flush();
                }
            }

            w.ForceMerge(1);
            w.Dispose();

            Console.WriteLine("verifying...");
            Console.Out.Flush();

            DirectoryReader r = DirectoryReader.Open(dir);
            int expectedValue = 0;
            foreach (AtomicReaderContext context in r.Leaves)
            {
                AtomicReader reader = context.AtomicReader;
                BytesRef scratch = new BytesRef();
                BinaryDocValues dv = reader.GetSortedDocValues("dv");
                for (int i = 0; i < reader.MaxDoc; i++)
                {
                    bytes[0] = (byte)(expectedValue >> 8);
                    bytes[1] = (byte)expectedValue;
                    dv.Get(i, scratch);
                    Assert.AreEqual(data, scratch);
                    expectedValue++;
                }
            }

            r.Dispose();
            dir.Dispose();
        }
        public virtual void TestMixedTypesDifferentThreads()
        {
            Directory dir = NewDirectory();
            IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())));

            CountDownLatch startingGun = new CountDownLatch(1);
            AtomicBoolean hitExc = new AtomicBoolean();
            ThreadClass[] threads = new ThreadClass[3];
            for (int i = 0; i < 3; i++)
            {
                Field field;
                if (i == 0)
                {
                    field = new SortedDocValuesField("foo", new BytesRef("hello"));
                }
                else if (i == 1)
                {
                    field = new NumericDocValuesField("foo", 0);
                }
                else
                {
                    field = new BinaryDocValuesField("foo", new BytesRef("bazz"));
                }
                Document doc = new Document();
                doc.Add(field);

                threads[i] = new ThreadAnonymousInnerClassHelper(this, w, startingGun, hitExc, doc);
                threads[i].Start();
            }

            startingGun.countDown();

            foreach (ThreadClass t in threads)
            {
                t.Join();
            }
            Assert.IsTrue(hitExc.Get());
            w.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 3
0
        public virtual void TestSortedBytesDocValuesField()
        {
            SortedDocValuesField field = new SortedDocValuesField("foo", new BytesRef("bar"));

            TrySetBoost(field);
            TrySetByteValue(field);
            field.BytesValue = "fubar".ToBytesRefArray(Encoding.UTF8);
            field.BytesValue = new BytesRef("baz");
            TrySetDoubleValue(field);
            TrySetIntValue(field);
            TrySetFloatValue(field);
            TrySetLongValue(field);
            TrySetReaderValue(field);
            TrySetShortValue(field);
            TrySetStringValue(field);
            TrySetTokenStreamValue(field);

            Assert.AreEqual(new BytesRef("baz"), field.BinaryValue);
        }
        public virtual void TestSortedWithLotsOfDups()
        {
            Directory dir = NewDirectory();
            Document doc = new Document();
            BytesRef @ref = new BytesRef();
            Field field = new SortedDocValuesField("bytes", @ref);
            doc.Add(field);

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);
            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int numDocs = AtLeast(500);
            for (int i = 0; i < numDocs; i++)
            {
                @ref.CopyChars(TestUtil.RandomSimpleString(Random(), 2));
                iw.AddDocument(doc);
                if (Random().Next(17) == 0)
                {
                    iw.Commit();
                }
            }
            DirectoryReader ir = iw.Reader;
            iw.ForceMerge(1);
            DirectoryReader ir2 = iw.Reader;
            AtomicReader merged = GetOnlySegmentReader(ir2);
            iw.Dispose();

            SortedDocValues multi = MultiDocValues.GetSortedValues(ir, "bytes");
            SortedDocValues single = merged.GetSortedDocValues("bytes");
            Assert.AreEqual(single.ValueCount, multi.ValueCount);
            BytesRef actual = new BytesRef();
            BytesRef expected = new BytesRef();
            for (int i = 0; i < numDocs; i++)
            {
                // check ord
                Assert.AreEqual(single.GetOrd(i), multi.GetOrd(i));
                // check ord value
                single.Get(i, expected);
                multi.Get(i, actual);
                Assert.AreEqual(expected, actual);
            }
            ir.Dispose();
            ir2.Dispose();
            dir.Dispose();
        }
        private void DoTest(FieldInfo.DocValuesType_e type)
        {
            Directory d = NewDirectory();
            IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            int nDocs = AtLeast(50);
            Field id = new NumericDocValuesField("id", 0);
            Field f;
            switch (type)
            {
                case FieldInfo.DocValuesType_e.BINARY:
                    f = new BinaryDocValuesField("dv", new BytesRef());
                    break;
                case FieldInfo.DocValuesType_e.SORTED:
                    f = new SortedDocValuesField("dv", new BytesRef());
                    break;
                case FieldInfo.DocValuesType_e.NUMERIC:
                    f = new NumericDocValuesField("dv", 0);
                    break;
                default:
                    throw new InvalidOperationException();
            }
            Document document = new Document();
            document.Add(id);
            document.Add(f);

            object[] vals = new object[nDocs];

            RandomIndexWriter iw = new RandomIndexWriter(Random(), d, iwConfig);
            for (int i = 0; i < nDocs; ++i)
            {
                id.LongValue = i;
                switch (type)
                {
                    case FieldInfo.DocValuesType_e.SORTED:
                    case FieldInfo.DocValuesType_e.BINARY:
                        do
                        {
                            vals[i] = TestUtil.RandomSimpleString(Random(), 20);
                        } while (((string)vals[i]).Length == 0);
                        f.BytesValue = new BytesRef((string)vals[i]);
                        break;
                    case FieldInfo.DocValuesType_e.NUMERIC:
                        int bitsPerValue = Random().NextIntBetween(1, 31); // keep it an int
                        vals[i] = (long)Random().Next((int)PackedInts.MaxValue(bitsPerValue));
                        f.LongValue = (long) vals[i];
                        break;
                }
                iw.AddDocument(document);
                if (Random().NextBoolean() && i % 10 == 9)
                {
                    iw.Commit();
                }
            }
            iw.Dispose();

            DirectoryReader rd = DirectoryReader.Open(d);
            foreach (AtomicReaderContext leave in rd.Leaves)
            {
                FunctionValues ids = (new LongFieldSource("id")).GetValues(null, leave);
                ValueSource vs;
                switch (type)
                {
                    case FieldInfo.DocValuesType_e.BINARY:
                    case FieldInfo.DocValuesType_e.SORTED:
                        vs = new BytesRefFieldSource("dv");
                        break;
                    case FieldInfo.DocValuesType_e.NUMERIC:
                        vs = new LongFieldSource("dv");
                        break;
                    default:
                        throw new InvalidOperationException();
                }
                FunctionValues values = vs.GetValues(null, leave);
                BytesRef bytes = new BytesRef();
                for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i)
                {
                    assertTrue(values.Exists(i));
                    if (vs is BytesRefFieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is string);
                    }
                    else if (vs is LongFieldSource)
                    {
                        assertTrue(values.ObjectVal(i) is long?);
                        assertTrue(values.BytesVal(i, bytes));
                    }
                    else
                    {
                        throw new InvalidOperationException();
                    }

                    object expected = vals[ids.IntVal(i)];
                    switch (type)
                    {
                        case FieldInfo.DocValuesType_e.SORTED:
                            values.OrdVal(i); // no exception
                            assertTrue(values.NumOrd() >= 1);
                            goto case FieldInfo.DocValuesType_e.BINARY;
                        case FieldInfo.DocValuesType_e.BINARY:
                            assertEquals(expected, values.ObjectVal(i));
                            assertEquals(expected, values.StrVal(i));
                            assertEquals(expected, values.ObjectVal(i));
                            assertEquals(expected, values.StrVal(i));
                            assertTrue(values.BytesVal(i, bytes));
                            assertEquals(new BytesRef((string)expected), bytes);
                            break;
                        case FieldInfo.DocValuesType_e.NUMERIC:
                            assertEquals(Number.ToInt64(expected.ToString()), values.LongVal(i));
                            break;
                    }
                }
            }
            rd.Dispose();
            d.Dispose();
        }
        /// <summary>
        /// Tests dv against stored fields with threads (all types + missing)
        /// </summary>
        // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass
        public virtual void TestThreads2()
        {
            AssumeTrue("Codec does not support GetDocsWithField", DefaultCodecSupportsDocsWithField());
            AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet());
            Directory dir = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf);
            Field idField = new StringField("id", "", Field.Store.NO);
            Field storedBinField = new StoredField("storedBin", new byte[0]);
            Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef());
            Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef());
            Field storedNumericField = new StoredField("storedNum", "");
            Field dvNumericField = new NumericDocValuesField("dvNum", 0);

            // index some docs
            int numDocs = AtLeast(300);
            for (int i = 0; i < numDocs; i++)
            {
                idField.StringValue = Convert.ToString(i);
                int length = TestUtil.NextInt(Random(), 0, 8);
                var buffer = new byte[length];
                Random().NextBytes(buffer);
                storedBinField.BytesValue = new BytesRef(buffer);
                dvBinField.BytesValue = new BytesRef(buffer);
                dvSortedField.BytesValue = new BytesRef(buffer);
                long numericValue = Random().NextLong();
                storedNumericField.StringValue = Convert.ToString(numericValue);
                dvNumericField.LongValue = numericValue;
                Document doc = new Document();
                doc.Add(idField);
                if (Random().Next(4) > 0)
                {
                    doc.Add(storedBinField);
                    doc.Add(dvBinField);
                    doc.Add(dvSortedField);
                }
                if (Random().Next(4) > 0)
                {
                    doc.Add(storedNumericField);
                    doc.Add(dvNumericField);
                }
                int numSortedSetFields = Random().Next(3);
                SortedSet<string> values = new SortedSet<string>();
                for (int j = 0; j < numSortedSetFields; j++)
                {
                    values.Add(TestUtil.RandomSimpleString(Random()));
                }
                foreach (string v in values)
                {
                    doc.Add(new SortedSetDocValuesField("dvSortedSet", new BytesRef(v)));
                    doc.Add(new StoredField("storedSortedSet", v));
                }
                writer.AddDocument(doc);
                if (Random().Next(31) == 0)
                {
                    writer.Commit();
                }
            }

            // delete some docs
            int numDeletions = Random().Next(numDocs / 10);
            for (int i = 0; i < numDeletions; i++)
            {
                int id = Random().Next(numDocs);
                writer.DeleteDocuments(new Term("id", Convert.ToString(id)));
            }
            writer.Dispose();

            // compare
            DirectoryReader ir = DirectoryReader.Open(dir);
            int numThreads = TestUtil.NextInt(Random(), 2, 7);
            ThreadClass[] threads = new ThreadClass[numThreads];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int i = 0; i < threads.Length; i++)
            {
                threads[i] = new ThreadAnonymousInnerClassHelper2(this, ir, startingGun);
                threads[i].Start();
            }
            startingGun.Signal();
            foreach (ThreadClass t in threads)
            {
                t.Join();
            }
            ir.Dispose();
            dir.Dispose();
        }
        private void DoTestSortedVsFieldCache(int minLength, int maxLength)
        {
            Directory dir = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf);
            Document doc = new Document();
            Field idField = new StringField("id", "", Field.Store.NO);
            Field indexedField = new StringField("indexed", "", Field.Store.NO);
            Field dvField = new SortedDocValuesField("dv", new BytesRef());
            doc.Add(idField);
            doc.Add(indexedField);
            doc.Add(dvField);

            // index some docs
            int numDocs = AtLeast(300);
            for (int i = 0; i < numDocs; i++)
            {
                idField.StringValue = Convert.ToString(i);
                int length;
                if (minLength == maxLength)
                {
                    length = minLength; // fixed length
                }
                else
                {
                    length = TestUtil.NextInt(Random(), minLength, maxLength);
                }
                string value = TestUtil.RandomSimpleString(Random(), length);
                indexedField.StringValue = value;
                dvField.BytesValue = new BytesRef(value);
                writer.AddDocument(doc);
                if (Random().Next(31) == 0)
                {
                    writer.Commit();
                }
            }

            // delete some docs
            int numDeletions = Random().Next(numDocs / 10);
            for (int i = 0; i < numDeletions; i++)
            {
                int id = Random().Next(numDocs);
                writer.DeleteDocuments(new Term("id", Convert.ToString(id)));
            }
            writer.Dispose();

            // compare
            DirectoryReader ir = DirectoryReader.Open(dir);
            foreach (AtomicReaderContext context in ir.Leaves)
            {
                AtomicReader r = context.AtomicReader;
                SortedDocValues expected = FieldCache.DEFAULT.GetTermsIndex(r, "indexed");
                SortedDocValues actual = r.GetSortedDocValues("dv");
                AssertEquals(r.MaxDoc, expected, actual);
            }
            ir.Dispose();
            dir.Dispose();
        }
        private void DoTestSortedVsStoredFields(int minLength, int maxLength)
        {
            Directory dir = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf);
            Document doc = new Document();
            Field idField = new StringField("id", "", Field.Store.NO);
            Field storedField = new StoredField("stored", new byte[0]);
            Field dvField = new SortedDocValuesField("dv", new BytesRef());
            doc.Add(idField);
            doc.Add(storedField);
            doc.Add(dvField);

            // index some docs
            int numDocs = AtLeast(300);
            for (int i = 0; i < numDocs; i++)
            {
                idField.StringValue = Convert.ToString(i);
                int length;
                if (minLength == maxLength)
                {
                    length = minLength; // fixed length
                }
                else
                {
                    length = TestUtil.NextInt(Random(), minLength, maxLength);
                }
                var buffer = new byte[length];
                Random().NextBytes(buffer);
                storedField.BytesValue = new BytesRef(buffer);
                dvField.BytesValue = new BytesRef(buffer);
                writer.AddDocument(doc);
                if (Random().Next(31) == 0)
                {
                    writer.Commit();
                }
            }

            // delete some docs
            int numDeletions = Random().Next(numDocs / 10);
            for (int i = 0; i < numDeletions; i++)
            {
                int id = Random().Next(numDocs);
                writer.DeleteDocuments(new Term("id", Convert.ToString(id)));
            }
            writer.Dispose();

            // compare
            DirectoryReader ir = DirectoryReader.Open(dir);
            foreach (AtomicReaderContext context in ir.Leaves)
            {
                AtomicReader r = context.AtomicReader;
                BinaryDocValues docValues = r.GetSortedDocValues("dv");
                for (int i = 0; i < r.MaxDoc; i++)
                {
                    BytesRef binaryValue = r.Document(i).GetBinaryValue("stored");
                    BytesRef scratch = new BytesRef();
                    docValues.Get(i, scratch);
                    Assert.AreEqual(binaryValue, scratch);
                }
            }
            ir.Dispose();
            dir.Dispose();
        }
        public void TestThreads()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf);
            Document doc = new Document();
            Field idField = new StringField("id", "", Field.Store.NO);
            Field storedBinField = new StoredField("storedBin", new byte[0]);
            Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef());
            Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef());
            Field storedNumericField = new StoredField("storedNum", "");
            Field dvNumericField = new NumericDocValuesField("dvNum", 0);
            doc.Add(idField);
            doc.Add(storedBinField);
            doc.Add(dvBinField);
            doc.Add(dvSortedField);
            doc.Add(storedNumericField);
            doc.Add(dvNumericField);

            // index some docs
            int numDocs = AtLeast(300);
            for (int i = 0; i < numDocs; i++)
            {
                idField.StringValue = Convert.ToString(i);
                int length = TestUtil.NextInt(Random(), 0, 8);
                var buffer = new byte[length];
                Random().NextBytes(buffer);
                storedBinField.BytesValue = new BytesRef(buffer);
                dvBinField.BytesValue = new BytesRef(buffer);
                dvSortedField.BytesValue = new BytesRef(buffer);
                long numericValue = Random().NextLong();
                storedNumericField.StringValue = Convert.ToString(numericValue);
                dvNumericField.LongValue = numericValue;
                writer.AddDocument(doc);
                if (Random().Next(31) == 0)
                {
                    writer.Commit();
                }
            }

            // delete some docs
            int numDeletions = Random().Next(numDocs / 10);
            for (int i = 0; i < numDeletions; i++)
            {
                int id = Random().Next(numDocs);
                writer.DeleteDocuments(new Term("id", Convert.ToString(id)));
            }
            writer.Dispose();

            // compare
            DirectoryReader ir = DirectoryReader.Open(dir);
            int numThreads = TestUtil.NextInt(Random(), 2, 7);
            ThreadClass[] threads = new ThreadClass[numThreads];
            CountdownEvent startingGun = new CountdownEvent(1);

            for (int i = 0; i < threads.Length; i++)
            {
                threads[i] = new ThreadAnonymousInnerClassHelper(this, ir, startingGun);
                threads[i].Start();
            }
            startingGun.Signal();
            foreach (ThreadClass t in threads)
            {
                t.Join();
            }
            ir.Dispose();
            dir.Dispose();
        }