Field that stores a per-document BytesRef value, indexed for sorting. Here's an example usage:
document.add(new SortedDocValuesField(name, new BytesRef("hello")));
If you also need to store the value, you should add a separate StoredField instance.
public virtual void TestFixedSorted([ValueSource(typeof(ConcurrentMergeSchedulers), "Values")]IConcurrentMergeScheduler scheduler) { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BFixedSorted")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = MockDirectoryWrapper.Throttling_e.NEVER; } IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(scheduler) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(IndexWriterConfig.OpenMode_e.CREATE)); Document doc = new Document(); var bytes = new byte[2]; BytesRef data = new BytesRef(bytes); SortedDocValuesField dvField = new SortedDocValuesField("dv", data); doc.Add(dvField); for (int i = 0; i < int.MaxValue; i++) { bytes[0] = (byte)(i >> 8); bytes[1] = (byte)i; w.AddDocument(doc); if (i % 100000 == 0) { Console.WriteLine("indexed: " + i); Console.Out.Flush(); } } w.ForceMerge(1); w.Dispose(); Console.WriteLine("verifying..."); Console.Out.Flush(); DirectoryReader r = DirectoryReader.Open(dir); int expectedValue = 0; foreach (AtomicReaderContext context in r.Leaves) { AtomicReader reader = context.AtomicReader; BytesRef scratch = new BytesRef(); BinaryDocValues dv = reader.GetSortedDocValues("dv"); for (int i = 0; i < reader.MaxDoc; i++) { bytes[0] = (byte)(expectedValue >> 8); bytes[1] = (byte)expectedValue; dv.Get(i, scratch); Assert.AreEqual(data, scratch); expectedValue++; } } r.Dispose(); dir.Dispose(); }
public virtual void TestMixedTypesDifferentThreads() { Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); CountDownLatch startingGun = new CountDownLatch(1); AtomicBoolean hitExc = new AtomicBoolean(); ThreadClass[] threads = new ThreadClass[3]; for (int i = 0; i < 3; i++) { Field field; if (i == 0) { field = new SortedDocValuesField("foo", new BytesRef("hello")); } else if (i == 1) { field = new NumericDocValuesField("foo", 0); } else { field = new BinaryDocValuesField("foo", new BytesRef("bazz")); } Document doc = new Document(); doc.Add(field); threads[i] = new ThreadAnonymousInnerClassHelper(this, w, startingGun, hitExc, doc); threads[i].Start(); } startingGun.countDown(); foreach (ThreadClass t in threads) { t.Join(); } Assert.IsTrue(hitExc.Get()); w.Dispose(); dir.Dispose(); }
public virtual void TestSortedBytesDocValuesField() { SortedDocValuesField field = new SortedDocValuesField("foo", new BytesRef("bar")); TrySetBoost(field); TrySetByteValue(field); field.BytesValue = "fubar".ToBytesRefArray(Encoding.UTF8); field.BytesValue = new BytesRef("baz"); TrySetDoubleValue(field); TrySetIntValue(field); TrySetFloatValue(field); TrySetLongValue(field); TrySetReaderValue(field); TrySetShortValue(field); TrySetStringValue(field); TrySetTokenStreamValue(field); Assert.AreEqual(new BytesRef("baz"), field.BinaryValue); }
public virtual void TestSortedWithLotsOfDups() { Directory dir = NewDirectory(); Document doc = new Document(); BytesRef @ref = new BytesRef(); Field field = new SortedDocValuesField("bytes", @ref); doc.Add(field); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { @ref.CopyChars(TestUtil.RandomSimpleString(Random(), 2)); iw.AddDocument(doc); if (Random().Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.Reader; iw.ForceMerge(1); DirectoryReader ir2 = iw.Reader; AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); SortedDocValues multi = MultiDocValues.GetSortedValues(ir, "bytes"); SortedDocValues single = merged.GetSortedDocValues("bytes"); Assert.AreEqual(single.ValueCount, multi.ValueCount); BytesRef actual = new BytesRef(); BytesRef expected = new BytesRef(); for (int i = 0; i < numDocs; i++) { // check ord Assert.AreEqual(single.GetOrd(i), multi.GetOrd(i)); // check ord value single.Get(i, expected); multi.Get(i, actual); Assert.AreEqual(expected, actual); } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
private void DoTest(FieldInfo.DocValuesType_e type) { Directory d = NewDirectory(); IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); int nDocs = AtLeast(50); Field id = new NumericDocValuesField("id", 0); Field f; switch (type) { case FieldInfo.DocValuesType_e.BINARY: f = new BinaryDocValuesField("dv", new BytesRef()); break; case FieldInfo.DocValuesType_e.SORTED: f = new SortedDocValuesField("dv", new BytesRef()); break; case FieldInfo.DocValuesType_e.NUMERIC: f = new NumericDocValuesField("dv", 0); break; default: throw new InvalidOperationException(); } Document document = new Document(); document.Add(id); document.Add(f); object[] vals = new object[nDocs]; RandomIndexWriter iw = new RandomIndexWriter(Random(), d, iwConfig); for (int i = 0; i < nDocs; ++i) { id.LongValue = i; switch (type) { case FieldInfo.DocValuesType_e.SORTED: case FieldInfo.DocValuesType_e.BINARY: do { vals[i] = TestUtil.RandomSimpleString(Random(), 20); } while (((string)vals[i]).Length == 0); f.BytesValue = new BytesRef((string)vals[i]); break; case FieldInfo.DocValuesType_e.NUMERIC: int bitsPerValue = Random().NextIntBetween(1, 31); // keep it an int vals[i] = (long)Random().Next((int)PackedInts.MaxValue(bitsPerValue)); f.LongValue = (long) vals[i]; break; } iw.AddDocument(document); if (Random().NextBoolean() && i % 10 == 9) { iw.Commit(); } } iw.Dispose(); DirectoryReader rd = DirectoryReader.Open(d); foreach (AtomicReaderContext leave in rd.Leaves) { FunctionValues ids = (new LongFieldSource("id")).GetValues(null, leave); ValueSource vs; switch (type) { case FieldInfo.DocValuesType_e.BINARY: case FieldInfo.DocValuesType_e.SORTED: vs = new BytesRefFieldSource("dv"); break; case FieldInfo.DocValuesType_e.NUMERIC: vs = new LongFieldSource("dv"); break; default: throw new InvalidOperationException(); } FunctionValues values = vs.GetValues(null, leave); BytesRef bytes = new BytesRef(); for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i) { assertTrue(values.Exists(i)); if (vs is BytesRefFieldSource) { assertTrue(values.ObjectVal(i) is string); } else if (vs is LongFieldSource) { assertTrue(values.ObjectVal(i) is long?); assertTrue(values.BytesVal(i, bytes)); } else { throw new InvalidOperationException(); } object expected = vals[ids.IntVal(i)]; switch (type) { case FieldInfo.DocValuesType_e.SORTED: values.OrdVal(i); // no exception assertTrue(values.NumOrd() >= 1); goto case FieldInfo.DocValuesType_e.BINARY; case FieldInfo.DocValuesType_e.BINARY: assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertTrue(values.BytesVal(i, bytes)); assertEquals(new BytesRef((string)expected), bytes); break; case FieldInfo.DocValuesType_e.NUMERIC: assertEquals(Number.ToInt64(expected.ToString()), values.LongVal(i)); break; } } } rd.Dispose(); d.Dispose(); }
/// <summary> /// Tests dv against stored fields with threads (all types + missing) /// </summary> // [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestThreads2() { AssumeTrue("Codec does not support GetDocsWithField", DefaultCodecSupportsDocsWithField()); AssumeTrue("Codec does not support SORTED_SET", DefaultCodecSupportsSortedSet()); Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf); Field idField = new StringField("id", "", Field.Store.NO); Field storedBinField = new StoredField("storedBin", new byte[0]); Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef()); Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef()); Field storedNumericField = new StoredField("storedNum", ""); Field dvNumericField = new NumericDocValuesField("dvNum", 0); // index some docs int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { idField.StringValue = Convert.ToString(i); int length = TestUtil.NextInt(Random(), 0, 8); var buffer = new byte[length]; Random().NextBytes(buffer); storedBinField.BytesValue = new BytesRef(buffer); dvBinField.BytesValue = new BytesRef(buffer); dvSortedField.BytesValue = new BytesRef(buffer); long numericValue = Random().NextLong(); storedNumericField.StringValue = Convert.ToString(numericValue); dvNumericField.LongValue = numericValue; Document doc = new Document(); doc.Add(idField); if (Random().Next(4) > 0) { doc.Add(storedBinField); doc.Add(dvBinField); doc.Add(dvSortedField); } if (Random().Next(4) > 0) { doc.Add(storedNumericField); doc.Add(dvNumericField); } int numSortedSetFields = Random().Next(3); SortedSet<string> values = new SortedSet<string>(); for (int j = 0; j < numSortedSetFields; j++) { values.Add(TestUtil.RandomSimpleString(Random())); } foreach (string v in values) { doc.Add(new SortedSetDocValuesField("dvSortedSet", new BytesRef(v))); doc.Add(new StoredField("storedSortedSet", v)); } writer.AddDocument(doc); if (Random().Next(31) == 0) { writer.Commit(); } } // delete some docs int numDeletions = Random().Next(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = Random().Next(numDocs); writer.DeleteDocuments(new Term("id", Convert.ToString(id))); } writer.Dispose(); // compare DirectoryReader ir = DirectoryReader.Open(dir); int numThreads = TestUtil.NextInt(Random(), 2, 7); ThreadClass[] threads = new ThreadClass[numThreads]; CountdownEvent startingGun = new CountdownEvent(1); for (int i = 0; i < threads.Length; i++) { threads[i] = new ThreadAnonymousInnerClassHelper2(this, ir, startingGun); threads[i].Start(); } startingGun.Signal(); foreach (ThreadClass t in threads) { t.Join(); } ir.Dispose(); dir.Dispose(); }
private void DoTestSortedVsFieldCache(int minLength, int maxLength) { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field indexedField = new StringField("indexed", "", Field.Store.NO); Field dvField = new SortedDocValuesField("dv", new BytesRef()); doc.Add(idField); doc.Add(indexedField); doc.Add(dvField); // index some docs int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { idField.StringValue = Convert.ToString(i); int length; if (minLength == maxLength) { length = minLength; // fixed length } else { length = TestUtil.NextInt(Random(), minLength, maxLength); } string value = TestUtil.RandomSimpleString(Random(), length); indexedField.StringValue = value; dvField.BytesValue = new BytesRef(value); writer.AddDocument(doc); if (Random().Next(31) == 0) { writer.Commit(); } } // delete some docs int numDeletions = Random().Next(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = Random().Next(numDocs); writer.DeleteDocuments(new Term("id", Convert.ToString(id))); } writer.Dispose(); // compare DirectoryReader ir = DirectoryReader.Open(dir); foreach (AtomicReaderContext context in ir.Leaves) { AtomicReader r = context.AtomicReader; SortedDocValues expected = FieldCache.DEFAULT.GetTermsIndex(r, "indexed"); SortedDocValues actual = r.GetSortedDocValues("dv"); AssertEquals(r.MaxDoc, expected, actual); } ir.Dispose(); dir.Dispose(); }
private void DoTestSortedVsStoredFields(int minLength, int maxLength) { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field storedField = new StoredField("stored", new byte[0]); Field dvField = new SortedDocValuesField("dv", new BytesRef()); doc.Add(idField); doc.Add(storedField); doc.Add(dvField); // index some docs int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { idField.StringValue = Convert.ToString(i); int length; if (minLength == maxLength) { length = minLength; // fixed length } else { length = TestUtil.NextInt(Random(), minLength, maxLength); } var buffer = new byte[length]; Random().NextBytes(buffer); storedField.BytesValue = new BytesRef(buffer); dvField.BytesValue = new BytesRef(buffer); writer.AddDocument(doc); if (Random().Next(31) == 0) { writer.Commit(); } } // delete some docs int numDeletions = Random().Next(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = Random().Next(numDocs); writer.DeleteDocuments(new Term("id", Convert.ToString(id))); } writer.Dispose(); // compare DirectoryReader ir = DirectoryReader.Open(dir); foreach (AtomicReaderContext context in ir.Leaves) { AtomicReader r = context.AtomicReader; BinaryDocValues docValues = r.GetSortedDocValues("dv"); for (int i = 0; i < r.MaxDoc; i++) { BytesRef binaryValue = r.Document(i).GetBinaryValue("stored"); BytesRef scratch = new BytesRef(); docValues.Get(i, scratch); Assert.AreEqual(binaryValue, scratch); } } ir.Dispose(); dir.Dispose(); }
public void TestThreads() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, conf); Document doc = new Document(); Field idField = new StringField("id", "", Field.Store.NO); Field storedBinField = new StoredField("storedBin", new byte[0]); Field dvBinField = new BinaryDocValuesField("dvBin", new BytesRef()); Field dvSortedField = new SortedDocValuesField("dvSorted", new BytesRef()); Field storedNumericField = new StoredField("storedNum", ""); Field dvNumericField = new NumericDocValuesField("dvNum", 0); doc.Add(idField); doc.Add(storedBinField); doc.Add(dvBinField); doc.Add(dvSortedField); doc.Add(storedNumericField); doc.Add(dvNumericField); // index some docs int numDocs = AtLeast(300); for (int i = 0; i < numDocs; i++) { idField.StringValue = Convert.ToString(i); int length = TestUtil.NextInt(Random(), 0, 8); var buffer = new byte[length]; Random().NextBytes(buffer); storedBinField.BytesValue = new BytesRef(buffer); dvBinField.BytesValue = new BytesRef(buffer); dvSortedField.BytesValue = new BytesRef(buffer); long numericValue = Random().NextLong(); storedNumericField.StringValue = Convert.ToString(numericValue); dvNumericField.LongValue = numericValue; writer.AddDocument(doc); if (Random().Next(31) == 0) { writer.Commit(); } } // delete some docs int numDeletions = Random().Next(numDocs / 10); for (int i = 0; i < numDeletions; i++) { int id = Random().Next(numDocs); writer.DeleteDocuments(new Term("id", Convert.ToString(id))); } writer.Dispose(); // compare DirectoryReader ir = DirectoryReader.Open(dir); int numThreads = TestUtil.NextInt(Random(), 2, 7); ThreadClass[] threads = new ThreadClass[numThreads]; CountdownEvent startingGun = new CountdownEvent(1); for (int i = 0; i < threads.Length; i++) { threads[i] = new ThreadAnonymousInnerClassHelper(this, ir, startingGun); threads[i].Start(); } startingGun.Signal(); foreach (ThreadClass t in threads) { t.Join(); } ir.Dispose(); dir.Dispose(); }