public virtual void TestMultiValuedDocValuesField() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d); Document doc = new Document(); Field f = new NumericDocValuesField("field", 17); // Index doc values are single-valued so we should not // be able to add same field more than once: doc.Add(f); doc.Add(f); try { w.AddDocument(doc); Assert.Fail("didn't hit expected exception"); } catch (System.ArgumentException iae) { // expected } doc = new Document(); doc.Add(f); w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.Reader; w.Dispose(); Assert.AreEqual(17, FieldCache.DEFAULT.GetInts(GetOnlySegmentReader(r), "field", false).Get(0)); r.Dispose(); d.Dispose(); }
private void AddField(Document doc, string field, string value, DocValuesType type) { doc.Add(new StringField(field, value, Field.Store.YES)); if (type == DocValuesType.NONE) { return; } string dvField = field + "_dv"; Field valuesField = null; switch (type) { case DocValuesType.NUMERIC: valuesField = new NumericDocValuesField(dvField, int.Parse(value, CultureInfo.InvariantCulture)); break; case DocValuesType.BINARY: valuesField = new BinaryDocValuesField(dvField, new BytesRef(value)); break; case DocValuesType.SORTED: valuesField = new SortedDocValuesField(dvField, new BytesRef(value)); break; } doc.Add(valuesField); }
public virtual void Test() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); conf.SetCodec(new Lucene46Codec()); RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, conf); Document doc = new Document(); // these fields should sometimes get term vectors, etc Field idField = NewStringField("id", "", Field.Store.NO); Field bodyField = NewTextField("body", "", Field.Store.NO); Field dvField = new NumericDocValuesField("dv", 5); doc.Add(idField); doc.Add(bodyField); doc.Add(dvField); for (int i = 0; i < 100; i++) { idField.SetStringValue(Convert.ToString(i)); bodyField.SetStringValue(TestUtil.RandomUnicodeString(Random())); riw.AddDocument(doc); if (Random().Next(7) == 0) { riw.Commit(); } if (Random().Next(20) == 0) { riw.DeleteDocuments(new Term("id", Convert.ToString(i))); } } riw.Dispose(); CheckHeaders(dir); dir.Dispose(); }
public virtual void TestLongMinMax() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d, Similarity, TimeZone); Document doc = new Document(); NumericDocValuesField field = new NumericDocValuesField("field", 0L); doc.Add(field); field.SetInt64Value(long.MinValue); w.AddDocument(doc); field.SetInt64Value(0); w.AddDocument(doc); field.SetInt64Value(long.MaxValue); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); FacetsCollector fc = new FacetsCollector(); IndexSearcher s = NewSearcher(r); s.Search(new MatchAllDocsQuery(), fc); Facets facets = new Int64RangeFacetCounts("field", fc, new Int64Range("min", long.MinValue, true, long.MinValue, true), new Int64Range("max", long.MaxValue, true, long.MaxValue, true), new Int64Range("all0", long.MinValue, true, long.MaxValue, true), new Int64Range("all1", long.MinValue, false, long.MaxValue, true), new Int64Range("all2", long.MinValue, true, long.MaxValue, false), new Int64Range("all3", long.MinValue, false, long.MaxValue, false)); FacetResult result = facets.GetTopChildren(10, "field"); Assert.AreEqual("dim=field path=[] value=3 childCount=6\n min (1)\n max (1)\n all0 (3)\n all1 (2)\n all2 (2)\n all3 (1)\n", result.ToString()); r.Dispose(); d.Dispose(); }
public static IndexableField InstantiateSortField(string key, object value) { IndexableField field; if (value is Number) { Number number = ( Number )value; if (value is float?) { field = new SortedNumericDocValuesField(key, NumericUtils.floatToSortableInt(number.floatValue())); } else if (value is double?) { field = new SortedNumericDocValuesField(key, NumericUtils.doubleToSortableLong(number.doubleValue())); } else { field = new SortedNumericDocValuesField(key, number.longValue()); } } else { if (LuceneExplicitIndex.KEY_DOC_ID.Equals(key)) { field = new NumericDocValuesField(key, long.Parse(value.ToString())); } else { field = new SortedSetDocValuesField(key, new BytesRef(value.ToString())); } } return(field); }
// [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestDateCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); const long @base = 13; // prime long day = 1000L * 60 * 60 * 24; Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 300; ++i) { dvf.LongValue = @base + Random().Next(1000) * day; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); for (int i = 0; i < 50; ++i) { dvf.LongValue = @base + Random().Next(1000) * day; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new longs costed less than if they had only been packed Assert.IsTrue(size2 < size1 + (PackedInts.BitsRequired(day) * 50) / 8); }
/// <summary> /// Adds a new <see cref="NumericDocValuesField"/> field with the specified 64-bit <see cref="long"/> value </summary> /// <remarks> /// If you also need to store the value, you should add a /// separate <see cref="StoredField"/> instance. /// </remarks> /// <param name="document">This <see cref="Document"/>.</param> /// <param name="name"> field name </param> /// <param name="value"> 64-bit <see cref="long"/> value </param> /// <returns>The field that was added to this <see cref="Document"/>.</returns> /// <exception cref="System.ArgumentNullException"> if the field <paramref name="name"/> is <c>null</c> </exception> public static NumericDocValuesField AddNumericDocValuesField(this Document document, string name, long value) { var field = new NumericDocValuesField(name, value); document.Add(field); return(field); }
// [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestSingleBigValueCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 20000; ++i) { dvf.LongValue = i & 1023; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); dvf.LongValue = long.MaxValue; iwriter.AddDocument(doc); iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new value did not grow the bpv for every other value Assert.IsTrue(size2 < size1 + (20000 * (63 - 10)) / 8); }
public virtual void TestOverlappedEndStart() { Directory d = NewDirectory(); var w = new RandomIndexWriter(Random(), d, Similarity, TimeZone); Document doc = new Document(); NumericDocValuesField field = new NumericDocValuesField("field", 0L); doc.Add(field); for (long l = 0; l < 100; l++) { field.SetInt64Value(l); w.AddDocument(doc); } field.SetInt64Value(long.MaxValue); w.AddDocument(doc); IndexReader r = w.Reader; w.Dispose(); FacetsCollector fc = new FacetsCollector(); IndexSearcher s = NewSearcher(r); s.Search(new MatchAllDocsQuery(), fc); Facets facets = new Int64RangeFacetCounts("field", fc, new Int64Range("0-10", 0L, true, 10L, true), new Int64Range("10-20", 10L, true, 20L, true), new Int64Range("20-30", 20L, true, 30L, true), new Int64Range("30-40", 30L, true, 40L, true)); FacetResult result = facets.GetTopChildren(10, "field"); Assert.AreEqual("dim=field path=[] value=41 childCount=4\n 0-10 (11)\n 10-20 (11)\n 20-30 (11)\n 30-40 (11)\n", result.ToString()); r.Dispose(); d.Dispose(); }
private IDictionary <string, Document> GenerateIndexDocuments(int ndocs) { IDictionary <string, Document> docs = new HashMap <string, Document>(); for (int i = 0; i < ndocs; i++) { Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES); Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i)); Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i); Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i); Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i); Field contexts = new StoredField(CONTEXTS_FIELD_NAME, new BytesRef("ctx_" + i + "_0")); Document doc = new Document(); doc.Add(field); doc.Add(payload); doc.Add(weight1); doc.Add(weight2); doc.Add(weight3); doc.Add(contexts); for (int j = 1; j < AtLeast(3); j++) { contexts.SetBytesValue(new BytesRef("ctx_" + i + "_" + j)); doc.Add(contexts); } docs.Put(field.GetStringValue(), doc); } return(docs); }
public virtual void TestMissingValues() { AssumeTrue("codec does not support docsWithField", DefaultCodecSupportsDocsWithField()); Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d, Similarity, TimeZone); Document doc = new Document(); NumericDocValuesField field = new NumericDocValuesField("field", 0L); doc.Add(field); for (long l = 0; l < 100; l++) { if (l % 5 == 0) { // Every 5th doc is missing the value: w.AddDocument(new Document()); continue; } field.SetInt64Value(l); w.AddDocument(doc); } IndexReader r = w.Reader; FacetsCollector fc = new FacetsCollector(); IndexSearcher s = NewSearcher(r); s.Search(new MatchAllDocsQuery(), fc); Facets facets = new Int64RangeFacetCounts("field", fc, new Int64Range("less than 10", 0L, true, 10L, false), new Int64Range("less than or equal to 10", 0L, true, 10L, true), new Int64Range("over 90", 90L, false, 100L, false), new Int64Range("90 or above", 90L, true, 100L, false), new Int64Range("over 1000", 1000L, false, long.MaxValue, false)); Assert.AreEqual("dim=field path=[] value=16 childCount=5\n less than 10 (8)\n less than or equal to 10 (8)\n over 90 (8)\n 90 or above (8)\n over 1000 (0)\n", facets.GetTopChildren(10, "field").ToString()); IOUtils.Close(w, r, d); }
public virtual void TestDifferentTypedDocValuesField2() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, d); Document doc = new Document(); // Index doc values are single-valued so we should not // be able to add same field more than once: Field f = new NumericDocValuesField("field", 17); doc.Add(f); doc.Add(new SortedDocValuesField("field", new BytesRef("hello"))); try { w.AddDocument(doc); Assert.Fail("didn't hit expected exception"); } catch (Exception iae) when(iae.IsIllegalArgumentException()) { // expected } doc = new Document(); doc.Add(f); w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.GetReader(); Assert.AreEqual(17, GetOnlySegmentReader(r).GetNumericDocValues("field").Get(0)); r.Dispose(); w.Dispose(); d.Dispose(); }
public virtual void TestDifferentTypedDocValuesField() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d, Similarity, TimeZone); Document doc = new Document(); // Index doc values are single-valued so we should not // be able to add same field more than once: Field f; doc.Add(f = new NumericDocValuesField("field", 17)); doc.Add(new BinaryDocValuesField("field", new BytesRef("blah"))); try { w.AddDocument(doc); Assert.Fail("didn't hit expected exception"); } #pragma warning disable 168 catch (System.ArgumentException iae) #pragma warning restore 168 { // expected } doc = new Document(); doc.Add(f); w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.Reader; w.Dispose(); Assert.AreEqual(17, FieldCache.DEFAULT.GetInt32s(GetOnlySegmentReader(r), "field", false).Get(0)); r.Dispose(); d.Dispose(); }
public virtual void TestDifferentTypedDocValuesField2() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter(Random(), d); Document doc = new Document(); // Index doc values are single-valued so we should not // be able to add same field more than once: Field f = new NumericDocValuesField("field", 17); doc.Add(f); doc.Add(new SortedDocValuesField("field", new BytesRef("hello"))); try { w.AddDocument(doc); Assert.Fail("didn't hit expected exception"); } catch (System.ArgumentException iae) { // expected } doc = new Document(); doc.Add(f); w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.Reader; Assert.AreEqual(17, GetOnlySegmentReader(r).GetNumericDocValues("field").Get(0)); r.Dispose(); w.Dispose(); d.Dispose(); }
public void TestAddNumericDocValuesField() { NumericDocValuesField field = null; long value = 123; AssertDocumentExtensionAddsToDocument(document => field = document.AddNumericDocValuesField("theName", value)); Assert.AreEqual("theName", field.Name); Assert.AreEqual(value, field.GetInt64ValueOrDefault()); }
public virtual void TestNumerics() { BaseDirectoryWrapper dir = NewFSDirectory(CreateTempDir("2BNumerics")); if (dir is MockDirectoryWrapper) { ((MockDirectoryWrapper)dir).Throttling = Throttling.NEVER; } IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)) .SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH) .SetRAMBufferSizeMB(256.0) .SetMergeScheduler(new ConcurrentMergeScheduler()) .SetMergePolicy(NewLogMergePolicy(false, 10)) .SetOpenMode(OpenMode.CREATE)); Document doc = new Document(); NumericDocValuesField dvField = new NumericDocValuesField("dv", 0); doc.Add(dvField); for (int i = 0; i < int.MaxValue; i++) { dvField.SetInt64Value(i); w.AddDocument(doc); if (i % 100000 == 0) { Console.WriteLine("indexed: " + i); Console.Out.Flush(); } } w.ForceMerge(1); w.Dispose(); Console.WriteLine("verifying..."); Console.Out.Flush(); DirectoryReader r = DirectoryReader.Open(dir); long expectedValue = 0; foreach (AtomicReaderContext context in r.Leaves) { AtomicReader reader = context.AtomicReader; NumericDocValues dv = reader.GetNumericDocValues("dv"); for (int i = 0; i < reader.MaxDoc; i++) { Assert.AreEqual(expectedValue, dv.Get(i)); expectedValue++; } } r.Dispose(); dir.Dispose(); }
/// <summary> /// Adds a new <see cref="NumericDocValuesField"/> field with the specified 64-bit <see cref="long"/> value </summary> /// <remarks> /// If you also need to store the value, you should add a /// separate <see cref="StoredField"/> instance. /// </remarks> /// <param name="document">This <see cref="Document"/>.</param> /// <param name="name"> field name </param> /// <param name="value"> 64-bit <see cref="long"/> value </param> /// <returns>The field that was added to this <see cref="Document"/>.</returns> /// <exception cref="ArgumentNullException"> if this <paramref name="document"/>, the field <paramref name="name"/> is <c>null</c>. </exception> public static NumericDocValuesField AddNumericDocValuesField(this Document document, string name, long value) { if (document is null) { throw new ArgumentNullException(nameof(document)); } var field = new NumericDocValuesField(name, value); document.Add(field); return(field); }
public virtual void TestNumericDocValuesField() { NumericDocValuesField field = new NumericDocValuesField("foo", 5L); TrySetBoost(field); TrySetByteValue(field); TrySetBytesValue(field); TrySetBytesRefValue(field); TrySetDoubleValue(field); TrySetIntValue(field); TrySetFloatValue(field); field.LongValue = 6; // ok TrySetReaderValue(field); TrySetShortValue(field); TrySetStringValue(field); TrySetTokenStreamValue(field); Assert.AreEqual(6L, (long)field.NumericValue); }
public virtual void TestNumerics() { Directory dir = NewDirectory(); Document doc = new Document(); Field field = new NumericDocValuesField("numbers", 0); doc.Add(field); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { field.SetInt64Value(Random().NextLong()); iw.AddDocument(doc); if (Random().Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.Reader; iw.ForceMerge(1); DirectoryReader ir2 = iw.Reader; AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); NumericDocValues multi = MultiDocValues.GetNumericValues(ir, "numbers"); NumericDocValues single = merged.GetNumericDocValues("numbers"); for (int i = 0; i < numDocs; i++) { Assert.AreEqual(single.Get(i), multi.Get(i)); } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
// [Test] // LUCENENET NOTE: For now, we are overriding this test in every subclass to pull it into the right context for the subclass public virtual void TestUniqueValuesCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); int uniqueValueCount = TestUtil.NextInt(Random(), 1, 256); IList <long> values = new List <long>(); Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 300; ++i) { long value; if (values.Count < uniqueValueCount) { value = Random().NextLong(); values.Add(value); } else { value = RandomInts.RandomFrom(Random(), values); } dvf.LongValue = value; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); for (int i = 0; i < 20; ++i) { dvf.LongValue = RandomInts.RandomFrom(Random(), values); iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new longs did not cost 8 bytes each Assert.IsTrue(size2 < size1 + 8 * 20); }
public virtual void TestMixedTypesDifferentThreads() { Directory dir = NewDirectory(); IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random))); CountdownEvent startingGun = new CountdownEvent(1); AtomicBoolean hitExc = new AtomicBoolean(); ThreadJob[] threads = new ThreadJob[3]; for (int i = 0; i < 3; i++) { Field field; if (i == 0) { field = new SortedDocValuesField("foo", new BytesRef("hello")); } else if (i == 1) { field = new NumericDocValuesField("foo", 0); } else { field = new BinaryDocValuesField("foo", new BytesRef("bazz")); } Document doc = new Document(); doc.Add(field); threads[i] = new ThreadAnonymousClass(this, w, startingGun, hitExc, doc); threads[i].Start(); } startingGun.Signal(); foreach (ThreadJob t in threads) { t.Join(); } Assert.IsTrue(hitExc); w.Dispose(); dir.Dispose(); }
public virtual void TestBasicLong() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, d); Document doc = new Document(); NumericDocValuesField field = new NumericDocValuesField("field", 0L); doc.Add(field); for (long l = 0; l < 100; l++) { field.SetInt64Value(l); w.AddDocument(doc); } // Also add Long.MAX_VALUE field.SetInt64Value(long.MaxValue); w.AddDocument(doc); IndexReader r = w.GetReader(); w.Dispose(); FacetsCollector fc = new FacetsCollector(); IndexSearcher s = NewSearcher(r); s.Search(new MatchAllDocsQuery(), fc); Facets facets = new Int64RangeFacetCounts("field", fc, new Int64Range("less than 10", 0L, true, 10L, false), new Int64Range("less than or equal to 10", 0L, true, 10L, true), new Int64Range("over 90", 90L, false, 100L, false), new Int64Range("90 or above", 90L, true, 100L, false), new Int64Range("over 1000", 1000L, false, long.MaxValue, true)); FacetResult result = facets.GetTopChildren(10, "field"); Assert.AreEqual("dim=field path=[] value=22 childCount=5\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (1)\n", result.ToString()); r.Dispose(); d.Dispose(); }
public virtual void TestUniqueValuesCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); int uniqueValueCount = TestUtil.NextInt(Random(), 1, 256); IList<long> values = new List<long>(); Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 300; ++i) { long value; if (values.Count < uniqueValueCount) { value = Random().NextLong(); values.Add(value); } else { value = RandomInts.RandomFrom(Random(), values); } dvf.LongValue = value; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); for (int i = 0; i < 20; ++i) { dvf.LongValue = RandomInts.RandomFrom(Random(), values); iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new longs did not cost 8 bytes each Assert.IsTrue(size2 < size1 + 8 * 20); }
public virtual void TestMultiValuedDocValuesField() { Directory d = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, d); Document doc = new Document(); Field f = new NumericDocValuesField("field", 17); // Index doc values are single-valued so we should not // be able to add same field more than once: doc.Add(f); doc.Add(f); try { w.AddDocument(doc); Assert.Fail("didn't hit expected exception"); } #pragma warning disable 168 catch (ArgumentException iae) #pragma warning restore 168 { // expected } doc = new Document(); doc.Add(f); w.AddDocument(doc); w.ForceMerge(1); DirectoryReader r = w.GetReader(); w.Dispose(); Assert.AreEqual(17, FieldCache.DEFAULT.GetInt32s(GetOnlySegmentReader(r), "field", false).Get(0)); r.Dispose(); d.Dispose(); }
public virtual void TestDateCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); const long @base = 13; // prime long day = 1000L * 60 * 60 * 24; Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 300; ++i) { dvf.LongValue = @base + Random().Next(1000) * day; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); for (int i = 0; i < 50; ++i) { dvf.LongValue = @base + Random().Next(1000) * day; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new longs costed less than if they had only been packed Assert.IsTrue(size2 < size1 + (PackedInts.BitsRequired(day) * 50) / 8); }
public virtual void TestSingleBigValueCompression() { Directory dir = new RAMDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); IndexWriter iwriter = new IndexWriter(dir, iwc); Document doc = new Document(); NumericDocValuesField dvf = new NumericDocValuesField("dv", 0); doc.Add(dvf); for (int i = 0; i < 20000; ++i) { dvf.LongValue = i & 1023; iwriter.AddDocument(doc); } iwriter.ForceMerge(1); long size1 = DirSize(dir); dvf.LongValue = long.MaxValue; iwriter.AddDocument(doc); iwriter.ForceMerge(1); long size2 = DirSize(dir); // make sure the new value did not grow the bpv for every other value Assert.IsTrue(size2 < size1 + (20000 * (63 - 10)) / 8); }
// NOTE: this is likely buggy, and cannot clone fields // with tokenStreamValues, etc. Use at your own risk!! // TODO: is there a pre-existing way to do this!!! public static Document CloneDocument(Document doc1) { Document doc2 = new Document(); foreach (IIndexableField f in doc1.Fields) { Field field1 = (Field)f; Field field2; DocValuesType dvType = field1.FieldType.DocValueType; NumericType numType = field1.FieldType.NumericType; if (dvType != DocValuesType.NONE) { switch (dvType) { case DocValuesType.NUMERIC: field2 = new NumericDocValuesField(field1.Name, field1.GetInt64Value().Value); break; case DocValuesType.BINARY: field2 = new BinaryDocValuesField(field1.Name, field1.GetBinaryValue()); break; case DocValuesType.SORTED: field2 = new SortedDocValuesField(field1.Name, field1.GetBinaryValue()); break; default: throw IllegalStateException.Create("unknown Type: " + dvType); } } else if (numType != NumericType.NONE) { switch (numType) { case NumericType.INT32: field2 = new Int32Field(field1.Name, field1.GetInt32Value().Value, field1.FieldType); break; case NumericType.SINGLE: field2 = new SingleField(field1.Name, field1.GetInt32Value().Value, field1.FieldType); break; case NumericType.INT64: field2 = new Int64Field(field1.Name, field1.GetInt32Value().Value, field1.FieldType); break; case NumericType.DOUBLE: field2 = new DoubleField(field1.Name, field1.GetInt32Value().Value, field1.FieldType); break; default: throw IllegalStateException.Create("unknown Type: " + numType); } } else { field2 = new Field(field1.Name, field1.GetStringValue(), field1.FieldType); } doc2.Add(field2); } return(doc2); }
private void DoTest(DocValuesType type) { Directory d = NewDirectory(); IndexWriterConfig iwConfig = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); int nDocs = AtLeast(50); Field id = new NumericDocValuesField("id", 0); Field f; switch (type) { case DocValuesType.BINARY: f = new BinaryDocValuesField("dv", new BytesRef()); break; case DocValuesType.SORTED: f = new SortedDocValuesField("dv", new BytesRef()); break; case DocValuesType.NUMERIC: f = new NumericDocValuesField("dv", 0); break; default: throw AssertionError.Create(); } Document document = new Document(); document.Add(id); document.Add(f); object[] vals = new object[nDocs]; RandomIndexWriter iw = new RandomIndexWriter(Random, d, iwConfig); for (int i = 0; i < nDocs; ++i) { id.SetInt64Value(i); switch (type) { case DocValuesType.SORTED: case DocValuesType.BINARY: do { vals[i] = TestUtil.RandomSimpleString(Random, 20); } while (((string)vals[i]).Length == 0); f.SetBytesValue(new BytesRef((string)vals[i])); break; case DocValuesType.NUMERIC: int bitsPerValue = RandomInts.RandomInt32Between(Random, 1, 31); // keep it an int vals[i] = (long)Random.Next((int)PackedInt32s.MaxValue(bitsPerValue)); f.SetInt64Value((long)vals[i]); break; } iw.AddDocument(document); if (Random.NextBoolean() && i % 10 == 9) { iw.Commit(); } } iw.Dispose(); DirectoryReader rd = DirectoryReader.Open(d); foreach (AtomicReaderContext leave in rd.Leaves) { FunctionValues ids = (new Int64FieldSource("id")).GetValues(null, leave); ValueSource vs; switch (type) { case DocValuesType.BINARY: case DocValuesType.SORTED: vs = new BytesRefFieldSource("dv"); break; case DocValuesType.NUMERIC: vs = new Int64FieldSource("dv"); break; default: throw AssertionError.Create(); } FunctionValues values = vs.GetValues(null, leave); BytesRef bytes = new BytesRef(); for (int i = 0; i < leave.AtomicReader.MaxDoc; ++i) { assertTrue(values.Exists(i)); if (vs is BytesRefFieldSource) { assertTrue(values.ObjectVal(i) is string); } else if (vs is Int64FieldSource) { assertTrue(values.ObjectVal(i) is J2N.Numerics.Int64); assertTrue(values.BytesVal(i, bytes)); } else { throw AssertionError.Create(); } object expected = vals[ids.Int32Val(i)]; switch (type) { case DocValuesType.SORTED: values.OrdVal(i); // no exception assertTrue(values.NumOrd >= 1); goto case DocValuesType.BINARY; case DocValuesType.BINARY: assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertEquals(expected, values.ObjectVal(i)); assertEquals(expected, values.StrVal(i)); assertTrue(values.BytesVal(i, bytes)); assertEquals(new BytesRef((string)expected), bytes); break; case DocValuesType.NUMERIC: assertEquals(Convert.ToInt64(expected, CultureInfo.InvariantCulture), values.Int64Val(i)); break; } } } rd.Dispose(); d.Dispose(); }
public void Test() { RandomIndexWriter writer; DirectoryReader indexReader; int numParents = AtLeast(200); IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); cfg.SetMergePolicy(NewLogMergePolicy()); using (writer = new RandomIndexWriter(Random(), NewDirectory(), cfg)) { Document parentDoc = new Document(); NumericDocValuesField parentVal = new NumericDocValuesField("parent_val", 0L); parentDoc.Add(parentVal); StringField parent = new StringField("parent", "true", Field.Store.YES); parentDoc.Add(parent); for (int i = 0; i < numParents; ++i) { List <Document> documents = new List <Document>(); int numChildren = Random().nextInt(10); for (int j = 0; j < numChildren; ++j) { Document childDoc = new Document(); childDoc.Add(new NumericDocValuesField("child_val", Random().nextInt(5))); documents.Add(childDoc); } parentVal.SetInt64Value(Random().nextInt(50)); documents.Add(parentDoc); writer.AddDocuments(documents); } writer.ForceMerge(1); indexReader = writer.Reader; } AtomicReader reader = GetOnlySegmentReader(indexReader); Filter parentsFilter = new FixedBitSetCachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("parent", "true")))); FixedBitSet parentBits = (FixedBitSet)parentsFilter.GetDocIdSet(reader.AtomicContext, null); NumericDocValues parentValues = reader.GetNumericDocValues("parent_val"); NumericDocValues childValues = reader.GetNumericDocValues("child_val"); Sort parentSort = new Sort(new SortField("parent_val", SortFieldType.INT64)); Sort childSort = new Sort(new SortField("child_val", SortFieldType.INT64)); Sort sort = new Sort(new SortField("custom", new BlockJoinComparerSource(parentsFilter, parentSort, childSort))); Sorter sorter = new Sorter(sort); Sorter.DocMap docMap = sorter.Sort(reader); assertEquals(reader.MaxDoc, docMap.Count); int[] children = new int[1]; int numChildren2 = 0; int previousParent = -1; for (int i = 0; i < docMap.Count; ++i) { int oldID = docMap.NewToOld(i); if (parentBits.Get(oldID)) { // check that we have the right children for (int j = 0; j < numChildren2; ++j) { assertEquals(oldID, parentBits.NextSetBit(children[j])); } // check that children are sorted for (int j = 1; j < numChildren2; ++j) { int doc1 = children[j - 1]; int doc2 = children[j]; if (childValues.Get(doc1) == childValues.Get(doc2)) { assertTrue(doc1 < doc2); // sort is stable } else { assertTrue(childValues.Get(doc1) < childValues.Get(doc2)); } } // check that parents are sorted if (previousParent != -1) { if (parentValues.Get(previousParent) == parentValues.Get(oldID)) { assertTrue(previousParent < oldID); } else { assertTrue(parentValues.Get(previousParent) < parentValues.Get(oldID)); } } // reset previousParent = oldID; numChildren2 = 0; } else { children = ArrayUtil.Grow(children, numChildren2 + 1); children[numChildren2++] = oldID; } } indexReader.Dispose(); writer.w.Directory.Dispose(); }