public FloatDocValuesAnonymousInnerClassHelper(NormValueSource outerInstance, NormValueSource @this, TFIDFSimilarity similarity, NumericDocValues norms) : base(@this) { this.outerInstance = outerInstance; this.similarity = similarity; this.norms = norms; }
/// <summary> /// Returns a <see cref="NumericDocValues"/> for a reader's docvalues (potentially merging on-the-fly) /// <para> /// This is a slow way to access numeric values. Instead, access them per-segment /// with <see cref="AtomicReader.GetNumericDocValues(string)"/> /// </para> /// </summary> public static NumericDocValues GetNumericValues(IndexReader r, string field) { IList <AtomicReaderContext> leaves = r.Leaves; int size = leaves.Count; if (size == 0) { return(null); } else if (size == 1) { return(leaves[0].AtomicReader.GetNumericDocValues(field)); } bool anyReal = false; NumericDocValues[] values = new NumericDocValues[size]; int[] starts = new int[size + 1]; for (int i = 0; i < size; i++) { AtomicReaderContext context = leaves[i]; NumericDocValues v = context.AtomicReader.GetNumericDocValues(field); if (v == null) { v = DocValues.EMPTY_NUMERIC; } else { anyReal = true; } values[i] = v; starts[i] = context.DocBase; } starts[size] = r.MaxDoc; if (!anyReal) { return(null); } else { return(new NumericDocValuesAnonymousInnerClassHelper2(values, starts)); } }
public static void CheckNorms(AtomicReader reader) { // test omit norms for (int i = 0; i < DocHelper.Fields.Length; i++) { IIndexableField f = DocHelper.Fields[i]; if (f.IndexableFieldType.IsIndexed) { Assert.AreEqual(reader.GetNormValues(f.Name) != null, !f.IndexableFieldType.OmitNorms); Assert.AreEqual(reader.GetNormValues(f.Name) != null, !DocHelper.NoNorms.ContainsKey(f.Name)); if (reader.GetNormValues(f.Name) == null) { // test for norms of null NumericDocValues norms = MultiDocValues.GetNormValues(reader, f.Name); Assert.IsNull(norms); } } } }
public virtual void TestCustomEncoder() { Directory dir = NewDirectory(); MockAnalyzer analyzer = new MockAnalyzer(Random()); IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); config.SetSimilarity(new CustomNormEncodingSimilarity(this)); RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, config); Document doc = new Document(); Field foo = NewTextField("foo", "", Field.Store.NO); Field bar = NewTextField("bar", "", Field.Store.NO); doc.Add(foo); doc.Add(bar); for (int i = 0; i < 100; i++) { bar.SetStringValue("singleton"); writer.AddDocument(doc); } IndexReader reader = writer.Reader; writer.Dispose(); NumericDocValues fooNorms = MultiDocValues.GetNormValues(reader, "foo"); for (int i = 0; i < reader.MaxDoc; i++) { Assert.AreEqual(0, fooNorms.Get(i)); } NumericDocValues barNorms = MultiDocValues.GetNormValues(reader, "bar"); for (int i = 0; i < reader.MaxDoc; i++) { Assert.AreEqual(1, barNorms.Get(i)); } reader.Dispose(); dir.Dispose(); }
public virtual void TestNumerics() { Directory dir = NewDirectory(); Document doc = new Document(); Field field = new NumericDocValuesField("numbers", 0); doc.Add(field); IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); iwc.SetMergePolicy(NewLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); int numDocs = AtLeast(500); for (int i = 0; i < numDocs; i++) { field.SetInt64Value(Random().NextLong()); iw.AddDocument(doc); if (Random().Next(17) == 0) { iw.Commit(); } } DirectoryReader ir = iw.Reader; iw.ForceMerge(1); DirectoryReader ir2 = iw.Reader; AtomicReader merged = GetOnlySegmentReader(ir2); iw.Dispose(); NumericDocValues multi = MultiDocValues.GetNumericValues(ir, "numbers"); NumericDocValues single = merged.GetNumericDocValues("numbers"); for (int i = 0; i < numDocs; i++) { Assert.AreEqual(single.Get(i), multi.Get(i)); } ir.Dispose(); ir2.Dispose(); dir.Dispose(); }
/// <summary> /// Returns a NumericDocValues for a reader's norms (potentially merging on-the-fly). /// <p> /// this is a slow way to access normalization values. Instead, access them per-segment /// with <seealso cref="AtomicReader#getNormValues(String)"/> /// </p> /// </summary> public static NumericDocValues GetNormValues(IndexReader r, string field) { IList<AtomicReaderContext> leaves = r.Leaves; int size = leaves.Count; if (size == 0) { return null; } else if (size == 1) { return leaves[0].AtomicReader.GetNormValues(field); } FieldInfo fi = MultiFields.GetMergedFieldInfos(r).FieldInfo(field); if (fi == null || fi.HasNorms() == false) { return null; } bool anyReal = false; NumericDocValues[] values = new NumericDocValues[size]; int[] starts = new int[size + 1]; for (int i = 0; i < size; i++) { AtomicReaderContext context = leaves[i]; NumericDocValues v = context.AtomicReader.GetNormValues(field); if (v == null) { v = DocValues.EMPTY_NUMERIC; } else { anyReal = true; } values[i] = v; starts[i] = context.DocBase; } starts[size] = r.MaxDoc; Debug.Assert(anyReal); return new NumericDocValuesAnonymousInnerClassHelper(values, starts); }
private void MergeNorms(SegmentWriteState segmentWriteState) { DocValuesConsumer consumer = codec.NormsFormat.NormsConsumer(segmentWriteState); bool success = false; try { foreach (FieldInfo field in mergeState.FieldInfos) { if (field.HasNorms) { IList <NumericDocValues> toMerge = new JCG.List <NumericDocValues>(); IList <IBits> docsWithField = new JCG.List <IBits>(); foreach (AtomicReader reader in mergeState.Readers) { NumericDocValues norms = reader.GetNormValues(field.Name); if (norms == null) { norms = DocValues.EMPTY_NUMERIC; } toMerge.Add(norms); docsWithField.Add(new Lucene.Net.Util.Bits.MatchAllBits(reader.MaxDoc)); } consumer.MergeNumericField(field, mergeState, toMerge, docsWithField); } } success = true; } finally { if (success) { IOUtils.Dispose(consumer); } else { IOUtils.DisposeWhileHandlingException(consumer); } } }
public void AssertDocValuesEquals(string info, int num, NumericDocValues leftDocValues, NumericDocValues rightDocValues) { Assert.IsNotNull(leftDocValues, info); Assert.IsNotNull(rightDocValues, info); for (int docID = 0; docID < num; docID++) { Assert.AreEqual(leftDocValues.Get(docID), rightDocValues.Get(docID)); } }
private void MergeDocValues(SegmentWriteState segmentWriteState) { DocValuesConsumer consumer = codec.DocValuesFormat.FieldsConsumer(segmentWriteState); bool success = false; try { foreach (FieldInfo field in mergeState.FieldInfos) { DocValuesType type = field.DocValuesType; if (type != DocValuesType.NONE) { if (type == DocValuesType.NUMERIC) { IList <NumericDocValues> toMerge = new JCG.List <NumericDocValues>(); IList <IBits> docsWithField = new JCG.List <IBits>(); foreach (AtomicReader reader in mergeState.Readers) { NumericDocValues values = reader.GetNumericDocValues(field.Name); IBits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_NUMERIC; bits = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc); } toMerge.Add(values); docsWithField.Add(bits); } consumer.MergeNumericField(field, mergeState, toMerge, docsWithField); } else if (type == DocValuesType.BINARY) { IList <BinaryDocValues> toMerge = new JCG.List <BinaryDocValues>(); IList <IBits> docsWithField = new JCG.List <IBits>(); foreach (AtomicReader reader in mergeState.Readers) { BinaryDocValues values = reader.GetBinaryDocValues(field.Name); IBits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_BINARY; bits = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc); } toMerge.Add(values); docsWithField.Add(bits); } consumer.MergeBinaryField(field, mergeState, toMerge, docsWithField); } else if (type == DocValuesType.SORTED) { IList <SortedDocValues> toMerge = new JCG.List <SortedDocValues>(); foreach (AtomicReader reader in mergeState.Readers) { SortedDocValues values = reader.GetSortedDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED; } toMerge.Add(values); } consumer.MergeSortedField(field, mergeState, toMerge); } else if (type == DocValuesType.SORTED_SET) { IList <SortedSetDocValues> toMerge = new JCG.List <SortedSetDocValues>(); foreach (AtomicReader reader in mergeState.Readers) { SortedSetDocValues values = reader.GetSortedSetDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED_SET; } toMerge.Add(values); } consumer.MergeSortedSetField(field, mergeState, toMerge); } else { throw AssertionError.Create("type=" + type); } } } success = true; } finally { if (success) { IOUtils.Dispose(consumer); } else { IOUtils.DisposeWhileHandlingException(consumer); } } }
public AssertingNumericDocValues(NumericDocValues @in, int maxDoc) { this.@in = @in; this.maxDoc = maxDoc; }
public virtual void TestTonsOfUpdates() { // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM Directory dir = NewDirectory(); Random random = Random; IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc IndexWriter writer = new IndexWriter(dir, conf); // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds) int numDocs = AtLeast(20000); int numBinaryFields = AtLeast(5); int numTerms = TestUtil.NextInt32(random, 10, 100); // terms should affect many docs ISet <string> updateTerms = new JCG.HashSet <string>(); while (updateTerms.Count < numTerms) { updateTerms.Add(TestUtil.RandomSimpleString(random)); } // System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms); // build a large index with many BDV fields and update terms for (int i = 0; i < numDocs; i++) { Document doc = new Document(); int numUpdateTerms = TestUtil.NextInt32(random, 1, numTerms / 10); for (int j = 0; j < numUpdateTerms; j++) { doc.Add(new StringField("upd", RandomPicks.RandomFrom(random, updateTerms), Store.NO)); } for (int j = 0; j < numBinaryFields; j++) { long val = random.Next(); doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(val))); doc.Add(new NumericDocValuesField("cf" + j, val * 2)); } writer.AddDocument(doc); } writer.Commit(); // commit so there's something to apply to // set to flush every 2048 bytes (approximately every 12 updates), so we get // many flushes during binary updates writer.Config.SetRAMBufferSizeMB(2048.0 / 1024 / 1024); int numUpdates = AtLeast(100); // System.out.println("numUpdates=" + numUpdates); for (int i = 0; i < numUpdates; i++) { int field = random.Next(numBinaryFields); Term updateTerm = new Term("upd", RandomPicks.RandomFrom(random, updateTerms)); long value = random.Next(); writer.UpdateBinaryDocValue(updateTerm, "f" + field, TestBinaryDocValuesUpdates.ToBytes(value)); writer.UpdateNumericDocValue(updateTerm, "cf" + field, value * 2); } writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); BytesRef scratch = new BytesRef(); foreach (AtomicReaderContext context in reader.Leaves) { for (int i = 0; i < numBinaryFields; i++) { AtomicReader r = context.AtomicReader; BinaryDocValues f = r.GetBinaryDocValues("f" + i); NumericDocValues cf = r.GetNumericDocValues("cf" + i); for (int j = 0; j < r.MaxDoc; j++) { Assert.AreEqual(cf.Get(j), TestBinaryDocValuesUpdates.GetValue(f, j, scratch) * 2, "reader=" + r + ", field=f" + i + ", doc=" + j); } } } reader.Dispose(); dir.Dispose(); }
private static void CheckNumericDocValues(string fieldName, AtomicReader reader, NumericDocValues ndv, Bits docsWithField) { for (int i = 0; i < reader.MaxDoc; i++) { long value = ndv.Get(i); if (docsWithField.Get(i) == false && value != 0) { throw new Exception("dv for field: " + fieldName + " is marked missing but has value=" + value + " for doc: " + i); } } }
private Explanation ExplainScore(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms) { Explanation result = new Explanation(); result.Description = "score(doc=" + doc + ",freq=" + freq + "), product of:"; Explanation boostExpl = new Explanation(stats.QueryBoost * stats.TopLevelBoost, "boost"); if (boostExpl.Value != 1.0f) { result.AddDetail(boostExpl); } result.AddDetail(stats.Idf); Explanation tfNormExpl = new Explanation(); tfNormExpl.Description = "tfNorm, computed from:"; tfNormExpl.AddDetail(freq); tfNormExpl.AddDetail(new Explanation(K1_Renamed, "parameter k1")); if (norms == null) { tfNormExpl.AddDetail(new Explanation(0, "parameter b (norms omitted for field)")); tfNormExpl.Value = (freq.Value * (K1_Renamed + 1)) / (freq.Value + K1_Renamed); } else { float doclen = DecodeNormValue((sbyte)norms.Get(doc)); tfNormExpl.AddDetail(new Explanation(b, "parameter b")); tfNormExpl.AddDetail(new Explanation(stats.Avgdl, "avgFieldLength")); tfNormExpl.AddDetail(new Explanation(doclen, "fieldLength")); tfNormExpl.Value = (freq.Value * (K1_Renamed + 1)) / (freq.Value + K1_Renamed * (1 - b + b * doclen / stats.Avgdl)); } result.AddDetail(tfNormExpl); result.Value = boostExpl.Value * stats.Idf.Value * tfNormExpl.Value; return result; }
internal BM25DocScorer(BM25Similarity outerInstance, BM25Stats stats, NumericDocValues norms) { this.OuterInstance = outerInstance; this.Stats = stats; this.WeightValue = stats.Weight * (outerInstance.K1_Renamed + 1); this.Cache = stats.Cache; this.Norms = norms; }
public AssertingNumericDocValues(NumericDocValues @in, int maxDoc) { this.@in = @in; this.MaxDoc = maxDoc; }
private void MergeDocValues(SegmentWriteState segmentWriteState) { DocValuesConsumer consumer = Codec.DocValuesFormat().FieldsConsumer(segmentWriteState); bool success = false; try { foreach (FieldInfo field in MergeState.FieldInfos) { DocValuesType_e?type = field.DocValuesType; if (type != null) { if (type == DocValuesType_e.NUMERIC) { IList <NumericDocValues> toMerge = new List <NumericDocValues>(); //IList<Bits> docsWithField = new List<Bits>(); foreach (AtomicReader reader in MergeState.Readers) { NumericDocValues values = reader.GetNumericDocValues(field.Name); Bits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_NUMERIC; bits = new Lucene.Net.Util.Bits_MatchNoBits(reader.MaxDoc); } toMerge.Add(values); //docsWithField.Add(bits); } consumer.MergeNumericField(field, MergeState, toMerge /*, docsWithField*/); } else if (type == DocValuesType_e.BINARY) { IList <BinaryDocValues> toMerge = new List <BinaryDocValues>(); //IList<Bits> docsWithField = new List<Bits>(); foreach (AtomicReader reader in MergeState.Readers) { BinaryDocValues values = reader.GetBinaryDocValues(field.Name); Bits bits = reader.GetDocsWithField(field.Name); if (values == null) { values = DocValues.EMPTY_BINARY; bits = new Lucene.Net.Util.Bits_MatchNoBits(reader.MaxDoc); } toMerge.Add(values); //docsWithField.Add(bits); } consumer.MergeBinaryField(field, MergeState, toMerge /*, docsWithField*/); } else if (type == DocValuesType_e.SORTED) { IList <SortedDocValues> toMerge = new List <SortedDocValues>(); foreach (AtomicReader reader in MergeState.Readers) { SortedDocValues values = reader.GetSortedDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED; } toMerge.Add(values); } consumer.MergeSortedField(field, MergeState, toMerge); } else if (type == DocValuesType_e.SORTED_SET) { IList <SortedSetDocValues> toMerge = new List <SortedSetDocValues>(); foreach (AtomicReader reader in MergeState.Readers) { SortedSetDocValues values = reader.GetSortedSetDocValues(field.Name); if (values == null) { values = DocValues.EMPTY_SORTED_SET; } toMerge.Add(values); } consumer.MergeSortedSetField(field, MergeState, toMerge); } else { throw new InvalidOperationException("type=" + type); } } } success = true; } finally { if (success) { IOUtils.Close(consumer); } else { IOUtils.CloseWhileHandlingException(consumer); } } }
public SortedDocValuesAnonymousInnerClassHelper(FSTEntry fstEntry, NumericDocValues numericDocValues, FST<long?> fst1, FST.BytesReader @in, FST.Arc<long?> arc, FST.Arc<long?> scratchArc1, IntsRef intsRef, BytesRefFSTEnum<long?> bytesRefFstEnum) { entry = fstEntry; docToOrd = numericDocValues; fst = fst1; this.@in = @in; firstArc = arc; scratchArc = scratchArc1; scratchInts = intsRef; fstEnum = bytesRefFstEnum; }
public virtual void SearchIndex(Directory dir, string oldName) { //QueryParser parser = new QueryParser("contents", new MockAnalyzer(random)); //Query query = parser.parse("handle:1"); IndexReader reader = DirectoryReader.Open(dir); IndexSearcher searcher = NewSearcher(reader); TestUtil.CheckIndex(dir); // true if this is a 4.0+ index bool is40Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("content5") != null; // true if this is a 4.2+ index bool is42Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("dvSortedSet") != null; Debug.Assert(is40Index); // NOTE: currently we can only do this on trunk! Bits liveDocs = MultiFields.GetLiveDocs(reader); for (int i = 0; i < 35; i++) { if (liveDocs.Get(i)) { Document d = reader.Document(i); IList <IndexableField> fields = d.Fields; bool isProxDoc = d.GetField("content3") == null; if (isProxDoc) { int numFields = is40Index ? 7 : 5; Assert.AreEqual(numFields, fields.Count); IndexableField f = d.GetField("id"); Assert.AreEqual("" + i, f.StringValue); f = d.GetField("utf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = d.GetField("autf8"); Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue); f = d.GetField("content2"); Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue); f = d.GetField("fie\u2C77ld"); Assert.AreEqual("field with non-ascii name", f.StringValue); } Fields tfvFields = reader.GetTermVectors(i); Assert.IsNotNull(tfvFields, "i=" + i); Terms tfv = tfvFields.Terms("utf8"); Assert.IsNotNull(tfv, "docID=" + i + " index=" + oldName); } else { // Only ID 7 is deleted Assert.AreEqual(7, i); } } if (is40Index) { // check docvalues fields NumericDocValues dvByte = MultiDocValues.GetNumericValues(reader, "dvByte"); BinaryDocValues dvBytesDerefFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefFixed"); BinaryDocValues dvBytesDerefVar = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefVar"); SortedDocValues dvBytesSortedFixed = MultiDocValues.GetSortedValues(reader, "dvBytesSortedFixed"); SortedDocValues dvBytesSortedVar = MultiDocValues.GetSortedValues(reader, "dvBytesSortedVar"); BinaryDocValues dvBytesStraightFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightFixed"); BinaryDocValues dvBytesStraightVar = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightVar"); NumericDocValues dvDouble = MultiDocValues.GetNumericValues(reader, "dvDouble"); NumericDocValues dvFloat = MultiDocValues.GetNumericValues(reader, "dvFloat"); NumericDocValues dvInt = MultiDocValues.GetNumericValues(reader, "dvInt"); NumericDocValues dvLong = MultiDocValues.GetNumericValues(reader, "dvLong"); NumericDocValues dvPacked = MultiDocValues.GetNumericValues(reader, "dvPacked"); NumericDocValues dvShort = MultiDocValues.GetNumericValues(reader, "dvShort"); SortedSetDocValues dvSortedSet = null; if (is42Index) { dvSortedSet = MultiDocValues.GetSortedSetValues(reader, "dvSortedSet"); } for (int i = 0; i < 35; i++) { int id = Convert.ToInt32(reader.Document(i).Get("id")); Assert.AreEqual(id, dvByte.Get(i)); sbyte[] bytes = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id }; BytesRef expectedRef = new BytesRef((byte[])(Array)bytes); BytesRef scratch = new BytesRef(); dvBytesDerefFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesDerefVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesSortedFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesSortedVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesStraightFixed.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); dvBytesStraightVar.Get(i, scratch); Assert.AreEqual(expectedRef, scratch); Assert.AreEqual((double)id, BitConverter.Int64BitsToDouble(dvDouble.Get(i)), 0D); Assert.AreEqual((float)id, Number.IntBitsToFloat((int)dvFloat.Get(i)), 0F); Assert.AreEqual(id, dvInt.Get(i)); Assert.AreEqual(id, dvLong.Get(i)); Assert.AreEqual(id, dvPacked.Get(i)); Assert.AreEqual(id, dvShort.Get(i)); if (is42Index) { dvSortedSet.Document = i; long ord = dvSortedSet.NextOrd(); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.NextOrd()); dvSortedSet.LookupOrd(ord, scratch); Assert.AreEqual(expectedRef, scratch); } } } ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs; // First document should be #0 Document doc = searcher.IndexReader.Document(hits[0].Doc); assertEquals("didn't get the right document first", "0", doc.Get("id")); DoTestHits(hits, 34, searcher.IndexReader); if (is40Index) { hits = searcher.Search(new TermQuery(new Term("content5", "aaa")), null, 1000).ScoreDocs; DoTestHits(hits, 34, searcher.IndexReader); hits = searcher.Search(new TermQuery(new Term("content6", "aaa")), null, 1000).ScoreDocs; DoTestHits(hits, 34, searcher.IndexReader); } hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs; Assert.AreEqual(34, hits.Length); reader.Dispose(); }
public virtual void TestStressMultiThreading() { Directory dir = NewDirectory(); IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)); IndexWriter writer = new IndexWriter(dir, conf); // create index int numThreads = TestUtil.NextInt32(Random, 3, 6); int numDocs = AtLeast(2000); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(new StringField("id", "doc" + i, Store.NO)); double group = Random.NextDouble(); string g; if (group < 0.1) { g = "g0"; } else if (group < 0.5) { g = "g1"; } else if (group < 0.8) { g = "g2"; } else { g = "g3"; } doc.Add(new StringField("updKey", g, Store.NO)); for (int j = 0; j < numThreads; j++) { long value = Random.Next(); doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(value))); doc.Add(new NumericDocValuesField("cf" + j, value * 2)); // control, always updated to f * 2 } writer.AddDocument(doc); } CountdownEvent done = new CountdownEvent(numThreads); AtomicInt32 numUpdates = new AtomicInt32(AtLeast(100)); // same thread updates a field as well as reopens ThreadJob[] threads = new ThreadJob[numThreads]; for (int i = 0; i < threads.Length; i++) { string f = "f" + i; string cf = "cf" + i; threads[i] = new ThreadAnonymousClass(this, "UpdateThread-" + i, writer, numDocs, done, numUpdates, f, cf); } foreach (ThreadJob t in threads) { t.Start(); } done.Wait(); writer.Dispose(); DirectoryReader reader = DirectoryReader.Open(dir); BytesRef scratch = new BytesRef(); foreach (AtomicReaderContext context in reader.Leaves) { AtomicReader r = context.AtomicReader; for (int i = 0; i < numThreads; i++) { BinaryDocValues bdv = r.GetBinaryDocValues("f" + i); NumericDocValues control = r.GetNumericDocValues("cf" + i); IBits docsWithBdv = r.GetDocsWithField("f" + i); IBits docsWithControl = r.GetDocsWithField("cf" + i); IBits liveDocs = r.LiveDocs; for (int j = 0; j < r.MaxDoc; j++) { if (liveDocs is null || liveDocs.Get(j)) { Assert.AreEqual(docsWithBdv.Get(j), docsWithControl.Get(j)); if (docsWithBdv.Get(j)) { long ctrlValue = control.Get(j); long bdvValue = TestBinaryDocValuesUpdates.GetValue(bdv, j, scratch) * 2; // if (ctrlValue != bdvValue) { // System.out.println("seg=" + r + ", f=f" + i + ", doc=" + j + ", group=" + r.Document(j).Get("updKey") + ", ctrlValue=" + ctrlValue + ", bdvBytes=" + scratch); // } Assert.AreEqual(ctrlValue, bdvValue); } } } } } reader.Dispose(); dir.Dispose(); }
public SortedDocValuesAnonymousInnerClassHelper(DirectDocValuesProducer outerInstance, SortedEntry entry, NumericDocValues docToOrd, BinaryDocValues values) { this.outerInstance = outerInstance; this.entry = entry; this.docToOrd = docToOrd; this.values = values; }
public virtual void TestManyReopensAndFields() { Directory dir = NewDirectory(); Random random = Random; IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); LogMergePolicy lmp = NewLogMergePolicy(); lmp.MergeFactor = 3; // merge often conf.SetMergePolicy(lmp); IndexWriter writer = new IndexWriter(dir, conf); bool isNRT = random.NextBoolean(); DirectoryReader reader; if (isNRT) { reader = DirectoryReader.Open(writer, true); } else { writer.Commit(); reader = DirectoryReader.Open(dir); } int numFields = random.Next(4) + 3; // 3-7 int numNDVFields = random.Next(numFields / 2) + 1; // 1-3 long[] fieldValues = new long[numFields]; bool[] fieldHasValue = new bool[numFields]; Arrays.Fill(fieldHasValue, true); for (int i = 0; i < fieldValues.Length; i++) { fieldValues[i] = 1; } int numRounds = AtLeast(15); int docID = 0; for (int i = 0; i < numRounds; i++) { int numDocs = AtLeast(5); // System.out.println("[" + Thread.currentThread().getName() + "]: round=" + i + ", numDocs=" + numDocs); for (int j = 0; j < numDocs; j++) { Document doc = new Document(); doc.Add(new StringField("id", "doc-" + docID, Store.NO)); doc.Add(new StringField("key", "all", Store.NO)); // update key // add all fields with their current value for (int f = 0; f < fieldValues.Length; f++) { if (f < numNDVFields) { doc.Add(new NumericDocValuesField("f" + f, fieldValues[f])); } else { doc.Add(new BinaryDocValuesField("f" + f, TestBinaryDocValuesUpdates.ToBytes(fieldValues[f]))); } } writer.AddDocument(doc); ++docID; } // if field's value was unset before, unset it from all new added documents too for (int field = 0; field < fieldHasValue.Length; field++) { if (!fieldHasValue[field]) { if (field < numNDVFields) { writer.UpdateNumericDocValue(new Term("key", "all"), "f" + field, null); } else { writer.UpdateBinaryDocValue(new Term("key", "all"), "f" + field, null); } } } int fieldIdx = random.Next(fieldValues.Length); string updateField = "f" + fieldIdx; if (random.NextBoolean()) { // System.out.println("[" + Thread.currentThread().getName() + "]: unset field '" + updateField + "'"); fieldHasValue[fieldIdx] = false; if (fieldIdx < numNDVFields) { writer.UpdateNumericDocValue(new Term("key", "all"), updateField, null); } else { writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, null); } } else { fieldHasValue[fieldIdx] = true; if (fieldIdx < numNDVFields) { writer.UpdateNumericDocValue(new Term("key", "all"), updateField, ++fieldValues[fieldIdx]); } else { writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, TestBinaryDocValuesUpdates.ToBytes(++fieldValues[fieldIdx])); } // System.out.println("[" + Thread.currentThread().getName() + "]: updated field '" + updateField + "' to value " + fieldValues[fieldIdx]); } if (random.NextDouble() < 0.2) { int deleteDoc = random.Next(docID); // might also delete an already deleted document, ok! writer.DeleteDocuments(new Term("id", "doc-" + deleteDoc)); // System.out.println("[" + Thread.currentThread().getName() + "]: deleted document: doc-" + deleteDoc); } // verify reader if (!isNRT) { writer.Commit(); } // System.out.println("[" + Thread.currentThread().getName() + "]: reopen reader: " + reader); DirectoryReader newReader = DirectoryReader.OpenIfChanged(reader); Assert.IsNotNull(newReader); reader.Dispose(); reader = newReader; // System.out.println("[" + Thread.currentThread().getName() + "]: reopened reader: " + reader); Assert.IsTrue(reader.NumDocs > 0); // we delete at most one document per round BytesRef scratch = new BytesRef(); foreach (AtomicReaderContext context in reader.Leaves) { AtomicReader r = context.AtomicReader; // System.out.println(((SegmentReader) r).getSegmentName()); IBits liveDocs = r.LiveDocs; for (int field = 0; field < fieldValues.Length; field++) { string f = "f" + field; BinaryDocValues bdv = r.GetBinaryDocValues(f); NumericDocValues ndv = r.GetNumericDocValues(f); IBits docsWithField = r.GetDocsWithField(f); if (field < numNDVFields) { Assert.IsNotNull(ndv); Assert.IsNull(bdv); } else { Assert.IsNull(ndv); Assert.IsNotNull(bdv); } int maxDoc = r.MaxDoc; for (int doc = 0; doc < maxDoc; doc++) { if (liveDocs is null || liveDocs.Get(doc)) { // System.out.println("doc=" + (doc + context.docBase) + " f='" + f + "' vslue=" + getValue(bdv, doc, scratch)); if (fieldHasValue[field]) { Assert.IsTrue(docsWithField.Get(doc)); if (field < numNDVFields) { Assert.AreEqual(fieldValues[field], ndv.Get(doc), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r); } else { Assert.AreEqual(fieldValues[field], TestBinaryDocValuesUpdates.GetValue(bdv, doc, scratch), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r); } } else { Assert.IsFalse(docsWithField.Get(doc)); } } } } } // System.out.println(); } IOUtils.Dispose(writer, reader, dir); }
/// <summary> /// Creates an iterator over term, weight and payload fields from the lucene /// index. setting <code>withPayload</code> to false, implies an iterator /// over only term and weight. /// </summary> public DocumentInputIterator(DocumentDictionary outerInstance, bool hasPayloads, bool hasContexts) { this.outerInstance = outerInstance; this.hasPayloads = hasPayloads; this.hasContexts = hasContexts; docCount = outerInstance.reader.MaxDoc() - 1; weightValues = (outerInstance.weightField != null) ? MultiDocValues.GetNumericValues(outerInstance.reader, outerInstance.weightField) : null; liveDocs = (outerInstance.reader.Leaves().Count > 0) ? MultiFields.GetLiveDocs(outerInstance.reader) : null; relevantFields = GetRelevantFields(new string[] { outerInstance.field, outerInstance.weightField, outerInstance.payloadField, outerInstance.contextsField }); }
public RandomAccessOrdsAnonymousInnerClassHelper(DirectDocValuesProducer outerInstance, SortedSetEntry entry, NumericDocValues docToOrdAddress, NumericDocValues ords, BinaryDocValues values) { this.entry = entry; this.docToOrdAddress = docToOrdAddress; this.ords = ords; this.values = values; }
/// <summary> /// Returns a NumericDocValues for a reader's docvalues (potentially merging on-the-fly) /// <p> /// this is a slow way to access numeric values. Instead, access them per-segment /// with <seealso cref="AtomicReader#getNumericDocValues(String)"/> /// </p> /// /// </summary> public static NumericDocValues GetNumericValues(IndexReader r, string field) { IList<AtomicReaderContext> leaves = r.Leaves; int size = leaves.Count; if (size == 0) { return null; } else if (size == 1) { return leaves[0].AtomicReader.GetNumericDocValues(field); } bool anyReal = false; NumericDocValues[] values = new NumericDocValues[size]; int[] starts = new int[size + 1]; for (int i = 0; i < size; i++) { AtomicReaderContext context = leaves[i]; NumericDocValues v = context.AtomicReader.GetNumericDocValues(field); if (v == null) { v = DocValues.EMPTY_NUMERIC; } else { anyReal = true; } values[i] = v; starts[i] = context.DocBase; } starts[size] = r.MaxDoc; if (!anyReal) { return null; } else { return new NumericDocValuesAnonymousInnerClassHelper2(values, starts); } }