private AppendingDeltaPackedLongBuffer PendingCounts; // termIDs per doc #endregion Fields #region Constructors public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) { this.FieldInfo = fieldInfo; this.IwBytesUsed = iwBytesUsed; Hash = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)), BytesRefHash.DEFAULT_CAPACITY, new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed)); Pending = new AppendingPackedLongBuffer(PackedInts.COMPACT); PendingCounts = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT); BytesUsed = Pending.RamBytesUsed() + PendingCounts.RamBytesUsed(); iwBytesUsed.AddAndGet(BytesUsed); }
public TermsHashPerField(DocInverterPerField docInverterPerField, TermsHash termsHash, TermsHash nextTermsHash, FieldInfo fieldInfo) { IntPool = termsHash.IntPool; BytePool = termsHash.BytePool; TermBytePool = termsHash.TermBytePool; DocState = termsHash.DocState; this.TermsHash = termsHash; BytesUsed = termsHash.BytesUsed; FieldState = docInverterPerField.FieldState; this.Consumer = termsHash.Consumer.AddField(this, fieldInfo); PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, BytesUsed); BytesHash = new BytesRefHash(TermBytePool, HASH_INIT_SIZE, byteStarts); StreamCount = Consumer.StreamCount; NumPostingInt = 2 * StreamCount; this.FieldInfo = fieldInfo; if (nextTermsHash != null) { NextPerField = (TermsHashPerField)nextTermsHash.AddField(docInverterPerField, fieldInfo); } else { NextPerField = null; } }
public void TestRandomSortedBytes() { Directory dir = NewDirectory(); IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); if (!DefaultCodecSupportsDocsWithField()) { // if the codec doesnt support missing, we expect missing to be mapped to byte[] // by the impersonator, but we have to give it a chance to merge them to this cfg.SetMergePolicy(NewLogMergePolicy()); } RandomIndexWriter w = new RandomIndexWriter(Random(), dir, cfg); int numDocs = AtLeast(100); BytesRefHash hash = new BytesRefHash(); IDictionary<string, string> docToString = new Dictionary<string, string>(); int maxLength = TestUtil.NextInt(Random(), 1, 50); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); doc.Add(NewTextField("id", "" + i, Field.Store.YES)); string @string = TestUtil.RandomRealisticUnicodeString(Random(), 1, maxLength); BytesRef br = new BytesRef(@string); doc.Add(new SortedDocValuesField("field", br)); hash.Add(br); docToString["" + i] = @string; w.AddDocument(doc); } if (Rarely()) { w.Commit(); } int numDocsNoValue = AtLeast(10); for (int i = 0; i < numDocsNoValue; i++) { Document doc = new Document(); doc.Add(NewTextField("id", "noValue", Field.Store.YES)); w.AddDocument(doc); } if (!DefaultCodecSupportsDocsWithField()) { BytesRef bytesRef = new BytesRef(); hash.Add(bytesRef); // add empty value for the gaps } if (Rarely()) { w.Commit(); } if (!DefaultCodecSupportsDocsWithField()) { // if the codec doesnt support missing, we expect missing to be mapped to byte[] // by the impersonator, but we have to give it a chance to merge them to this w.ForceMerge(1); } for (int i = 0; i < numDocs; i++) { Document doc = new Document(); string id = "" + i + numDocs; doc.Add(NewTextField("id", id, Field.Store.YES)); string @string = TestUtil.RandomRealisticUnicodeString(Random(), 1, maxLength); BytesRef br = new BytesRef(@string); hash.Add(br); docToString[id] = @string; doc.Add(new SortedDocValuesField("field", br)); w.AddDocument(doc); } w.Commit(); IndexReader reader = w.Reader; SortedDocValues docValues = MultiDocValues.GetSortedValues(reader, "field"); int[] sort = hash.Sort(BytesRef.UTF8SortedAsUnicodeComparer); BytesRef expected = new BytesRef(); BytesRef actual = new BytesRef(); Assert.AreEqual(hash.Size(), docValues.ValueCount); for (int i = 0; i < hash.Size(); i++) { hash.Get(sort[i], expected); docValues.LookupOrd(i, actual); Assert.AreEqual(expected.Utf8ToString(), actual.Utf8ToString()); int ord = docValues.LookupTerm(expected); Assert.AreEqual(i, ord); } AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(reader); ISet<KeyValuePair<string, string>> entrySet = docToString.EntrySet(); foreach (KeyValuePair<string, string> entry in entrySet) { // pk lookup DocsEnum termDocsEnum = slowR.TermDocsEnum(new Term("id", entry.Key)); int docId = termDocsEnum.NextDoc(); expected = new BytesRef(entry.Value); docValues.Get(docId, actual); Assert.AreEqual(expected, actual); } reader.Dispose(); w.Dispose(); dir.Dispose(); }