private AppendingDeltaPackedLongBuffer PendingCounts; // termIDs per doc

        #endregion Fields

        #region Constructors

        public SortedSetDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed)
        {
            this.FieldInfo = fieldInfo;
            this.IwBytesUsed = iwBytesUsed;
            Hash = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectTrackingAllocator(iwBytesUsed)), BytesRefHash.DEFAULT_CAPACITY, new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
            Pending = new AppendingPackedLongBuffer(PackedInts.COMPACT);
            PendingCounts = new AppendingDeltaPackedLongBuffer(PackedInts.COMPACT);
            BytesUsed = Pending.RamBytesUsed() + PendingCounts.RamBytesUsed();
            iwBytesUsed.AddAndGet(BytesUsed);
        }
示例#2
0
 public TermsHashPerField(DocInverterPerField docInverterPerField, TermsHash termsHash, TermsHash nextTermsHash, FieldInfo fieldInfo)
 {
     IntPool = termsHash.IntPool;
     BytePool = termsHash.BytePool;
     TermBytePool = termsHash.TermBytePool;
     DocState = termsHash.DocState;
     this.TermsHash = termsHash;
     BytesUsed = termsHash.BytesUsed;
     FieldState = docInverterPerField.FieldState;
     this.Consumer = termsHash.Consumer.AddField(this, fieldInfo);
     PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, BytesUsed);
     BytesHash = new BytesRefHash(TermBytePool, HASH_INIT_SIZE, byteStarts);
     StreamCount = Consumer.StreamCount;
     NumPostingInt = 2 * StreamCount;
     this.FieldInfo = fieldInfo;
     if (nextTermsHash != null)
     {
         NextPerField = (TermsHashPerField)nextTermsHash.AddField(docInverterPerField, fieldInfo);
     }
     else
     {
         NextPerField = null;
     }
 }
        public void TestRandomSortedBytes()
        {
            Directory dir = NewDirectory();
            IndexWriterConfig cfg = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()));
            if (!DefaultCodecSupportsDocsWithField())
            {
                // if the codec doesnt support missing, we expect missing to be mapped to byte[]
                // by the impersonator, but we have to give it a chance to merge them to this
                cfg.SetMergePolicy(NewLogMergePolicy());
            }
            RandomIndexWriter w = new RandomIndexWriter(Random(), dir, cfg);
            int numDocs = AtLeast(100);
            BytesRefHash hash = new BytesRefHash();
            IDictionary<string, string> docToString = new Dictionary<string, string>();
            int maxLength = TestUtil.NextInt(Random(), 1, 50);
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("id", "" + i, Field.Store.YES));
                string @string = TestUtil.RandomRealisticUnicodeString(Random(), 1, maxLength);
                BytesRef br = new BytesRef(@string);
                doc.Add(new SortedDocValuesField("field", br));
                hash.Add(br);
                docToString["" + i] = @string;
                w.AddDocument(doc);
            }
            if (Rarely())
            {
                w.Commit();
            }
            int numDocsNoValue = AtLeast(10);
            for (int i = 0; i < numDocsNoValue; i++)
            {
                Document doc = new Document();
                doc.Add(NewTextField("id", "noValue", Field.Store.YES));
                w.AddDocument(doc);
            }
            if (!DefaultCodecSupportsDocsWithField())
            {
                BytesRef bytesRef = new BytesRef();
                hash.Add(bytesRef); // add empty value for the gaps
            }
            if (Rarely())
            {
                w.Commit();
            }
            if (!DefaultCodecSupportsDocsWithField())
            {
                // if the codec doesnt support missing, we expect missing to be mapped to byte[]
                // by the impersonator, but we have to give it a chance to merge them to this
                w.ForceMerge(1);
            }
            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                string id = "" + i + numDocs;
                doc.Add(NewTextField("id", id, Field.Store.YES));
                string @string = TestUtil.RandomRealisticUnicodeString(Random(), 1, maxLength);
                BytesRef br = new BytesRef(@string);
                hash.Add(br);
                docToString[id] = @string;
                doc.Add(new SortedDocValuesField("field", br));
                w.AddDocument(doc);
            }
            w.Commit();
            IndexReader reader = w.Reader;
            SortedDocValues docValues = MultiDocValues.GetSortedValues(reader, "field");
            int[] sort = hash.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
            BytesRef expected = new BytesRef();
            BytesRef actual = new BytesRef();
            Assert.AreEqual(hash.Size(), docValues.ValueCount);
            for (int i = 0; i < hash.Size(); i++)
            {
                hash.Get(sort[i], expected);
                docValues.LookupOrd(i, actual);
                Assert.AreEqual(expected.Utf8ToString(), actual.Utf8ToString());
                int ord = docValues.LookupTerm(expected);
                Assert.AreEqual(i, ord);
            }
            AtomicReader slowR = SlowCompositeReaderWrapper.Wrap(reader);
            ISet<KeyValuePair<string, string>> entrySet = docToString.EntrySet();

            foreach (KeyValuePair<string, string> entry in entrySet)
            {
                // pk lookup
                DocsEnum termDocsEnum = slowR.TermDocsEnum(new Term("id", entry.Key));
                int docId = termDocsEnum.NextDoc();
                expected = new BytesRef(entry.Value);
                docValues.Get(docId, actual);
                Assert.AreEqual(expected, actual);
            }

            reader.Dispose();
            w.Dispose();
            dir.Dispose();
        }