Пример #1
0
 public FloatDocValuesAnonymousInnerClassHelper(NormValueSource outerInstance, NormValueSource @this, TFIDFSimilarity similarity, NumericDocValues norms)
     : base(@this)
 {
     this.outerInstance = outerInstance;
     this.similarity = similarity;
     this.norms = norms;
 }
Пример #2
0
        /// <summary>
        /// Returns a <see cref="NumericDocValues"/> for a reader's docvalues (potentially merging on-the-fly)
        /// <para>
        /// This is a slow way to access numeric values. Instead, access them per-segment
        /// with <see cref="AtomicReader.GetNumericDocValues(string)"/>
        /// </para>
        /// </summary>
        public static NumericDocValues GetNumericValues(IndexReader r, string field)
        {
            IList <AtomicReaderContext> leaves = r.Leaves;
            int size = leaves.Count;

            if (size == 0)
            {
                return(null);
            }
            else if (size == 1)
            {
                return(leaves[0].AtomicReader.GetNumericDocValues(field));
            }

            bool anyReal = false;

            NumericDocValues[] values = new NumericDocValues[size];
            int[] starts = new int[size + 1];
            for (int i = 0; i < size; i++)
            {
                AtomicReaderContext context = leaves[i];
                NumericDocValues    v       = context.AtomicReader.GetNumericDocValues(field);
                if (v == null)
                {
                    v = DocValues.EMPTY_NUMERIC;
                }
                else
                {
                    anyReal = true;
                }
                values[i] = v;
                starts[i] = context.DocBase;
            }
            starts[size] = r.MaxDoc;

            if (!anyReal)
            {
                return(null);
            }
            else
            {
                return(new NumericDocValuesAnonymousInnerClassHelper2(values, starts));
            }
        }
Пример #3
0
 public static void CheckNorms(AtomicReader reader)
 {
     // test omit norms
     for (int i = 0; i < DocHelper.Fields.Length; i++)
     {
         IIndexableField f = DocHelper.Fields[i];
         if (f.IndexableFieldType.IsIndexed)
         {
             Assert.AreEqual(reader.GetNormValues(f.Name) != null, !f.IndexableFieldType.OmitNorms);
             Assert.AreEqual(reader.GetNormValues(f.Name) != null, !DocHelper.NoNorms.ContainsKey(f.Name));
             if (reader.GetNormValues(f.Name) == null)
             {
                 // test for norms of null
                 NumericDocValues norms = MultiDocValues.GetNormValues(reader, f.Name);
                 Assert.IsNull(norms);
             }
         }
     }
 }
Пример #4
0
        public virtual void TestCustomEncoder()
        {
            Directory    dir      = NewDirectory();
            MockAnalyzer analyzer = new MockAnalyzer(Random());

            IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            config.SetSimilarity(new CustomNormEncodingSimilarity(this));
            RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, config);
            Document          doc    = new Document();
            Field             foo    = NewTextField("foo", "", Field.Store.NO);
            Field             bar    = NewTextField("bar", "", Field.Store.NO);

            doc.Add(foo);
            doc.Add(bar);

            for (int i = 0; i < 100; i++)
            {
                bar.SetStringValue("singleton");
                writer.AddDocument(doc);
            }

            IndexReader reader = writer.Reader;

            writer.Dispose();

            NumericDocValues fooNorms = MultiDocValues.GetNormValues(reader, "foo");

            for (int i = 0; i < reader.MaxDoc; i++)
            {
                Assert.AreEqual(0, fooNorms.Get(i));
            }

            NumericDocValues barNorms = MultiDocValues.GetNormValues(reader, "bar");

            for (int i = 0; i < reader.MaxDoc; i++)
            {
                Assert.AreEqual(1, barNorms.Get(i));
            }

            reader.Dispose();
            dir.Dispose();
        }
Пример #5
0
        public virtual void TestNumerics()
        {
            Directory dir   = NewDirectory();
            Document  doc   = new Document();
            Field     field = new NumericDocValuesField("numbers", 0);

            doc.Add(field);

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int numDocs = AtLeast(500);

            for (int i = 0; i < numDocs; i++)
            {
                field.SetInt64Value(Random().NextLong());
                iw.AddDocument(doc);
                if (Random().Next(17) == 0)
                {
                    iw.Commit();
                }
            }
            DirectoryReader ir = iw.Reader;

            iw.ForceMerge(1);
            DirectoryReader ir2    = iw.Reader;
            AtomicReader    merged = GetOnlySegmentReader(ir2);

            iw.Dispose();

            NumericDocValues multi  = MultiDocValues.GetNumericValues(ir, "numbers");
            NumericDocValues single = merged.GetNumericDocValues("numbers");

            for (int i = 0; i < numDocs; i++)
            {
                Assert.AreEqual(single.Get(i), multi.Get(i));
            }
            ir.Dispose();
            ir2.Dispose();
            dir.Dispose();
        }
Пример #6
0
        /// <summary>
        /// Returns a NumericDocValues for a reader's norms (potentially merging on-the-fly).
        /// <p>
        /// this is a slow way to access normalization values. Instead, access them per-segment
        /// with <seealso cref="AtomicReader#getNormValues(String)"/>
        /// </p>
        /// </summary>
        public static NumericDocValues GetNormValues(IndexReader r, string field)
        {
            IList<AtomicReaderContext> leaves = r.Leaves;
            int size = leaves.Count;
            if (size == 0)
            {
                return null;
            }
            else if (size == 1)
            {
                return leaves[0].AtomicReader.GetNormValues(field);
            }
            FieldInfo fi = MultiFields.GetMergedFieldInfos(r).FieldInfo(field);
            if (fi == null || fi.HasNorms() == false)
            {
                return null;
            }

            bool anyReal = false;
            NumericDocValues[] values = new NumericDocValues[size];
            int[] starts = new int[size + 1];
            for (int i = 0; i < size; i++)
            {
                AtomicReaderContext context = leaves[i];
                NumericDocValues v = context.AtomicReader.GetNormValues(field);
                if (v == null)
                {
                    v = DocValues.EMPTY_NUMERIC;
                }
                else
                {
                    anyReal = true;
                }
                values[i] = v;
                starts[i] = context.DocBase;
            }
            starts[size] = r.MaxDoc;

            Debug.Assert(anyReal);

            return new NumericDocValuesAnonymousInnerClassHelper(values, starts);
        }
Пример #7
0
        private void MergeNorms(SegmentWriteState segmentWriteState)
        {
            DocValuesConsumer consumer = codec.NormsFormat.NormsConsumer(segmentWriteState);
            bool success = false;

            try
            {
                foreach (FieldInfo field in mergeState.FieldInfos)
                {
                    if (field.HasNorms)
                    {
                        IList <NumericDocValues> toMerge       = new JCG.List <NumericDocValues>();
                        IList <IBits>            docsWithField = new JCG.List <IBits>();
                        foreach (AtomicReader reader in mergeState.Readers)
                        {
                            NumericDocValues norms = reader.GetNormValues(field.Name);
                            if (norms == null)
                            {
                                norms = DocValues.EMPTY_NUMERIC;
                            }
                            toMerge.Add(norms);
                            docsWithField.Add(new Lucene.Net.Util.Bits.MatchAllBits(reader.MaxDoc));
                        }
                        consumer.MergeNumericField(field, mergeState, toMerge, docsWithField);
                    }
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(consumer);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(consumer);
                }
            }
        }
Пример #8
0
 public void AssertDocValuesEquals(string info, int num, NumericDocValues leftDocValues, NumericDocValues rightDocValues)
 {
     Assert.IsNotNull(leftDocValues, info);
     Assert.IsNotNull(rightDocValues, info);
     for (int docID = 0; docID < num; docID++)
     {
         Assert.AreEqual(leftDocValues.Get(docID), rightDocValues.Get(docID));
     }
 }
Пример #9
0
        private void MergeDocValues(SegmentWriteState segmentWriteState)
        {
            DocValuesConsumer consumer = codec.DocValuesFormat.FieldsConsumer(segmentWriteState);
            bool success = false;

            try
            {
                foreach (FieldInfo field in mergeState.FieldInfos)
                {
                    DocValuesType type = field.DocValuesType;
                    if (type != DocValuesType.NONE)
                    {
                        if (type == DocValuesType.NUMERIC)
                        {
                            IList <NumericDocValues> toMerge       = new JCG.List <NumericDocValues>();
                            IList <IBits>            docsWithField = new JCG.List <IBits>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                NumericDocValues values = reader.GetNumericDocValues(field.Name);
                                IBits            bits   = reader.GetDocsWithField(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_NUMERIC;
                                    bits   = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc);
                                }
                                toMerge.Add(values);
                                docsWithField.Add(bits);
                            }
                            consumer.MergeNumericField(field, mergeState, toMerge, docsWithField);
                        }
                        else if (type == DocValuesType.BINARY)
                        {
                            IList <BinaryDocValues> toMerge       = new JCG.List <BinaryDocValues>();
                            IList <IBits>           docsWithField = new JCG.List <IBits>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                BinaryDocValues values = reader.GetBinaryDocValues(field.Name);
                                IBits           bits   = reader.GetDocsWithField(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_BINARY;
                                    bits   = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc);
                                }
                                toMerge.Add(values);
                                docsWithField.Add(bits);
                            }
                            consumer.MergeBinaryField(field, mergeState, toMerge, docsWithField);
                        }
                        else if (type == DocValuesType.SORTED)
                        {
                            IList <SortedDocValues> toMerge = new JCG.List <SortedDocValues>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                SortedDocValues values = reader.GetSortedDocValues(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_SORTED;
                                }
                                toMerge.Add(values);
                            }
                            consumer.MergeSortedField(field, mergeState, toMerge);
                        }
                        else if (type == DocValuesType.SORTED_SET)
                        {
                            IList <SortedSetDocValues> toMerge = new JCG.List <SortedSetDocValues>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                SortedSetDocValues values = reader.GetSortedSetDocValues(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_SORTED_SET;
                                }
                                toMerge.Add(values);
                            }
                            consumer.MergeSortedSetField(field, mergeState, toMerge);
                        }
                        else
                        {
                            throw AssertionError.Create("type=" + type);
                        }
                    }
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(consumer);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(consumer);
                }
            }
        }
Пример #10
0
 public AssertingNumericDocValues(NumericDocValues @in, int maxDoc)
 {
     this.@in    = @in;
     this.maxDoc = maxDoc;
 }
Пример #11
0
        public virtual void TestTonsOfUpdates()
        {
            // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM
            Directory         dir    = NewDirectory();
            Random            random = Random;
            IndexWriterConfig conf   = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));

            conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB);
            conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc
            IndexWriter writer = new IndexWriter(dir, conf);

            // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds)
            int           numDocs         = AtLeast(20000);
            int           numBinaryFields = AtLeast(5);
            int           numTerms        = TestUtil.NextInt32(random, 10, 100); // terms should affect many docs
            ISet <string> updateTerms     = new JCG.HashSet <string>();

            while (updateTerms.Count < numTerms)
            {
                updateTerms.Add(TestUtil.RandomSimpleString(random));
            }

            //    System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms);

            // build a large index with many BDV fields and update terms
            for (int i = 0; i < numDocs; i++)
            {
                Document doc            = new Document();
                int      numUpdateTerms = TestUtil.NextInt32(random, 1, numTerms / 10);
                for (int j = 0; j < numUpdateTerms; j++)
                {
                    doc.Add(new StringField("upd", RandomPicks.RandomFrom(random, updateTerms), Store.NO));
                }
                for (int j = 0; j < numBinaryFields; j++)
                {
                    long val = random.Next();
                    doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(val)));
                    doc.Add(new NumericDocValuesField("cf" + j, val * 2));
                }
                writer.AddDocument(doc);
            }

            writer.Commit(); // commit so there's something to apply to

            // set to flush every 2048 bytes (approximately every 12 updates), so we get
            // many flushes during binary updates
            writer.Config.SetRAMBufferSizeMB(2048.0 / 1024 / 1024);
            int numUpdates = AtLeast(100);

            //    System.out.println("numUpdates=" + numUpdates);
            for (int i = 0; i < numUpdates; i++)
            {
                int  field      = random.Next(numBinaryFields);
                Term updateTerm = new Term("upd", RandomPicks.RandomFrom(random, updateTerms));
                long value      = random.Next();
                writer.UpdateBinaryDocValue(updateTerm, "f" + field, TestBinaryDocValuesUpdates.ToBytes(value));
                writer.UpdateNumericDocValue(updateTerm, "cf" + field, value * 2);
            }

            writer.Dispose();

            DirectoryReader reader  = DirectoryReader.Open(dir);
            BytesRef        scratch = new BytesRef();

            foreach (AtomicReaderContext context in reader.Leaves)
            {
                for (int i = 0; i < numBinaryFields; i++)
                {
                    AtomicReader     r  = context.AtomicReader;
                    BinaryDocValues  f  = r.GetBinaryDocValues("f" + i);
                    NumericDocValues cf = r.GetNumericDocValues("cf" + i);
                    for (int j = 0; j < r.MaxDoc; j++)
                    {
                        Assert.AreEqual(cf.Get(j), TestBinaryDocValuesUpdates.GetValue(f, j, scratch) * 2, "reader=" + r + ", field=f" + i + ", doc=" + j);
                    }
                }
            }
            reader.Dispose();

            dir.Dispose();
        }
Пример #12
0
 private static void CheckNumericDocValues(string fieldName, AtomicReader reader, NumericDocValues ndv, Bits docsWithField)
 {
     for (int i = 0; i < reader.MaxDoc; i++)
     {
         long value = ndv.Get(i);
         if (docsWithField.Get(i) == false && value != 0)
         {
             throw new Exception("dv for field: " + fieldName + " is marked missing but has value=" + value + " for doc: " + i);
         }
     }
 }
Пример #13
0
        private Explanation ExplainScore(int doc, Explanation freq, BM25Stats stats, NumericDocValues norms)
        {
            Explanation result = new Explanation();
            result.Description = "score(doc=" + doc + ",freq=" + freq + "), product of:";

            Explanation boostExpl = new Explanation(stats.QueryBoost * stats.TopLevelBoost, "boost");
            if (boostExpl.Value != 1.0f)
            {
                result.AddDetail(boostExpl);
            }

            result.AddDetail(stats.Idf);

            Explanation tfNormExpl = new Explanation();
            tfNormExpl.Description = "tfNorm, computed from:";
            tfNormExpl.AddDetail(freq);
            tfNormExpl.AddDetail(new Explanation(K1_Renamed, "parameter k1"));
            if (norms == null)
            {
                tfNormExpl.AddDetail(new Explanation(0, "parameter b (norms omitted for field)"));
                tfNormExpl.Value = (freq.Value * (K1_Renamed + 1)) / (freq.Value + K1_Renamed);
            }
            else
            {
                float doclen = DecodeNormValue((sbyte)norms.Get(doc));
                tfNormExpl.AddDetail(new Explanation(b, "parameter b"));
                tfNormExpl.AddDetail(new Explanation(stats.Avgdl, "avgFieldLength"));
                tfNormExpl.AddDetail(new Explanation(doclen, "fieldLength"));
                tfNormExpl.Value = (freq.Value * (K1_Renamed + 1)) / (freq.Value + K1_Renamed * (1 - b + b * doclen / stats.Avgdl));
            }
            result.AddDetail(tfNormExpl);
            result.Value = boostExpl.Value * stats.Idf.Value * tfNormExpl.Value;
            return result;
        }
Пример #14
0
 internal BM25DocScorer(BM25Similarity outerInstance, BM25Stats stats, NumericDocValues norms)
 {
     this.OuterInstance = outerInstance;
     this.Stats = stats;
     this.WeightValue = stats.Weight * (outerInstance.K1_Renamed + 1);
     this.Cache = stats.Cache;
     this.Norms = norms;
 }
Пример #15
0
 public AssertingNumericDocValues(NumericDocValues @in, int maxDoc)
 {
     this.@in = @in;
     this.MaxDoc = maxDoc;
 }
Пример #16
0
        private void MergeDocValues(SegmentWriteState segmentWriteState)
        {
            DocValuesConsumer consumer = Codec.DocValuesFormat().FieldsConsumer(segmentWriteState);
            bool success = false;

            try
            {
                foreach (FieldInfo field in MergeState.FieldInfos)
                {
                    DocValuesType_e?type = field.DocValuesType;
                    if (type != null)
                    {
                        if (type == DocValuesType_e.NUMERIC)
                        {
                            IList <NumericDocValues> toMerge = new List <NumericDocValues>();
                            //IList<Bits> docsWithField = new List<Bits>();
                            foreach (AtomicReader reader in MergeState.Readers)
                            {
                                NumericDocValues values = reader.GetNumericDocValues(field.Name);
                                Bits             bits   = reader.GetDocsWithField(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_NUMERIC;
                                    bits   = new Lucene.Net.Util.Bits_MatchNoBits(reader.MaxDoc);
                                }
                                toMerge.Add(values);
                                //docsWithField.Add(bits);
                            }
                            consumer.MergeNumericField(field, MergeState, toMerge /*, docsWithField*/);
                        }
                        else if (type == DocValuesType_e.BINARY)
                        {
                            IList <BinaryDocValues> toMerge = new List <BinaryDocValues>();
                            //IList<Bits> docsWithField = new List<Bits>();
                            foreach (AtomicReader reader in MergeState.Readers)
                            {
                                BinaryDocValues values = reader.GetBinaryDocValues(field.Name);
                                Bits            bits   = reader.GetDocsWithField(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_BINARY;
                                    bits   = new Lucene.Net.Util.Bits_MatchNoBits(reader.MaxDoc);
                                }
                                toMerge.Add(values);
                                //docsWithField.Add(bits);
                            }
                            consumer.MergeBinaryField(field, MergeState, toMerge /*, docsWithField*/);
                        }
                        else if (type == DocValuesType_e.SORTED)
                        {
                            IList <SortedDocValues> toMerge = new List <SortedDocValues>();
                            foreach (AtomicReader reader in MergeState.Readers)
                            {
                                SortedDocValues values = reader.GetSortedDocValues(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_SORTED;
                                }
                                toMerge.Add(values);
                            }
                            consumer.MergeSortedField(field, MergeState, toMerge);
                        }
                        else if (type == DocValuesType_e.SORTED_SET)
                        {
                            IList <SortedSetDocValues> toMerge = new List <SortedSetDocValues>();
                            foreach (AtomicReader reader in MergeState.Readers)
                            {
                                SortedSetDocValues values = reader.GetSortedSetDocValues(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_SORTED_SET;
                                }
                                toMerge.Add(values);
                            }
                            consumer.MergeSortedSetField(field, MergeState, toMerge);
                        }
                        else
                        {
                            throw new InvalidOperationException("type=" + type);
                        }
                    }
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(consumer);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(consumer);
                }
            }
        }
Пример #17
0
 public SortedDocValuesAnonymousInnerClassHelper(FSTEntry fstEntry,
     NumericDocValues numericDocValues, FST<long?> fst1, FST.BytesReader @in, FST.Arc<long?> arc, FST.Arc<long?> scratchArc1,
     IntsRef intsRef, BytesRefFSTEnum<long?> bytesRefFstEnum)
 {
     entry = fstEntry;
     docToOrd = numericDocValues;
     fst = fst1;
     this.@in = @in;
     firstArc = arc;
     scratchArc = scratchArc1;
     scratchInts = intsRef;
     fstEnum = bytesRefFstEnum;
 }
        public virtual void SearchIndex(Directory dir, string oldName)
        {
            //QueryParser parser = new QueryParser("contents", new MockAnalyzer(random));
            //Query query = parser.parse("handle:1");

            IndexReader   reader   = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);

            TestUtil.CheckIndex(dir);

            // true if this is a 4.0+ index
            bool is40Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("content5") != null;
            // true if this is a 4.2+ index
            bool is42Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("dvSortedSet") != null;

            Debug.Assert(is40Index); // NOTE: currently we can only do this on trunk!

            Bits liveDocs = MultiFields.GetLiveDocs(reader);

            for (int i = 0; i < 35; i++)
            {
                if (liveDocs.Get(i))
                {
                    Document d = reader.Document(i);
                    IList <IndexableField> fields = d.Fields;
                    bool isProxDoc = d.GetField("content3") == null;
                    if (isProxDoc)
                    {
                        int numFields = is40Index ? 7 : 5;
                        Assert.AreEqual(numFields, fields.Count);
                        IndexableField f = d.GetField("id");
                        Assert.AreEqual("" + i, f.StringValue);

                        f = d.GetField("utf8");
                        Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                        f = d.GetField("autf8");
                        Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                        f = d.GetField("content2");
                        Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue);

                        f = d.GetField("fie\u2C77ld");
                        Assert.AreEqual("field with non-ascii name", f.StringValue);
                    }

                    Fields tfvFields = reader.GetTermVectors(i);
                    Assert.IsNotNull(tfvFields, "i=" + i);
                    Terms tfv = tfvFields.Terms("utf8");
                    Assert.IsNotNull(tfv, "docID=" + i + " index=" + oldName);
                }
                else
                {
                    // Only ID 7 is deleted
                    Assert.AreEqual(7, i);
                }
            }

            if (is40Index)
            {
                // check docvalues fields
                NumericDocValues   dvByte               = MultiDocValues.GetNumericValues(reader, "dvByte");
                BinaryDocValues    dvBytesDerefFixed    = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefFixed");
                BinaryDocValues    dvBytesDerefVar      = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefVar");
                SortedDocValues    dvBytesSortedFixed   = MultiDocValues.GetSortedValues(reader, "dvBytesSortedFixed");
                SortedDocValues    dvBytesSortedVar     = MultiDocValues.GetSortedValues(reader, "dvBytesSortedVar");
                BinaryDocValues    dvBytesStraightFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightFixed");
                BinaryDocValues    dvBytesStraightVar   = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightVar");
                NumericDocValues   dvDouble             = MultiDocValues.GetNumericValues(reader, "dvDouble");
                NumericDocValues   dvFloat              = MultiDocValues.GetNumericValues(reader, "dvFloat");
                NumericDocValues   dvInt       = MultiDocValues.GetNumericValues(reader, "dvInt");
                NumericDocValues   dvLong      = MultiDocValues.GetNumericValues(reader, "dvLong");
                NumericDocValues   dvPacked    = MultiDocValues.GetNumericValues(reader, "dvPacked");
                NumericDocValues   dvShort     = MultiDocValues.GetNumericValues(reader, "dvShort");
                SortedSetDocValues dvSortedSet = null;
                if (is42Index)
                {
                    dvSortedSet = MultiDocValues.GetSortedSetValues(reader, "dvSortedSet");
                }

                for (int i = 0; i < 35; i++)
                {
                    int id = Convert.ToInt32(reader.Document(i).Get("id"));
                    Assert.AreEqual(id, dvByte.Get(i));

                    sbyte[]  bytes       = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id };
                    BytesRef expectedRef = new BytesRef((byte[])(Array)bytes);
                    BytesRef scratch     = new BytesRef();

                    dvBytesDerefFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesDerefVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesSortedFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesSortedVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesStraightFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesStraightVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);

                    Assert.AreEqual((double)id, BitConverter.Int64BitsToDouble(dvDouble.Get(i)), 0D);
                    Assert.AreEqual((float)id, Number.IntBitsToFloat((int)dvFloat.Get(i)), 0F);
                    Assert.AreEqual(id, dvInt.Get(i));
                    Assert.AreEqual(id, dvLong.Get(i));
                    Assert.AreEqual(id, dvPacked.Get(i));
                    Assert.AreEqual(id, dvShort.Get(i));
                    if (is42Index)
                    {
                        dvSortedSet.Document = i;
                        long ord = dvSortedSet.NextOrd();
                        Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.NextOrd());
                        dvSortedSet.LookupOrd(ord, scratch);
                        Assert.AreEqual(expectedRef, scratch);
                    }
                }
            }

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;

            // First document should be #0
            Document doc = searcher.IndexReader.Document(hits[0].Doc);

            assertEquals("didn't get the right document first", "0", doc.Get("id"));

            DoTestHits(hits, 34, searcher.IndexReader);

            if (is40Index)
            {
                hits = searcher.Search(new TermQuery(new Term("content5", "aaa")), null, 1000).ScoreDocs;

                DoTestHits(hits, 34, searcher.IndexReader);

                hits = searcher.Search(new TermQuery(new Term("content6", "aaa")), null, 1000).ScoreDocs;

                DoTestHits(hits, 34, searcher.IndexReader);
            }

            hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);
            hits = searcher.Search(new TermQuery(new Term("utf8", "lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);
            hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);

            reader.Dispose();
        }
Пример #19
0
        public virtual void TestStressMultiThreading()
        {
            Directory         dir    = NewDirectory();
            IndexWriterConfig conf   = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random));
            IndexWriter       writer = new IndexWriter(dir, conf);

            // create index
            int numThreads = TestUtil.NextInt32(Random, 3, 6);
            int numDocs    = AtLeast(2000);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc = new Document();
                doc.Add(new StringField("id", "doc" + i, Store.NO));
                double group = Random.NextDouble();
                string g;
                if (group < 0.1)
                {
                    g = "g0";
                }
                else if (group < 0.5)
                {
                    g = "g1";
                }
                else if (group < 0.8)
                {
                    g = "g2";
                }
                else
                {
                    g = "g3";
                }
                doc.Add(new StringField("updKey", g, Store.NO));
                for (int j = 0; j < numThreads; j++)
                {
                    long value = Random.Next();
                    doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(value)));
                    doc.Add(new NumericDocValuesField("cf" + j, value * 2)); // control, always updated to f * 2
                }
                writer.AddDocument(doc);
            }

            CountdownEvent done       = new CountdownEvent(numThreads);
            AtomicInt32    numUpdates = new AtomicInt32(AtLeast(100));

            // same thread updates a field as well as reopens
            ThreadJob[] threads = new ThreadJob[numThreads];
            for (int i = 0; i < threads.Length; i++)
            {
                string f  = "f" + i;
                string cf = "cf" + i;
                threads[i] = new ThreadAnonymousClass(this, "UpdateThread-" + i, writer, numDocs, done, numUpdates, f, cf);
            }

            foreach (ThreadJob t in threads)
            {
                t.Start();
            }
            done.Wait();
            writer.Dispose();

            DirectoryReader reader  = DirectoryReader.Open(dir);
            BytesRef        scratch = new BytesRef();

            foreach (AtomicReaderContext context in reader.Leaves)
            {
                AtomicReader r = context.AtomicReader;
                for (int i = 0; i < numThreads; i++)
                {
                    BinaryDocValues  bdv             = r.GetBinaryDocValues("f" + i);
                    NumericDocValues control         = r.GetNumericDocValues("cf" + i);
                    IBits            docsWithBdv     = r.GetDocsWithField("f" + i);
                    IBits            docsWithControl = r.GetDocsWithField("cf" + i);
                    IBits            liveDocs        = r.LiveDocs;
                    for (int j = 0; j < r.MaxDoc; j++)
                    {
                        if (liveDocs is null || liveDocs.Get(j))
                        {
                            Assert.AreEqual(docsWithBdv.Get(j), docsWithControl.Get(j));
                            if (docsWithBdv.Get(j))
                            {
                                long ctrlValue = control.Get(j);
                                long bdvValue  = TestBinaryDocValuesUpdates.GetValue(bdv, j, scratch) * 2;
                                //              if (ctrlValue != bdvValue) {
                                //                System.out.println("seg=" + r + ", f=f" + i + ", doc=" + j + ", group=" + r.Document(j).Get("updKey") + ", ctrlValue=" + ctrlValue + ", bdvBytes=" + scratch);
                                //              }
                                Assert.AreEqual(ctrlValue, bdvValue);
                            }
                        }
                    }
                }
            }
            reader.Dispose();

            dir.Dispose();
        }
 public SortedDocValuesAnonymousInnerClassHelper(DirectDocValuesProducer outerInstance, SortedEntry entry, NumericDocValues docToOrd, BinaryDocValues values)
 {
     this.outerInstance = outerInstance;
     this.entry = entry;
     this.docToOrd = docToOrd;
     this.values = values;
 }
Пример #21
0
        public virtual void TestManyReopensAndFields()
        {
            Directory         dir    = NewDirectory();
            Random            random = Random;
            IndexWriterConfig conf   = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
            LogMergePolicy    lmp    = NewLogMergePolicy();

            lmp.MergeFactor = 3; // merge often
            conf.SetMergePolicy(lmp);
            IndexWriter writer = new IndexWriter(dir, conf);

            bool            isNRT = random.NextBoolean();
            DirectoryReader reader;

            if (isNRT)
            {
                reader = DirectoryReader.Open(writer, true);
            }
            else
            {
                writer.Commit();
                reader = DirectoryReader.Open(dir);
            }

            int numFields    = random.Next(4) + 3;             // 3-7
            int numNDVFields = random.Next(numFields / 2) + 1; // 1-3

            long[] fieldValues   = new long[numFields];
            bool[] fieldHasValue = new bool[numFields];
            Arrays.Fill(fieldHasValue, true);
            for (int i = 0; i < fieldValues.Length; i++)
            {
                fieldValues[i] = 1;
            }

            int numRounds = AtLeast(15);
            int docID     = 0;

            for (int i = 0; i < numRounds; i++)
            {
                int numDocs = AtLeast(5);
                //      System.out.println("[" + Thread.currentThread().getName() + "]: round=" + i + ", numDocs=" + numDocs);
                for (int j = 0; j < numDocs; j++)
                {
                    Document doc = new Document();
                    doc.Add(new StringField("id", "doc-" + docID, Store.NO));
                    doc.Add(new StringField("key", "all", Store.NO)); // update key
                    // add all fields with their current value
                    for (int f = 0; f < fieldValues.Length; f++)
                    {
                        if (f < numNDVFields)
                        {
                            doc.Add(new NumericDocValuesField("f" + f, fieldValues[f]));
                        }
                        else
                        {
                            doc.Add(new BinaryDocValuesField("f" + f, TestBinaryDocValuesUpdates.ToBytes(fieldValues[f])));
                        }
                    }
                    writer.AddDocument(doc);
                    ++docID;
                }

                // if field's value was unset before, unset it from all new added documents too
                for (int field = 0; field < fieldHasValue.Length; field++)
                {
                    if (!fieldHasValue[field])
                    {
                        if (field < numNDVFields)
                        {
                            writer.UpdateNumericDocValue(new Term("key", "all"), "f" + field, null);
                        }
                        else
                        {
                            writer.UpdateBinaryDocValue(new Term("key", "all"), "f" + field, null);
                        }
                    }
                }

                int    fieldIdx    = random.Next(fieldValues.Length);
                string updateField = "f" + fieldIdx;
                if (random.NextBoolean())
                {
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: unset field '" + updateField + "'");
                    fieldHasValue[fieldIdx] = false;
                    if (fieldIdx < numNDVFields)
                    {
                        writer.UpdateNumericDocValue(new Term("key", "all"), updateField, null);
                    }
                    else
                    {
                        writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, null);
                    }
                }
                else
                {
                    fieldHasValue[fieldIdx] = true;
                    if (fieldIdx < numNDVFields)
                    {
                        writer.UpdateNumericDocValue(new Term("key", "all"), updateField, ++fieldValues[fieldIdx]);
                    }
                    else
                    {
                        writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, TestBinaryDocValuesUpdates.ToBytes(++fieldValues[fieldIdx]));
                    }
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: updated field '" + updateField + "' to value " + fieldValues[fieldIdx]);
                }

                if (random.NextDouble() < 0.2)
                {
                    int deleteDoc = random.Next(docID); // might also delete an already deleted document, ok!
                    writer.DeleteDocuments(new Term("id", "doc-" + deleteDoc));
                    //        System.out.println("[" + Thread.currentThread().getName() + "]: deleted document: doc-" + deleteDoc);
                }

                // verify reader
                if (!isNRT)
                {
                    writer.Commit();
                }

                //      System.out.println("[" + Thread.currentThread().getName() + "]: reopen reader: " + reader);
                DirectoryReader newReader = DirectoryReader.OpenIfChanged(reader);
                Assert.IsNotNull(newReader);
                reader.Dispose();
                reader = newReader;
                //      System.out.println("[" + Thread.currentThread().getName() + "]: reopened reader: " + reader);
                Assert.IsTrue(reader.NumDocs > 0); // we delete at most one document per round
                BytesRef scratch = new BytesRef();
                foreach (AtomicReaderContext context in reader.Leaves)
                {
                    AtomicReader r = context.AtomicReader;
                    //        System.out.println(((SegmentReader) r).getSegmentName());
                    IBits liveDocs = r.LiveDocs;
                    for (int field = 0; field < fieldValues.Length; field++)
                    {
                        string           f             = "f" + field;
                        BinaryDocValues  bdv           = r.GetBinaryDocValues(f);
                        NumericDocValues ndv           = r.GetNumericDocValues(f);
                        IBits            docsWithField = r.GetDocsWithField(f);
                        if (field < numNDVFields)
                        {
                            Assert.IsNotNull(ndv);
                            Assert.IsNull(bdv);
                        }
                        else
                        {
                            Assert.IsNull(ndv);
                            Assert.IsNotNull(bdv);
                        }
                        int maxDoc = r.MaxDoc;
                        for (int doc = 0; doc < maxDoc; doc++)
                        {
                            if (liveDocs is null || liveDocs.Get(doc))
                            {
                                //              System.out.println("doc=" + (doc + context.docBase) + " f='" + f + "' vslue=" + getValue(bdv, doc, scratch));
                                if (fieldHasValue[field])
                                {
                                    Assert.IsTrue(docsWithField.Get(doc));
                                    if (field < numNDVFields)
                                    {
                                        Assert.AreEqual(fieldValues[field], ndv.Get(doc), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r);
                                    }
                                    else
                                    {
                                        Assert.AreEqual(fieldValues[field], TestBinaryDocValuesUpdates.GetValue(bdv, doc, scratch), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r);
                                    }
                                }
                                else
                                {
                                    Assert.IsFalse(docsWithField.Get(doc));
                                }
                            }
                        }
                    }
                }
                //      System.out.println();
            }

            IOUtils.Dispose(writer, reader, dir);
        }
Пример #22
0
 /// <summary>
 /// Creates an iterator over term, weight and payload fields from the lucene
 /// index. setting <code>withPayload</code> to false, implies an iterator
 /// over only term and weight.
 /// </summary>
 public DocumentInputIterator(DocumentDictionary outerInstance, bool hasPayloads, bool hasContexts)
 {
     this.outerInstance = outerInstance;
     this.hasPayloads = hasPayloads;
     this.hasContexts = hasContexts;
     docCount = outerInstance.reader.MaxDoc() - 1;
     weightValues = (outerInstance.weightField != null) ? MultiDocValues.GetNumericValues(outerInstance.reader, outerInstance.weightField) : null;
     liveDocs = (outerInstance.reader.Leaves().Count > 0) ? MultiFields.GetLiveDocs(outerInstance.reader) : null;
     relevantFields = GetRelevantFields(new string[] { outerInstance.field, outerInstance.weightField, outerInstance.payloadField, outerInstance.contextsField });
 }
 public RandomAccessOrdsAnonymousInnerClassHelper(DirectDocValuesProducer outerInstance, SortedSetEntry entry, NumericDocValues docToOrdAddress, NumericDocValues ords, BinaryDocValues values)
 {
     this.entry = entry;
     this.docToOrdAddress = docToOrdAddress;
     this.ords = ords;
     this.values = values;
 }
Пример #24
0
        /// <summary>
        /// Returns a NumericDocValues for a reader's docvalues (potentially merging on-the-fly)
        /// <p>
        /// this is a slow way to access numeric values. Instead, access them per-segment
        /// with <seealso cref="AtomicReader#getNumericDocValues(String)"/>
        /// </p>
        ///
        /// </summary>
        public static NumericDocValues GetNumericValues(IndexReader r, string field)
        {
            IList<AtomicReaderContext> leaves = r.Leaves;
            int size = leaves.Count;
            if (size == 0)
            {
                return null;
            }
            else if (size == 1)
            {
                return leaves[0].AtomicReader.GetNumericDocValues(field);
            }

            bool anyReal = false;
            NumericDocValues[] values = new NumericDocValues[size];
            int[] starts = new int[size + 1];
            for (int i = 0; i < size; i++)
            {
                AtomicReaderContext context = leaves[i];
                NumericDocValues v = context.AtomicReader.GetNumericDocValues(field);
                if (v == null)
                {
                    v = DocValues.EMPTY_NUMERIC;
                }
                else
                {
                    anyReal = true;
                }
                values[i] = v;
                starts[i] = context.DocBase;
            }
            starts[size] = r.MaxDoc;

            if (!anyReal)
            {
                return null;
            }
            else
            {
                return new NumericDocValuesAnonymousInnerClassHelper2(values, starts);
            }
        }