예제 #1
0
        public virtual void TestSimple()
        {
            Directory         dir   = NewDirectory();
            RandomIndexWriter w     = new RandomIndexWriter(Random, dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random)).SetMergePolicy(NewLogMergePolicy()));
            Document          doc   = new Document();
            Field             field = NewTextField("field", "", Field.Store.NO);

            doc.Add(field);
            field.SetStringValue("a b c");
            w.AddDocument(doc);

            field.SetStringValue("d e f");
            w.AddDocument(doc);

            field.SetStringValue("a f");
            w.AddDocument(doc);

            IndexReader r = w.GetReader();

            w.Dispose();

            AtomicReader       ar   = SlowCompositeReaderWrapper.Wrap(r);
            DocTermOrds        dto  = new DocTermOrds(ar, ar.LiveDocs, "field");
            SortedSetDocValues iter = dto.GetIterator(ar);

            iter.SetDocument(0);
            Assert.AreEqual(0, iter.NextOrd());
            Assert.AreEqual(1, iter.NextOrd());
            Assert.AreEqual(2, iter.NextOrd());
            Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, iter.NextOrd());

            iter.SetDocument(1);
            Assert.AreEqual(3, iter.NextOrd());
            Assert.AreEqual(4, iter.NextOrd());
            Assert.AreEqual(5, iter.NextOrd());
            Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, iter.NextOrd());

            iter.SetDocument(2);
            Assert.AreEqual(0, iter.NextOrd());
            Assert.AreEqual(5, iter.NextOrd());
            Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, iter.NextOrd());

            r.Dispose();
            dir.Dispose();
        }
예제 #2
0
        public override SortedSetDocValues GetSortedSetDocValues(string field)
        {
            EnsureOpen();
            OrdinalMap map = null;

            lock (cachedOrdMaps)
            {
                if (!cachedOrdMaps.TryGetValue(field, out map))
                {
                    // uncached, or not a multi dv
                    SortedSetDocValues dv = MultiDocValues.GetSortedSetValues(@in, field);
                    if (dv is MultiSortedSetDocValues docValues)
                    {
                        map = docValues.Mapping;
                        if (map.owner == CoreCacheKey)
                        {
                            cachedOrdMaps[field] = map;
                        }
                    }
                    return(dv);
                }
            }
            // cached ordinal map
            if (FieldInfos.FieldInfo(field).DocValuesType != DocValuesType.SORTED_SET)
            {
                return(null);
            }
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(map != null);
            }
            int size   = @in.Leaves.Count;
            var values = new SortedSetDocValues[size];

            int[] starts = new int[size + 1];
            for (int i = 0; i < size; i++)
            {
                AtomicReaderContext context = @in.Leaves[i];
                SortedSetDocValues  v       = context.AtomicReader.GetSortedSetDocValues(field) ?? DocValues.EMPTY_SORTED_SET;
                values[i] = v;
                starts[i] = context.DocBase;
            }
            starts[size] = MaxDoc;
            return(new MultiSortedSetDocValues(values, starts, map));
        }
예제 #3
0
 public AssertingSortedSetDocValues(SortedSetDocValues @in, int maxDoc)
 {
     this.@in = @in;
     this.MaxDoc = maxDoc;
     this.ValueCount_Renamed = @in.ValueCount;
     Debug.Assert(ValueCount_Renamed >= 0);
 }
        private void AssertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual)
        {
            // can be null for the segment if no docs actually had any SortedDocValues
            // in this case FC.getDocTermsOrds returns EMPTY
            if (actual == null)
            {
                Assert.AreEqual(DocValues.EMPTY_SORTED_SET, expected);
                return;
            }
            Assert.AreEqual(expected.ValueCount, actual.ValueCount);
            // compare ord lists
            for (int i = 0; i < maxDoc; i++)
            {
                expected.Document = i;
                actual.Document = i;
                long expectedOrd;
                while ((expectedOrd = expected.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                {
                    Assert.AreEqual(expectedOrd, actual.NextOrd());
                }
                Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, actual.NextOrd());
            }

            // compare ord dictionary
            BytesRef expectedBytes = new BytesRef();
            BytesRef actualBytes = new BytesRef();
            for (long i = 0; i < expected.ValueCount; i++)
            {
                expected.LookupTerm(expectedBytes);
                actual.LookupTerm(actualBytes);
                Assert.AreEqual(expectedBytes, actualBytes);
            }

            // compare termsenum
            AssertEquals(expected.ValueCount, expected.TermsEnum(), actual.TermsEnum());
        }
예제 #5
0
 public FieldCacheDocIdSetAnonymousInnerClassHelper(DocTermOrdsRangeFilterAnonymousInnerClassHelper outerInstance, int maxDoc, Bits acceptDocs, SortedSetDocValues docTermOrds, long inclusiveLowerPoint, long inclusiveUpperPoint)
     : base(maxDoc, acceptDocs)
 {
     this.OuterInstance = outerInstance;
     this.DocTermOrds = docTermOrds;
     this.InclusiveLowerPoint = inclusiveLowerPoint;
     this.InclusiveUpperPoint = inclusiveUpperPoint;
 }
예제 #6
0
 /// <summary>
 /// Creates a new <see cref="TermsEnum"/> over the provided values </summary>
 public SortedSetDocValuesTermsEnum(SortedSetDocValues values)
 {
     this.values = values;
 }
예제 #7
0
 public BitsAnonymousClass2(SortedSetDocValues dv, int maxDoc)
 {
     this.dv = dv;
     this.maxDoc = maxDoc;
 }
예제 #8
0
        public virtual void TestSortedSetWithDups()
        {
            AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet());
            Directory dir = NewDirectory();

            IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null);

            iwc.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc);

            int numDocs = AtLeast(500);

            for (int i = 0; i < numDocs; i++)
            {
                Document doc       = new Document();
                int      numValues = Random().Next(5);
                for (int j = 0; j < numValues; j++)
                {
                    doc.Add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.RandomSimpleString(Random(), 2))));
                }
                iw.AddDocument(doc);
                if (Random().Next(17) == 0)
                {
                    iw.Commit();
                }
            }
            DirectoryReader ir = iw.Reader;

            iw.ForceMerge(1);
            DirectoryReader ir2    = iw.Reader;
            AtomicReader    merged = GetOnlySegmentReader(ir2);

            iw.Dispose();

            SortedSetDocValues multi  = MultiDocValues.GetSortedSetValues(ir, "bytes");
            SortedSetDocValues single = merged.GetSortedSetDocValues("bytes");

            if (multi == null)
            {
                Assert.IsNull(single);
            }
            else
            {
                Assert.AreEqual(single.ValueCount, multi.ValueCount);
                BytesRef actual   = new BytesRef();
                BytesRef expected = new BytesRef();
                // check values
                for (long i = 0; i < single.ValueCount; i++)
                {
                    single.LookupOrd(i, expected);
                    multi.LookupOrd(i, actual);
                    Assert.AreEqual(expected, actual);
                }
                // check ord list
                for (int i = 0; i < numDocs; i++)
                {
                    single.SetDocument(i);
                    List <long?> expectedList = new List <long?>();
                    long         ord;
                    while ((ord = single.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        expectedList.Add(ord);
                    }

                    multi.SetDocument(i);
                    int upto = 0;
                    while ((ord = multi.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        Assert.AreEqual((long)expectedList[upto], ord);
                        upto++;
                    }
                    Assert.AreEqual(expectedList.Count, upto);
                }
            }

            ir.Dispose();
            ir2.Dispose();
            dir.Dispose();
        }
 public TermsAnonymousInnerClassHelper(MultiTermQueryDocTermOrdsWrapperFilter outerInstance, SortedSetDocValues docTermOrds)
 {
     this.OuterInstance = outerInstance;
     this.DocTermOrds = docTermOrds;
 }
예제 #10
0
        private void MergeDocValues(SegmentWriteState segmentWriteState)
        {
            DocValuesConsumer consumer = Codec.DocValuesFormat().FieldsConsumer(segmentWriteState);
            bool success = false;

            try
            {
                foreach (FieldInfo field in MergeState.FieldInfos)
                {
                    DocValuesType_e?type = field.DocValuesType;
                    if (type != null)
                    {
                        if (type == DocValuesType_e.NUMERIC)
                        {
                            IList <NumericDocValues> toMerge = new List <NumericDocValues>();
                            //IList<Bits> docsWithField = new List<Bits>();
                            foreach (AtomicReader reader in MergeState.Readers)
                            {
                                NumericDocValues values = reader.GetNumericDocValues(field.Name);
                                Bits             bits   = reader.GetDocsWithField(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_NUMERIC;
                                    bits   = new Lucene.Net.Util.Bits_MatchNoBits(reader.MaxDoc);
                                }
                                toMerge.Add(values);
                                //docsWithField.Add(bits);
                            }
                            consumer.MergeNumericField(field, MergeState, toMerge /*, docsWithField*/);
                        }
                        else if (type == DocValuesType_e.BINARY)
                        {
                            IList <BinaryDocValues> toMerge = new List <BinaryDocValues>();
                            //IList<Bits> docsWithField = new List<Bits>();
                            foreach (AtomicReader reader in MergeState.Readers)
                            {
                                BinaryDocValues values = reader.GetBinaryDocValues(field.Name);
                                Bits            bits   = reader.GetDocsWithField(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_BINARY;
                                    bits   = new Lucene.Net.Util.Bits_MatchNoBits(reader.MaxDoc);
                                }
                                toMerge.Add(values);
                                //docsWithField.Add(bits);
                            }
                            consumer.MergeBinaryField(field, MergeState, toMerge /*, docsWithField*/);
                        }
                        else if (type == DocValuesType_e.SORTED)
                        {
                            IList <SortedDocValues> toMerge = new List <SortedDocValues>();
                            foreach (AtomicReader reader in MergeState.Readers)
                            {
                                SortedDocValues values = reader.GetSortedDocValues(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_SORTED;
                                }
                                toMerge.Add(values);
                            }
                            consumer.MergeSortedField(field, MergeState, toMerge);
                        }
                        else if (type == DocValuesType_e.SORTED_SET)
                        {
                            IList <SortedSetDocValues> toMerge = new List <SortedSetDocValues>();
                            foreach (AtomicReader reader in MergeState.Readers)
                            {
                                SortedSetDocValues values = reader.GetSortedSetDocValues(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_SORTED_SET;
                                }
                                toMerge.Add(values);
                            }
                            consumer.MergeSortedSetField(field, MergeState, toMerge);
                        }
                        else
                        {
                            throw new InvalidOperationException("type=" + type);
                        }
                    }
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Close(consumer);
                }
                else
                {
                    IOUtils.CloseWhileHandlingException(consumer);
                }
            }
        }
 /// <summary>
 /// Creates a new TermsEnum over the provided values </summary>
 public SortedSetDocValuesTermsEnum(SortedSetDocValues values)
 {
     this.Values = values;
 }
예제 #12
0
 /// <summary>
 /// Returns a single-valued view of the SortedSetDocValues, if it was previously
 /// wrapped with <seealso cref="#singleton"/>, or null.
 /// </summary>
 public static SortedDocValues UnwrapSingleton(SortedSetDocValues dv)
 {
     if (dv is SingletonSortedSetDocValues)
     {
         return ((SingletonSortedSetDocValues)dv).SortedDocValues;
     }
     else
     {
         return null;
     }
 }
예제 #13
0
 /// <summary>
 /// Returns a Bits representing all documents from <code>dv</code> that have a value.
 /// </summary>
 public static Bits DocsWithValue(SortedSetDocValues dv, int maxDoc)
 {
     return new BitsAnonymousInnerClassHelper2(dv, maxDoc);
 }
예제 #14
0
 /// <summary>
 /// Creates a new MultiSortedSetDocValues over <code>values</code> </summary>
 internal MultiSortedSetDocValues(SortedSetDocValues[] values, int[] docStarts, OrdinalMap mapping)
 {
     Debug.Assert(values.Length == mapping.OrdDeltas.Length);
     Debug.Assert(docStarts.Length == values.Length + 1);
     this.Values = values;
     this.DocStarts = docStarts;
     this.Mapping = mapping;
 }
예제 #15
0
        /// <summary>
        /// Returns a SortedSetDocValues for a reader's docvalues (potentially doing extremely slow things).
        /// <p>
        /// this is an extremely slow way to access sorted values. Instead, access them per-segment
        /// with <seealso cref="AtomicReader#getSortedSetDocValues(String)"/>
        /// </p>
        /// </summary>
        public static SortedSetDocValues GetSortedSetValues(IndexReader r, string field)
        {
            IList<AtomicReaderContext> leaves = r.Leaves;
            int size = leaves.Count;

            if (size == 0)
            {
                return null;
            }
            else if (size == 1)
            {
                return leaves[0].AtomicReader.GetSortedSetDocValues(field);
            }

            bool anyReal = false;
            SortedSetDocValues[] values = new SortedSetDocValues[size];
            int[] starts = new int[size + 1];
            for (int i = 0; i < size; i++)
            {
                AtomicReaderContext context = leaves[i];
                SortedSetDocValues v = context.AtomicReader.GetSortedSetDocValues(field);
                if (v == null)
                {
                    v = DocValues.EMPTY_SORTED_SET;
                }
                else
                {
                    anyReal = true;
                }
                values[i] = v;
                starts[i] = context.DocBase;
            }
            starts[size] = r.MaxDoc;

            if (!anyReal)
            {
                return null;
            }
            else
            {
                TermsEnum[] enums = new TermsEnum[values.Length];
                for (int i = 0; i < values.Length; i++)
                {
                    enums[i] = values[i].TermsEnum();
                }
                OrdinalMap mapping = new OrdinalMap(r.CoreCacheKey, enums);
                return new MultiSortedSetDocValues(values, starts, mapping);
            }
        }
예제 #16
0
        private void Verify(AtomicReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef)
        {
            DocTermOrds dto = new DocTermOrds(r, r.LiveDocs, "field", prefixRef, int.MaxValue, TestUtil.NextInt32(Random, 2, 10));

            FieldCache.Int32s docIDToID = FieldCache.DEFAULT.GetInt32s(r, "id", false);

            /*
             * for(int docID=0;docID<subR.MaxDoc;docID++) {
             * System.out.println("  docID=" + docID + " id=" + docIDToID[docID]);
             * }
             */

            if (VERBOSE)
            {
                Console.WriteLine("TEST: verify prefix=" + (prefixRef == null ? "null" : prefixRef.Utf8ToString()));
                Console.WriteLine("TEST: all TERMS:");
                TermsEnum allTE = MultiFields.GetTerms(r, "field").GetIterator(null);
                int       ord   = 0;
                while (allTE.Next() != null)
                {
                    Console.WriteLine("  ord=" + (ord++) + " term=" + allTE.Term.Utf8ToString());
                }
            }

            //final TermsEnum te = subR.Fields.Terms("field").iterator();
            TermsEnum te = dto.GetOrdTermsEnum(r);

            if (dto.NumTerms == 0)
            {
                if (prefixRef == null)
                {
                    Assert.IsNull(MultiFields.GetTerms(r, "field"));
                }
                else
                {
                    Terms terms = MultiFields.GetTerms(r, "field");
                    if (terms != null)
                    {
                        TermsEnum            termsEnum = terms.GetIterator(null);
                        TermsEnum.SeekStatus result    = termsEnum.SeekCeil(prefixRef);
                        if (result != TermsEnum.SeekStatus.END)
                        {
                            Assert.IsFalse(StringHelper.StartsWith(termsEnum.Term, prefixRef), "term=" + termsEnum.Term.Utf8ToString() + " matches prefix=" + prefixRef.Utf8ToString());
                        }
                        else
                        {
                            // ok
                        }
                    }
                    else
                    {
                        // ok
                    }
                }
                return;
            }

            if (VERBOSE)
            {
                Console.WriteLine("TEST: TERMS:");
                te.SeekExact(0);
                while (true)
                {
                    Console.WriteLine("  ord=" + te.Ord + " term=" + te.Term.Utf8ToString());
                    if (te.Next() == null)
                    {
                        break;
                    }
                }
            }

            SortedSetDocValues iter = dto.GetIterator(r);

            for (int docID = 0; docID < r.MaxDoc; docID++)
            {
                if (VERBOSE)
                {
                    Console.WriteLine("TEST: docID=" + docID + " of " + r.MaxDoc + " (id=" + docIDToID.Get(docID) + ")");
                }
                iter.SetDocument(docID);
                int[] answers = idToOrds[docIDToID.Get(docID)];
                int   upto    = 0;
                long  ord;
                while ((ord = iter.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                {
                    te.SeekExact(ord);
                    BytesRef expected = termsArray[answers[upto++]];
                    if (VERBOSE)
                    {
                        Console.WriteLine("  exp=" + expected.Utf8ToString() + " actual=" + te.Term.Utf8ToString());
                    }
                    Assert.AreEqual(expected, te.Term, "expected=" + expected.Utf8ToString() + " actual=" + te.Term.Utf8ToString() + " ord=" + ord);
                }
                Assert.AreEqual(answers.Length, upto);
            }
        }
예제 #17
0
        public virtual void TestSortedTermsEnum()
        {
            Directory         directory = NewDirectory();
            Analyzer          analyzer  = new MockAnalyzer(Random);
            IndexWriterConfig iwconfig  = NewIndexWriterConfig(TEST_VERSION_CURRENT, analyzer);

            iwconfig.SetMergePolicy(NewLogMergePolicy());
            RandomIndexWriter iwriter = new RandomIndexWriter(Random, directory, iwconfig);

            Document doc = new Document();

            doc.Add(new StringField("field", "hello", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "world", Field.Store.NO));
            iwriter.AddDocument(doc);

            doc = new Document();
            doc.Add(new StringField("field", "beer", Field.Store.NO));
            iwriter.AddDocument(doc);
            iwriter.ForceMerge(1);

            DirectoryReader ireader = iwriter.GetReader();

            iwriter.Dispose();

            AtomicReader       ar = GetOnlySegmentReader(ireader);
            SortedSetDocValues dv = FieldCache.DEFAULT.GetDocTermOrds(ar, "field");

            Assert.AreEqual(3, dv.ValueCount);

            TermsEnum termsEnum = dv.GetTermsEnum();

            // next()
            Assert.AreEqual("beer", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.AreEqual("hello", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.AreEqual("world", termsEnum.Next().Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);

            // seekCeil()
            Assert.AreEqual(SeekStatus.NOT_FOUND, termsEnum.SeekCeil(new BytesRef("ha!")));
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.AreEqual(SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.AreEqual(SeekStatus.END, termsEnum.SeekCeil(new BytesRef("zzz")));

            // seekExact()
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("beer")));
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("hello")));
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            Assert.IsTrue(termsEnum.SeekExact(new BytesRef("world")));
            Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);
            Assert.IsFalse(termsEnum.SeekExact(new BytesRef("bogus")));

            // seek(ord)
            termsEnum.SeekExact(0);
            Assert.AreEqual("beer", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(0, termsEnum.Ord);
            termsEnum.SeekExact(1);
            Assert.AreEqual("hello", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(1, termsEnum.Ord);
            termsEnum.SeekExact(2);
            Assert.AreEqual("world", termsEnum.Term.Utf8ToString());
            Assert.AreEqual(2, termsEnum.Ord);
            ireader.Dispose();
            directory.Dispose();
        }
 public FieldCacheDocIdSetAnonymousInnerClassHelper(MultiTermQueryDocTermOrdsWrapperFilter outerInstance, int maxDoc, Bits acceptDocs, SortedSetDocValues docTermOrds, LongBitSet termSet)
     : base(maxDoc, acceptDocs)
 {
     this.OuterInstance = outerInstance;
     this.DocTermOrds = docTermOrds;
     this.TermSet = termSet;
 }
예제 #19
0
        private static void CheckSortedSetDocValues(string fieldName, AtomicReader reader, SortedSetDocValues dv, Bits docsWithField)
        {
            long maxOrd = dv.ValueCount - 1;
            LongBitSet seenOrds = new LongBitSet(dv.ValueCount);
            long maxOrd2 = -1;
            for (int i = 0; i < reader.MaxDoc; i++)
            {
                dv.Document = i;
                long lastOrd = -1;
                long ord;
                if (docsWithField.Get(i))
                {
                    int ordCount = 0;
                    while ((ord = dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        if (ord <= lastOrd)
                        {
                            throw new Exception("ords out of order: " + ord + " <= " + lastOrd + " for doc: " + i);
                        }
                        if (ord < 0 || ord > maxOrd)
                        {
                            throw new Exception("ord out of bounds: " + ord);
                        }
                        if (dv is RandomAccessOrds)
                        {
                            long ord2 = ((RandomAccessOrds)dv).OrdAt(ordCount);
                            if (ord != ord2)
                            {
                                throw new Exception("ordAt(" + ordCount + ") inconsistent, expected=" + ord + ",got=" + ord2 + " for doc: " + i);
                            }
                        }
                        lastOrd = ord;
                        maxOrd2 = Math.Max(maxOrd2, ord);
                        seenOrds.Set(ord);
                        ordCount++;
                    }
                    if (ordCount == 0)
                    {
                        throw new Exception("dv for field: " + fieldName + " has no ordinals but is not marked missing for doc: " + i);
                    }
                    if (dv is RandomAccessOrds)
                    {
                        long ordCount2 = ((RandomAccessOrds)dv).Cardinality();
                        if (ordCount != ordCount2)
                        {
                            throw new Exception("cardinality inconsistent, expected=" + ordCount + ",got=" + ordCount2 + " for doc: " + i);
                        }
                    }
                }
                else
                {
                    long o = dv.NextOrd();
                    if (o != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        throw new Exception("dv for field: " + fieldName + " is marked missing but has ord=" + o + " for doc: " + i);
                    }
                    if (dv is RandomAccessOrds)
                    {
                        long ordCount2 = ((RandomAccessOrds)dv).Cardinality();
                        if (ordCount2 != 0)
                        {
                            throw new Exception("dv for field: " + fieldName + " is marked missing but has cardinality " + ordCount2 + " for doc: " + i);
                        }
                    }
                }
            }
            if (maxOrd != maxOrd2)
            {
                throw new Exception("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2);
            }
            if (seenOrds.Cardinality() != dv.ValueCount)
            {
                throw new Exception("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.ValueCount + " but only used: " + seenOrds.Cardinality());
            }

            BytesRef lastValue = null;
            BytesRef scratch = new BytesRef();
            for (long i = 0; i <= maxOrd; i++)
            {
                dv.LookupOrd(i, scratch);
                Debug.Assert(scratch.Valid);
                if (lastValue != null)
                {
                    if (scratch.CompareTo(lastValue) <= 0)
                    {
                        throw new Exception("dv for field: " + fieldName + " has ords out of order: " + lastValue + " >=" + scratch);
                    }
                }
                lastValue = BytesRef.DeepCopyOf(scratch);
            }
        }
예제 #20
0
 internal MinValue(SortedSetDocValues @in)
 {
     this.@in = @in;
 }
예제 #21
0
 /// <summary>
 /// Returns a <see cref="IBits"/> representing all documents from <paramref name="dv"/> that have a value.
 /// </summary>
 public static IBits DocsWithValue(SortedSetDocValues dv, int maxDoc)
 {
     return new BitsAnonymousClass2(dv, maxDoc);
 }
예제 #22
0
			/// <exception cref="System.IO.IOException"></exception>
			public override void SetNextReader(AtomicReaderContext context)
			{
				if (segmentFacetCounts != null)
				{
					segmentResults.AddItem(((TermGroupFacetCollector.MV.SegmentResult)CreateSegmentResult
						()));
				}
				groupFieldTermsIndex = FieldCache.DEFAULT.GetTermsIndex(((AtomicReader)context.Reader
					()), groupField);
				facetFieldDocTermOrds = FieldCache.DEFAULT.GetDocTermOrds(((AtomicReader)context.
					Reader()), facetField);
				facetFieldNumTerms = (int)facetFieldDocTermOrds.GetValueCount();
				if (facetFieldNumTerms == 0)
				{
					facetOrdTermsEnum = null;
				}
				else
				{
					facetOrdTermsEnum = facetFieldDocTermOrds.TermsEnum();
				}
				// [facetFieldNumTerms() + 1] for all possible facet values and docs not containing facet field
				segmentFacetCounts = new int[facetFieldNumTerms + 1];
				segmentTotalCount = 0;
				segmentGroupedFacetHits.Clear();
				foreach (GroupedFacetHit groupedFacetHit in groupedFacetHits)
				{
					int groupOrd = groupedFacetHit.groupValue == null ? -1 : groupFieldTermsIndex.LookupTerm
						(groupedFacetHit.groupValue);
					if (groupedFacetHit.groupValue != null && groupOrd < 0)
					{
						continue;
					}
					int facetOrd;
					if (groupedFacetHit.facetValue != null)
					{
						if (facetOrdTermsEnum == null || !facetOrdTermsEnum.SeekExact(groupedFacetHit.facetValue
							))
						{
							continue;
						}
						facetOrd = (int)facetOrdTermsEnum.Ord();
					}
					else
					{
						facetOrd = facetFieldNumTerms;
					}
					// (facetFieldDocTermOrds.numTerms() + 1) for all possible facet values and docs not containing facet field
					int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1) + facetOrd;
					segmentGroupedFacetHits.Put(segmentGroupedFacetsIndex);
				}
				if (facetPrefix != null)
				{
					TermsEnum.SeekStatus seekStatus;
					if (facetOrdTermsEnum != null)
					{
						seekStatus = facetOrdTermsEnum.SeekCeil(facetPrefix);
					}
					else
					{
						seekStatus = TermsEnum.SeekStatus.END;
					}
					if (seekStatus != TermsEnum.SeekStatus.END)
					{
						startFacetOrd = (int)facetOrdTermsEnum.Ord();
					}
					else
					{
						startFacetOrd = 0;
						endFacetOrd = 0;
						return;
					}
					BytesRef facetEndPrefix = BytesRef.DeepCopyOf(facetPrefix);
					facetEndPrefix.Append(UnicodeUtil.BIG_TERM);
					seekStatus = facetOrdTermsEnum.SeekCeil(facetEndPrefix);
					if (seekStatus != TermsEnum.SeekStatus.END)
					{
						endFacetOrd = (int)facetOrdTermsEnum.Ord();
					}
					else
					{
						endFacetOrd = facetFieldNumTerms;
					}
				}
				else
				{
					// Don't include null...
					startFacetOrd = 0;
					endFacetOrd = facetFieldNumTerms + 1;
				}
			}
예제 #23
0
 /// <summary>
 /// Returns a <see cref="IBits"/> representing all documents from <paramref name="dv"/> that have a value.
 /// </summary>
 public static IBits DocsWithValue(SortedSetDocValues dv, int maxDoc)
 {
     return(new BitsAnonymousInnerClassHelper2(dv, maxDoc));
 }
예제 #24
0
 public BitsAnonymousInnerClassHelper2(SortedSetDocValues dv, int maxDoc)
 {
     this.dv     = dv;
     this.maxDoc = maxDoc;
 }
예제 #25
0
        private void MergeDocValues(SegmentWriteState segmentWriteState)
        {
            DocValuesConsumer consumer = codec.DocValuesFormat.FieldsConsumer(segmentWriteState);
            bool success = false;

            try
            {
                foreach (FieldInfo field in mergeState.FieldInfos)
                {
                    DocValuesType type = field.DocValuesType;
                    if (type != DocValuesType.NONE)
                    {
                        if (type == DocValuesType.NUMERIC)
                        {
                            IList <NumericDocValues> toMerge       = new JCG.List <NumericDocValues>();
                            IList <IBits>            docsWithField = new JCG.List <IBits>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                NumericDocValues values = reader.GetNumericDocValues(field.Name);
                                IBits            bits   = reader.GetDocsWithField(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_NUMERIC;
                                    bits   = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc);
                                }
                                toMerge.Add(values);
                                docsWithField.Add(bits);
                            }
                            consumer.MergeNumericField(field, mergeState, toMerge, docsWithField);
                        }
                        else if (type == DocValuesType.BINARY)
                        {
                            IList <BinaryDocValues> toMerge       = new JCG.List <BinaryDocValues>();
                            IList <IBits>           docsWithField = new JCG.List <IBits>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                BinaryDocValues values = reader.GetBinaryDocValues(field.Name);
                                IBits           bits   = reader.GetDocsWithField(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_BINARY;
                                    bits   = new Lucene.Net.Util.Bits.MatchNoBits(reader.MaxDoc);
                                }
                                toMerge.Add(values);
                                docsWithField.Add(bits);
                            }
                            consumer.MergeBinaryField(field, mergeState, toMerge, docsWithField);
                        }
                        else if (type == DocValuesType.SORTED)
                        {
                            IList <SortedDocValues> toMerge = new JCG.List <SortedDocValues>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                SortedDocValues values = reader.GetSortedDocValues(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_SORTED;
                                }
                                toMerge.Add(values);
                            }
                            consumer.MergeSortedField(field, mergeState, toMerge);
                        }
                        else if (type == DocValuesType.SORTED_SET)
                        {
                            IList <SortedSetDocValues> toMerge = new JCG.List <SortedSetDocValues>();
                            foreach (AtomicReader reader in mergeState.Readers)
                            {
                                SortedSetDocValues values = reader.GetSortedSetDocValues(field.Name);
                                if (values == null)
                                {
                                    values = DocValues.EMPTY_SORTED_SET;
                                }
                                toMerge.Add(values);
                            }
                            consumer.MergeSortedSetField(field, mergeState, toMerge);
                        }
                        else
                        {
                            throw AssertionError.Create("type=" + type);
                        }
                    }
                }
                success = true;
            }
            finally
            {
                if (success)
                {
                    IOUtils.Dispose(consumer);
                }
                else
                {
                    IOUtils.DisposeWhileHandlingException(consumer);
                }
            }
        }
 public override SortedSetDocValues GetSortedSetDocValues(string field)
 {
     EnsureOpen();
     OrdinalMap map = null;
     lock (CachedOrdMaps)
     {
         if (!CachedOrdMaps.TryGetValue(field, out map))
         {
             // uncached, or not a multi dv
             SortedSetDocValues dv = MultiDocValues.GetSortedSetValues(@in, field);
             MultiSortedSetDocValues docValues = dv as MultiSortedSetDocValues;
             if (docValues != null)
             {
                 map = docValues.Mapping;
                 if (map.Owner == CoreCacheKey)
                 {
                     CachedOrdMaps[field] = map;
                 }
             }
             return dv;
         }
     }
     // cached ordinal map
     if (FieldInfos.FieldInfo(field).DocValuesType != DocValuesType.SORTED_SET)
     {
         return null;
     }
     Debug.Assert(map != null);
     int size = @in.Leaves().Count;
     SortedSetDocValues[] values = new SortedSetDocValues[size];
     int[] starts = new int[size + 1];
     for (int i = 0; i < size; i++)
     {
         AtomicReaderContext context = @in.Leaves()[i];
         SortedSetDocValues v = context.AtomicReader.GetSortedSetDocValues(field) ?? DocValues.EMPTY_SORTED_SET;
         values[i] = v;
         starts[i] = context.DocBase;
     }
     starts[size] = MaxDoc();
     return new MultiSortedSetDocValues(values, starts, map);
 }
        public virtual void SearchIndex(Directory dir, string oldName)
        {
            //QueryParser parser = new QueryParser("contents", new MockAnalyzer(random));
            //Query query = parser.parse("handle:1");

            IndexReader   reader   = DirectoryReader.Open(dir);
            IndexSearcher searcher = NewSearcher(reader);

            TestUtil.CheckIndex(dir);

            // true if this is a 4.0+ index
            bool is40Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("content5") != null;
            // true if this is a 4.2+ index
            bool is42Index = MultiFields.GetMergedFieldInfos(reader).FieldInfo("dvSortedSet") != null;

            Debug.Assert(is40Index); // NOTE: currently we can only do this on trunk!

            Bits liveDocs = MultiFields.GetLiveDocs(reader);

            for (int i = 0; i < 35; i++)
            {
                if (liveDocs.Get(i))
                {
                    Document d = reader.Document(i);
                    IList <IndexableField> fields = d.Fields;
                    bool isProxDoc = d.GetField("content3") == null;
                    if (isProxDoc)
                    {
                        int numFields = is40Index ? 7 : 5;
                        Assert.AreEqual(numFields, fields.Count);
                        IndexableField f = d.GetField("id");
                        Assert.AreEqual("" + i, f.StringValue);

                        f = d.GetField("utf8");
                        Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                        f = d.GetField("autf8");
                        Assert.AreEqual("Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", f.StringValue);

                        f = d.GetField("content2");
                        Assert.AreEqual("here is more content with aaa aaa aaa", f.StringValue);

                        f = d.GetField("fie\u2C77ld");
                        Assert.AreEqual("field with non-ascii name", f.StringValue);
                    }

                    Fields tfvFields = reader.GetTermVectors(i);
                    Assert.IsNotNull(tfvFields, "i=" + i);
                    Terms tfv = tfvFields.Terms("utf8");
                    Assert.IsNotNull(tfv, "docID=" + i + " index=" + oldName);
                }
                else
                {
                    // Only ID 7 is deleted
                    Assert.AreEqual(7, i);
                }
            }

            if (is40Index)
            {
                // check docvalues fields
                NumericDocValues   dvByte               = MultiDocValues.GetNumericValues(reader, "dvByte");
                BinaryDocValues    dvBytesDerefFixed    = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefFixed");
                BinaryDocValues    dvBytesDerefVar      = MultiDocValues.GetBinaryValues(reader, "dvBytesDerefVar");
                SortedDocValues    dvBytesSortedFixed   = MultiDocValues.GetSortedValues(reader, "dvBytesSortedFixed");
                SortedDocValues    dvBytesSortedVar     = MultiDocValues.GetSortedValues(reader, "dvBytesSortedVar");
                BinaryDocValues    dvBytesStraightFixed = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightFixed");
                BinaryDocValues    dvBytesStraightVar   = MultiDocValues.GetBinaryValues(reader, "dvBytesStraightVar");
                NumericDocValues   dvDouble             = MultiDocValues.GetNumericValues(reader, "dvDouble");
                NumericDocValues   dvFloat              = MultiDocValues.GetNumericValues(reader, "dvFloat");
                NumericDocValues   dvInt       = MultiDocValues.GetNumericValues(reader, "dvInt");
                NumericDocValues   dvLong      = MultiDocValues.GetNumericValues(reader, "dvLong");
                NumericDocValues   dvPacked    = MultiDocValues.GetNumericValues(reader, "dvPacked");
                NumericDocValues   dvShort     = MultiDocValues.GetNumericValues(reader, "dvShort");
                SortedSetDocValues dvSortedSet = null;
                if (is42Index)
                {
                    dvSortedSet = MultiDocValues.GetSortedSetValues(reader, "dvSortedSet");
                }

                for (int i = 0; i < 35; i++)
                {
                    int id = Convert.ToInt32(reader.Document(i).Get("id"));
                    Assert.AreEqual(id, dvByte.Get(i));

                    sbyte[]  bytes       = new sbyte[] { (sbyte)((int)((uint)id >> 24)), (sbyte)((int)((uint)id >> 16)), (sbyte)((int)((uint)id >> 8)), (sbyte)id };
                    BytesRef expectedRef = new BytesRef((byte[])(Array)bytes);
                    BytesRef scratch     = new BytesRef();

                    dvBytesDerefFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesDerefVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesSortedFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesSortedVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesStraightFixed.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);
                    dvBytesStraightVar.Get(i, scratch);
                    Assert.AreEqual(expectedRef, scratch);

                    Assert.AreEqual((double)id, BitConverter.Int64BitsToDouble(dvDouble.Get(i)), 0D);
                    Assert.AreEqual((float)id, Number.IntBitsToFloat((int)dvFloat.Get(i)), 0F);
                    Assert.AreEqual(id, dvInt.Get(i));
                    Assert.AreEqual(id, dvLong.Get(i));
                    Assert.AreEqual(id, dvPacked.Get(i));
                    Assert.AreEqual(id, dvShort.Get(i));
                    if (is42Index)
                    {
                        dvSortedSet.Document = i;
                        long ord = dvSortedSet.NextOrd();
                        Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, dvSortedSet.NextOrd());
                        dvSortedSet.LookupOrd(ord, scratch);
                        Assert.AreEqual(expectedRef, scratch);
                    }
                }
            }

            ScoreDoc[] hits = searcher.Search(new TermQuery(new Term("content", "aaa")), null, 1000).ScoreDocs;

            // First document should be #0
            Document doc = searcher.IndexReader.Document(hits[0].Doc);

            assertEquals("didn't get the right document first", "0", doc.Get("id"));

            DoTestHits(hits, 34, searcher.IndexReader);

            if (is40Index)
            {
                hits = searcher.Search(new TermQuery(new Term("content5", "aaa")), null, 1000).ScoreDocs;

                DoTestHits(hits, 34, searcher.IndexReader);

                hits = searcher.Search(new TermQuery(new Term("content6", "aaa")), null, 1000).ScoreDocs;

                DoTestHits(hits, 34, searcher.IndexReader);
            }

            hits = searcher.Search(new TermQuery(new Term("utf8", "\u0000")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);
            hits = searcher.Search(new TermQuery(new Term("utf8", "lu\uD834\uDD1Ece\uD834\uDD60ne")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);
            hits = searcher.Search(new TermQuery(new Term("utf8", "ab\ud917\udc17cd")), null, 1000).ScoreDocs;
            Assert.AreEqual(34, hits.Length);

            reader.Dispose();
        }
예제 #28
0
 internal SlowMinShouldMatchScorer(BooleanWeight weight, AtomicReader reader, IndexSearcher searcher)
     : base(weight)
 {
     this.Dv = reader.GetSortedSetDocValues("dv");
     this.MaxDoc = reader.MaxDoc;
     BooleanQuery bq = (BooleanQuery)weight.Query;
     this.MinNrShouldMatch = bq.MinimumNumberShouldMatch;
     this.Sims = new SimScorer[(int)Dv.ValueCount];
     foreach (BooleanClause clause in bq.Clauses)
     {
         Debug.Assert(!clause.Prohibited);
         Debug.Assert(!clause.Required);
         Term term = ((TermQuery)clause.Query).Term;
         long ord = Dv.LookupTerm(term.Bytes);
         if (ord >= 0)
         {
             bool success = Ords.Add(ord);
             Debug.Assert(success); // no dups
             TermContext context = TermContext.Build(reader.Context, term);
             SimWeight w = weight.Similarity.ComputeWeight(1f, searcher.CollectionStatistics("field"), searcher.TermStatistics(term, context));
             var dummy = w.ValueForNormalization; // ignored
             w.Normalize(1F, 1F);
             Sims[(int)ord] = weight.Similarity.DoSimScorer(w, (AtomicReaderContext)reader.Context);
         }
     }
 }