Пример #1
0
 public override int NextDoc()
 {
     if (Debugging.AssertsEnabled)
     {
         Debugging.Assert(currentDoc != NO_MORE_DOCS);
     }
     for (currentDoc = currentDoc + 1; currentDoc < maxDoc; currentDoc++)
     {
         currentMatched = 0;
         score          = 0;
         dv.SetDocument(currentDoc);
         long ord;
         while ((ord = dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
         {
             if (ords.Contains(ord))
             {
                 currentMatched++;
                 score += sims[(int)ord].Score(currentDoc, 1);
             }
         }
         if (currentMatched >= minNrShouldMatch)
         {
             return(currentDoc);
         }
     }
     return(currentDoc = NO_MORE_DOCS);
 }
Пример #2
0
        public virtual void TestNonexistantFields()
        {
            Directory         dir = NewDirectory();
            RandomIndexWriter iw  = new RandomIndexWriter(Random(), dir, Similarity, TimeZone);
            Document          doc = new Document();

            iw.AddDocument(doc);
            DirectoryReader ir = iw.Reader;

            iw.Dispose();

            AtomicReader ar = GetOnlySegmentReader(ir);

            IFieldCache cache = FieldCache.DEFAULT;

            cache.PurgeAllCaches();
            Assert.AreEqual(0, cache.GetCacheEntries().Length);

#pragma warning disable 612, 618
            Bytes bytes = cache.GetBytes(ar, "bogusbytes", true);
            Assert.AreEqual(0, bytes.Get(0));

            Int16s shorts = cache.GetInt16s(ar, "bogusshorts", true);
            Assert.AreEqual(0, shorts.Get(0));
#pragma warning restore 612, 618

            Int32s ints = cache.GetInt32s(ar, "bogusints", true);
            Assert.AreEqual(0, ints.Get(0));

            Int64s longs = cache.GetInt64s(ar, "boguslongs", true);
            Assert.AreEqual(0, longs.Get(0));

            Singles floats = cache.GetSingles(ar, "bogusfloats", true);
            Assert.AreEqual(0, floats.Get(0), 0.0f);

            Doubles doubles = cache.GetDoubles(ar, "bogusdoubles", true);
            Assert.AreEqual(0, doubles.Get(0), 0.0D);

            BytesRef        scratch  = new BytesRef();
            BinaryDocValues binaries = cache.GetTerms(ar, "bogusterms", true);
            binaries.Get(0, scratch);
            Assert.AreEqual(0, scratch.Length);

            SortedDocValues sorted = cache.GetTermsIndex(ar, "bogustermsindex");
            Assert.AreEqual(-1, sorted.GetOrd(0));
            sorted.Get(0, scratch);
            Assert.AreEqual(0, scratch.Length);

            SortedSetDocValues sortedSet = cache.GetDocTermOrds(ar, "bogusmultivalued");
            sortedSet.SetDocument(0);
            Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());

            IBits bits = cache.GetDocsWithField(ar, "bogusbits");
            Assert.IsFalse(bits.Get(0));

            // check that we cached nothing
            Assert.AreEqual(0, cache.GetCacheEntries().Length);
            ir.Dispose();
            dir.Dispose();
        }
Пример #3
0
            public override void Collect(int doc)
            {
                _docTermOrds.SetDocument(doc);
                long ord;

                while ((ord = _docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                {
                    _docTermOrds.LookupOrd(ord, _scratch);
                    _collectorTerms.Add(_scratch);
                }
            }
Пример #4
0
 protected internal override sealed bool MatchDoc(int doc)
 {
     docTermOrds.SetDocument(doc);
     long ord;
     // TODO: we could track max bit set and early terminate (since they come in sorted order)
     while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
     {
         if (termSet.Get(ord))
         {
             return true;
         }
     }
     return false;
 }
Пример #5
0
            public virtual void Collect(int doc)
            {
                docTermOrds.SetDocument(doc);
                long ord;

                while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                {
                    docTermOrds.LookupOrd(ord, joinValue);
                    if (!joinValueToJoinScores.TryGetValue(joinValue, out JoinScore joinScore) || joinScore == null)
                    {
                        joinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore();
                    }
                    joinScore.AddScore(scorer.GetScore());
                }
            }
Пример #6
0
            public virtual void Collect(int doc)
            {
                docTermOrds.SetDocument(doc);
                long ord;

                while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                {
                    docTermOrds.LookupOrd(ord, joinValue);
                    var joinScore = JoinValueToJoinScores.ContainsKey(joinValue) ? JoinValueToJoinScores[joinValue] : null;
                    if (joinScore == null)
                    {
                        JoinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore();
                    }
                    joinScore.AddScore(scorer.GetScore());
                }
            }
Пример #7
0
                protected internal override sealed bool MatchDoc(int doc)
                {
                    docTermOrds.SetDocument(doc);
                    long ord;

                    while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        if (ord > inclusiveUpperPoint)
                        {
                            return(false);
                        }
                        else if (ord >= inclusiveLowerPoint)
                        {
                            return(true);
                        }
                    }
                    return(false);
                }
Пример #8
0
        public virtual void TestSortedSetDocValuesField()
        {
            AssumeTrue("default codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
            SortedSetDocValues dv = reader.GetSortedSetDocValues(SORTED_SET_DV_FIELD);
            int      maxDoc       = reader.MaxDoc;
            BytesRef bytes        = new BytesRef();

            for (int i = 0; i < maxDoc; i++)
            {
                dv.SetDocument(i);
                dv.LookupOrd(dv.NextOrd(), bytes);
                int value = sortedValues[i];
                assertEquals("incorrect sorted-set DocValues for doc " + i, value.toString(), bytes.Utf8ToString());
                dv.LookupOrd(dv.NextOrd(), bytes);
                assertEquals("incorrect sorted-set DocValues for doc " + i, (value + 1).ToString(), bytes.Utf8ToString());
                assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.NextOrd());
            }
        }
Пример #9
0
        private IEnumerable <long?> GetMergeSortedSetDocToOrdCountEnumerable(AtomicReader[] readers, SortedSetDocValues[] dvs)
        {
            int          readerUpto      = -1;
            int          docIDUpto       = 0;
            AtomicReader currentReader   = null;
            IBits        currentLiveDocs = null;

            while (true)
            {
                if (readerUpto == readers.Length)
                {
                    yield break;
                }

                if (currentReader == null || docIDUpto == currentReader.MaxDoc)
                {
                    readerUpto++;
                    if (readerUpto < readers.Length)
                    {
                        currentReader   = readers[readerUpto];
                        currentLiveDocs = currentReader.LiveDocs;
                    }
                    docIDUpto = 0;
                    continue;
                }

                if (currentLiveDocs == null || currentLiveDocs.Get(docIDUpto))
                {
                    SortedSetDocValues dv = dvs[readerUpto];
                    dv.SetDocument(docIDUpto);
                    long value = 0;
                    while (dv.NextOrd() != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        value++;
                    }
                    docIDUpto++;
                    yield return(value);

                    continue;
                }

                docIDUpto++;
            }
        }
Пример #10
0
            public virtual void Collect(int doc)
            {
                docTermOrds.SetDocument(doc);
                long ord;

                while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                {
                    docTermOrds.LookupOrd(ord, scratch);
                    if (!joinValueToJoinScores.TryGetValue(scratch, out JoinScore joinScore) || joinScore == null)
                    {
                        continue;
                    }
                    int basedDoc = docBase + doc;
                    // First encountered join value determines the score.
                    // Something to keep in mind for many-to-many relations.
                    if (!docToJoinScore.ContainsKey(basedDoc))
                    {
                        docToJoinScore[basedDoc] = joinScore;
                    }
                }
            }
Пример #11
0
            /// <summary>
            /// Returns a <see cref="DocIdSet"/> with documents that should be permitted in search
            /// results.
            /// </summary>
            public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs)
            {
                SortedSetDocValues docTermOrds = FieldCache.DEFAULT.GetDocTermOrds((context.AtomicReader), m_query.m_field);
                // Cannot use FixedBitSet because we require long index (ord):
                Int64BitSet termSet   = new Int64BitSet(docTermOrds.ValueCount);
                TermsEnum   termsEnum = m_query.GetTermsEnum(new TermsAnonymousInnerClassHelper(docTermOrds));

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(termsEnum != null);
                }
                if (termsEnum.MoveNext())
                {
                    // fill into a bitset
                    do
                    {
                        termSet.Set(termsEnum.Ord);
                    } while (termsEnum.MoveNext());
                }
                else
                {
                    return(null);
                }
                return(new FieldCacheDocIdSet(context.Reader.MaxDoc, acceptDocs, (doc) =>
                {
                    docTermOrds.SetDocument(doc);
                    long ord;
                    // TODO: we could track max bit set and early terminate (since they come in sorted order)
                    while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        if (termSet.Get(ord))
                        {
                            return true;
                        }
                    }
                    return false;
                }));
            }
Пример #12
0
 public override int NextDoc()
 {
     Debug.Assert(CurrentDoc != NO_MORE_DOCS);
     for (CurrentDoc = CurrentDoc + 1; CurrentDoc < MaxDoc; CurrentDoc++)
     {
         CurrentMatched = 0;
         Score_Renamed  = 0;
         Dv.SetDocument(CurrentDoc);
         long ord;
         while ((ord = Dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
         {
             if (Ords.Contains(ord))
             {
                 CurrentMatched++;
                 Score_Renamed += Sims[(int)ord].Score(CurrentDoc, 1);
             }
         }
         if (CurrentMatched >= MinNrShouldMatch)
         {
             return(CurrentDoc);
         }
     }
     return(CurrentDoc = NO_MORE_DOCS);
 }
Пример #13
0
        public virtual void Test()
        {
#pragma warning disable 612, 618
            IFieldCache        cache   = FieldCache.DEFAULT;
            FieldCache.Doubles doubles = cache.GetDoubles(Reader, "theDouble", Random.NextBoolean());
            Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", FieldCache.DEFAULT_DOUBLE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(doubles.Get(i) == (double.MaxValue - i), doubles.Get(i) + " does not equal: " + (double.MaxValue - i));
            }

            FieldCache.Int64s longs = cache.GetInt64s(Reader, "theLong", Random.NextBoolean());
            Assert.AreSame(longs, cache.GetInt64s(Reader, "theLong", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(longs, cache.GetInt64s(Reader, "theLong", FieldCache.DEFAULT_INT64_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(longs.Get(i) == (long.MaxValue - i), longs.Get(i) + " does not equal: " + (long.MaxValue - i) + " i=" + i);
            }

            FieldCache.Bytes bytes = cache.GetBytes(Reader, "theByte", Random.NextBoolean());
            Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", FieldCache.DEFAULT_BYTE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue((sbyte)bytes.Get(i) == (sbyte)(sbyte.MaxValue - i), (sbyte)bytes.Get(i) + " does not equal: " + (sbyte.MaxValue - i));
            }

            FieldCache.Int16s shorts = cache.GetInt16s(Reader, "theShort", Random.NextBoolean());
            Assert.AreSame(shorts, cache.GetInt16s(Reader, "theShort", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(shorts, cache.GetInt16s(Reader, "theShort", FieldCache.DEFAULT_INT16_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(shorts.Get(i) == (short)(short.MaxValue - i), shorts.Get(i) + " does not equal: " + (short.MaxValue - i));
            }

            FieldCache.Int32s ints = cache.GetInt32s(Reader, "theInt", Random.NextBoolean());
            Assert.AreSame(ints, cache.GetInt32s(Reader, "theInt", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(ints, cache.GetInt32s(Reader, "theInt", FieldCache.DEFAULT_INT32_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(ints.Get(i) == (int.MaxValue - i), ints.Get(i) + " does not equal: " + (int.MaxValue - i));
            }

            FieldCache.Singles floats = cache.GetSingles(Reader, "theFloat", Random.NextBoolean());
            Assert.AreSame(floats, cache.GetSingles(Reader, "theFloat", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(floats, cache.GetSingles(Reader, "theFloat", FieldCache.DEFAULT_SINGLE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(floats.Get(i) == (float.MaxValue - i), floats.Get(i) + " does not equal: " + (float.MaxValue - i));
            }
#pragma warning restore 612, 618

            IBits docsWithField = cache.GetDocsWithField(Reader, "theLong");
            Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "theLong"), "Second request to cache return same array");
            Assert.IsTrue(docsWithField is Bits.MatchAllBits, "docsWithField(theLong) must be class Bits.MatchAllBits");
            Assert.IsTrue(docsWithField.Length == NUM_DOCS, "docsWithField(theLong) Size: " + docsWithField.Length + " is not: " + NUM_DOCS);
            for (int i = 0; i < docsWithField.Length; i++)
            {
                Assert.IsTrue(docsWithField.Get(i));
            }

            docsWithField = cache.GetDocsWithField(Reader, "sparse");
            Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "sparse"), "Second request to cache return same array");
            Assert.IsFalse(docsWithField is Bits.MatchAllBits, "docsWithField(sparse) must not be class Bits.MatchAllBits");
            Assert.IsTrue(docsWithField.Length == NUM_DOCS, "docsWithField(sparse) Size: " + docsWithField.Length + " is not: " + NUM_DOCS);
            for (int i = 0; i < docsWithField.Length; i++)
            {
                Assert.AreEqual(i % 2 == 0, docsWithField.Get(i));
            }

            // getTermsIndex
            SortedDocValues termsIndex = cache.GetTermsIndex(Reader, "theRandomUnicodeString");
            Assert.AreSame(termsIndex, cache.GetTermsIndex(Reader, "theRandomUnicodeString"), "Second request to cache return same array");
            BytesRef br = new BytesRef();
            for (int i = 0; i < NUM_DOCS; i++)
            {
                BytesRef term;
                int      ord = termsIndex.GetOrd(i);
                if (ord == -1)
                {
                    term = null;
                }
                else
                {
                    termsIndex.LookupOrd(ord, br);
                    term = br;
                }
                string s = term == null ? null : term.Utf8ToString();
                Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s, StringComparison.Ordinal), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]);
            }

            int nTerms = termsIndex.ValueCount;

            TermsEnum tenum = termsIndex.GetTermsEnum();
            BytesRef  val   = new BytesRef();
            for (int i = 0; i < nTerms; i++)
            {
                BytesRef val1 = tenum.Next();
                termsIndex.LookupOrd(i, val);
                // System.out.println("i="+i);
                Assert.AreEqual(val, val1);
            }

            // seek the enum around (note this isn't a great test here)
            int num = AtLeast(100);
            for (int i = 0; i < num; i++)
            {
                int k = Random.Next(nTerms);
                termsIndex.LookupOrd(k, val);
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val));
                Assert.AreEqual(val, tenum.Term);
            }

            for (int i = 0; i < nTerms; i++)
            {
                termsIndex.LookupOrd(i, val);
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val));
                Assert.AreEqual(val, tenum.Term);
            }

            // test bad field
            termsIndex = cache.GetTermsIndex(Reader, "bogusfield");

            // getTerms
            BinaryDocValues terms = cache.GetTerms(Reader, "theRandomUnicodeString", true);
            Assert.AreSame(terms, cache.GetTerms(Reader, "theRandomUnicodeString", true), "Second request to cache return same array");
            IBits bits = cache.GetDocsWithField(Reader, "theRandomUnicodeString");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                terms.Get(i, br);
                BytesRef term;
                if (!bits.Get(i))
                {
                    term = null;
                }
                else
                {
                    term = br;
                }
                string s = term == null ? null : term.Utf8ToString();
                Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s, StringComparison.Ordinal), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]);
            }

            // test bad field
            terms = cache.GetTerms(Reader, "bogusfield", false);

            // getDocTermOrds
            SortedSetDocValues termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField");
            int numEntries = cache.GetCacheEntries().Length;
            // ask for it again, and check that we didnt create any additional entries:
            termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField");
            Assert.AreEqual(numEntries, cache.GetCacheEntries().Length);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                termOrds.SetDocument(i);
                // this will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
                IList <BytesRef> values = MultiValued[i].Distinct().ToList();
                foreach (BytesRef v in values)
                {
                    if (v == null)
                    {
                        // why does this test use null values... instead of an empty list: confusing
                        break;
                    }
                    long ord = termOrds.NextOrd();
                    Debug.Assert(ord != SortedSetDocValues.NO_MORE_ORDS);
                    BytesRef scratch = new BytesRef();
                    termOrds.LookupOrd(ord, scratch);
                    Assert.AreEqual(v, scratch);
                }
                Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, termOrds.NextOrd());
            }

            // test bad field
            termOrds = cache.GetDocTermOrds(Reader, "bogusfield");
            Assert.IsTrue(termOrds.ValueCount == 0);

            FieldCache.DEFAULT.PurgeByCacheKey(Reader.CoreCacheKey);
        }
Пример #14
0
 public override void SetDocument(int docID)
 {
     @in.SetDocument(docMap.NewToOld(docID));
 }
Пример #15
0
        private IEnumerable <long?> GetMergeSortedSetOrdsEnumerable(AtomicReader[] readers, SortedSetDocValues[] dvs, OrdinalMap map)
        {
            int          readerUpto      = -1;
            int          docIDUpto       = 0;
            AtomicReader currentReader   = null;
            IBits        currentLiveDocs = null;
            var          ords            = new long[8];
            int          ordUpto         = 0;
            int          ordLength       = 0;

            while (true)
            {
                if (readerUpto == readers.Length)
                {
                    yield break;
                }

                if (ordUpto < ordLength)
                {
                    var value = ords[ordUpto];
                    ordUpto++;
                    yield return(value);

                    continue;
                }

                if (currentReader == null || docIDUpto == currentReader.MaxDoc)
                {
                    readerUpto++;
                    if (readerUpto < readers.Length)
                    {
                        currentReader   = readers[readerUpto];
                        currentLiveDocs = currentReader.LiveDocs;
                    }
                    docIDUpto = 0;
                    continue;
                }

                if (currentLiveDocs == null || currentLiveDocs.Get(docIDUpto))
                {
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(docIDUpto < currentReader.MaxDoc);
                    }
                    SortedSetDocValues dv = dvs[readerUpto];
                    dv.SetDocument(docIDUpto);
                    ordUpto = ordLength = 0;
                    long ord;
                    while ((ord = dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        if (ordLength == ords.Length)
                        {
                            ords = ArrayUtil.Grow(ords, ordLength + 1);
                        }
                        ords[ordLength] = map.GetGlobalOrd(readerUpto, ord);
                        ordLength++;
                    }
                    docIDUpto++;
                    continue;
                }

                docIDUpto++;
            }
        }
Пример #16
0
        /// <summary>
        /// Create the results based on the search hits.
        /// Can be overridden by subclass to add particular behavior (e.g. weight transformation) </summary>
        /// <exception cref="System.IO.IOException"> If there are problems reading fields from the underlying Lucene index. </exception>
        protected internal virtual IList <LookupResult> CreateResults(IndexSearcher searcher, TopFieldDocs hits, int num, string charSequence, bool doHighlight, IEnumerable <string> matchedTokens, string prefixToken)
        {
            BinaryDocValues textDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, TEXT_FIELD_NAME);

            // This will just be null if app didn't pass payloads to build():
            // TODO: maybe just stored fields?  they compress...
            BinaryDocValues             payloadsDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, "payloads");
            IList <AtomicReaderContext> leaves     = searcher.IndexReader.Leaves;
            List <LookupResult>         results    = new List <LookupResult>();
            BytesRef scratch = new BytesRef();

            for (int i = 0; i < hits.ScoreDocs.Length; i++)
            {
                FieldDoc fd = (FieldDoc)hits.ScoreDocs[i];
                textDV.Get(fd.Doc, scratch);
                string text  = scratch.Utf8ToString();
                long   score = (long)fd.Fields[0];

                BytesRef payload;
                if (payloadsDV != null)
                {
                    payload = new BytesRef();
                    payloadsDV.Get(fd.Doc, payload);
                }
                else
                {
                    payload = null;
                }

                // Must look up sorted-set by segment:
                int segment = ReaderUtil.SubIndex(fd.Doc, leaves);
                SortedSetDocValues contextsDV = leaves[segment].AtomicReader.GetSortedSetDocValues(CONTEXTS_FIELD_NAME);
                HashSet <BytesRef> contexts;
                if (contextsDV != null)
                {
                    contexts = new HashSet <BytesRef>();
                    contextsDV.SetDocument(fd.Doc - leaves[segment].DocBase);
                    long ord;
                    while ((ord = contextsDV.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        BytesRef context = new BytesRef();
                        contextsDV.LookupOrd(ord, context);
                        contexts.Add(context);
                    }
                }
                else
                {
                    contexts = null;
                }

                LookupResult result;

                if (doHighlight)
                {
                    object highlightKey = Highlight(text, matchedTokens, prefixToken);
                    result = new LookupResult(highlightKey.ToString(), highlightKey, score, payload, contexts);
                }
                else
                {
                    result = new LookupResult(text, score, payload, contexts);
                }

                results.Add(result);
            }

            return(results);
        }
Пример #17
0
            public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs)
            {
                SortedSetDocValues docTermOrds = FieldCache.DEFAULT.GetDocTermOrds(context.AtomicReader, field);
                long lowerPoint = lowerVal == null ? -1 : docTermOrds.LookupTerm(lowerVal);
                long upperPoint = upperVal == null ? -1 : docTermOrds.LookupTerm(upperVal);

                long inclusiveLowerPoint, inclusiveUpperPoint;

                // Hints:
                // * binarySearchLookup returns -1, if value was null.
                // * the value is <0 if no exact hit was found, the returned value
                //   is (-(insertion point) - 1)
                if (lowerPoint == -1 && lowerVal == null)
                {
                    inclusiveLowerPoint = 0;
                }
                else if (includeLower && lowerPoint >= 0)
                {
                    inclusiveLowerPoint = lowerPoint;
                }
                else if (lowerPoint >= 0)
                {
                    inclusiveLowerPoint = lowerPoint + 1;
                }
                else
                {
                    inclusiveLowerPoint = Math.Max(0, -lowerPoint - 1);
                }

                if (upperPoint == -1 && upperVal == null)
                {
                    inclusiveUpperPoint = long.MaxValue;
                }
                else if (includeUpper && upperPoint >= 0)
                {
                    inclusiveUpperPoint = upperPoint;
                }
                else if (upperPoint >= 0)
                {
                    inclusiveUpperPoint = upperPoint - 1;
                }
                else
                {
                    inclusiveUpperPoint = -upperPoint - 2;
                }

                if (inclusiveUpperPoint < 0 || inclusiveLowerPoint > inclusiveUpperPoint)
                {
                    return(null);
                }

                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(inclusiveLowerPoint >= 0 && inclusiveUpperPoint >= 0);
                }

                return(new FieldCacheDocIdSet(context.AtomicReader.MaxDoc, acceptDocs, (doc) =>
                {
                    docTermOrds.SetDocument(doc);
                    long ord;
                    while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        if (ord > inclusiveUpperPoint)
                        {
                            return false;
                        }
                        else if (ord >= inclusiveLowerPoint)
                        {
                            return true;
                        }
                    }
                    return false;
                }));
            }
Пример #18
0
        public virtual void TestDocValuesIntegration()
        {
            AssumeTrue("3.x does not support docvalues", DefaultCodecSupportsDocValues);
            Directory         dir = NewDirectory();
            IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null);
            RandomIndexWriter iw  = new RandomIndexWriter(Random, dir, iwc);
            Document          doc = new Document();

            doc.Add(new BinaryDocValuesField("binary", new BytesRef("binary value")));
            doc.Add(new SortedDocValuesField("sorted", new BytesRef("sorted value")));
            doc.Add(new NumericDocValuesField("numeric", 42));
            if (DefaultCodecSupportsSortedSet)
            {
                doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1")));
                doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2")));
            }
            iw.AddDocument(doc);
            DirectoryReader ir = iw.GetReader();

            iw.Dispose();
            AtomicReader ar = GetOnlySegmentReader(ir);

            BytesRef scratch = new BytesRef();

            // Binary type: can be retrieved via getTerms()
            try
            {
                FieldCache.DEFAULT.GetInt32s(ar, "binary", false);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            BinaryDocValues binary = FieldCache.DEFAULT.GetTerms(ar, "binary", true);
            binary.Get(0, scratch);
            Assert.AreEqual("binary value", scratch.Utf8ToString());

            try
            {
                FieldCache.DEFAULT.GetTermsIndex(ar, "binary");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                FieldCache.DEFAULT.GetDocTermOrds(ar, "binary");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                new DocTermOrds(ar, null, "binary");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            IBits bits = FieldCache.DEFAULT.GetDocsWithField(ar, "binary");
            Assert.IsTrue(bits.Get(0));

            // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds()
            try
            {
                FieldCache.DEFAULT.GetInt32s(ar, "sorted", false);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                new DocTermOrds(ar, null, "sorted");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            binary = FieldCache.DEFAULT.GetTerms(ar, "sorted", true);
            binary.Get(0, scratch);
            Assert.AreEqual("sorted value", scratch.Utf8ToString());

            SortedDocValues sorted = FieldCache.DEFAULT.GetTermsIndex(ar, "sorted");
            Assert.AreEqual(0, sorted.GetOrd(0));
            Assert.AreEqual(1, sorted.ValueCount);
            sorted.Get(0, scratch);
            Assert.AreEqual("sorted value", scratch.Utf8ToString());

            SortedSetDocValues sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sorted");
            sortedSet.SetDocument(0);
            Assert.AreEqual(0, sortedSet.NextOrd());
            Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());
            Assert.AreEqual(1, sortedSet.ValueCount);

            bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sorted");
            Assert.IsTrue(bits.Get(0));

            // Numeric type: can be retrieved via getInts() and so on
            Int32s numeric = FieldCache.DEFAULT.GetInt32s(ar, "numeric", false);
            Assert.AreEqual(42, numeric.Get(0));

            try
            {
                FieldCache.DEFAULT.GetTerms(ar, "numeric", true);
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                FieldCache.DEFAULT.GetTermsIndex(ar, "numeric");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                FieldCache.DEFAULT.GetDocTermOrds(ar, "numeric");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            try
            {
                new DocTermOrds(ar, null, "numeric");
                Assert.Fail();
            }
#pragma warning disable 168
            catch (InvalidOperationException expected)
#pragma warning restore 168
            {
            }

            bits = FieldCache.DEFAULT.GetDocsWithField(ar, "numeric");
            Assert.IsTrue(bits.Get(0));

            // SortedSet type: can be retrieved via getDocTermOrds()
            if (DefaultCodecSupportsSortedSet)
            {
                try
                {
                    FieldCache.DEFAULT.GetInt32s(ar, "sortedset", false);
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                try
                {
                    FieldCache.DEFAULT.GetTerms(ar, "sortedset", true);
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                try
                {
                    FieldCache.DEFAULT.GetTermsIndex(ar, "sortedset");
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                try
                {
                    new DocTermOrds(ar, null, "sortedset");
                    Assert.Fail();
                }
#pragma warning disable 168
                catch (InvalidOperationException expected)
#pragma warning restore 168
                {
                }

                sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sortedset");
                sortedSet.SetDocument(0);
                Assert.AreEqual(0, sortedSet.NextOrd());
                Assert.AreEqual(1, sortedSet.NextOrd());
                Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd());
                Assert.AreEqual(2, sortedSet.ValueCount);

                bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sortedset");
                Assert.IsTrue(bits.Get(0));
            }

            ir.Dispose();
            dir.Dispose();
        }
Пример #19
0
        /// <summary>
        /// Does all the "real work" of tallying up the counts. </summary>
        private void Count(IList <FacetsCollector.MatchingDocs> matchingDocs)
        {
            //System.out.println("ssdv count");

            MultiDocValues.OrdinalMap ordinalMap;

            // TODO: is this right?  really, we need a way to
            // verify that this ordinalMap "matches" the leaves in
            // matchingDocs...
            if (dv is MultiDocValues.MultiSortedSetDocValues && matchingDocs.Count > 1)
            {
                ordinalMap = ((MultiDocValues.MultiSortedSetDocValues)dv).Mapping;
            }
            else
            {
                ordinalMap = null;
            }

            IndexReader origReader = state.OrigReader;

            foreach (FacetsCollector.MatchingDocs hits in matchingDocs)
            {
                var reader = hits.Context.AtomicReader;
                //System.out.println("  reader=" + reader);
                // LUCENE-5090: make sure the provided reader context "matches"
                // the top-level reader passed to the
                // SortedSetDocValuesReaderState, else cryptic
                // AIOOBE can happen:
                if (!Equals(ReaderUtil.GetTopLevelContext(hits.Context).Reader, origReader))
                {
                    throw new InvalidOperationException("the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader");
                }

                SortedSetDocValues segValues = reader.GetSortedSetDocValues(field);
                if (segValues == null)
                {
                    continue;
                }

                DocIdSetIterator docs = hits.Bits.GetIterator();

                // TODO: yet another option is to count all segs
                // first, only in seg-ord space, and then do a
                // merge-sort-PQ in the end to only "resolve to
                // global" those seg ords that can compete, if we know
                // we just want top K?  ie, this is the same algo
                // that'd be used for merging facets across shards
                // (distributed faceting).  but this has much higher
                // temp ram req'ts (sum of number of ords across all
                // segs)
                if (ordinalMap != null)
                {
                    int segOrd = hits.Context.Ord;

                    int numSegOrds = (int)segValues.ValueCount;

                    if (hits.TotalHits < numSegOrds / 10)
                    {
                        //System.out.println("    remap as-we-go");
                        // Remap every ord to global ord as we iterate:
                        int doc;
                        while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            //System.out.println("    doc=" + doc);
                            segValues.SetDocument(doc);
                            int term = (int)segValues.NextOrd();
                            while (term != SortedSetDocValues.NO_MORE_ORDS)
                            {
                                //System.out.println("      segOrd=" + segOrd + " ord=" + term + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, term));
                                counts[(int)ordinalMap.GetGlobalOrd(segOrd, term)]++;
                                term = (int)segValues.NextOrd();
                            }
                        }
                    }
                    else
                    {
                        //System.out.println("    count in seg ord first");

                        // First count in seg-ord space:
                        int[] segCounts = new int[numSegOrds];
                        int   doc;
                        while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                        {
                            //System.out.println("    doc=" + doc);
                            segValues.SetDocument(doc);
                            int term = (int)segValues.NextOrd();
                            while (term != SortedSetDocValues.NO_MORE_ORDS)
                            {
                                //System.out.println("      ord=" + term);
                                segCounts[term]++;
                                term = (int)segValues.NextOrd();
                            }
                        }

                        // Then, migrate to global ords:
                        for (int ord = 0; ord < numSegOrds; ord++)
                        {
                            int count = segCounts[ord];
                            if (count != 0)
                            {
                                //System.out.println("    migrate segOrd=" + segOrd + " ord=" + ord + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, ord));
                                counts[(int)ordinalMap.GetGlobalOrd(segOrd, ord)] += count;
                            }
                        }
                    }
                }
                else
                {
                    // No ord mapping (e.g., single segment index):
                    // just aggregate directly into counts:
                    int doc;
                    while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS)
                    {
                        segValues.SetDocument(doc);
                        int term = (int)segValues.NextOrd();
                        while (term != SortedSetDocValues.NO_MORE_ORDS)
                        {
                            counts[term]++;
                            term = (int)segValues.NextOrd();
                        }
                    }
                }
            }
        }
Пример #20
0
 public override int GetOrd(int docID)
 {
     @in.SetDocument(docID);
     return((int)@in.NextOrd());
 }