예제 #1
0
        private FacetResult GetDim(string dim, OrdRange ordRange, int topN)
        {
            TopOrdAndInt32Queue q = null;

            int bottomCount = 0;

            int dimCount   = 0;
            int childCount = 0;

            TopOrdAndInt32Queue.OrdAndValue reuse = null;
            //System.out.println("getDim : " + ordRange.start + " - " + ordRange.end);
            for (int ord = ordRange.Start; ord <= ordRange.End; ord++)
            {
                //System.out.println("  ord=" + ord + " count=" + counts[ord]);
                if (counts[ord] > 0)
                {
                    dimCount += counts[ord];
                    childCount++;
                    if (counts[ord] > bottomCount)
                    {
                        if (reuse == null)
                        {
                            reuse = new TopOrdAndInt32Queue.OrdAndValue();
                        }
                        reuse.Ord   = ord;
                        reuse.Value = counts[ord];
                        if (q == null)
                        {
                            // Lazy init, so we don't create this for the
                            // sparse case unnecessarily
                            q = new TopOrdAndInt32Queue(topN);
                        }
                        reuse = q.InsertWithOverflow(reuse);
                        if (q.Count == topN)
                        {
                            bottomCount = q.Top.Value;
                        }
                    }
                }
            }

            if (q == null)
            {
                return(null);
            }

            LabelAndValue[] labelValues = new LabelAndValue[q.Count];
            for (int i = labelValues.Length - 1; i >= 0; i--)
            {
                TopOrdAndInt32Queue.OrdAndValue ordAndValue = q.Pop();
                var term = new BytesRef();
                dv.LookupOrd(ordAndValue.Ord, term);
                string[] parts = FacetsConfig.StringToPath(term.Utf8ToString());
                labelValues[i] = new LabelAndValue(parts[1], ordAndValue.Value);
            }

            return(new FacetResult(dim, Arrays.Empty <string>(), dimCount, labelValues, childCount));
        }
예제 #2
0
        public virtual void TestSortedSetDocValuesField()
        {
            AssumeTrue("default codec does not support SORTED_SET", DefaultCodecSupportsSortedSet);
            SortedSetDocValues dv = reader.GetSortedSetDocValues(SORTED_SET_DV_FIELD);
            int      maxDoc       = reader.MaxDoc;
            BytesRef bytes        = new BytesRef();

            for (int i = 0; i < maxDoc; i++)
            {
                dv.SetDocument(i);
                dv.LookupOrd(dv.NextOrd(), bytes);
                int value = sortedValues[i];
                assertEquals("incorrect sorted-set DocValues for doc " + i, value.toString(), bytes.Utf8ToString());
                dv.LookupOrd(dv.NextOrd(), bytes);
                assertEquals("incorrect sorted-set DocValues for doc " + i, (value + 1).ToString(), bytes.Utf8ToString());
                assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.NextOrd());
            }
        }
예제 #3
0
        /// <summary>
        /// Creates this, pulling doc values from the specified
        /// field.
        /// </summary>
        public DefaultSortedSetDocValuesReaderState(IndexReader reader, string field = FacetsConfig.DEFAULT_INDEX_FIELD_NAME)
        {
            this.field      = field;
            this.origReader = reader;

            // We need this to create thread-safe MultiSortedSetDV
            // per collector:
            topReader = SlowCompositeReaderWrapper.Wrap(reader);
            SortedSetDocValues dv = topReader.GetSortedSetDocValues(field);

            if (dv is null)
            {
                throw new ArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues");
            }
            if (dv.ValueCount > int.MaxValue)
            {
                throw new ArgumentException("can only handle valueCount < System.Int32.MaxValue; got " + dv.ValueCount);
            }
            valueCount = (int)dv.ValueCount;

            // TODO: we can make this more efficient if eg we can be
            // "involved" when IOrdinalMap is being created?  Ie see
            // each term/ord it's assigning as it goes...
            string   lastDim  = null;
            int      startOrd = -1;
            BytesRef spare    = new BytesRef();

            // TODO: this approach can work for full hierarchy?;
            // TaxoReader can't do this since ords are not in
            // "sorted order" ... but we should generalize this to
            // support arbitrary hierarchy:
            for (int ord = 0; ord < valueCount; ord++)
            {
                dv.LookupOrd(ord, spare);
                string[] components = FacetsConfig.StringToPath(spare.Utf8ToString());
                if (components.Length != 2)
                {
                    throw new ArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.ToString(components) + " " + spare.Utf8ToString());
                }
                if (!components[0].Equals(lastDim, StringComparison.Ordinal))
                {
                    if (lastDim != null)
                    {
                        prefixToOrdRange[lastDim] = new OrdRange(startOrd, ord - 1);
                    }
                    startOrd = ord;
                    lastDim  = components[0];
                }
            }

            if (lastDim != null)
            {
                prefixToOrdRange[lastDim] = new OrdRange(startOrd, valueCount - 1);
            }
        }
예제 #4
0
            public override void Collect(int doc)
            {
                _docTermOrds.SetDocument(doc);
                long ord;

                while ((ord = _docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                {
                    _docTermOrds.LookupOrd(ord, _scratch);
                    _collectorTerms.Add(_scratch);
                }
            }
예제 #5
0
        private FacetResult GetDim(string dim, OrdRange ordRange, int topN)
        {
            TopOrdAndInt32Queue q = null;

            int bottomCount = 0;

            int dimCount   = 0;
            int childCount = 0;

            //System.out.println("getDim : " + ordRange.start + " - " + ordRange.end);
            for (int ord = ordRange.Start; ord <= ordRange.End; ord++)
            {
                //System.out.println("  ord=" + ord + " count=" + counts[ord]);
                if (counts[ord] > 0)
                {
                    dimCount += counts[ord];
                    childCount++;
                    if (counts[ord] > bottomCount)
                    {
                        if (q == null)
                        {
                            // Lazy init, so we don't create this for the
                            // sparse case unnecessarily
                            q = new TopOrdAndInt32Queue(topN);
                        }
                        // LUCENENET specific - use struct instead of reusing class instance for better performance
                        q.Insert(new OrdAndValue <int>(ord, counts[ord]));
                        if (q.Count == topN)
                        {
                            bottomCount = q.Top.Value;
                        }
                    }
                }
            }

            if (q == null)
            {
                return(null);
            }

            var scratch = new BytesRef();

            LabelAndValue[] labelValues = new LabelAndValue[q.Count];
            for (int i = labelValues.Length - 1; i >= 0; i--)
            {
                var ordAndValue = q.Pop();
                dv.LookupOrd(ordAndValue.Ord, scratch);
                string[] parts = FacetsConfig.StringToPath(scratch.Utf8ToString());
                labelValues[i] = new LabelAndValue(parts[1], ordAndValue.Value);
            }

            return(new FacetResult(dim, Arrays.Empty <string>(), dimCount, labelValues, childCount));
        }
예제 #6
0
            public virtual void Collect(int doc)
            {
                docTermOrds.SetDocument(doc);
                long ord;

                while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                {
                    docTermOrds.LookupOrd(ord, joinValue);
                    if (!joinValueToJoinScores.TryGetValue(joinValue, out JoinScore joinScore) || joinScore == null)
                    {
                        joinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore();
                    }
                    joinScore.AddScore(scorer.GetScore());
                }
            }
예제 #7
0
            public virtual void Collect(int doc)
            {
                docTermOrds.SetDocument(doc);
                long ord;

                while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                {
                    docTermOrds.LookupOrd(ord, joinValue);
                    var joinScore = JoinValueToJoinScores.ContainsKey(joinValue) ? JoinValueToJoinScores[joinValue] : null;
                    if (joinScore == null)
                    {
                        JoinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore();
                    }
                    joinScore.AddScore(scorer.GetScore());
                }
            }
예제 #8
0
            public override void Collect(int doc)
            {
                docTermOrds.Document = doc;
                long ord;

                while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                {
                    docTermOrds.LookupOrd(ord, joinValue);
                    var joinScore = JoinValueToJoinScores[joinValue];
                    if (joinScore == null)
                    {
                        JoinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore();
                    }
                    joinScore.AddScore(scorer.Score());
                }
            }
예제 #9
0
            public virtual void Collect(int doc)
            {
                docTermOrds.SetDocument(doc);
                long ord;

                while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                {
                    docTermOrds.LookupOrd(ord, scratch);
                    if (!joinValueToJoinScores.TryGetValue(scratch, out JoinScore joinScore) || joinScore == null)
                    {
                        continue;
                    }
                    int basedDoc = docBase + doc;
                    // First encountered join value determines the score.
                    // Something to keep in mind for many-to-many relations.
                    if (!docToJoinScore.ContainsKey(basedDoc))
                    {
                        docToJoinScore[basedDoc] = joinScore;
                    }
                }
            }
예제 #10
0
            public override void Collect(int doc)
            {
                docTermOrds.Document = doc;
                long ord;

                while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                {
                    docTermOrds.LookupOrd(ord, scratch);
                    JoinScore joinScore = _joinValueToJoinScores.ContainsKey(scratch) ? _joinValueToJoinScores[scratch] : null;
                    if (joinScore == null)
                    {
                        continue;
                    }
                    int basedDoc = docBase + doc;
                    // First encountered join value determines the score.
                    // Something to keep in mind for many-to-many relations.
                    if (!_docToJoinScore.ContainsKey(basedDoc))
                    {
                        _docToJoinScore[basedDoc] = joinScore;
                    }
                }
            }
예제 #11
0
 public override void LookupOrd(long ord, BytesRef result)
 {
     @in.LookupOrd(ord, result);
 }
예제 #12
0
        /// <summary>
        /// Create the results based on the search hits.
        /// Can be overridden by subclass to add particular behavior (e.g. weight transformation) </summary>
        /// <exception cref="System.IO.IOException"> If there are problems reading fields from the underlying Lucene index. </exception>
        protected internal virtual IList <LookupResult> CreateResults(IndexSearcher searcher, TopFieldDocs hits, int num, string charSequence, bool doHighlight, IEnumerable <string> matchedTokens, string prefixToken)
        {
            BinaryDocValues textDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, TEXT_FIELD_NAME);

            // This will just be null if app didn't pass payloads to build():
            // TODO: maybe just stored fields?  they compress...
            BinaryDocValues             payloadsDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, "payloads");
            IList <AtomicReaderContext> leaves     = searcher.IndexReader.Leaves;
            List <LookupResult>         results    = new List <LookupResult>();
            BytesRef scratch = new BytesRef();

            for (int i = 0; i < hits.ScoreDocs.Length; i++)
            {
                FieldDoc fd = (FieldDoc)hits.ScoreDocs[i];
                textDV.Get(fd.Doc, scratch);
                string text  = scratch.Utf8ToString();
                long   score = (long)fd.Fields[0];

                BytesRef payload;
                if (payloadsDV != null)
                {
                    payload = new BytesRef();
                    payloadsDV.Get(fd.Doc, payload);
                }
                else
                {
                    payload = null;
                }

                // Must look up sorted-set by segment:
                int segment = ReaderUtil.SubIndex(fd.Doc, leaves);
                SortedSetDocValues contextsDV = leaves[segment].AtomicReader.GetSortedSetDocValues(CONTEXTS_FIELD_NAME);
                HashSet <BytesRef> contexts;
                if (contextsDV != null)
                {
                    contexts = new HashSet <BytesRef>();
                    contextsDV.SetDocument(fd.Doc - leaves[segment].DocBase);
                    long ord;
                    while ((ord = contextsDV.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        BytesRef context = new BytesRef();
                        contextsDV.LookupOrd(ord, context);
                        contexts.Add(context);
                    }
                }
                else
                {
                    contexts = null;
                }

                LookupResult result;

                if (doHighlight)
                {
                    object highlightKey = Highlight(text, matchedTokens, prefixToken);
                    result = new LookupResult(highlightKey.ToString(), highlightKey, score, payload, contexts);
                }
                else
                {
                    result = new LookupResult(text, score, payload, contexts);
                }

                results.Add(result);
            }

            return(results);
        }
예제 #13
0
        public virtual void Test()
        {
#pragma warning disable 612, 618
            IFieldCache        cache   = FieldCache.DEFAULT;
            FieldCache.Doubles doubles = cache.GetDoubles(Reader, "theDouble", Random.NextBoolean());
            Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", FieldCache.DEFAULT_DOUBLE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(doubles.Get(i) == (double.MaxValue - i), doubles.Get(i) + " does not equal: " + (double.MaxValue - i));
            }

            FieldCache.Int64s longs = cache.GetInt64s(Reader, "theLong", Random.NextBoolean());
            Assert.AreSame(longs, cache.GetInt64s(Reader, "theLong", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(longs, cache.GetInt64s(Reader, "theLong", FieldCache.DEFAULT_INT64_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(longs.Get(i) == (long.MaxValue - i), longs.Get(i) + " does not equal: " + (long.MaxValue - i) + " i=" + i);
            }

            FieldCache.Bytes bytes = cache.GetBytes(Reader, "theByte", Random.NextBoolean());
            Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", FieldCache.DEFAULT_BYTE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue((sbyte)bytes.Get(i) == (sbyte)(sbyte.MaxValue - i), (sbyte)bytes.Get(i) + " does not equal: " + (sbyte.MaxValue - i));
            }

            FieldCache.Int16s shorts = cache.GetInt16s(Reader, "theShort", Random.NextBoolean());
            Assert.AreSame(shorts, cache.GetInt16s(Reader, "theShort", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(shorts, cache.GetInt16s(Reader, "theShort", FieldCache.DEFAULT_INT16_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(shorts.Get(i) == (short)(short.MaxValue - i), shorts.Get(i) + " does not equal: " + (short.MaxValue - i));
            }

            FieldCache.Int32s ints = cache.GetInt32s(Reader, "theInt", Random.NextBoolean());
            Assert.AreSame(ints, cache.GetInt32s(Reader, "theInt", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(ints, cache.GetInt32s(Reader, "theInt", FieldCache.DEFAULT_INT32_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(ints.Get(i) == (int.MaxValue - i), ints.Get(i) + " does not equal: " + (int.MaxValue - i));
            }

            FieldCache.Singles floats = cache.GetSingles(Reader, "theFloat", Random.NextBoolean());
            Assert.AreSame(floats, cache.GetSingles(Reader, "theFloat", Random.NextBoolean()), "Second request to cache return same array");
            Assert.AreSame(floats, cache.GetSingles(Reader, "theFloat", FieldCache.DEFAULT_SINGLE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                Assert.IsTrue(floats.Get(i) == (float.MaxValue - i), floats.Get(i) + " does not equal: " + (float.MaxValue - i));
            }
#pragma warning restore 612, 618

            IBits docsWithField = cache.GetDocsWithField(Reader, "theLong");
            Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "theLong"), "Second request to cache return same array");
            Assert.IsTrue(docsWithField is Bits.MatchAllBits, "docsWithField(theLong) must be class Bits.MatchAllBits");
            Assert.IsTrue(docsWithField.Length == NUM_DOCS, "docsWithField(theLong) Size: " + docsWithField.Length + " is not: " + NUM_DOCS);
            for (int i = 0; i < docsWithField.Length; i++)
            {
                Assert.IsTrue(docsWithField.Get(i));
            }

            docsWithField = cache.GetDocsWithField(Reader, "sparse");
            Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "sparse"), "Second request to cache return same array");
            Assert.IsFalse(docsWithField is Bits.MatchAllBits, "docsWithField(sparse) must not be class Bits.MatchAllBits");
            Assert.IsTrue(docsWithField.Length == NUM_DOCS, "docsWithField(sparse) Size: " + docsWithField.Length + " is not: " + NUM_DOCS);
            for (int i = 0; i < docsWithField.Length; i++)
            {
                Assert.AreEqual(i % 2 == 0, docsWithField.Get(i));
            }

            // getTermsIndex
            SortedDocValues termsIndex = cache.GetTermsIndex(Reader, "theRandomUnicodeString");
            Assert.AreSame(termsIndex, cache.GetTermsIndex(Reader, "theRandomUnicodeString"), "Second request to cache return same array");
            BytesRef br = new BytesRef();
            for (int i = 0; i < NUM_DOCS; i++)
            {
                BytesRef term;
                int      ord = termsIndex.GetOrd(i);
                if (ord == -1)
                {
                    term = null;
                }
                else
                {
                    termsIndex.LookupOrd(ord, br);
                    term = br;
                }
                string s = term == null ? null : term.Utf8ToString();
                Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s, StringComparison.Ordinal), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]);
            }

            int nTerms = termsIndex.ValueCount;

            TermsEnum tenum = termsIndex.GetTermsEnum();
            BytesRef  val   = new BytesRef();
            for (int i = 0; i < nTerms; i++)
            {
                BytesRef val1 = tenum.Next();
                termsIndex.LookupOrd(i, val);
                // System.out.println("i="+i);
                Assert.AreEqual(val, val1);
            }

            // seek the enum around (note this isn't a great test here)
            int num = AtLeast(100);
            for (int i = 0; i < num; i++)
            {
                int k = Random.Next(nTerms);
                termsIndex.LookupOrd(k, val);
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val));
                Assert.AreEqual(val, tenum.Term);
            }

            for (int i = 0; i < nTerms; i++)
            {
                termsIndex.LookupOrd(i, val);
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val));
                Assert.AreEqual(val, tenum.Term);
            }

            // test bad field
            termsIndex = cache.GetTermsIndex(Reader, "bogusfield");

            // getTerms
            BinaryDocValues terms = cache.GetTerms(Reader, "theRandomUnicodeString", true);
            Assert.AreSame(terms, cache.GetTerms(Reader, "theRandomUnicodeString", true), "Second request to cache return same array");
            IBits bits = cache.GetDocsWithField(Reader, "theRandomUnicodeString");
            for (int i = 0; i < NUM_DOCS; i++)
            {
                terms.Get(i, br);
                BytesRef term;
                if (!bits.Get(i))
                {
                    term = null;
                }
                else
                {
                    term = br;
                }
                string s = term == null ? null : term.Utf8ToString();
                Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s, StringComparison.Ordinal), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]);
            }

            // test bad field
            terms = cache.GetTerms(Reader, "bogusfield", false);

            // getDocTermOrds
            SortedSetDocValues termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField");
            int numEntries = cache.GetCacheEntries().Length;
            // ask for it again, and check that we didnt create any additional entries:
            termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField");
            Assert.AreEqual(numEntries, cache.GetCacheEntries().Length);

            for (int i = 0; i < NUM_DOCS; i++)
            {
                termOrds.SetDocument(i);
                // this will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId
                IList <BytesRef> values = MultiValued[i].Distinct().ToList();
                foreach (BytesRef v in values)
                {
                    if (v == null)
                    {
                        // why does this test use null values... instead of an empty list: confusing
                        break;
                    }
                    long ord = termOrds.NextOrd();
                    Debug.Assert(ord != SortedSetDocValues.NO_MORE_ORDS);
                    BytesRef scratch = new BytesRef();
                    termOrds.LookupOrd(ord, scratch);
                    Assert.AreEqual(v, scratch);
                }
                Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, termOrds.NextOrd());
            }

            // test bad field
            termOrds = cache.GetDocTermOrds(Reader, "bogusfield");
            Assert.IsTrue(termOrds.ValueCount == 0);

            FieldCache.DEFAULT.PurgeByCacheKey(Reader.CoreCacheKey);
        }