Exemple #1
0
 public virtual void TestAppend()
 {
     sbyte[] bytes = new sbyte[] { (sbyte)'a', (sbyte)'b', (sbyte)'c', (sbyte)'d' };
     BytesRef b = new BytesRef(bytes, 1, 3); // bcd
     b.Append(new BytesRef("e"));
     Assert.AreEqual("bcde", b.Utf8ToString());
 }
        private void WritePosition(int delta, BytesRef payload)
        {
            if (payloads)
            {
                int payloadLength = payload == null ? 0 : payload.Length;

                if (payloadLength != lastPayloadLength)
                {
                    lastPayloadLength = payloadLength;
                    tvf.WriteVInt32((delta << 1) | 1);
                    tvf.WriteVInt32(payloadLength);
                }
                else
                {
                    tvf.WriteVInt32(delta << 1);
                }
                if (payloadLength > 0)
                {
                    if (payloadLength + payloadData.Length < 0)
                    {
                        // we overflowed the payload buffer, just throw UOE
                        // having > System.Int32.MaxValue bytes of payload for a single term in a single doc is nuts.
                        throw UnsupportedOperationException.Create("A term cannot have more than System.Int32.MaxValue bytes of payload data in a single document");
                    }
                    payloadData.Append(payload);
                }
            }
            else
            {
                tvf.WriteVInt32(delta);
            }
        }
        private void WritePosition(int delta, BytesRef payload)
        {
            if (Payloads)
            {
                int payloadLength = payload == null ? 0 : payload.Length;

                if (payloadLength != LastPayloadLength)
                {
                    LastPayloadLength = payloadLength;
                    Tvf.WriteVInt((delta << 1) | 1);
                    Tvf.WriteVInt(payloadLength);
                }
                else
                {
                    Tvf.WriteVInt(delta << 1);
                }
                if (payloadLength > 0)
                {
                    if (payloadLength + PayloadData.Length < 0)
                    {
                        // we overflowed the payload buffer, just throw UOE
                        // having > Integer.MAX_VALUE bytes of payload for a single term in a single doc is nuts.
                        throw new System.NotSupportedException("A term cannot have more than Integer.MAX_VALUE bytes of payload data in a single document");
                    }
                    PayloadData.Append(payload);
                }
            }
            else
            {
                Tvf.WriteVInt(delta);
            }
        }
            public override void SetNextReader(AtomicReaderContext context)
            {
                if (m_segmentFacetCounts != null)
                {
                    m_segmentResults.Add(CreateSegmentResult());
                }

                groupFieldTermsIndex = FieldCache.DEFAULT.GetTermsIndex(context.AtomicReader, m_groupField);
                facetFieldTermsIndex = FieldCache.DEFAULT.GetTermsIndex(context.AtomicReader, m_facetField);

                // 1+ to allow for the -1 "not set":
                m_segmentFacetCounts = new int[facetFieldTermsIndex.ValueCount + 1];
                m_segmentTotalCount  = 0;

                segmentGroupedFacetHits.Clear();
                foreach (GroupedFacetHit groupedFacetHit in groupedFacetHits)
                {
                    int facetOrd = groupedFacetHit.facetValue == null ? -1 : facetFieldTermsIndex.LookupTerm(groupedFacetHit.facetValue);
                    if (groupedFacetHit.facetValue != null && facetOrd < 0)
                    {
                        continue;
                    }

                    int groupOrd = groupedFacetHit.groupValue == null ? -1 : groupFieldTermsIndex.LookupTerm(groupedFacetHit.groupValue);
                    if (groupedFacetHit.groupValue != null && groupOrd < 0)
                    {
                        continue;
                    }

                    int segmentGroupedFacetsIndex = groupOrd * (facetFieldTermsIndex.ValueCount + 1) + facetOrd;
                    segmentGroupedFacetHits.Put(segmentGroupedFacetsIndex);
                }

                if (m_facetPrefix != null)
                {
                    m_startFacetOrd = facetFieldTermsIndex.LookupTerm(m_facetPrefix);
                    if (m_startFacetOrd < 0)
                    {
                        // Points to the ord one higher than facetPrefix
                        m_startFacetOrd = -m_startFacetOrd - 1;
                    }
                    BytesRef facetEndPrefix = BytesRef.DeepCopyOf(m_facetPrefix);
                    facetEndPrefix.Append(UnicodeUtil.BIG_TERM);
                    m_endFacetOrd = facetFieldTermsIndex.LookupTerm(facetEndPrefix);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(m_endFacetOrd < 0);
                    }
                    m_endFacetOrd = -m_endFacetOrd - 1; // Points to the ord one higher than facetEndPrefix
                }
                else
                {
                    m_startFacetOrd = -1;
                    m_endFacetOrd   = facetFieldTermsIndex.ValueCount;
                }
            }
            protected override bool AcceptResult(Int32sRef input, long?output)
            {
                Util.Fst.Util.ToBytesRef(input, scratchBytes);
                finalLastToken.Grow(finalLastToken.Length + scratchBytes.Length);
                int lenSav = finalLastToken.Length;

                finalLastToken.Append(scratchBytes);
                //System.out.println("    accept? input='" + scratchBytes.utf8ToString() + "'; lastToken='" + finalLastToken.utf8ToString() + "'; return " + (seen.contains(finalLastToken) == false));
                bool ret = seen.Contains(finalLastToken) == false;

                finalLastToken.Length = lenSav;
                return(ret);
            }
        public override void Add(int doc, object value)
        {
            // TODO: if the Sorter interface changes to take long indexes, we can remove that limitation
            if (Size == int.MaxValue)
            {
                throw new InvalidOperationException("cannot support more than Integer.MAX_VALUE doc/value entries");
            }

            BytesRef val = (BytesRef)value;

            if (val == null)
            {
                val = BinaryDocValuesUpdate.MISSING;
            }

            // grow the structures to have room for more elements
            if (Docs.Size() == Size)
            {
                Docs          = Docs.Grow(Size + 1);
                Offsets       = Offsets.Grow(Size + 1);
                Lengths       = Lengths.Grow(Size + 1);
                DocsWithField = FixedBitSet.EnsureCapacity(DocsWithField, (int)Docs.Size());
            }

            if (val != BinaryDocValuesUpdate.MISSING)
            {
                // only mark the document as having a value in that field if the value wasn't set to null (MISSING)
                DocsWithField.Set(Size);
            }

            Docs.Set(Size, doc);
            Offsets.Set(Size, Values.Length);
            Lengths.Set(Size, val.Length);
            Values.Append(val);
            ++Size;
        }
Exemple #7
0
        public override void Add(int doc, object value)
        {
            // TODO: if the Sorter interface changes to take long indexes, we can remove that limitation
            if (size == int.MaxValue)
            {
                throw new InvalidOperationException("cannot support more than System.Int32.MaxValue doc/value entries");
            }

            BytesRef val = (BytesRef)value;

            if (val == null)
            {
                val = BinaryDocValuesUpdate.MISSING;
            }

            // grow the structures to have room for more elements
            if (docs.Count == size)
            {
                docs          = docs.Grow(size + 1);
                offsets       = offsets.Grow(size + 1);
                lengths       = lengths.Grow(size + 1);
                docsWithField = FixedBitSet.EnsureCapacity(docsWithField, (int)docs.Count);
            }

            if (val != BinaryDocValuesUpdate.MISSING)
            {
                // only mark the document as having a value in that field if the value wasn't set to null (MISSING)
                docsWithField.Set(size);
            }

            docs.Set(size, doc);
            offsets.Set(size, values.Length);
            lengths.Set(size, val.Length);
            values.Append(val);
            ++size;
        }
Exemple #8
0
        private void Add(int doc, BytesRef value) // LUCENENET specific: Marked private instead of public and changed the value parameter type
        {
            // TODO: if the Sorter interface changes to take long indexes, we can remove that limitation
            if (size == int.MaxValue)
            {
                throw IllegalStateException.Create("cannot support more than System.Int32.MaxValue doc/value entries");
            }

            BytesRef val = value;

            if (val is null)
            {
                val = BinaryDocValuesUpdate.MISSING;
            }

            // grow the structures to have room for more elements
            if (docs.Count == size)
            {
                docs          = docs.Grow(size + 1);
                offsets       = offsets.Grow(size + 1);
                lengths       = lengths.Grow(size + 1);
                docsWithField = FixedBitSet.EnsureCapacity(docsWithField, (int)docs.Count);
            }

            if (val != BinaryDocValuesUpdate.MISSING)
            {
                // only mark the document as having a value in that field if the value wasn't set to null (MISSING)
                docsWithField.Set(size);
            }

            docs.Set(size, doc);
            offsets.Set(size, values.Length);
            lengths.Set(size, val.Length);
            values.Append(val);
            ++size;
        }
        /// <summary>
        /// Appends a single suggestion and its weight to the internal buffers.
        /// </summary>
        /// <param name="utf8">
        ///          The suggestion (utf8 representation) to be added. The content is
        ///          copied and the object can be reused. </param>
        /// <param name="bucket">
        ///          The bucket to place this suggestion in. Must be non-negative and
        ///          smaller than the number of buckets passed in the constructor.
        ///          Higher numbers indicate suggestions that should be presented
        ///          before suggestions placed in smaller buckets. </param>
        public virtual void Add(BytesRef utf8, int bucket)
        {
            if (bucket < 0 || bucket >= buckets)
            {
                throw new ArgumentException("Bucket outside of the allowed range [0, " + buckets + "): " + bucket);
            }

            if (scratch.Bytes.Length < utf8.Length + 1)
            {
                scratch.Grow(utf8.Length + 10);
            }

            scratch.Length   = 1;
            scratch.Bytes[0] = (byte)bucket;
            scratch.Append(utf8);
            sorter.Add(scratch);
        }
Exemple #10
0
        /// <summary>
        /// Appends a single suggestion and its weight to the internal buffers.
        /// </summary>
        /// <param name="utf8">
        ///          The suggestion (utf8 representation) to be added. The content is
        ///          copied and the object can be reused. </param>
        /// <param name="bucket">
        ///          The bucket to place this suggestion in. Must be non-negative and
        ///          smaller than the number of buckets passed in the constructor.
        ///          Higher numbers indicate suggestions that should be presented
        ///          before suggestions placed in smaller buckets. </param>
        public virtual void Add(BytesRef utf8, int bucket)
        {
            // LUCENENET: Added guard clause for null
            if (utf8 is null)
            {
                throw new ArgumentNullException(nameof(utf8));
            }

            if (bucket < 0 || bucket >= buckets)
            {
                throw new ArgumentOutOfRangeException(nameof(buckets), "Bucket outside of the allowed range [0, " + buckets + "): " + bucket); // LUCENENET specific - changed from IllegalArgumentException to ArgumentOutOfRangeException (.NET convention)
            }

            if (scratch.Bytes.Length < utf8.Length + 1)
            {
                scratch.Grow(utf8.Length + 10);
            }

            scratch.Length   = 1;
            scratch.Bytes[0] = (byte)bucket;
            scratch.Append(utf8);
            sorter.Add(scratch);
        }
Exemple #11
0
        public override IList <LookupResult> DoLookup(string key, IEnumerable <BytesRef> contexts, bool onlyMorePopular, int num)
        {
            if (contexts != null)
            {
                throw new ArgumentException("this suggester doesn't support contexts");
            }
            if (Debugging.AssertsEnabled)
            {
                Debugging.Assert(num > 0);
            }

            if (onlyMorePopular)
            {
                throw new ArgumentException("this suggester only works with onlyMorePopular=false");
            }

            if (fst == null)
            {
                return(Collections.EmptyList <LookupResult>());
            }

            BytesRef scratch      = new BytesRef(key);
            int      prefixLength = scratch.Length;

            FST.Arc <long?> arc = new FST.Arc <long?>();

            // match the prefix portion exactly
            long?prefixOutput = null;

            try
            {
                prefixOutput = LookupPrefix(scratch, arc);
            }
            catch (IOException bogus)
            {
                throw new Exception(bogus.ToString(), bogus);
            }

            if (prefixOutput == null)
            {
                return(Collections.EmptyList <LookupResult>());
            }

            List <LookupResult> results = new List <LookupResult>(num);
            CharsRef            spare   = new CharsRef();

            if (exactFirst && arc.IsFinal)
            {
                spare.Grow(scratch.Length);
                UnicodeUtil.UTF8toUTF16(scratch, spare);
                results.Add(new LookupResult(spare.ToString(), DecodeWeight(prefixOutput.GetValueOrDefault() + arc.NextFinalOutput.GetValueOrDefault())));
                if (--num == 0)
                {
                    return(results); // that was quick
                }
            }

            // complete top-N
            Util.Fst.Util.TopResults <long?> completions = null;
            try
            {
                completions = Lucene.Net.Util.Fst.Util.ShortestPaths(fst, arc, prefixOutput, weightComparer, num, !exactFirst);
                if (Debugging.AssertsEnabled)
                {
                    Debugging.Assert(completions.IsComplete);
                }
            }
            catch (IOException bogus)
            {
                throw new Exception(bogus.ToString(), bogus);
            }

            BytesRef suffix = new BytesRef(8);

            foreach (Util.Fst.Util.Result <long?> completion in completions)
            {
                scratch.Length = prefixLength;
                // append suffix
                Lucene.Net.Util.Fst.Util.ToBytesRef(completion.Input, suffix);
                scratch.Append(suffix);
                spare.Grow(scratch.Length);
                UnicodeUtil.UTF8toUTF16(scratch, spare);
                results.Add(new LookupResult(spare.ToString(), DecodeWeight(completion.Output.GetValueOrDefault())));
            }
            return(results);
        }
        /// <summary>
        /// Retrieve suggestions.
        /// </summary>
        public virtual IList <LookupResult> DoLookup(string key, IEnumerable <BytesRef> contexts, int num)
        {
            if (contexts != null)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }

            TokenStream ts = queryAnalyzer.GetTokenStream("", key.ToString());

            try
            {
                ITermToBytesRefAttribute    termBytesAtt = ts.AddAttribute <ITermToBytesRefAttribute>();
                IOffsetAttribute            offsetAtt    = ts.AddAttribute <IOffsetAttribute>();
                IPositionLengthAttribute    posLenAtt    = ts.AddAttribute <IPositionLengthAttribute>();
                IPositionIncrementAttribute posIncAtt    = ts.AddAttribute <IPositionIncrementAttribute>();
                ts.Reset();

                var lastTokens = new BytesRef[grams];
                //System.out.println("lookup: key='" + key + "'");

                // Run full analysis, but save only the
                // last 1gram, last 2gram, etc.:
                BytesRef tokenBytes   = termBytesAtt.BytesRef;
                int      maxEndOffset = -1;
                bool     sawRealToken = false;
                while (ts.IncrementToken())
                {
                    termBytesAtt.FillBytesRef();
                    sawRealToken |= tokenBytes.Length > 0;
                    // TODO: this is somewhat iffy; today, ShingleFilter
                    // sets posLen to the gram count; maybe we should make
                    // a separate dedicated att for this?
                    int gramCount = posLenAtt.PositionLength;

                    Debug.Assert(gramCount <= grams);

                    // Safety: make sure the recalculated count "agrees":
                    if (CountGrams(tokenBytes) != gramCount)
                    {
                        throw new System.ArgumentException("tokens must not contain separator byte; got token=" + tokenBytes + " but gramCount=" + gramCount + " does not match recalculated count=" + CountGrams(tokenBytes));
                    }
                    maxEndOffset = Math.Max(maxEndOffset, offsetAtt.EndOffset);
                    lastTokens[gramCount - 1] = BytesRef.DeepCopyOf(tokenBytes);
                }
                ts.End();

                if (!sawRealToken)
                {
                    throw new System.ArgumentException("no tokens produced by analyzer, or the only tokens were empty strings");
                }

                // Carefully fill last tokens with _ tokens;
                // ShingleFilter appraently won't emit "only hole"
                // tokens:
                int endPosInc = posIncAtt.PositionIncrement;

                // Note this will also be true if input is the empty
                // string (in which case we saw no tokens and
                // maxEndOffset is still -1), which in fact works out OK
                // because we fill the unigram with an empty BytesRef
                // below:
                bool lastTokenEnded = offsetAtt.EndOffset > maxEndOffset || endPosInc > 0;
                //System.out.println("maxEndOffset=" + maxEndOffset + " vs " + offsetAtt.EndOffset);

                if (lastTokenEnded)
                {
                    //System.out.println("  lastTokenEnded");
                    // If user hit space after the last token, then
                    // "upgrade" all tokens.  This way "foo " will suggest
                    // all bigrams starting w/ foo, and not any unigrams
                    // starting with "foo":
                    for (int i = grams - 1; i > 0; i--)
                    {
                        BytesRef token = lastTokens[i - 1];
                        if (token == null)
                        {
                            continue;
                        }
                        token.Grow(token.Length + 1);
                        token.Bytes[token.Length] = separator;
                        token.Length++;
                        lastTokens[i] = token;
                    }
                    lastTokens[0] = new BytesRef();
                }

                var arc = new FST.Arc <long?>();

                var bytesReader = fst.GetBytesReader();

                // Try highest order models first, and if they return
                // results, return that; else, fallback:
                double backoff = 1.0;

                List <LookupResult> results = new List <LookupResult>(num);

                // We only add a given suffix once, from the highest
                // order model that saw it; for subsequent lower order
                // models we skip it:
                var seen = new HashSet <BytesRef>();

                for (int gram = grams - 1; gram >= 0; gram--)
                {
                    BytesRef token = lastTokens[gram];
                    // Don't make unigram predictions from empty string:
                    if (token == null || (token.Length == 0 && key.Length > 0))
                    {
                        // Input didn't have enough tokens:
                        //System.out.println("  gram=" + gram + ": skip: not enough input");
                        continue;
                    }

                    if (endPosInc > 0 && gram <= endPosInc)
                    {
                        // Skip hole-only predictions; in theory we
                        // shouldn't have to do this, but we'd need to fix
                        // ShingleFilter to produce only-hole tokens:
                        //System.out.println("  break: only holes now");
                        break;
                    }

                    //System.out.println("try " + (gram+1) + " gram token=" + token.utf8ToString());

                    // TODO: we could add fuzziness here
                    // match the prefix portion exactly
                    //Pair<Long,BytesRef> prefixOutput = null;
                    long?prefixOutput = null;
                    try
                    {
                        prefixOutput = LookupPrefix(fst, bytesReader, token, arc);
                    }
                    catch (IOException bogus)
                    {
                        throw new Exception(bogus.ToString(), bogus);
                    }
                    //System.out.println("  prefixOutput=" + prefixOutput);

                    if (prefixOutput == null)
                    {
                        // This model never saw this prefix, e.g. the
                        // trigram model never saw context "purple mushroom"
                        backoff *= ALPHA;
                        continue;
                    }

                    // TODO: we could do this division at build time, and
                    // bake it into the FST?

                    // Denominator for computing scores from current
                    // model's predictions:
                    long contextCount = totTokens;

                    BytesRef lastTokenFragment = null;

                    for (int i = token.Length - 1; i >= 0; i--)
                    {
                        if (token.Bytes[token.Offset + i] == separator)
                        {
                            BytesRef context = new BytesRef(token.Bytes, token.Offset, i);
                            long?    output  = Lucene.Net.Util.Fst.Util.Get(fst, Lucene.Net.Util.Fst.Util.ToInt32sRef(context, new Int32sRef()));
                            Debug.Assert(output != null);
                            contextCount      = DecodeWeight(output);
                            lastTokenFragment = new BytesRef(token.Bytes, token.Offset + i + 1, token.Length - i - 1);
                            break;
                        }
                    }

                    BytesRef finalLastToken;

                    if (lastTokenFragment == null)
                    {
                        finalLastToken = BytesRef.DeepCopyOf(token);
                    }
                    else
                    {
                        finalLastToken = BytesRef.DeepCopyOf(lastTokenFragment);
                    }
                    Debug.Assert(finalLastToken.Offset == 0);

                    CharsRef spare = new CharsRef();

                    // complete top-N
                    Util.Fst.Util.TopResults <long?> completions = null;
                    try
                    {
                        // Because we store multiple models in one FST
                        // (1gram, 2gram, 3gram), we must restrict the
                        // search so that it only considers the current
                        // model.  For highest order model, this is not
                        // necessary since all completions in the FST
                        // must be from this model, but for lower order
                        // models we have to filter out the higher order
                        // ones:

                        // Must do num+seen.size() for queue depth because we may
                        // reject up to seen.size() paths in acceptResult():
                        Util.Fst.Util.TopNSearcher <long?> searcher = new TopNSearcherAnonymousInnerClassHelper(this, fst, num, num + seen.Count, weightComparer, seen, finalLastToken);

                        // since this search is initialized with a single start node
                        // it is okay to start with an empty input path here
                        searcher.AddStartPaths(arc, prefixOutput, true, new Int32sRef());

                        completions = searcher.Search();
                        Debug.Assert(completions.IsComplete);
                    }
                    catch (IOException bogus)
                    {
                        throw new Exception(bogus.ToString(), bogus);
                    }

                    int prefixLength = token.Length;

                    BytesRef suffix = new BytesRef(8);
                    //System.out.println("    " + completions.length + " completions");

                    foreach (Util.Fst.Util.Result <long?> completion in completions)
                    {
                        token.Length = prefixLength;
                        // append suffix
                        Util.Fst.Util.ToBytesRef(completion.Input, suffix);
                        token.Append(suffix);

                        //System.out.println("    completion " + token.utf8ToString());

                        // Skip this path if a higher-order model already
                        // saw/predicted its last token:
                        BytesRef lastToken = token;
                        for (int i = token.Length - 1; i >= 0; i--)
                        {
                            if (token.Bytes[token.Offset + i] == separator)
                            {
                                Debug.Assert(token.Length - i - 1 > 0);
                                lastToken = new BytesRef(token.Bytes, token.Offset + i + 1, token.Length - i - 1);
                                break;
                            }
                        }
                        if (seen.Contains(lastToken))
                        {
                            //System.out.println("      skip dup " + lastToken.utf8ToString());
                            goto nextCompletionContinue;
                        }
                        seen.Add(BytesRef.DeepCopyOf(lastToken));
                        spare.Grow(token.Length);
                        UnicodeUtil.UTF8toUTF16(token, spare);
                        LookupResult result = new LookupResult(spare.ToString(),
                                                               // LUCENENET NOTE: We need to calculate this as decimal because when using double it can sometimes
                                                               // return numbers that are greater than long.MaxValue, which results in a negative long number.
                                                               (long)(long.MaxValue * (decimal)backoff * ((decimal)DecodeWeight(completion.Output)) / contextCount));
                        results.Add(result);
                        Debug.Assert(results.Count == seen.Count);
                        //System.out.println("  add result=" + result);
                        nextCompletionContinue :;
                    }
                    backoff *= ALPHA;
                }

                results.Sort(new ComparerAnonymousInnerClassHelper(this));

                if (results.Count > num)
                {
                    results.SubList(num, results.Count).Clear();
                }

                return(results);
            }
            finally
            {
                IOUtils.DisposeWhileHandlingException(ts);
            }
        }
            public override void SetNextReader(AtomicReaderContext context)
            {
                if (m_segmentFacetCounts != null)
                {
                    m_segmentResults.Add(CreateSegmentResult());
                }

                groupFieldTermsIndex  = FieldCache.DEFAULT.GetTermsIndex(context.AtomicReader, m_groupField);
                facetFieldDocTermOrds = FieldCache.DEFAULT.GetDocTermOrds(context.AtomicReader, m_facetField);
                facetFieldNumTerms    = (int)facetFieldDocTermOrds.ValueCount;
                if (facetFieldNumTerms == 0)
                {
                    facetOrdTermsEnum = null;
                }
                else
                {
                    facetOrdTermsEnum = facetFieldDocTermOrds.GetTermsEnum();
                }
                // [facetFieldNumTerms() + 1] for all possible facet values and docs not containing facet field
                m_segmentFacetCounts = new int[facetFieldNumTerms + 1];
                m_segmentTotalCount  = 0;

                segmentGroupedFacetHits.Clear();
                foreach (GroupedFacetHit groupedFacetHit in groupedFacetHits)
                {
                    int groupOrd = groupedFacetHit.groupValue == null ? -1 : groupFieldTermsIndex.LookupTerm(groupedFacetHit.groupValue);
                    if (groupedFacetHit.groupValue != null && groupOrd < 0)
                    {
                        continue;
                    }

                    int facetOrd;
                    if (groupedFacetHit.facetValue != null)
                    {
                        if (facetOrdTermsEnum == null || !facetOrdTermsEnum.SeekExact(groupedFacetHit.facetValue))
                        {
                            continue;
                        }
                        facetOrd = (int)facetOrdTermsEnum.Ord;
                    }
                    else
                    {
                        facetOrd = facetFieldNumTerms;
                    }

                    // (facetFieldDocTermOrds.numTerms() + 1) for all possible facet values and docs not containing facet field
                    int segmentGroupedFacetsIndex = groupOrd * (facetFieldNumTerms + 1) + facetOrd;
                    segmentGroupedFacetHits.Put(segmentGroupedFacetsIndex);
                }

                if (m_facetPrefix != null)
                {
                    TermsEnum.SeekStatus seekStatus;
                    if (facetOrdTermsEnum != null)
                    {
                        seekStatus = facetOrdTermsEnum.SeekCeil(m_facetPrefix);
                    }
                    else
                    {
                        seekStatus = TermsEnum.SeekStatus.END;
                    }

                    if (seekStatus != TermsEnum.SeekStatus.END)
                    {
                        m_startFacetOrd = (int)facetOrdTermsEnum.Ord;
                    }
                    else
                    {
                        m_startFacetOrd = 0;
                        m_endFacetOrd   = 0;
                        return;
                    }

                    BytesRef facetEndPrefix = BytesRef.DeepCopyOf(m_facetPrefix);
                    facetEndPrefix.Append(UnicodeUtil.BIG_TERM);
                    seekStatus = facetOrdTermsEnum.SeekCeil(facetEndPrefix);
                    if (seekStatus != TermsEnum.SeekStatus.END)
                    {
                        m_endFacetOrd = (int)facetOrdTermsEnum.Ord;
                    }
                    else
                    {
                        m_endFacetOrd = facetFieldNumTerms; // Don't include null...
                    }
                }
                else
                {
                    m_startFacetOrd = 0;
                    m_endFacetOrd   = facetFieldNumTerms + 1;
                }
            }
Exemple #14
0
        public override IList <LookupResult> Lookup(string key, HashSet <BytesRef> contexts, bool onlyMorePopular, int num)
        {
            if (contexts != null)
            {
                throw new System.ArgumentException("this suggester doesn't support contexts");
            }
            Debug.Assert(num > 0);

            if (onlyMorePopular)
            {
                throw new System.ArgumentException("this suggester only works with onlyMorePopular=false");
            }

            if (fst == null)
            {
                return(Collections.emptyList());
            }

            BytesRef    scratch      = new BytesRef(key);
            int         prefixLength = scratch.Length;
            Arc <long?> arc          = new Arc <long?>();

            // match the prefix portion exactly
            long?prefixOutput = null;

            try
            {
                prefixOutput = LookupPrefix(scratch, arc);
            }
            catch (IOException bogus)
            {
                throw new Exception(bogus);
            }

            if (prefixOutput == null)
            {
                return(Collections.emptyList());
            }

            IList <LookupResult> results = new List <LookupResult>(num);
            CharsRef             spare   = new CharsRef();

            if (exactFirst && arc.Final)
            {
                spare.grow(scratch.length);
                UnicodeUtil.UTF8toUTF16(scratch, spare);
                results.Add(new LookupResult(spare.ToString(), decodeWeight(prefixOutput + arc.nextFinalOutput)));
                if (--num == 0)
                {
                    return(results); // that was quick
                }
            }

            // complete top-N
            TopResults <long?> completions = null;

            try
            {
                completions = Util.ShortestPaths(fst, arc, prefixOutput, weightComparator, num, !exactFirst);
                Debug.Assert(completions.isComplete);
            }
            catch (IOException bogus)
            {
                throw new Exception(bogus);
            }

            BytesRef suffix = new BytesRef(8);

            foreach (Result <long?> completion in completions)
            {
                scratch.length = prefixLength;
                // append suffix
                Util.ToBytesRef(completion.input, suffix);
                scratch.Append(suffix);
                spare.Grow(scratch.Length);
                UnicodeUtil.UTF8toUTF16(scratch, spare);
                results.Add(new LookupResult(spare.ToString(), decodeWeight(completion.output)));
            }
            return(results);
        }