Exemple #1
0
            // NOTE: slow!  (linear scan)
            public override SeekStatus SeekCeil(BytesRef text)
            {
                if (nextTerm != 0)
                {
                    int cmp = text.CompareTo(term);
                    if (cmp < 0)
                    {
                        nextTerm = 0;
                        tvf.Seek(tvfFP);
                    }
                    else if (cmp == 0)
                    {
                        return(SeekStatus.FOUND);
                    }
                }

                while (MoveNext())
                {
                    int cmp = text.CompareTo(term);
                    if (cmp < 0)
                    {
                        return(SeekStatus.NOT_FOUND);
                    }
                    else if (cmp == 0)
                    {
                        return(SeekStatus.FOUND);
                    }
                }

                return(SeekStatus.END);
            }
            // NOTE: slow!  (linear scan)
            public override SeekStatus SeekCeil(BytesRef text)
            {
                if (NextTerm != 0)
                {
                    int cmp = text.CompareTo(Term_Renamed);
                    if (cmp < 0)
                    {
                        NextTerm = 0;
                        Tvf.Seek(TvfFP);
                    }
                    else if (cmp == 0)
                    {
                        return(SeekStatus.FOUND);
                    }
                }

                while (Next() != null)
                {
                    int cmp = text.CompareTo(Term_Renamed);
                    if (cmp < 0)
                    {
                        return(SeekStatus.NOT_FOUND);
                    }
                    else if (cmp == 0)
                    {
                        return(SeekStatus.FOUND);
                    }
                }

                return(SeekStatus.END);
            }
Exemple #3
0
                public override TermsEnum.SeekStatus SeekCeil(BytesRef text)
                {
                    // binary-search just the index values to find the block,
                    // then scan within the block
                    long low  = 0;
                    long high = outerInstance.numIndexValues - 1;

                    while (low <= high)
                    {
                        long mid = (int)((uint)(low + high) >> 1);
                        DoSeek(mid * outerInstance.interval);
                        int cmp = termBuffer.CompareTo(text);

                        if (cmp < 0)
                        {
                            low = mid + 1;
                        }
                        else if (cmp > 0)
                        {
                            high = mid - 1;
                        }
                        else
                        {
                            // we got lucky, found an indexed term
                            SetTerm();
                            return(TermsEnum.SeekStatus.FOUND);
                        }
                    }

                    if (outerInstance.numIndexValues == 0)
                    {
                        return(TermsEnum.SeekStatus.END);
                    }

                    // block before insertion point
                    long block = low - 1;

                    DoSeek(block < 0 ? -1 : block *outerInstance.interval);

                    while (MoveNext())
                    {
                        int cmp = termBuffer.CompareTo(text);
                        if (cmp == 0)
                        {
                            SetTerm();
                            return(TermsEnum.SeekStatus.FOUND);
                        }
                        else if (cmp > 0)
                        {
                            SetTerm();
                            return(TermsEnum.SeekStatus.NOT_FOUND);
                        }
                    }

                    return(TermsEnum.SeekStatus.END);
                }
Exemple #4
0
        /// <summary>
        /// If {@code key} exists, returns its ordinal, else
        ///  returns {@code -insertionPoint-1}, like {@code
        ///  Arrays.binarySearch}.
        /// </summary>
        ///  <param name="key"> Key to look up
        ///  </param>
        public virtual long LookupTerm(BytesRef key)
        {
            BytesRef spare = new BytesRef();
            long     low   = 0;
            long     high  = ValueCount - 1;

            while (low <= high)
            {
                long mid = (int)((uint)(low + high) >> 1);
                LookupOrd(mid, spare);
                int cmp = spare.CompareTo(key);

                if (cmp < 0)
                {
                    low = mid + 1;
                }
                else if (cmp > 0)
                {
                    high = mid - 1;
                }
                else
                {
                    return(mid); // key found
                }
            }

            return(-(low + 1)); // key not found.
        }
 public override TermsEnum.SeekStatus SeekCeil(BytesRef text)
 {
     if (ord < numTerms && ord >= 0)
     {
         int cmp = Term.CompareTo(text);
         if (cmp == 0)
         {
             return(TermsEnum.SeekStatus.FOUND);
         }
         else if (cmp > 0)
         {
             Reset();
         }
     }
     // linear scan
     while (true)
     {
         BytesRef term = Next();
         if (term == null)
         {
             return(TermsEnum.SeekStatus.END);
         }
         int cmp = term.CompareTo(text);
         if (cmp > 0)
         {
             return(TermsEnum.SeekStatus.NOT_FOUND);
         }
         else if (cmp == 0)
         {
             return(TermsEnum.SeekStatus.FOUND);
         }
     }
 }
        /// <summary>
        /// Builds the final automaton from a list of entries.
        /// </summary>
        private FST <object> BuildAutomaton(IBytesRefSorter sorter)
        {
            // Build the automaton.
            Outputs <object> outputs = NoOutputs.Singleton;
            object           empty   = outputs.NoOutput;
            Builder <object> builder = new Builder <object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, null, false, PackedInt32s.DEFAULT, true, 15);

            BytesRef          scratch = new BytesRef();
            BytesRef          entry;
            Int32sRef         scratchIntsRef = new Int32sRef();
            int               count          = 0;
            IBytesRefIterator iter           = sorter.GetIterator();

            while ((entry = iter.Next()) != null)
            {
                count++;
                if (scratch.CompareTo(entry) != 0)
                {
                    builder.Add(Util.Fst.Util.ToInt32sRef(entry, scratchIntsRef), empty);
                    scratch.CopyBytes(entry);
                }
            }

            return(count == 0 ? null : builder.Finish());
        }
Exemple #7
0
        private void CheckTermsOrder(IndexReader r, ISet <string> allTerms, bool isTop)
        {
            TermsEnum terms = MultiFields.GetFields(r).GetTerms("f").GetEnumerator();

            BytesRef last = new BytesRef();

            ISet <string> seenTerms = new JCG.HashSet <string>();

            while (terms.MoveNext())
            {
                BytesRef term = terms.Term;

                Assert.IsTrue(last.CompareTo(term) < 0);
                last.CopyBytes(term);

                string s = term.Utf8ToString();
                Assert.IsTrue(allTerms.Contains(s), "term " + TermDesc(s) + " was not added to index (count=" + allTerms.Count + ")");
                seenTerms.Add(s);
            }

            if (isTop)
            {
                Assert.IsTrue(allTerms.SetEquals(seenTerms));
            }

            // Test seeking:
            IEnumerator <string> it = seenTerms.GetEnumerator();

            while (it.MoveNext())
            {
                BytesRef tr = new BytesRef(it.Current);
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(tr), "seek failed for term=" + TermDesc(tr.Utf8ToString()));
            }
        }
Exemple #8
0
        /// <summary>
        /// If <paramref name="key"/> exists, returns its ordinal, else
        /// returns <c>-insertionPoint-1</c>, like
        /// <see cref="System.Array.BinarySearch(System.Array, int, int, object)"/>
        /// </summary>
        /// <param name="key"> Key to look up</param>
        public virtual int LookupTerm(BytesRef key)
        {
            BytesRef spare = new BytesRef();
            int      low   = 0;
            int      high  = ValueCount - 1;

            while (low <= high)
            {
                int mid = (low + high).TripleShift(1);
                LookupOrd(mid, spare);
                int cmp = spare.CompareTo(key);

                if (cmp < 0)
                {
                    low = mid + 1;
                }
                else if (cmp > 0)
                {
                    high = mid - 1;
                }
                else
                {
                    return(mid); // key found
                }
            }

            return(-(low + 1)); // key not found.
        }
Exemple #9
0
        private int CountTerms(MultiTermQuery q)
        {
            Terms terms = MultiFields.GetTerms(Reader, q.Field);

            if (terms == null)
            {
                return(0);
            }
            TermsEnum termEnum = q.GetTermsEnum(terms);

            Assert.IsNotNull(termEnum);
            int      count = 0;
            BytesRef cur, last = null;

            while ((cur = termEnum.Next()) != null)
            {
                count++;
                if (last != null)
                {
                    Assert.IsTrue(last.CompareTo(cur) < 0);
                }
                last = BytesRef.DeepCopyOf(cur);
            }
            // LUCENE-3314: the results after next() already returned null are undefined,
            // Assert.IsNull(termEnum.Next());
            return(count);
        }
Exemple #10
0
 /// <summary>
 /// Compares two terms, returning a negative integer if this
 ///  term belongs before the argument, zero if this term is equal to the
 ///  argument, and a positive integer if this term belongs after the argument.
 ///
 ///  The ordering of terms is first by field, then by text.
 /// </summary>
 public int CompareTo(Term other)
 {
     if (Field_Renamed.Equals(other.Field_Renamed))
     {
         return(Bytes_Renamed.CompareTo(other.Bytes_Renamed));
     }
     else
     {
         return(Field_Renamed.CompareTo(other.Field_Renamed));
     }
 }
Exemple #11
0
        public override TermsEnum GetTermsEnum(Terms terms, AttributeSource atts)
        {
            if (LowerTerm_Renamed != null && UpperTerm_Renamed != null && LowerTerm_Renamed.CompareTo(UpperTerm_Renamed) > 0)
            {
                return(TermsEnum.EMPTY);
            }

            TermsEnum tenum = terms.Iterator(null);

            if ((LowerTerm_Renamed == null || (IncludeLower && LowerTerm_Renamed.Length == 0)) && UpperTerm_Renamed == null)
            {
                return(tenum);
            }
            return(new TermRangeTermsEnum(tenum, LowerTerm_Renamed, UpperTerm_Renamed, IncludeLower, IncludeUpper));
        }
Exemple #12
0
        public override int CompareSameType(object other)
        {
            MutableValueStr b = (MutableValueStr)other;
            int             c = Value.CompareTo(b.Value);

            if (c != 0)
            {
                return(c);
            }
            if (Exists == b.Exists)
            {
                return(0);
            }
            return(Exists ? 1 : -1);
        }
Exemple #13
0
        protected override TermsEnum GetTermsEnum(Terms terms, AttributeSource atts)
        {
            if (lowerTerm != null && upperTerm != null && lowerTerm.CompareTo(upperTerm) > 0)
            {
                return(TermsEnum.EMPTY);
            }

            TermsEnum tenum = terms.GetEnumerator();

            if ((lowerTerm == null || (includeLower && lowerTerm.Length == 0)) && upperTerm == null)
            {
                return(tenum);
            }
            return(new TermRangeTermsEnum(tenum, lowerTerm, upperTerm, includeLower, includeUpper));
        }
Exemple #14
0
 // single straight enum
 private void DoTestStraightEnum(IList <Term> fieldTerms, IndexReader reader, int uniqueTermCount)
 {
     if (Verbose)
     {
         Console.WriteLine("\nTEST: top now enum reader=" + reader);
     }
     Fields fields = MultiFields.GetFields(reader);
     {
         // Test straight enum:
         int termCount = 0;
         foreach (string field in fields)
         {
             Terms terms = fields.GetTerms(field);
             Assert.IsNotNull(terms);
             TermsEnum termsEnum = terms.GetEnumerator();
             BytesRef  text;
             BytesRef  lastText = null;
             while (termsEnum.MoveNext())
             {
                 text = termsEnum.Term;
                 Term exp = fieldTerms[termCount];
                 if (Verbose)
                 {
                     Console.WriteLine("  got term=" + field + ":" + UnicodeUtil.ToHexString(text.Utf8ToString()));
                     Console.WriteLine("       exp=" + exp.Field + ":" + UnicodeUtil.ToHexString(exp.Text));
                     Console.WriteLine();
                 }
                 if (lastText == null)
                 {
                     lastText = BytesRef.DeepCopyOf(text);
                 }
                 else
                 {
                     Assert.IsTrue(lastText.CompareTo(text) < 0);
                     lastText.CopyBytes(text);
                 }
                 Assert.AreEqual(exp.Field, field);
                 Assert.AreEqual(exp.Bytes, text);
                 termCount++;
             }
             if (Verbose)
             {
                 Console.WriteLine("  no more terms for field=" + field);
             }
         }
         Assert.AreEqual(uniqueTermCount, termCount);
     }
 }
Exemple #15
0
        public virtual void TestRanges()
        {
            int num = AtLeast(1000);

            for (int i = 0; i < num; i++)
            {
                BytesRef lowerVal = new BytesRef(TestUtil.RandomUnicodeString(Random));
                BytesRef upperVal = new BytesRef(TestUtil.RandomUnicodeString(Random));
                if (upperVal.CompareTo(lowerVal) < 0)
                {
                    AssertSame(upperVal, lowerVal, Random.NextBoolean(), Random.NextBoolean());
                }
                else
                {
                    AssertSame(lowerVal, upperVal, Random.NextBoolean(), Random.NextBoolean());
                }
            }
        }
Exemple #16
0
 public virtual int CompareTo(TermData o)
 {
     return(text.CompareTo(o.text));
 }
            // NOTE: slow!  (linear scan)
            public override SeekStatus SeekCeil(BytesRef text)
            {
                if (NextTerm != 0)
                {
                    int cmp = text.CompareTo(Term_Renamed);
                    if (cmp < 0)
                    {
                        NextTerm = 0;
                        Tvf.Seek(TvfFP);
                    }
                    else if (cmp == 0)
                    {
                        return SeekStatus.FOUND;
                    }
                }

                while (Next() != null)
                {
                    int cmp = text.CompareTo(Term_Renamed);
                    if (cmp < 0)
                    {
                        return SeekStatus.NOT_FOUND;
                    }
                    else if (cmp == 0)
                    {
                        return SeekStatus.FOUND;
                    }
                }

                return SeekStatus.END;
            }
 public virtual void TestRanges()
 {
     int num = AtLeast(1000);
     for (int i = 0; i < num; i++)
     {
         BytesRef lowerVal = new BytesRef(TestUtil.RandomUnicodeString(Random()));
         BytesRef upperVal = new BytesRef(TestUtil.RandomUnicodeString(Random()));
         if (upperVal.CompareTo(lowerVal) < 0)
         {
             AssertSame(upperVal, lowerVal, Random().NextBoolean(), Random().NextBoolean());
         }
         else
         {
             AssertSame(lowerVal, upperVal, Random().NextBoolean(), Random().NextBoolean());
         }
     }
 }
 public int CompareTo(TermAndFreq other)
 {
     return term.CompareTo(other.term) + freq.CompareTo(other.freq);
 }
Exemple #20
0
            public override SeekStatus SeekCeil(BytesRef target)
            {
                // already here
                if (term != null && term.Equals(target))
                {
                    return(SeekStatus.FOUND);
                }

                int startIdx = Array.BinarySearch(outerInstance.m_indexedTermsArray, target);

                if (startIdx >= 0)
                {
                    // we hit the term exactly... lucky us!
                    TermsEnum.SeekStatus seekStatus = termsEnum.SeekCeil(target);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(seekStatus == TermsEnum.SeekStatus.FOUND);
                    }
                    ord = startIdx << outerInstance.indexIntervalBits;
                    SetTerm();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(term != null);
                    }
                    return(SeekStatus.FOUND);
                }

                // we didn't hit the term exactly
                startIdx = -startIdx - 1;

                if (startIdx == 0)
                {
                    // our target occurs *before* the first term
                    TermsEnum.SeekStatus seekStatus = termsEnum.SeekCeil(target);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(seekStatus == TermsEnum.SeekStatus.NOT_FOUND);
                    }
                    ord = 0;
                    SetTerm();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(term != null);
                    }
                    return(SeekStatus.NOT_FOUND);
                }

                // back up to the start of the block
                startIdx--;

                if ((ord >> outerInstance.indexIntervalBits) == startIdx && term != null && term.CompareTo(target) <= 0)
                {
                    // we are already in the right block and the current term is before the term we want,
                    // so we don't need to seek.
                }
                else
                {
                    // seek to the right block
                    TermsEnum.SeekStatus seekStatus = termsEnum.SeekCeil(outerInstance.m_indexedTermsArray[startIdx]);
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(seekStatus == TermsEnum.SeekStatus.FOUND);
                    }
                    ord = startIdx << outerInstance.indexIntervalBits;
                    SetTerm();
                    if (Debugging.AssertsEnabled)
                    {
                        Debugging.Assert(term != null);                           // should be non-null since it's in the index
                    }
                }

                while (term != null && term.CompareTo(target) < 0)
                {
                    Next();
                }

                if (term == null)
                {
                    return(SeekStatus.END);
                }
                else if (term.CompareTo(target) == 0)
                {
                    return(SeekStatus.FOUND);
                }
                else
                {
                    return(SeekStatus.NOT_FOUND);
                }
            }
Exemple #21
0
            public override SeekStatus SeekCeil(BytesRef term)
            {
                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("TE.seek target=" + UnicodeUtil.ToHexString(term.Utf8ToString()));
                }
                skipNext = false;
                TermInfosReader tis = outerInstance.TermsDict;
                Term            t0  = new Term(fieldInfo.Name, term);

                Debug.Assert(termEnum != null);

                tis.SeekEnum(termEnum, t0, false);

                Term t = termEnum.Term();

                if (t != null && t.Field == internedFieldName && term.BytesEquals(t.Bytes))
                {
                    // If we found an exact match, no need to do the
                    // surrogate dance
                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  seek exact match");
                    }
                    current = t.Bytes;
                    return(SeekStatus.FOUND);
                }
                else if (t == null || t.Field != internedFieldName)
                {
                    // TODO: maybe we can handle this like the next()
                    // into null?  set term as prevTerm then dance?

                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  seek hit EOF");
                    }

                    // We hit EOF; try end-case surrogate dance: if we
                    // find an E, try swapping in S, backwards:
                    scratchTerm.CopyBytes(term);

                    Debug.Assert(scratchTerm.Offset == 0);

                    for (int i = scratchTerm.Length - 1; i >= 0; i--)
                    {
                        if (IsHighBMPChar(scratchTerm.Bytes, i))
                        {
                            if (DEBUG_SURROGATES)
                            {
                                Console.WriteLine("    found E pos=" + i + "; try seek");
                            }

                            if (SeekToNonBMP(seekTermEnum, scratchTerm, i))
                            {
                                scratchTerm.CopyBytes(seekTermEnum.Term().Bytes);
                                outerInstance.TermsDict.SeekEnum(termEnum, seekTermEnum.Term(), false);

                                newSuffixStart = 1 + i;

                                DoPushes();

                                // Found a match
                                // TODO: faster seek?
                                current = termEnum.Term().Bytes;
                                return(SeekStatus.NOT_FOUND);
                            }
                        }
                    }

                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  seek END");
                    }

                    current = null;
                    return(SeekStatus.END);
                }
                else
                {
                    // We found a non-exact but non-null term; this one
                    // is fun -- just treat it like next, by pretending
                    // requested term was prev:
                    prevTerm.CopyBytes(term);

                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  seek hit non-exact term=" + UnicodeUtil.ToHexString(t.Text()));
                    }

                    BytesRef br = t.Bytes;
                    Debug.Assert(br.Offset == 0);

                    SetNewSuffixStart(term, br);

                    SurrogateDance();

                    Term t2 = termEnum.Term();
                    if (t2 == null || t2.Field != internedFieldName)
                    {
                        // PreFlex codec interns field names; verify:
                        Debug.Assert(t2 == null || !t2.Field.Equals(internedFieldName, StringComparison.Ordinal));
                        current = null;
                        return(SeekStatus.END);
                    }
                    else
                    {
                        current = t2.Bytes;
                        Debug.Assert(!unicodeSortOrder || term.CompareTo(current) < 0, "term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " vs current=" + UnicodeUtil.ToHexString(current.Utf8ToString()));
                        return(SeekStatus.NOT_FOUND);
                    }
                }
            }
Exemple #22
0
            public override SeekStatus SeekCeil(BytesRef target)
            {
                // already here
                if (Term_Renamed != null && Term_Renamed.Equals(target))
                {
                    return(SeekStatus.FOUND);
                }

                int startIdx = OuterInstance.IndexedTermsArray.ToList().BinarySearch(target);

                if (startIdx >= 0)
                {
                    // we hit the term exactly... lucky us!
                    TermsEnum.SeekStatus seekStatus = TermsEnum.SeekCeil(target);
                    Debug.Assert(seekStatus == TermsEnum.SeekStatus.FOUND);
                    Ord_Renamed = startIdx << OuterInstance.IndexIntervalBits;
                    SetTerm();
                    Debug.Assert(Term_Renamed != null);
                    return(SeekStatus.FOUND);
                }

                // we didn't hit the term exactly
                startIdx = -startIdx - 1;

                if (startIdx == 0)
                {
                    // our target occurs *before* the first term
                    TermsEnum.SeekStatus seekStatus = TermsEnum.SeekCeil(target);
                    Debug.Assert(seekStatus == TermsEnum.SeekStatus.NOT_FOUND);
                    Ord_Renamed = 0;
                    SetTerm();
                    Debug.Assert(Term_Renamed != null);
                    return(SeekStatus.NOT_FOUND);
                }

                // back up to the start of the block
                startIdx--;

                if ((Ord_Renamed >> OuterInstance.IndexIntervalBits) == startIdx && Term_Renamed != null && Term_Renamed.CompareTo(target) <= 0)
                {
                    // we are already in the right block and the current term is before the term we want,
                    // so we don't need to seek.
                }
                else
                {
                    // seek to the right block
                    TermsEnum.SeekStatus seekStatus = TermsEnum.SeekCeil(OuterInstance.IndexedTermsArray[startIdx]);
                    Debug.Assert(seekStatus == TermsEnum.SeekStatus.FOUND);
                    Ord_Renamed = startIdx << OuterInstance.IndexIntervalBits;
                    SetTerm();
                    Debug.Assert(Term_Renamed != null); // should be non-null since it's in the index
                }

                while (Term_Renamed != null && Term_Renamed.CompareTo(target) < 0)
                {
                    Next();
                }

                if (Term_Renamed == null)
                {
                    return(SeekStatus.END);
                }
                else if (Term_Renamed.CompareTo(target) == 0)
                {
                    return(SeekStatus.FOUND);
                }
                else
                {
                    return(SeekStatus.NOT_FOUND);
                }
            }
 public override void SeekExact(BytesRef target, TermState otherState)
 {
     // if (DEBUG) {
     //   System.out.println("BTTR.seekExact termState seg=" + segment + " target=" + target.utf8ToString() + " " + target + " state=" + otherState);
     // }
     Debug.Assert(ClearEOF());
     if (target.CompareTo(Term_Renamed) != 0 || !TermExists)
     {
         Debug.Assert(otherState != null && otherState is BlockTermState);
         CurrentFrame = StaticFrame;
         CurrentFrame.State.CopyFrom(otherState);
         Term_Renamed.CopyBytes(target);
         CurrentFrame.MetaDataUpto = CurrentFrame.TermBlockOrd;
         Debug.Assert(CurrentFrame.MetaDataUpto > 0);
         ValidIndexPrefix = 0;
     }
     else
     {
         // if (DEBUG) {
         //   System.out.println("  skip seek: already on target state=" + currentFrame.state);
         // }
     }
 }
            public override SeekStatus SeekCeil(BytesRef term)
            {
                if (DEBUG_SURROGATES)
                {
                    Console.WriteLine("TE.seek target=" + UnicodeUtil.ToHexString(term.Utf8ToString()));
                }
                SkipNext = false;
                TermInfosReader tis = OuterInstance.TermsDict;
                Term t0 = new Term(fieldInfo.Name, term);

                Debug.Assert(TermEnum != null);

                tis.SeekEnum(TermEnum, t0, false);

                Term t = TermEnum.Term();

                if (t != null && t.Field() == InternedFieldName && term.BytesEquals(t.Bytes()))
                {
                    // If we found an exact match, no need to do the
                    // surrogate dance
                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  seek exact match");
                    }
                    Current = t.Bytes();
                    return SeekStatus.FOUND;
                }
                else if (t == null || t.Field() != InternedFieldName)
                {
                    // TODO: maybe we can handle this like the next()
                    // into null?  set term as prevTerm then dance?

                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  seek hit EOF");
                    }

                    // We hit EOF; try end-case surrogate dance: if we
                    // find an E, try swapping in S, backwards:
                    ScratchTerm.CopyBytes(term);

                    Debug.Assert(ScratchTerm.Offset == 0);

                    for (int i = ScratchTerm.Length - 1; i >= 0; i--)
                    {
                        if (IsHighBMPChar(ScratchTerm.Bytes, i))
                        {
                            if (DEBUG_SURROGATES)
                            {
                                Console.WriteLine("    found E pos=" + i + "; try seek");
                            }

                            if (SeekToNonBMP(SeekTermEnum, ScratchTerm, i))
                            {
                                ScratchTerm.CopyBytes(SeekTermEnum.Term().Bytes());
                                OuterInstance.TermsDict.SeekEnum(TermEnum, SeekTermEnum.Term(), false);

                                NewSuffixStart = 1 + i;

                                DoPushes();

                                // Found a match
                                // TODO: faster seek?
                                Current = TermEnum.Term().Bytes();
                                return SeekStatus.NOT_FOUND;
                            }
                        }
                    }

                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  seek END");
                    }

                    Current = null;
                    return SeekStatus.END;
                }
                else
                {
                    // We found a non-exact but non-null term; this one
                    // is fun -- just treat it like next, by pretending
                    // requested term was prev:
                    PrevTerm.CopyBytes(term);

                    if (DEBUG_SURROGATES)
                    {
                        Console.WriteLine("  seek hit non-exact term=" + UnicodeUtil.ToHexString(t.Text()));
                    }

                    BytesRef br = t.Bytes();
                    Debug.Assert(br.Offset == 0);

                    SetNewSuffixStart(term, br);

                    SurrogateDance();

                    Term t2 = TermEnum.Term();
                    if (t2 == null || t2.Field() != InternedFieldName)
                    {
                        // PreFlex codec interns field names; verify:
                        Debug.Assert(t2 == null || !t2.Field().Equals(InternedFieldName));
                        Current = null;
                        return SeekStatus.END;
                    }
                    else
                    {
                        Current = t2.Bytes();
                        Debug.Assert(!UnicodeSortOrder || term.CompareTo(Current) < 0, "term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " vs current=" + UnicodeUtil.ToHexString(Current.Utf8ToString()));
                        return SeekStatus.NOT_FOUND;
                    }
                }
            }
        private void CheckTermsOrder(IndexReader r, ISet<string> allTerms, bool isTop)
        {
            TermsEnum terms = MultiFields.GetFields(r).Terms("f").Iterator(null);

            BytesRef last = new BytesRef();

            HashSet<string> seenTerms = new HashSet<string>();

            while (true)
            {
                BytesRef term = terms.Next();
                if (term == null)
                {
                    break;
                }

                Assert.IsTrue(last.CompareTo(term) < 0);
                last.CopyBytes(term);

                string s = term.Utf8ToString();
                Assert.IsTrue(allTerms.Contains(s), "term " + TermDesc(s) + " was not added to index (count=" + allTerms.Count + ")");
                seenTerms.Add(s);
            }

            if (isTop)
            {
                Assert.IsTrue(allTerms.SetEquals(seenTerms));
            }

            // Test seeking:
            IEnumerator<string> it = seenTerms.GetEnumerator();
            while (it.MoveNext())
            {
                BytesRef tr = new BytesRef(it.Current);
                Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(tr), "seek failed for term=" + TermDesc(tr.Utf8ToString()));
            }
        }
Exemple #26
0
        private static void CheckSortedSetDocValues(string fieldName, AtomicReader reader, SortedSetDocValues dv, Bits docsWithField)
        {
            long maxOrd = dv.ValueCount - 1;
            LongBitSet seenOrds = new LongBitSet(dv.ValueCount);
            long maxOrd2 = -1;
            for (int i = 0; i < reader.MaxDoc; i++)
            {
                dv.Document = i;
                long lastOrd = -1;
                long ord;
                if (docsWithField.Get(i))
                {
                    int ordCount = 0;
                    while ((ord = dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        if (ord <= lastOrd)
                        {
                            throw new Exception("ords out of order: " + ord + " <= " + lastOrd + " for doc: " + i);
                        }
                        if (ord < 0 || ord > maxOrd)
                        {
                            throw new Exception("ord out of bounds: " + ord);
                        }
                        if (dv is RandomAccessOrds)
                        {
                            long ord2 = ((RandomAccessOrds)dv).OrdAt(ordCount);
                            if (ord != ord2)
                            {
                                throw new Exception("ordAt(" + ordCount + ") inconsistent, expected=" + ord + ",got=" + ord2 + " for doc: " + i);
                            }
                        }
                        lastOrd = ord;
                        maxOrd2 = Math.Max(maxOrd2, ord);
                        seenOrds.Set(ord);
                        ordCount++;
                    }
                    if (ordCount == 0)
                    {
                        throw new Exception("dv for field: " + fieldName + " has no ordinals but is not marked missing for doc: " + i);
                    }
                    if (dv is RandomAccessOrds)
                    {
                        long ordCount2 = ((RandomAccessOrds)dv).Cardinality();
                        if (ordCount != ordCount2)
                        {
                            throw new Exception("cardinality inconsistent, expected=" + ordCount + ",got=" + ordCount2 + " for doc: " + i);
                        }
                    }
                }
                else
                {
                    long o = dv.NextOrd();
                    if (o != SortedSetDocValues.NO_MORE_ORDS)
                    {
                        throw new Exception("dv for field: " + fieldName + " is marked missing but has ord=" + o + " for doc: " + i);
                    }
                    if (dv is RandomAccessOrds)
                    {
                        long ordCount2 = ((RandomAccessOrds)dv).Cardinality();
                        if (ordCount2 != 0)
                        {
                            throw new Exception("dv for field: " + fieldName + " is marked missing but has cardinality " + ordCount2 + " for doc: " + i);
                        }
                    }
                }
            }
            if (maxOrd != maxOrd2)
            {
                throw new Exception("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2);
            }
            if (seenOrds.Cardinality() != dv.ValueCount)
            {
                throw new Exception("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.ValueCount + " but only used: " + seenOrds.Cardinality());
            }

            BytesRef lastValue = null;
            BytesRef scratch = new BytesRef();
            for (long i = 0; i <= maxOrd; i++)
            {
                dv.LookupOrd(i, scratch);
                Debug.Assert(scratch.Valid);
                if (lastValue != null)
                {
                    if (scratch.CompareTo(lastValue) <= 0)
                    {
                        throw new Exception("dv for field: " + fieldName + " has ords out of order: " + lastValue + " >=" + scratch);
                    }
                }
                lastValue = BytesRef.DeepCopyOf(scratch);
            }
        }
        /// <summary>
        /// Builds the final automaton from a list of entries.
        /// </summary>
        private FST<object> BuildAutomaton(BytesRefSorter sorter)
        {
            // Build the automaton.
            Outputs<object> outputs = NoOutputs.Singleton;
            object empty = outputs.NoOutput;
            Builder<object> builder = new Builder<object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, null, false, PackedInts.DEFAULT, true, 15);

            BytesRef scratch = new BytesRef();
            BytesRef entry;
            IntsRef scratchIntsRef = new IntsRef();
            int count = 0;
            BytesRefIterator iter = sorter.GetEnumerator();
            while ((entry = iter.Next()) != null)
            {
                count++;
                if (scratch.CompareTo(entry) != 0)
                {
                    builder.Add(Util.Fst.Util.ToIntsRef(entry, scratchIntsRef), empty);
                    scratch.CopyBytes(entry);
                }
            }

            return count == 0 ? null : builder.Finish();
        }
Exemple #28
0
 private static void CheckSortedDocValues(string fieldName, AtomicReader reader, SortedDocValues dv, Bits docsWithField)
 {
     CheckBinaryDocValues(fieldName, reader, dv, docsWithField);
     int maxOrd = dv.ValueCount - 1;
     FixedBitSet seenOrds = new FixedBitSet(dv.ValueCount);
     int maxOrd2 = -1;
     for (int i = 0; i < reader.MaxDoc; i++)
     {
         int ord = dv.GetOrd(i);
         if (ord == -1)
         {
             if (docsWithField.Get(i))
             {
                 throw new Exception("dv for field: " + fieldName + " has -1 ord but is not marked missing for doc: " + i);
             }
         }
         else if (ord < -1 || ord > maxOrd)
         {
             throw new Exception("ord out of bounds: " + ord);
         }
         else
         {
             if (!docsWithField.Get(i))
             {
                 throw new Exception("dv for field: " + fieldName + " is missing but has ord=" + ord + " for doc: " + i);
             }
             maxOrd2 = Math.Max(maxOrd2, ord);
             seenOrds.Set(ord);
         }
     }
     if (maxOrd != maxOrd2)
     {
         throw new Exception("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2);
     }
     if (seenOrds.Cardinality() != dv.ValueCount)
     {
         throw new Exception("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.ValueCount + " but only used: " + seenOrds.Cardinality());
     }
     BytesRef lastValue = null;
     BytesRef scratch = new BytesRef();
     for (int i = 0; i <= maxOrd; i++)
     {
         dv.LookupOrd(i, scratch);
         Debug.Assert(scratch.Valid);
         if (lastValue != null)
         {
             if (scratch.CompareTo(lastValue) <= 0)
             {
                 throw new Exception("dv for field: " + fieldName + " has ords out of order: " + lastValue + " >=" + scratch);
             }
         }
         lastValue = BytesRef.DeepCopyOf(scratch);
     }
 }