// NOTE: slow! (linear scan) public override SeekStatus SeekCeil(BytesRef text) { if (nextTerm != 0) { int cmp = text.CompareTo(term); if (cmp < 0) { nextTerm = 0; tvf.Seek(tvfFP); } else if (cmp == 0) { return(SeekStatus.FOUND); } } while (MoveNext()) { int cmp = text.CompareTo(term); if (cmp < 0) { return(SeekStatus.NOT_FOUND); } else if (cmp == 0) { return(SeekStatus.FOUND); } } return(SeekStatus.END); }
// NOTE: slow! (linear scan) public override SeekStatus SeekCeil(BytesRef text) { if (NextTerm != 0) { int cmp = text.CompareTo(Term_Renamed); if (cmp < 0) { NextTerm = 0; Tvf.Seek(TvfFP); } else if (cmp == 0) { return(SeekStatus.FOUND); } } while (Next() != null) { int cmp = text.CompareTo(Term_Renamed); if (cmp < 0) { return(SeekStatus.NOT_FOUND); } else if (cmp == 0) { return(SeekStatus.FOUND); } } return(SeekStatus.END); }
public override TermsEnum.SeekStatus SeekCeil(BytesRef text) { // binary-search just the index values to find the block, // then scan within the block long low = 0; long high = outerInstance.numIndexValues - 1; while (low <= high) { long mid = (int)((uint)(low + high) >> 1); DoSeek(mid * outerInstance.interval); int cmp = termBuffer.CompareTo(text); if (cmp < 0) { low = mid + 1; } else if (cmp > 0) { high = mid - 1; } else { // we got lucky, found an indexed term SetTerm(); return(TermsEnum.SeekStatus.FOUND); } } if (outerInstance.numIndexValues == 0) { return(TermsEnum.SeekStatus.END); } // block before insertion point long block = low - 1; DoSeek(block < 0 ? -1 : block *outerInstance.interval); while (MoveNext()) { int cmp = termBuffer.CompareTo(text); if (cmp == 0) { SetTerm(); return(TermsEnum.SeekStatus.FOUND); } else if (cmp > 0) { SetTerm(); return(TermsEnum.SeekStatus.NOT_FOUND); } } return(TermsEnum.SeekStatus.END); }
/// <summary> /// If {@code key} exists, returns its ordinal, else /// returns {@code -insertionPoint-1}, like {@code /// Arrays.binarySearch}. /// </summary> /// <param name="key"> Key to look up /// </param> public virtual long LookupTerm(BytesRef key) { BytesRef spare = new BytesRef(); long low = 0; long high = ValueCount - 1; while (low <= high) { long mid = (int)((uint)(low + high) >> 1); LookupOrd(mid, spare); int cmp = spare.CompareTo(key); if (cmp < 0) { low = mid + 1; } else if (cmp > 0) { high = mid - 1; } else { return(mid); // key found } } return(-(low + 1)); // key not found. }
public override TermsEnum.SeekStatus SeekCeil(BytesRef text) { if (ord < numTerms && ord >= 0) { int cmp = Term.CompareTo(text); if (cmp == 0) { return(TermsEnum.SeekStatus.FOUND); } else if (cmp > 0) { Reset(); } } // linear scan while (true) { BytesRef term = Next(); if (term == null) { return(TermsEnum.SeekStatus.END); } int cmp = term.CompareTo(text); if (cmp > 0) { return(TermsEnum.SeekStatus.NOT_FOUND); } else if (cmp == 0) { return(TermsEnum.SeekStatus.FOUND); } } }
/// <summary> /// Builds the final automaton from a list of entries. /// </summary> private FST <object> BuildAutomaton(IBytesRefSorter sorter) { // Build the automaton. Outputs <object> outputs = NoOutputs.Singleton; object empty = outputs.NoOutput; Builder <object> builder = new Builder <object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, null, false, PackedInt32s.DEFAULT, true, 15); BytesRef scratch = new BytesRef(); BytesRef entry; Int32sRef scratchIntsRef = new Int32sRef(); int count = 0; IBytesRefIterator iter = sorter.GetIterator(); while ((entry = iter.Next()) != null) { count++; if (scratch.CompareTo(entry) != 0) { builder.Add(Util.Fst.Util.ToInt32sRef(entry, scratchIntsRef), empty); scratch.CopyBytes(entry); } } return(count == 0 ? null : builder.Finish()); }
private void CheckTermsOrder(IndexReader r, ISet <string> allTerms, bool isTop) { TermsEnum terms = MultiFields.GetFields(r).GetTerms("f").GetEnumerator(); BytesRef last = new BytesRef(); ISet <string> seenTerms = new JCG.HashSet <string>(); while (terms.MoveNext()) { BytesRef term = terms.Term; Assert.IsTrue(last.CompareTo(term) < 0); last.CopyBytes(term); string s = term.Utf8ToString(); Assert.IsTrue(allTerms.Contains(s), "term " + TermDesc(s) + " was not added to index (count=" + allTerms.Count + ")"); seenTerms.Add(s); } if (isTop) { Assert.IsTrue(allTerms.SetEquals(seenTerms)); } // Test seeking: IEnumerator <string> it = seenTerms.GetEnumerator(); while (it.MoveNext()) { BytesRef tr = new BytesRef(it.Current); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(tr), "seek failed for term=" + TermDesc(tr.Utf8ToString())); } }
/// <summary> /// If <paramref name="key"/> exists, returns its ordinal, else /// returns <c>-insertionPoint-1</c>, like /// <see cref="System.Array.BinarySearch(System.Array, int, int, object)"/> /// </summary> /// <param name="key"> Key to look up</param> public virtual int LookupTerm(BytesRef key) { BytesRef spare = new BytesRef(); int low = 0; int high = ValueCount - 1; while (low <= high) { int mid = (low + high).TripleShift(1); LookupOrd(mid, spare); int cmp = spare.CompareTo(key); if (cmp < 0) { low = mid + 1; } else if (cmp > 0) { high = mid - 1; } else { return(mid); // key found } } return(-(low + 1)); // key not found. }
private int CountTerms(MultiTermQuery q) { Terms terms = MultiFields.GetTerms(Reader, q.Field); if (terms == null) { return(0); } TermsEnum termEnum = q.GetTermsEnum(terms); Assert.IsNotNull(termEnum); int count = 0; BytesRef cur, last = null; while ((cur = termEnum.Next()) != null) { count++; if (last != null) { Assert.IsTrue(last.CompareTo(cur) < 0); } last = BytesRef.DeepCopyOf(cur); } // LUCENE-3314: the results after next() already returned null are undefined, // Assert.IsNull(termEnum.Next()); return(count); }
/// <summary> /// Compares two terms, returning a negative integer if this /// term belongs before the argument, zero if this term is equal to the /// argument, and a positive integer if this term belongs after the argument. /// /// The ordering of terms is first by field, then by text. /// </summary> public int CompareTo(Term other) { if (Field_Renamed.Equals(other.Field_Renamed)) { return(Bytes_Renamed.CompareTo(other.Bytes_Renamed)); } else { return(Field_Renamed.CompareTo(other.Field_Renamed)); } }
public override TermsEnum GetTermsEnum(Terms terms, AttributeSource atts) { if (LowerTerm_Renamed != null && UpperTerm_Renamed != null && LowerTerm_Renamed.CompareTo(UpperTerm_Renamed) > 0) { return(TermsEnum.EMPTY); } TermsEnum tenum = terms.Iterator(null); if ((LowerTerm_Renamed == null || (IncludeLower && LowerTerm_Renamed.Length == 0)) && UpperTerm_Renamed == null) { return(tenum); } return(new TermRangeTermsEnum(tenum, LowerTerm_Renamed, UpperTerm_Renamed, IncludeLower, IncludeUpper)); }
public override int CompareSameType(object other) { MutableValueStr b = (MutableValueStr)other; int c = Value.CompareTo(b.Value); if (c != 0) { return(c); } if (Exists == b.Exists) { return(0); } return(Exists ? 1 : -1); }
protected override TermsEnum GetTermsEnum(Terms terms, AttributeSource atts) { if (lowerTerm != null && upperTerm != null && lowerTerm.CompareTo(upperTerm) > 0) { return(TermsEnum.EMPTY); } TermsEnum tenum = terms.GetEnumerator(); if ((lowerTerm == null || (includeLower && lowerTerm.Length == 0)) && upperTerm == null) { return(tenum); } return(new TermRangeTermsEnum(tenum, lowerTerm, upperTerm, includeLower, includeUpper)); }
// single straight enum private void DoTestStraightEnum(IList <Term> fieldTerms, IndexReader reader, int uniqueTermCount) { if (Verbose) { Console.WriteLine("\nTEST: top now enum reader=" + reader); } Fields fields = MultiFields.GetFields(reader); { // Test straight enum: int termCount = 0; foreach (string field in fields) { Terms terms = fields.GetTerms(field); Assert.IsNotNull(terms); TermsEnum termsEnum = terms.GetEnumerator(); BytesRef text; BytesRef lastText = null; while (termsEnum.MoveNext()) { text = termsEnum.Term; Term exp = fieldTerms[termCount]; if (Verbose) { Console.WriteLine(" got term=" + field + ":" + UnicodeUtil.ToHexString(text.Utf8ToString())); Console.WriteLine(" exp=" + exp.Field + ":" + UnicodeUtil.ToHexString(exp.Text)); Console.WriteLine(); } if (lastText == null) { lastText = BytesRef.DeepCopyOf(text); } else { Assert.IsTrue(lastText.CompareTo(text) < 0); lastText.CopyBytes(text); } Assert.AreEqual(exp.Field, field); Assert.AreEqual(exp.Bytes, text); termCount++; } if (Verbose) { Console.WriteLine(" no more terms for field=" + field); } } Assert.AreEqual(uniqueTermCount, termCount); } }
public virtual void TestRanges() { int num = AtLeast(1000); for (int i = 0; i < num; i++) { BytesRef lowerVal = new BytesRef(TestUtil.RandomUnicodeString(Random)); BytesRef upperVal = new BytesRef(TestUtil.RandomUnicodeString(Random)); if (upperVal.CompareTo(lowerVal) < 0) { AssertSame(upperVal, lowerVal, Random.NextBoolean(), Random.NextBoolean()); } else { AssertSame(lowerVal, upperVal, Random.NextBoolean(), Random.NextBoolean()); } } }
public virtual int CompareTo(TermData o) { return(text.CompareTo(o.text)); }
// NOTE: slow! (linear scan) public override SeekStatus SeekCeil(BytesRef text) { if (NextTerm != 0) { int cmp = text.CompareTo(Term_Renamed); if (cmp < 0) { NextTerm = 0; Tvf.Seek(TvfFP); } else if (cmp == 0) { return SeekStatus.FOUND; } } while (Next() != null) { int cmp = text.CompareTo(Term_Renamed); if (cmp < 0) { return SeekStatus.NOT_FOUND; } else if (cmp == 0) { return SeekStatus.FOUND; } } return SeekStatus.END; }
public virtual void TestRanges() { int num = AtLeast(1000); for (int i = 0; i < num; i++) { BytesRef lowerVal = new BytesRef(TestUtil.RandomUnicodeString(Random())); BytesRef upperVal = new BytesRef(TestUtil.RandomUnicodeString(Random())); if (upperVal.CompareTo(lowerVal) < 0) { AssertSame(upperVal, lowerVal, Random().NextBoolean(), Random().NextBoolean()); } else { AssertSame(lowerVal, upperVal, Random().NextBoolean(), Random().NextBoolean()); } } }
public int CompareTo(TermAndFreq other) { return term.CompareTo(other.term) + freq.CompareTo(other.freq); }
public override SeekStatus SeekCeil(BytesRef target) { // already here if (term != null && term.Equals(target)) { return(SeekStatus.FOUND); } int startIdx = Array.BinarySearch(outerInstance.m_indexedTermsArray, target); if (startIdx >= 0) { // we hit the term exactly... lucky us! TermsEnum.SeekStatus seekStatus = termsEnum.SeekCeil(target); if (Debugging.AssertsEnabled) { Debugging.Assert(seekStatus == TermsEnum.SeekStatus.FOUND); } ord = startIdx << outerInstance.indexIntervalBits; SetTerm(); if (Debugging.AssertsEnabled) { Debugging.Assert(term != null); } return(SeekStatus.FOUND); } // we didn't hit the term exactly startIdx = -startIdx - 1; if (startIdx == 0) { // our target occurs *before* the first term TermsEnum.SeekStatus seekStatus = termsEnum.SeekCeil(target); if (Debugging.AssertsEnabled) { Debugging.Assert(seekStatus == TermsEnum.SeekStatus.NOT_FOUND); } ord = 0; SetTerm(); if (Debugging.AssertsEnabled) { Debugging.Assert(term != null); } return(SeekStatus.NOT_FOUND); } // back up to the start of the block startIdx--; if ((ord >> outerInstance.indexIntervalBits) == startIdx && term != null && term.CompareTo(target) <= 0) { // we are already in the right block and the current term is before the term we want, // so we don't need to seek. } else { // seek to the right block TermsEnum.SeekStatus seekStatus = termsEnum.SeekCeil(outerInstance.m_indexedTermsArray[startIdx]); if (Debugging.AssertsEnabled) { Debugging.Assert(seekStatus == TermsEnum.SeekStatus.FOUND); } ord = startIdx << outerInstance.indexIntervalBits; SetTerm(); if (Debugging.AssertsEnabled) { Debugging.Assert(term != null); // should be non-null since it's in the index } } while (term != null && term.CompareTo(target) < 0) { Next(); } if (term == null) { return(SeekStatus.END); } else if (term.CompareTo(target) == 0) { return(SeekStatus.FOUND); } else { return(SeekStatus.NOT_FOUND); } }
public override SeekStatus SeekCeil(BytesRef term) { if (DEBUG_SURROGATES) { Console.WriteLine("TE.seek target=" + UnicodeUtil.ToHexString(term.Utf8ToString())); } skipNext = false; TermInfosReader tis = outerInstance.TermsDict; Term t0 = new Term(fieldInfo.Name, term); Debug.Assert(termEnum != null); tis.SeekEnum(termEnum, t0, false); Term t = termEnum.Term(); if (t != null && t.Field == internedFieldName && term.BytesEquals(t.Bytes)) { // If we found an exact match, no need to do the // surrogate dance if (DEBUG_SURROGATES) { Console.WriteLine(" seek exact match"); } current = t.Bytes; return(SeekStatus.FOUND); } else if (t == null || t.Field != internedFieldName) { // TODO: maybe we can handle this like the next() // into null? set term as prevTerm then dance? if (DEBUG_SURROGATES) { Console.WriteLine(" seek hit EOF"); } // We hit EOF; try end-case surrogate dance: if we // find an E, try swapping in S, backwards: scratchTerm.CopyBytes(term); Debug.Assert(scratchTerm.Offset == 0); for (int i = scratchTerm.Length - 1; i >= 0; i--) { if (IsHighBMPChar(scratchTerm.Bytes, i)) { if (DEBUG_SURROGATES) { Console.WriteLine(" found E pos=" + i + "; try seek"); } if (SeekToNonBMP(seekTermEnum, scratchTerm, i)) { scratchTerm.CopyBytes(seekTermEnum.Term().Bytes); outerInstance.TermsDict.SeekEnum(termEnum, seekTermEnum.Term(), false); newSuffixStart = 1 + i; DoPushes(); // Found a match // TODO: faster seek? current = termEnum.Term().Bytes; return(SeekStatus.NOT_FOUND); } } } if (DEBUG_SURROGATES) { Console.WriteLine(" seek END"); } current = null; return(SeekStatus.END); } else { // We found a non-exact but non-null term; this one // is fun -- just treat it like next, by pretending // requested term was prev: prevTerm.CopyBytes(term); if (DEBUG_SURROGATES) { Console.WriteLine(" seek hit non-exact term=" + UnicodeUtil.ToHexString(t.Text())); } BytesRef br = t.Bytes; Debug.Assert(br.Offset == 0); SetNewSuffixStart(term, br); SurrogateDance(); Term t2 = termEnum.Term(); if (t2 == null || t2.Field != internedFieldName) { // PreFlex codec interns field names; verify: Debug.Assert(t2 == null || !t2.Field.Equals(internedFieldName, StringComparison.Ordinal)); current = null; return(SeekStatus.END); } else { current = t2.Bytes; Debug.Assert(!unicodeSortOrder || term.CompareTo(current) < 0, "term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " vs current=" + UnicodeUtil.ToHexString(current.Utf8ToString())); return(SeekStatus.NOT_FOUND); } } }
public override SeekStatus SeekCeil(BytesRef target) { // already here if (Term_Renamed != null && Term_Renamed.Equals(target)) { return(SeekStatus.FOUND); } int startIdx = OuterInstance.IndexedTermsArray.ToList().BinarySearch(target); if (startIdx >= 0) { // we hit the term exactly... lucky us! TermsEnum.SeekStatus seekStatus = TermsEnum.SeekCeil(target); Debug.Assert(seekStatus == TermsEnum.SeekStatus.FOUND); Ord_Renamed = startIdx << OuterInstance.IndexIntervalBits; SetTerm(); Debug.Assert(Term_Renamed != null); return(SeekStatus.FOUND); } // we didn't hit the term exactly startIdx = -startIdx - 1; if (startIdx == 0) { // our target occurs *before* the first term TermsEnum.SeekStatus seekStatus = TermsEnum.SeekCeil(target); Debug.Assert(seekStatus == TermsEnum.SeekStatus.NOT_FOUND); Ord_Renamed = 0; SetTerm(); Debug.Assert(Term_Renamed != null); return(SeekStatus.NOT_FOUND); } // back up to the start of the block startIdx--; if ((Ord_Renamed >> OuterInstance.IndexIntervalBits) == startIdx && Term_Renamed != null && Term_Renamed.CompareTo(target) <= 0) { // we are already in the right block and the current term is before the term we want, // so we don't need to seek. } else { // seek to the right block TermsEnum.SeekStatus seekStatus = TermsEnum.SeekCeil(OuterInstance.IndexedTermsArray[startIdx]); Debug.Assert(seekStatus == TermsEnum.SeekStatus.FOUND); Ord_Renamed = startIdx << OuterInstance.IndexIntervalBits; SetTerm(); Debug.Assert(Term_Renamed != null); // should be non-null since it's in the index } while (Term_Renamed != null && Term_Renamed.CompareTo(target) < 0) { Next(); } if (Term_Renamed == null) { return(SeekStatus.END); } else if (Term_Renamed.CompareTo(target) == 0) { return(SeekStatus.FOUND); } else { return(SeekStatus.NOT_FOUND); } }
public override void SeekExact(BytesRef target, TermState otherState) { // if (DEBUG) { // System.out.println("BTTR.seekExact termState seg=" + segment + " target=" + target.utf8ToString() + " " + target + " state=" + otherState); // } Debug.Assert(ClearEOF()); if (target.CompareTo(Term_Renamed) != 0 || !TermExists) { Debug.Assert(otherState != null && otherState is BlockTermState); CurrentFrame = StaticFrame; CurrentFrame.State.CopyFrom(otherState); Term_Renamed.CopyBytes(target); CurrentFrame.MetaDataUpto = CurrentFrame.TermBlockOrd; Debug.Assert(CurrentFrame.MetaDataUpto > 0); ValidIndexPrefix = 0; } else { // if (DEBUG) { // System.out.println(" skip seek: already on target state=" + currentFrame.state); // } } }
public override SeekStatus SeekCeil(BytesRef term) { if (DEBUG_SURROGATES) { Console.WriteLine("TE.seek target=" + UnicodeUtil.ToHexString(term.Utf8ToString())); } SkipNext = false; TermInfosReader tis = OuterInstance.TermsDict; Term t0 = new Term(fieldInfo.Name, term); Debug.Assert(TermEnum != null); tis.SeekEnum(TermEnum, t0, false); Term t = TermEnum.Term(); if (t != null && t.Field() == InternedFieldName && term.BytesEquals(t.Bytes())) { // If we found an exact match, no need to do the // surrogate dance if (DEBUG_SURROGATES) { Console.WriteLine(" seek exact match"); } Current = t.Bytes(); return SeekStatus.FOUND; } else if (t == null || t.Field() != InternedFieldName) { // TODO: maybe we can handle this like the next() // into null? set term as prevTerm then dance? if (DEBUG_SURROGATES) { Console.WriteLine(" seek hit EOF"); } // We hit EOF; try end-case surrogate dance: if we // find an E, try swapping in S, backwards: ScratchTerm.CopyBytes(term); Debug.Assert(ScratchTerm.Offset == 0); for (int i = ScratchTerm.Length - 1; i >= 0; i--) { if (IsHighBMPChar(ScratchTerm.Bytes, i)) { if (DEBUG_SURROGATES) { Console.WriteLine(" found E pos=" + i + "; try seek"); } if (SeekToNonBMP(SeekTermEnum, ScratchTerm, i)) { ScratchTerm.CopyBytes(SeekTermEnum.Term().Bytes()); OuterInstance.TermsDict.SeekEnum(TermEnum, SeekTermEnum.Term(), false); NewSuffixStart = 1 + i; DoPushes(); // Found a match // TODO: faster seek? Current = TermEnum.Term().Bytes(); return SeekStatus.NOT_FOUND; } } } if (DEBUG_SURROGATES) { Console.WriteLine(" seek END"); } Current = null; return SeekStatus.END; } else { // We found a non-exact but non-null term; this one // is fun -- just treat it like next, by pretending // requested term was prev: PrevTerm.CopyBytes(term); if (DEBUG_SURROGATES) { Console.WriteLine(" seek hit non-exact term=" + UnicodeUtil.ToHexString(t.Text())); } BytesRef br = t.Bytes(); Debug.Assert(br.Offset == 0); SetNewSuffixStart(term, br); SurrogateDance(); Term t2 = TermEnum.Term(); if (t2 == null || t2.Field() != InternedFieldName) { // PreFlex codec interns field names; verify: Debug.Assert(t2 == null || !t2.Field().Equals(InternedFieldName)); Current = null; return SeekStatus.END; } else { Current = t2.Bytes(); Debug.Assert(!UnicodeSortOrder || term.CompareTo(Current) < 0, "term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " vs current=" + UnicodeUtil.ToHexString(Current.Utf8ToString())); return SeekStatus.NOT_FOUND; } } }
private void CheckTermsOrder(IndexReader r, ISet<string> allTerms, bool isTop) { TermsEnum terms = MultiFields.GetFields(r).Terms("f").Iterator(null); BytesRef last = new BytesRef(); HashSet<string> seenTerms = new HashSet<string>(); while (true) { BytesRef term = terms.Next(); if (term == null) { break; } Assert.IsTrue(last.CompareTo(term) < 0); last.CopyBytes(term); string s = term.Utf8ToString(); Assert.IsTrue(allTerms.Contains(s), "term " + TermDesc(s) + " was not added to index (count=" + allTerms.Count + ")"); seenTerms.Add(s); } if (isTop) { Assert.IsTrue(allTerms.SetEquals(seenTerms)); } // Test seeking: IEnumerator<string> it = seenTerms.GetEnumerator(); while (it.MoveNext()) { BytesRef tr = new BytesRef(it.Current); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, terms.SeekCeil(tr), "seek failed for term=" + TermDesc(tr.Utf8ToString())); } }
private static void CheckSortedSetDocValues(string fieldName, AtomicReader reader, SortedSetDocValues dv, Bits docsWithField) { long maxOrd = dv.ValueCount - 1; LongBitSet seenOrds = new LongBitSet(dv.ValueCount); long maxOrd2 = -1; for (int i = 0; i < reader.MaxDoc; i++) { dv.Document = i; long lastOrd = -1; long ord; if (docsWithField.Get(i)) { int ordCount = 0; while ((ord = dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (ord <= lastOrd) { throw new Exception("ords out of order: " + ord + " <= " + lastOrd + " for doc: " + i); } if (ord < 0 || ord > maxOrd) { throw new Exception("ord out of bounds: " + ord); } if (dv is RandomAccessOrds) { long ord2 = ((RandomAccessOrds)dv).OrdAt(ordCount); if (ord != ord2) { throw new Exception("ordAt(" + ordCount + ") inconsistent, expected=" + ord + ",got=" + ord2 + " for doc: " + i); } } lastOrd = ord; maxOrd2 = Math.Max(maxOrd2, ord); seenOrds.Set(ord); ordCount++; } if (ordCount == 0) { throw new Exception("dv for field: " + fieldName + " has no ordinals but is not marked missing for doc: " + i); } if (dv is RandomAccessOrds) { long ordCount2 = ((RandomAccessOrds)dv).Cardinality(); if (ordCount != ordCount2) { throw new Exception("cardinality inconsistent, expected=" + ordCount + ",got=" + ordCount2 + " for doc: " + i); } } } else { long o = dv.NextOrd(); if (o != SortedSetDocValues.NO_MORE_ORDS) { throw new Exception("dv for field: " + fieldName + " is marked missing but has ord=" + o + " for doc: " + i); } if (dv is RandomAccessOrds) { long ordCount2 = ((RandomAccessOrds)dv).Cardinality(); if (ordCount2 != 0) { throw new Exception("dv for field: " + fieldName + " is marked missing but has cardinality " + ordCount2 + " for doc: " + i); } } } } if (maxOrd != maxOrd2) { throw new Exception("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2); } if (seenOrds.Cardinality() != dv.ValueCount) { throw new Exception("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.ValueCount + " but only used: " + seenOrds.Cardinality()); } BytesRef lastValue = null; BytesRef scratch = new BytesRef(); for (long i = 0; i <= maxOrd; i++) { dv.LookupOrd(i, scratch); Debug.Assert(scratch.Valid); if (lastValue != null) { if (scratch.CompareTo(lastValue) <= 0) { throw new Exception("dv for field: " + fieldName + " has ords out of order: " + lastValue + " >=" + scratch); } } lastValue = BytesRef.DeepCopyOf(scratch); } }
/// <summary> /// Builds the final automaton from a list of entries. /// </summary> private FST<object> BuildAutomaton(BytesRefSorter sorter) { // Build the automaton. Outputs<object> outputs = NoOutputs.Singleton; object empty = outputs.NoOutput; Builder<object> builder = new Builder<object>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, shareMaxTailLength, outputs, null, false, PackedInts.DEFAULT, true, 15); BytesRef scratch = new BytesRef(); BytesRef entry; IntsRef scratchIntsRef = new IntsRef(); int count = 0; BytesRefIterator iter = sorter.GetEnumerator(); while ((entry = iter.Next()) != null) { count++; if (scratch.CompareTo(entry) != 0) { builder.Add(Util.Fst.Util.ToIntsRef(entry, scratchIntsRef), empty); scratch.CopyBytes(entry); } } return count == 0 ? null : builder.Finish(); }
private static void CheckSortedDocValues(string fieldName, AtomicReader reader, SortedDocValues dv, Bits docsWithField) { CheckBinaryDocValues(fieldName, reader, dv, docsWithField); int maxOrd = dv.ValueCount - 1; FixedBitSet seenOrds = new FixedBitSet(dv.ValueCount); int maxOrd2 = -1; for (int i = 0; i < reader.MaxDoc; i++) { int ord = dv.GetOrd(i); if (ord == -1) { if (docsWithField.Get(i)) { throw new Exception("dv for field: " + fieldName + " has -1 ord but is not marked missing for doc: " + i); } } else if (ord < -1 || ord > maxOrd) { throw new Exception("ord out of bounds: " + ord); } else { if (!docsWithField.Get(i)) { throw new Exception("dv for field: " + fieldName + " is missing but has ord=" + ord + " for doc: " + i); } maxOrd2 = Math.Max(maxOrd2, ord); seenOrds.Set(ord); } } if (maxOrd != maxOrd2) { throw new Exception("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2); } if (seenOrds.Cardinality() != dv.ValueCount) { throw new Exception("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.ValueCount + " but only used: " + seenOrds.Cardinality()); } BytesRef lastValue = null; BytesRef scratch = new BytesRef(); for (int i = 0; i <= maxOrd; i++) { dv.LookupOrd(i, scratch); Debug.Assert(scratch.Valid); if (lastValue != null) { if (scratch.CompareTo(lastValue) <= 0) { throw new Exception("dv for field: " + fieldName + " has ords out of order: " + lastValue + " >=" + scratch); } } lastValue = BytesRef.DeepCopyOf(scratch); } }