public virtual int CompareTo(ScoreTerm other) { if (term.BytesEquals(other.term)) { return(0); // consistent with equals } if (this.boost == other.boost) { return(other.term.CompareTo(this.term)); } else { return(this.boost.CompareTo(other.boost)); } }
private bool SameSurfaceForm(BytesRef key, BytesRef output2) { if (hasPayloads) { // output2 has at least PAYLOAD_SEP byte: if (key.Length >= output2.Length) { return(false); } for (int i = 0; i < key.Length; i++) { if (key.Bytes[key.Offset + i] != output2.Bytes[output2.Offset + i]) { return(false); } } return(output2.Bytes[output2.Offset + key.Length] == PAYLOAD_SEP); } else { return(key.BytesEquals(output2)); } }
/// <summary> /// Provide spelling corrections based on several parameters. /// </summary> /// <param name="term"> The term to suggest spelling corrections for </param> /// <param name="numSug"> The maximum number of spelling corrections </param> /// <param name="ir"> The index reader to fetch the candidate spelling corrections from </param> /// <param name="docfreq"> The minimum document frequency a potential suggestion need to have in order to be included </param> /// <param name="editDistance"> The maximum edit distance candidates are allowed to have </param> /// <param name="accuracy"> The minimum accuracy a suggested spelling correction needs to have in order to be included </param> /// <param name="spare"> a chars scratch </param> /// <returns> a collection of spelling corrections sorted by <code>ScoreTerm</code>'s natural order. </returns> /// <exception cref="System.IO.IOException"> If I/O related errors occur </exception> protected internal virtual IEnumerable <ScoreTerm> SuggestSimilar(Term term, int numSug, IndexReader ir, int docfreq, int editDistance, float accuracy, CharsRef spare) { var atts = new AttributeSource(); IMaxNonCompetitiveBoostAttribute maxBoostAtt = atts.AddAttribute <IMaxNonCompetitiveBoostAttribute>(); Terms terms = MultiFields.GetTerms(ir, term.Field); if (terms == null) { return(new List <ScoreTerm>()); } FuzzyTermsEnum e = new FuzzyTermsEnum(terms, atts, term, editDistance, Math.Max(minPrefix, editDistance - 1), true); var stQueue = new Support.PriorityQueue <ScoreTerm>(); BytesRef queryTerm = new BytesRef(term.Text()); BytesRef candidateTerm; ScoreTerm st = new ScoreTerm(); IBoostAttribute boostAtt = e.Attributes.AddAttribute <IBoostAttribute>(); while ((candidateTerm = e.Next()) != null) { float boost = boostAtt.Boost; // ignore uncompetitive hits if (stQueue.Count >= numSug && boost <= stQueue.Peek().Boost) { continue; } // ignore exact match of the same term if (queryTerm.BytesEquals(candidateTerm)) { continue; } int df = e.DocFreq; // check docFreq if required if (df <= docfreq) { continue; } float score; string termAsString; if (distance == INTERNAL_LEVENSHTEIN) { // delay creating strings until the end termAsString = null; // undo FuzzyTermsEnum's scale factor for a real scaled lev score score = boost / e.ScaleFactor + e.MinSimilarity; } else { UnicodeUtil.UTF8toUTF16(candidateTerm, spare); termAsString = spare.ToString(); score = distance.GetDistance(term.Text(), termAsString); } if (score < accuracy) { continue; } // add new entry in PQ st.Term = BytesRef.DeepCopyOf(candidateTerm); st.Boost = boost; st.Docfreq = df; st.TermAsString = termAsString; st.Score = score; stQueue.Offer(st); // possibly drop entries from queue st = (stQueue.Count > numSug) ? stQueue.Poll() : new ScoreTerm(); maxBoostAtt.MaxNonCompetitiveBoost = (stQueue.Count >= numSug) ? stQueue.Peek().Boost : float.NegativeInfinity; } return(stQueue); }
public virtual void VerifyEquals(DirectoryReader r1, DirectoryReader r2, string idField) { if (Verbose) { Console.WriteLine("\nr1 docs:"); PrintDocs(r1); Console.WriteLine("\nr2 docs:"); PrintDocs(r2); } if (r1.NumDocs != r2.NumDocs) { if (Debugging.AssertsEnabled) { Debugging.Assert(false, () => "r1.NumDocs=" + r1.NumDocs + " vs r2.NumDocs=" + r2.NumDocs); } } bool hasDeletes = !(r1.MaxDoc == r2.MaxDoc && r1.NumDocs == r1.MaxDoc); int[] r2r1 = new int[r2.MaxDoc]; // r2 id to r1 id mapping // create mapping from id2 space to id2 based on idField Fields f1 = MultiFields.GetFields(r1); if (f1 == null) { // make sure r2 is empty Assert.IsNull(MultiFields.GetFields(r2)); return; } Terms terms1 = f1.GetTerms(idField); if (terms1 == null) { Assert.IsTrue(MultiFields.GetFields(r2) == null || MultiFields.GetFields(r2).GetTerms(idField) == null); return; } TermsEnum termsEnum = terms1.GetIterator(null); IBits liveDocs1 = MultiFields.GetLiveDocs(r1); IBits liveDocs2 = MultiFields.GetLiveDocs(r2); Fields fields = MultiFields.GetFields(r2); if (fields == null) { // make sure r1 is in fact empty (eg has only all // deleted docs): IBits liveDocs = MultiFields.GetLiveDocs(r1); DocsEnum docs = null; while (termsEnum.Next() != null) { docs = TestUtil.Docs(Random, termsEnum, liveDocs, docs, DocsFlags.NONE); while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { Assert.Fail("r1 is not empty but r2 is"); } } return; } Terms terms2 = fields.GetTerms(idField); TermsEnum termsEnum2 = terms2.GetIterator(null); DocsEnum termDocs1 = null; DocsEnum termDocs2 = null; while (true) { BytesRef term = termsEnum.Next(); //System.out.println("TEST: match id term=" + term); if (term == null) { break; } termDocs1 = TestUtil.Docs(Random, termsEnum, liveDocs1, termDocs1, DocsFlags.NONE); if (termsEnum2.SeekExact(term)) { termDocs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, termDocs2, DocsFlags.NONE); } else { termDocs2 = null; } if (termDocs1.NextDoc() == DocIdSetIterator.NO_MORE_DOCS) { // this doc is deleted and wasn't replaced Assert.IsTrue(termDocs2 == null || termDocs2.NextDoc() == DocIdSetIterator.NO_MORE_DOCS); continue; } int id1 = termDocs1.DocID; Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs1.NextDoc()); Assert.IsTrue(termDocs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int id2 = termDocs2.DocID; Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs2.NextDoc()); r2r1[id2] = id1; // verify stored fields are equivalent try { VerifyEquals(r1.Document(id1), r2.Document(id2)); } catch (Exception /*t*/) { Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term); Console.WriteLine(" d1=" + r1.Document(id1)); Console.WriteLine(" d2=" + r2.Document(id2)); throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } try { // verify term vectors are equivalent VerifyEquals(r1.GetTermVectors(id1), r2.GetTermVectors(id2)); } catch (Exception /*e*/) { Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2); Fields tv1 = r1.GetTermVectors(id1); Console.WriteLine(" d1=" + tv1); if (tv1 != null) { DocsAndPositionsEnum dpEnum = null; DocsEnum dEnum = null; foreach (string field in tv1) { Console.WriteLine(" " + field + ":"); Terms terms3 = tv1.GetTerms(field); Assert.IsNotNull(terms3); TermsEnum termsEnum3 = terms3.GetIterator(null); BytesRef term2; while ((term2 = termsEnum3.Next()) != null) { Console.WriteLine(" " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq); dpEnum = termsEnum3.DocsAndPositions(null, dpEnum); if (dpEnum != null) { Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dpEnum.Freq; Console.WriteLine(" doc=" + dpEnum.DocID + " freq=" + freq); for (int posUpto = 0; posUpto < freq; posUpto++) { Console.WriteLine(" pos=" + dpEnum.NextPosition()); } } else { dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS); Assert.IsNotNull(dEnum); Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dEnum.Freq; Console.WriteLine(" doc=" + dEnum.DocID + " freq=" + freq); } } } } Fields tv2 = r2.GetTermVectors(id2); Console.WriteLine(" d2=" + tv2); if (tv2 != null) { DocsAndPositionsEnum dpEnum = null; DocsEnum dEnum = null; foreach (string field in tv2) { Console.WriteLine(" " + field + ":"); Terms terms3 = tv2.GetTerms(field); Assert.IsNotNull(terms3); TermsEnum termsEnum3 = terms3.GetIterator(null); BytesRef term2; while ((term2 = termsEnum3.Next()) != null) { Console.WriteLine(" " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq); dpEnum = termsEnum3.DocsAndPositions(null, dpEnum); if (dpEnum != null) { Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dpEnum.Freq; Console.WriteLine(" doc=" + dpEnum.DocID + " freq=" + freq); for (int posUpto = 0; posUpto < freq; posUpto++) { Console.WriteLine(" pos=" + dpEnum.NextPosition()); } } else { dEnum = TestUtil.Docs(Random, termsEnum3, null, dEnum, DocsFlags.FREQS); Assert.IsNotNull(dEnum); Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); int freq = dEnum.Freq; Console.WriteLine(" doc=" + dEnum.DocID + " freq=" + freq); } } } } throw; // LUCENENET: CA2200: Rethrow to preserve stack details (https://docs.microsoft.com/en-us/visualstudio/code-quality/ca2200-rethrow-to-preserve-stack-details) } } //System.out.println("TEST: done match id"); // Verify postings //System.out.println("TEST: create te1"); Fields fields1 = MultiFields.GetFields(r1); IEnumerator <string> fields1Enum = fields1.GetEnumerator(); Fields fields2 = MultiFields.GetFields(r2); IEnumerator <string> fields2Enum = fields2.GetEnumerator(); string field1 = null, field2 = null; TermsEnum termsEnum1 = null; termsEnum2 = null; DocsEnum docs1 = null, docs2 = null; // pack both doc and freq into single element for easy sorting long[] info1 = new long[r1.NumDocs]; long[] info2 = new long[r2.NumDocs]; for (; ;) { BytesRef term1 = null, term2 = null; // iterate until we get some docs int len1; for (; ;) { len1 = 0; if (termsEnum1 == null) { if (!fields1Enum.MoveNext()) { break; } field1 = fields1Enum.Current; Terms terms = fields1.GetTerms(field1); if (terms == null) { continue; } termsEnum1 = terms.GetIterator(null); } term1 = termsEnum1.Next(); if (term1 == null) { // no more terms in this field termsEnum1 = null; continue; } //System.out.println("TEST: term1=" + term1); docs1 = TestUtil.Docs(Random, termsEnum1, liveDocs1, docs1, DocsFlags.FREQS); while (docs1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int d = docs1.DocID; int f = docs1.Freq; info1[len1] = (((long)d) << 32) | (uint)f; len1++; } if (len1 > 0) { break; } } // iterate until we get some docs int len2; for (; ;) { len2 = 0; if (termsEnum2 == null) { if (!fields2Enum.MoveNext()) { break; } field2 = fields2Enum.Current; Terms terms = fields2.GetTerms(field2); if (terms == null) { continue; } termsEnum2 = terms.GetIterator(null); } term2 = termsEnum2.Next(); if (term2 == null) { // no more terms in this field termsEnum2 = null; continue; } //System.out.println("TEST: term1=" + term1); docs2 = TestUtil.Docs(Random, termsEnum2, liveDocs2, docs2, DocsFlags.FREQS); while (docs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) { int d = r2r1[docs2.DocID]; int f = docs2.Freq; info2[len2] = (((long)d) << 32) | (uint)f; len2++; } if (len2 > 0) { break; } } Assert.AreEqual(len1, len2); if (len1 == 0) // no more terms { break; } Assert.AreEqual(field1, field2); Assert.IsTrue(term1.BytesEquals(term2)); if (!hasDeletes) { Assert.AreEqual(termsEnum1.DocFreq, termsEnum2.DocFreq); } Assert.AreEqual(term1, term2, "len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes); // sort info2 to get it into ascending docid Array.Sort(info2, 0, len2); // now compare for (int i = 0; i < len1; i++) { Assert.AreEqual(info1[i], info2[i], "i=" + i + " len=" + len1 + " d1=" + ((long)((ulong)info1[i] >> 32)) + " f1=" + (info1[i] & int.MaxValue) + " d2=" + ((long)((ulong)info2[i] >> 32)) + " f2=" + (info2[i] & int.MaxValue) + " field=" + field1 + " term=" + term1.Utf8ToString()); } } }
public virtual void _run() { for (int iter = 0; iter < NUM_TEST_ITER; iter++) { FieldData field = fields[Random.Next(fields.Length)]; TermsEnum termsEnum = termsDict.GetTerms(field.fieldInfo.Name).GetIterator(null); #pragma warning disable 612, 618 if (si.Codec is Lucene3xCodec) #pragma warning restore 612, 618 { // code below expects unicode sort order continue; } int upto = 0; // Test straight enum of the terms: while (true) { BytesRef term = termsEnum.Next(); if (term == null) { break; } BytesRef expected = new BytesRef(field.terms[upto++].text2); Assert.IsTrue(expected.BytesEquals(term), "expected=" + expected + " vs actual " + term); } Assert.AreEqual(upto, field.terms.Length); // Test random seek: TermData term2 = field.terms[Random.Next(field.terms.Length)]; TermsEnum.SeekStatus status = termsEnum.SeekCeil(new BytesRef(term2.text2)); Assert.AreEqual(status, TermsEnum.SeekStatus.FOUND); Assert.AreEqual(term2.docs.Length, termsEnum.DocFreq); if (field.omitTF) { this.VerifyDocs(term2.docs, term2.positions, TestUtil.Docs(Random, termsEnum, null, null, DocsFlags.NONE), false); } else { this.VerifyDocs(term2.docs, term2.positions, termsEnum.DocsAndPositions(null, null), true); } // Test random seek by ord: int idx = Random.Next(field.terms.Length); term2 = field.terms[idx]; bool success = false; try { termsEnum.SeekExact(idx); success = true; } #pragma warning disable 168 catch (NotSupportedException uoe) #pragma warning restore 168 { // ok -- skip it } if (success) { Assert.AreEqual(status, TermsEnum.SeekStatus.FOUND); Assert.IsTrue(termsEnum.Term.BytesEquals(new BytesRef(term2.text2))); Assert.AreEqual(term2.docs.Length, termsEnum.DocFreq); if (field.omitTF) { this.VerifyDocs(term2.docs, term2.positions, TestUtil.Docs(Random, termsEnum, null, null, DocsFlags.NONE), false); } else { this.VerifyDocs(term2.docs, term2.positions, termsEnum.DocsAndPositions(null, null), true); } } // Test seek to non-existent terms: if (Verbose) { Console.WriteLine("TEST: seek non-exist terms"); } for (int i = 0; i < 100; i++) { string text2 = TestUtil.RandomUnicodeString(Random) + "."; status = termsEnum.SeekCeil(new BytesRef(text2)); Assert.IsTrue(status == TermsEnum.SeekStatus.NOT_FOUND || status == TermsEnum.SeekStatus.END); } // Seek to each term, backwards: if (Verbose) { Console.WriteLine("TEST: seek terms backwards"); } for (int i = field.terms.Length - 1; i >= 0; i--) { Assert.AreEqual(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef(field.terms[i].text2)), Thread.CurrentThread.Name + ": field=" + field.fieldInfo.Name + " term=" + field.terms[i].text2); Assert.AreEqual(field.terms[i].docs.Length, termsEnum.DocFreq); } // Seek to each term by ord, backwards for (int i = field.terms.Length - 1; i >= 0; i--) { try { termsEnum.SeekExact(i); Assert.AreEqual(field.terms[i].docs.Length, termsEnum.DocFreq); Assert.IsTrue(termsEnum.Term.BytesEquals(new BytesRef(field.terms[i].text2))); } #pragma warning disable 168 catch (NotSupportedException uoe) #pragma warning restore 168 { } } // Seek to non-existent empty-string term status = termsEnum.SeekCeil(new BytesRef("")); Assert.IsNotNull(status); //Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status); // Make sure we're now pointing to first term Assert.IsTrue(termsEnum.Term.BytesEquals(new BytesRef(field.terms[0].text2))); // Test docs enum termsEnum.SeekCeil(new BytesRef("")); upto = 0; do { term2 = field.terms[upto]; if (Random.Next(3) == 1) { DocsEnum docs; DocsEnum docsAndFreqs; DocsAndPositionsEnum postings; if (!field.omitTF) { postings = termsEnum.DocsAndPositions(null, null); if (postings != null) { docs = docsAndFreqs = postings; } else { docs = docsAndFreqs = TestUtil.Docs(Random, termsEnum, null, null, DocsFlags.FREQS); } } else { postings = null; docsAndFreqs = null; docs = TestUtil.Docs(Random, termsEnum, null, null, DocsFlags.NONE); } Assert.IsNotNull(docs); int upto2 = -1; bool ended = false; while (upto2 < term2.docs.Length - 1) { // Maybe skip: int left = term2.docs.Length - upto2; int doc; if (Random.Next(3) == 1 && left >= 1) { int inc = 1 + Random.Next(left - 1); upto2 += inc; if (Random.Next(2) == 1) { doc = docs.Advance(term2.docs[upto2]); Assert.AreEqual(term2.docs[upto2], doc); } else { doc = docs.Advance(1 + term2.docs[upto2]); if (doc == DocIdSetIterator.NO_MORE_DOCS) { // skipped past last doc if (Debugging.AssertsEnabled) { Debugging.Assert(upto2 == term2.docs.Length - 1); } ended = true; break; } else { // skipped to next doc if (Debugging.AssertsEnabled) { Debugging.Assert(upto2 < term2.docs.Length - 1); } if (doc >= term2.docs[1 + upto2]) { upto2++; } } } } else { doc = docs.NextDoc(); Assert.IsTrue(doc != -1); upto2++; } Assert.AreEqual(term2.docs[upto2], doc); if (!field.omitTF) { Assert.AreEqual(term2.positions[upto2].Length, postings.Freq); if (Random.Next(2) == 1) { this.VerifyPositions(term2.positions[upto2], postings); } } } if (!ended) { Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docs.NextDoc()); } } upto++; } while (termsEnum.Next() != null); Assert.AreEqual(upto, field.terms.Length); } }
private TermsFilter(FieldAndTermEnum iter, int length) { // TODO: maybe use oal.index.PrefixCodedTerms instead? // If number of terms is more than a few hundred it // should be a win // TODO: we also pack terms in FieldCache/DocValues // ... maybe we can refactor to share that code // TODO: yet another option is to build the union of the terms in // an automaton an call intersect on the termsenum if the density is high int hash = 9; var serializedTerms = Arrays.Empty <byte>(); this.offsets = new int[length + 1]; int lastEndOffset = 0; int index = 0; var termsAndFields = new List <TermsAndField>(); TermsAndField lastTermsAndField = null; BytesRef previousTerm = null; string previousField = null; BytesRef currentTerm; string currentField; while (iter.MoveNext()) { currentTerm = iter.Current; currentField = iter.Field; if (currentField == null) { throw new ArgumentException("Field must not be null"); } if (previousField != null) { // deduplicate if (previousField.Equals(currentField, StringComparison.Ordinal)) { if (previousTerm.BytesEquals(currentTerm)) { continue; } } else { int _start = lastTermsAndField == null ? 0 : lastTermsAndField.end; lastTermsAndField = new TermsAndField(_start, index, previousField); termsAndFields.Add(lastTermsAndField); } } hash = PRIME * hash + currentField.GetHashCode(); hash = PRIME * hash + currentTerm.GetHashCode(); if (serializedTerms.Length < lastEndOffset + currentTerm.Length) { serializedTerms = ArrayUtil.Grow(serializedTerms, lastEndOffset + currentTerm.Length); } Array.Copy(currentTerm.Bytes, currentTerm.Offset, serializedTerms, lastEndOffset, currentTerm.Length); offsets[index] = lastEndOffset; lastEndOffset += currentTerm.Length; index++; previousTerm = currentTerm; previousField = currentField; } offsets[index] = lastEndOffset; int start = lastTermsAndField == null ? 0 : lastTermsAndField.end; lastTermsAndField = new TermsAndField(start, index, previousField); termsAndFields.Add(lastTermsAndField); this.termsBytes = ArrayUtil.Shrink(serializedTerms, lastEndOffset); this.termsAndFields = termsAndFields.ToArray(); this.hashCode = hash; }
public override SeekStatus SeekCeil(BytesRef term) { if (DEBUG_SURROGATES) { Console.WriteLine("TE.seek target=" + UnicodeUtil.ToHexString(term.Utf8ToString())); } skipNext = false; TermInfosReader tis = outerInstance.TermsDict; Term t0 = new Term(fieldInfo.Name, term); Debug.Assert(termEnum != null); tis.SeekEnum(termEnum, t0, false); Term t = termEnum.Term(); if (t != null && t.Field == internedFieldName && term.BytesEquals(t.Bytes)) { // If we found an exact match, no need to do the // surrogate dance if (DEBUG_SURROGATES) { Console.WriteLine(" seek exact match"); } current = t.Bytes; return(SeekStatus.FOUND); } else if (t == null || t.Field != internedFieldName) { // TODO: maybe we can handle this like the next() // into null? set term as prevTerm then dance? if (DEBUG_SURROGATES) { Console.WriteLine(" seek hit EOF"); } // We hit EOF; try end-case surrogate dance: if we // find an E, try swapping in S, backwards: scratchTerm.CopyBytes(term); Debug.Assert(scratchTerm.Offset == 0); for (int i = scratchTerm.Length - 1; i >= 0; i--) { if (IsHighBMPChar(scratchTerm.Bytes, i)) { if (DEBUG_SURROGATES) { Console.WriteLine(" found E pos=" + i + "; try seek"); } if (SeekToNonBMP(seekTermEnum, scratchTerm, i)) { scratchTerm.CopyBytes(seekTermEnum.Term().Bytes); outerInstance.TermsDict.SeekEnum(termEnum, seekTermEnum.Term(), false); newSuffixStart = 1 + i; DoPushes(); // Found a match // TODO: faster seek? current = termEnum.Term().Bytes; return(SeekStatus.NOT_FOUND); } } } if (DEBUG_SURROGATES) { Console.WriteLine(" seek END"); } current = null; return(SeekStatus.END); } else { // We found a non-exact but non-null term; this one // is fun -- just treat it like next, by pretending // requested term was prev: prevTerm.CopyBytes(term); if (DEBUG_SURROGATES) { Console.WriteLine(" seek hit non-exact term=" + UnicodeUtil.ToHexString(t.Text())); } BytesRef br = t.Bytes; Debug.Assert(br.Offset == 0); SetNewSuffixStart(term, br); SurrogateDance(); Term t2 = termEnum.Term(); if (t2 == null || t2.Field != internedFieldName) { // PreFlex codec interns field names; verify: Debug.Assert(t2 == null || !t2.Field.Equals(internedFieldName, StringComparison.Ordinal)); current = null; return(SeekStatus.END); } else { current = t2.Bytes; Debug.Assert(!unicodeSortOrder || term.CompareTo(current) < 0, "term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " vs current=" + UnicodeUtil.ToHexString(current.Utf8ToString())); return(SeekStatus.NOT_FOUND); } } }
public override SeekStatus SeekCeil(BytesRef term) { if (DEBUG_SURROGATES) { Console.WriteLine("TE.seek target=" + UnicodeUtil.ToHexString(term.Utf8ToString())); } SkipNext = false; TermInfosReader tis = OuterInstance.TermsDict; Term t0 = new Term(fieldInfo.Name, term); Debug.Assert(TermEnum != null); tis.SeekEnum(TermEnum, t0, false); Term t = TermEnum.Term(); if (t != null && t.Field() == InternedFieldName && term.BytesEquals(t.Bytes())) { // If we found an exact match, no need to do the // surrogate dance if (DEBUG_SURROGATES) { Console.WriteLine(" seek exact match"); } Current = t.Bytes(); return SeekStatus.FOUND; } else if (t == null || t.Field() != InternedFieldName) { // TODO: maybe we can handle this like the next() // into null? set term as prevTerm then dance? if (DEBUG_SURROGATES) { Console.WriteLine(" seek hit EOF"); } // We hit EOF; try end-case surrogate dance: if we // find an E, try swapping in S, backwards: ScratchTerm.CopyBytes(term); Debug.Assert(ScratchTerm.Offset == 0); for (int i = ScratchTerm.Length - 1; i >= 0; i--) { if (IsHighBMPChar(ScratchTerm.Bytes, i)) { if (DEBUG_SURROGATES) { Console.WriteLine(" found E pos=" + i + "; try seek"); } if (SeekToNonBMP(SeekTermEnum, ScratchTerm, i)) { ScratchTerm.CopyBytes(SeekTermEnum.Term().Bytes()); OuterInstance.TermsDict.SeekEnum(TermEnum, SeekTermEnum.Term(), false); NewSuffixStart = 1 + i; DoPushes(); // Found a match // TODO: faster seek? Current = TermEnum.Term().Bytes(); return SeekStatus.NOT_FOUND; } } } if (DEBUG_SURROGATES) { Console.WriteLine(" seek END"); } Current = null; return SeekStatus.END; } else { // We found a non-exact but non-null term; this one // is fun -- just treat it like next, by pretending // requested term was prev: PrevTerm.CopyBytes(term); if (DEBUG_SURROGATES) { Console.WriteLine(" seek hit non-exact term=" + UnicodeUtil.ToHexString(t.Text())); } BytesRef br = t.Bytes(); Debug.Assert(br.Offset == 0); SetNewSuffixStart(term, br); SurrogateDance(); Term t2 = TermEnum.Term(); if (t2 == null || t2.Field() != InternedFieldName) { // PreFlex codec interns field names; verify: Debug.Assert(t2 == null || !t2.Field().Equals(InternedFieldName)); Current = null; return SeekStatus.END; } else { Current = t2.Bytes(); Debug.Assert(!UnicodeSortOrder || term.CompareTo(Current) < 0, "term=" + UnicodeUtil.ToHexString(term.Utf8ToString()) + " vs current=" + UnicodeUtil.ToHexString(Current.Utf8ToString())); return SeekStatus.NOT_FOUND; } } }