public override int NextDoc() { if (Debugging.AssertsEnabled) { Debugging.Assert(currentDoc != NO_MORE_DOCS); } for (currentDoc = currentDoc + 1; currentDoc < maxDoc; currentDoc++) { currentMatched = 0; score = 0; dv.SetDocument(currentDoc); long ord; while ((ord = dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (ords.Contains(ord)) { currentMatched++; score += sims[(int)ord].Score(currentDoc, 1); } } if (currentMatched >= minNrShouldMatch) { return(currentDoc); } } return(currentDoc = NO_MORE_DOCS); }
public virtual void TestNonexistantFields() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); iw.AddDocument(doc); DirectoryReader ir = iw.Reader; iw.Dispose(); AtomicReader ar = GetOnlySegmentReader(ir); IFieldCache cache = FieldCache.DEFAULT; cache.PurgeAllCaches(); Assert.AreEqual(0, cache.GetCacheEntries().Length); #pragma warning disable 612, 618 Bytes bytes = cache.GetBytes(ar, "bogusbytes", true); Assert.AreEqual(0, bytes.Get(0)); Int16s shorts = cache.GetInt16s(ar, "bogusshorts", true); Assert.AreEqual(0, shorts.Get(0)); #pragma warning restore 612, 618 Int32s ints = cache.GetInt32s(ar, "bogusints", true); Assert.AreEqual(0, ints.Get(0)); Int64s longs = cache.GetInt64s(ar, "boguslongs", true); Assert.AreEqual(0, longs.Get(0)); Singles floats = cache.GetSingles(ar, "bogusfloats", true); Assert.AreEqual(0, floats.Get(0), 0.0f); Doubles doubles = cache.GetDoubles(ar, "bogusdoubles", true); Assert.AreEqual(0, doubles.Get(0), 0.0D); BytesRef scratch = new BytesRef(); BinaryDocValues binaries = cache.GetTerms(ar, "bogusterms", true); binaries.Get(0, scratch); Assert.AreEqual(0, scratch.Length); SortedDocValues sorted = cache.GetTermsIndex(ar, "bogustermsindex"); Assert.AreEqual(-1, sorted.GetOrd(0)); sorted.Get(0, scratch); Assert.AreEqual(0, scratch.Length); SortedSetDocValues sortedSet = cache.GetDocTermOrds(ar, "bogusmultivalued"); sortedSet.SetDocument(0); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); IBits bits = cache.GetDocsWithField(ar, "bogusbits"); Assert.IsFalse(bits.Get(0)); // check that we cached nothing Assert.AreEqual(0, cache.GetCacheEntries().Length); ir.Dispose(); dir.Dispose(); }
public override void Collect(int doc) { _docTermOrds.SetDocument(doc); long ord; while ((ord = _docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { _docTermOrds.LookupOrd(ord, _scratch); _collectorTerms.Add(_scratch); } }
protected internal override sealed bool MatchDoc(int doc) { docTermOrds.SetDocument(doc); long ord; // TODO: we could track max bit set and early terminate (since they come in sorted order) while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (termSet.Get(ord)) { return true; } } return false; }
public virtual void Collect(int doc) { docTermOrds.SetDocument(doc); long ord; while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { docTermOrds.LookupOrd(ord, joinValue); if (!joinValueToJoinScores.TryGetValue(joinValue, out JoinScore joinScore) || joinScore == null) { joinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore(); } joinScore.AddScore(scorer.GetScore()); } }
public virtual void Collect(int doc) { docTermOrds.SetDocument(doc); long ord; while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { docTermOrds.LookupOrd(ord, joinValue); var joinScore = JoinValueToJoinScores.ContainsKey(joinValue) ? JoinValueToJoinScores[joinValue] : null; if (joinScore == null) { JoinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore(); } joinScore.AddScore(scorer.GetScore()); } }
protected internal override sealed bool MatchDoc(int doc) { docTermOrds.SetDocument(doc); long ord; while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (ord > inclusiveUpperPoint) { return(false); } else if (ord >= inclusiveLowerPoint) { return(true); } } return(false); }
public virtual void TestSortedSetDocValuesField() { AssumeTrue("default codec does not support SORTED_SET", DefaultCodecSupportsSortedSet); SortedSetDocValues dv = reader.GetSortedSetDocValues(SORTED_SET_DV_FIELD); int maxDoc = reader.MaxDoc; BytesRef bytes = new BytesRef(); for (int i = 0; i < maxDoc; i++) { dv.SetDocument(i); dv.LookupOrd(dv.NextOrd(), bytes); int value = sortedValues[i]; assertEquals("incorrect sorted-set DocValues for doc " + i, value.toString(), bytes.Utf8ToString()); dv.LookupOrd(dv.NextOrd(), bytes); assertEquals("incorrect sorted-set DocValues for doc " + i, (value + 1).ToString(), bytes.Utf8ToString()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.NextOrd()); } }
private IEnumerable <long?> GetMergeSortedSetDocToOrdCountEnumerable(AtomicReader[] readers, SortedSetDocValues[] dvs) { int readerUpto = -1; int docIDUpto = 0; AtomicReader currentReader = null; IBits currentLiveDocs = null; while (true) { if (readerUpto == readers.Length) { yield break; } if (currentReader == null || docIDUpto == currentReader.MaxDoc) { readerUpto++; if (readerUpto < readers.Length) { currentReader = readers[readerUpto]; currentLiveDocs = currentReader.LiveDocs; } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.Get(docIDUpto)) { SortedSetDocValues dv = dvs[readerUpto]; dv.SetDocument(docIDUpto); long value = 0; while (dv.NextOrd() != SortedSetDocValues.NO_MORE_ORDS) { value++; } docIDUpto++; yield return(value); continue; } docIDUpto++; } }
public virtual void Collect(int doc) { docTermOrds.SetDocument(doc); long ord; while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { docTermOrds.LookupOrd(ord, scratch); if (!joinValueToJoinScores.TryGetValue(scratch, out JoinScore joinScore) || joinScore == null) { continue; } int basedDoc = docBase + doc; // First encountered join value determines the score. // Something to keep in mind for many-to-many relations. if (!docToJoinScore.ContainsKey(basedDoc)) { docToJoinScore[basedDoc] = joinScore; } } }
/// <summary> /// Returns a <see cref="DocIdSet"/> with documents that should be permitted in search /// results. /// </summary> public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs) { SortedSetDocValues docTermOrds = FieldCache.DEFAULT.GetDocTermOrds((context.AtomicReader), m_query.m_field); // Cannot use FixedBitSet because we require long index (ord): Int64BitSet termSet = new Int64BitSet(docTermOrds.ValueCount); TermsEnum termsEnum = m_query.GetTermsEnum(new TermsAnonymousInnerClassHelper(docTermOrds)); if (Debugging.AssertsEnabled) { Debugging.Assert(termsEnum != null); } if (termsEnum.MoveNext()) { // fill into a bitset do { termSet.Set(termsEnum.Ord); } while (termsEnum.MoveNext()); } else { return(null); } return(new FieldCacheDocIdSet(context.Reader.MaxDoc, acceptDocs, (doc) => { docTermOrds.SetDocument(doc); long ord; // TODO: we could track max bit set and early terminate (since they come in sorted order) while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (termSet.Get(ord)) { return true; } } return false; })); }
public override int NextDoc() { Debug.Assert(CurrentDoc != NO_MORE_DOCS); for (CurrentDoc = CurrentDoc + 1; CurrentDoc < MaxDoc; CurrentDoc++) { CurrentMatched = 0; Score_Renamed = 0; Dv.SetDocument(CurrentDoc); long ord; while ((ord = Dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (Ords.Contains(ord)) { CurrentMatched++; Score_Renamed += Sims[(int)ord].Score(CurrentDoc, 1); } } if (CurrentMatched >= MinNrShouldMatch) { return(CurrentDoc); } } return(CurrentDoc = NO_MORE_DOCS); }
public virtual void Test() { #pragma warning disable 612, 618 IFieldCache cache = FieldCache.DEFAULT; FieldCache.Doubles doubles = cache.GetDoubles(Reader, "theDouble", Random.NextBoolean()); Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", FieldCache.DEFAULT_DOUBLE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(doubles.Get(i) == (double.MaxValue - i), doubles.Get(i) + " does not equal: " + (double.MaxValue - i)); } FieldCache.Int64s longs = cache.GetInt64s(Reader, "theLong", Random.NextBoolean()); Assert.AreSame(longs, cache.GetInt64s(Reader, "theLong", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(longs, cache.GetInt64s(Reader, "theLong", FieldCache.DEFAULT_INT64_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(longs.Get(i) == (long.MaxValue - i), longs.Get(i) + " does not equal: " + (long.MaxValue - i) + " i=" + i); } FieldCache.Bytes bytes = cache.GetBytes(Reader, "theByte", Random.NextBoolean()); Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", FieldCache.DEFAULT_BYTE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue((sbyte)bytes.Get(i) == (sbyte)(sbyte.MaxValue - i), (sbyte)bytes.Get(i) + " does not equal: " + (sbyte.MaxValue - i)); } FieldCache.Int16s shorts = cache.GetInt16s(Reader, "theShort", Random.NextBoolean()); Assert.AreSame(shorts, cache.GetInt16s(Reader, "theShort", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(shorts, cache.GetInt16s(Reader, "theShort", FieldCache.DEFAULT_INT16_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(shorts.Get(i) == (short)(short.MaxValue - i), shorts.Get(i) + " does not equal: " + (short.MaxValue - i)); } FieldCache.Int32s ints = cache.GetInt32s(Reader, "theInt", Random.NextBoolean()); Assert.AreSame(ints, cache.GetInt32s(Reader, "theInt", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(ints, cache.GetInt32s(Reader, "theInt", FieldCache.DEFAULT_INT32_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(ints.Get(i) == (int.MaxValue - i), ints.Get(i) + " does not equal: " + (int.MaxValue - i)); } FieldCache.Singles floats = cache.GetSingles(Reader, "theFloat", Random.NextBoolean()); Assert.AreSame(floats, cache.GetSingles(Reader, "theFloat", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(floats, cache.GetSingles(Reader, "theFloat", FieldCache.DEFAULT_SINGLE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(floats.Get(i) == (float.MaxValue - i), floats.Get(i) + " does not equal: " + (float.MaxValue - i)); } #pragma warning restore 612, 618 IBits docsWithField = cache.GetDocsWithField(Reader, "theLong"); Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "theLong"), "Second request to cache return same array"); Assert.IsTrue(docsWithField is Bits.MatchAllBits, "docsWithField(theLong) must be class Bits.MatchAllBits"); Assert.IsTrue(docsWithField.Length == NUM_DOCS, "docsWithField(theLong) Size: " + docsWithField.Length + " is not: " + NUM_DOCS); for (int i = 0; i < docsWithField.Length; i++) { Assert.IsTrue(docsWithField.Get(i)); } docsWithField = cache.GetDocsWithField(Reader, "sparse"); Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "sparse"), "Second request to cache return same array"); Assert.IsFalse(docsWithField is Bits.MatchAllBits, "docsWithField(sparse) must not be class Bits.MatchAllBits"); Assert.IsTrue(docsWithField.Length == NUM_DOCS, "docsWithField(sparse) Size: " + docsWithField.Length + " is not: " + NUM_DOCS); for (int i = 0; i < docsWithField.Length; i++) { Assert.AreEqual(i % 2 == 0, docsWithField.Get(i)); } // getTermsIndex SortedDocValues termsIndex = cache.GetTermsIndex(Reader, "theRandomUnicodeString"); Assert.AreSame(termsIndex, cache.GetTermsIndex(Reader, "theRandomUnicodeString"), "Second request to cache return same array"); BytesRef br = new BytesRef(); for (int i = 0; i < NUM_DOCS; i++) { BytesRef term; int ord = termsIndex.GetOrd(i); if (ord == -1) { term = null; } else { termsIndex.LookupOrd(ord, br); term = br; } string s = term == null ? null : term.Utf8ToString(); Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s, StringComparison.Ordinal), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]); } int nTerms = termsIndex.ValueCount; TermsEnum tenum = termsIndex.GetTermsEnum(); BytesRef val = new BytesRef(); for (int i = 0; i < nTerms; i++) { BytesRef val1 = tenum.Next(); termsIndex.LookupOrd(i, val); // System.out.println("i="+i); Assert.AreEqual(val, val1); } // seek the enum around (note this isn't a great test here) int num = AtLeast(100); for (int i = 0; i < num; i++) { int k = Random.Next(nTerms); termsIndex.LookupOrd(k, val); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val)); Assert.AreEqual(val, tenum.Term); } for (int i = 0; i < nTerms; i++) { termsIndex.LookupOrd(i, val); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val)); Assert.AreEqual(val, tenum.Term); } // test bad field termsIndex = cache.GetTermsIndex(Reader, "bogusfield"); // getTerms BinaryDocValues terms = cache.GetTerms(Reader, "theRandomUnicodeString", true); Assert.AreSame(terms, cache.GetTerms(Reader, "theRandomUnicodeString", true), "Second request to cache return same array"); IBits bits = cache.GetDocsWithField(Reader, "theRandomUnicodeString"); for (int i = 0; i < NUM_DOCS; i++) { terms.Get(i, br); BytesRef term; if (!bits.Get(i)) { term = null; } else { term = br; } string s = term == null ? null : term.Utf8ToString(); Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s, StringComparison.Ordinal), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]); } // test bad field terms = cache.GetTerms(Reader, "bogusfield", false); // getDocTermOrds SortedSetDocValues termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField"); int numEntries = cache.GetCacheEntries().Length; // ask for it again, and check that we didnt create any additional entries: termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField"); Assert.AreEqual(numEntries, cache.GetCacheEntries().Length); for (int i = 0; i < NUM_DOCS; i++) { termOrds.SetDocument(i); // this will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId IList <BytesRef> values = MultiValued[i].Distinct().ToList(); foreach (BytesRef v in values) { if (v == null) { // why does this test use null values... instead of an empty list: confusing break; } long ord = termOrds.NextOrd(); Debug.Assert(ord != SortedSetDocValues.NO_MORE_ORDS); BytesRef scratch = new BytesRef(); termOrds.LookupOrd(ord, scratch); Assert.AreEqual(v, scratch); } Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, termOrds.NextOrd()); } // test bad field termOrds = cache.GetDocTermOrds(Reader, "bogusfield"); Assert.IsTrue(termOrds.ValueCount == 0); FieldCache.DEFAULT.PurgeByCacheKey(Reader.CoreCacheKey); }
public override void SetDocument(int docID) { @in.SetDocument(docMap.NewToOld(docID)); }
private IEnumerable <long?> GetMergeSortedSetOrdsEnumerable(AtomicReader[] readers, SortedSetDocValues[] dvs, OrdinalMap map) { int readerUpto = -1; int docIDUpto = 0; AtomicReader currentReader = null; IBits currentLiveDocs = null; var ords = new long[8]; int ordUpto = 0; int ordLength = 0; while (true) { if (readerUpto == readers.Length) { yield break; } if (ordUpto < ordLength) { var value = ords[ordUpto]; ordUpto++; yield return(value); continue; } if (currentReader == null || docIDUpto == currentReader.MaxDoc) { readerUpto++; if (readerUpto < readers.Length) { currentReader = readers[readerUpto]; currentLiveDocs = currentReader.LiveDocs; } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.Get(docIDUpto)) { if (Debugging.AssertsEnabled) { Debugging.Assert(docIDUpto < currentReader.MaxDoc); } SortedSetDocValues dv = dvs[readerUpto]; dv.SetDocument(docIDUpto); ordUpto = ordLength = 0; long ord; while ((ord = dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (ordLength == ords.Length) { ords = ArrayUtil.Grow(ords, ordLength + 1); } ords[ordLength] = map.GetGlobalOrd(readerUpto, ord); ordLength++; } docIDUpto++; continue; } docIDUpto++; } }
/// <summary> /// Create the results based on the search hits. /// Can be overridden by subclass to add particular behavior (e.g. weight transformation) </summary> /// <exception cref="System.IO.IOException"> If there are problems reading fields from the underlying Lucene index. </exception> protected internal virtual IList <LookupResult> CreateResults(IndexSearcher searcher, TopFieldDocs hits, int num, string charSequence, bool doHighlight, IEnumerable <string> matchedTokens, string prefixToken) { BinaryDocValues textDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, TEXT_FIELD_NAME); // This will just be null if app didn't pass payloads to build(): // TODO: maybe just stored fields? they compress... BinaryDocValues payloadsDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, "payloads"); IList <AtomicReaderContext> leaves = searcher.IndexReader.Leaves; List <LookupResult> results = new List <LookupResult>(); BytesRef scratch = new BytesRef(); for (int i = 0; i < hits.ScoreDocs.Length; i++) { FieldDoc fd = (FieldDoc)hits.ScoreDocs[i]; textDV.Get(fd.Doc, scratch); string text = scratch.Utf8ToString(); long score = (long)fd.Fields[0]; BytesRef payload; if (payloadsDV != null) { payload = new BytesRef(); payloadsDV.Get(fd.Doc, payload); } else { payload = null; } // Must look up sorted-set by segment: int segment = ReaderUtil.SubIndex(fd.Doc, leaves); SortedSetDocValues contextsDV = leaves[segment].AtomicReader.GetSortedSetDocValues(CONTEXTS_FIELD_NAME); HashSet <BytesRef> contexts; if (contextsDV != null) { contexts = new HashSet <BytesRef>(); contextsDV.SetDocument(fd.Doc - leaves[segment].DocBase); long ord; while ((ord = contextsDV.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { BytesRef context = new BytesRef(); contextsDV.LookupOrd(ord, context); contexts.Add(context); } } else { contexts = null; } LookupResult result; if (doHighlight) { object highlightKey = Highlight(text, matchedTokens, prefixToken); result = new LookupResult(highlightKey.ToString(), highlightKey, score, payload, contexts); } else { result = new LookupResult(text, score, payload, contexts); } results.Add(result); } return(results); }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs) { SortedSetDocValues docTermOrds = FieldCache.DEFAULT.GetDocTermOrds(context.AtomicReader, field); long lowerPoint = lowerVal == null ? -1 : docTermOrds.LookupTerm(lowerVal); long upperPoint = upperVal == null ? -1 : docTermOrds.LookupTerm(upperVal); long inclusiveLowerPoint, inclusiveUpperPoint; // Hints: // * binarySearchLookup returns -1, if value was null. // * the value is <0 if no exact hit was found, the returned value // is (-(insertion point) - 1) if (lowerPoint == -1 && lowerVal == null) { inclusiveLowerPoint = 0; } else if (includeLower && lowerPoint >= 0) { inclusiveLowerPoint = lowerPoint; } else if (lowerPoint >= 0) { inclusiveLowerPoint = lowerPoint + 1; } else { inclusiveLowerPoint = Math.Max(0, -lowerPoint - 1); } if (upperPoint == -1 && upperVal == null) { inclusiveUpperPoint = long.MaxValue; } else if (includeUpper && upperPoint >= 0) { inclusiveUpperPoint = upperPoint; } else if (upperPoint >= 0) { inclusiveUpperPoint = upperPoint - 1; } else { inclusiveUpperPoint = -upperPoint - 2; } if (inclusiveUpperPoint < 0 || inclusiveLowerPoint > inclusiveUpperPoint) { return(null); } if (Debugging.AssertsEnabled) { Debugging.Assert(inclusiveLowerPoint >= 0 && inclusiveUpperPoint >= 0); } return(new FieldCacheDocIdSet(context.AtomicReader.MaxDoc, acceptDocs, (doc) => { docTermOrds.SetDocument(doc); long ord; while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (ord > inclusiveUpperPoint) { return false; } else if (ord >= inclusiveLowerPoint) { return true; } } return false; })); }
public virtual void TestDocValuesIntegration() { AssumeTrue("3.x does not support docvalues", DefaultCodecSupportsDocValues); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); doc.Add(new BinaryDocValuesField("binary", new BytesRef("binary value"))); doc.Add(new SortedDocValuesField("sorted", new BytesRef("sorted value"))); doc.Add(new NumericDocValuesField("numeric", 42)); if (DefaultCodecSupportsSortedSet) { doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1"))); doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2"))); } iw.AddDocument(doc); DirectoryReader ir = iw.GetReader(); iw.Dispose(); AtomicReader ar = GetOnlySegmentReader(ir); BytesRef scratch = new BytesRef(); // Binary type: can be retrieved via getTerms() try { FieldCache.DEFAULT.GetInt32s(ar, "binary", false); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } BinaryDocValues binary = FieldCache.DEFAULT.GetTerms(ar, "binary", true); binary.Get(0, scratch); Assert.AreEqual("binary value", scratch.Utf8ToString()); try { FieldCache.DEFAULT.GetTermsIndex(ar, "binary"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetDocTermOrds(ar, "binary"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "binary"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } IBits bits = FieldCache.DEFAULT.GetDocsWithField(ar, "binary"); Assert.IsTrue(bits.Get(0)); // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds() try { FieldCache.DEFAULT.GetInt32s(ar, "sorted", false); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "sorted"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } binary = FieldCache.DEFAULT.GetTerms(ar, "sorted", true); binary.Get(0, scratch); Assert.AreEqual("sorted value", scratch.Utf8ToString()); SortedDocValues sorted = FieldCache.DEFAULT.GetTermsIndex(ar, "sorted"); Assert.AreEqual(0, sorted.GetOrd(0)); Assert.AreEqual(1, sorted.ValueCount); sorted.Get(0, scratch); Assert.AreEqual("sorted value", scratch.Utf8ToString()); SortedSetDocValues sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sorted"); sortedSet.SetDocument(0); Assert.AreEqual(0, sortedSet.NextOrd()); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); Assert.AreEqual(1, sortedSet.ValueCount); bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sorted"); Assert.IsTrue(bits.Get(0)); // Numeric type: can be retrieved via getInts() and so on Int32s numeric = FieldCache.DEFAULT.GetInt32s(ar, "numeric", false); Assert.AreEqual(42, numeric.Get(0)); try { FieldCache.DEFAULT.GetTerms(ar, "numeric", true); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetTermsIndex(ar, "numeric"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetDocTermOrds(ar, "numeric"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "numeric"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } bits = FieldCache.DEFAULT.GetDocsWithField(ar, "numeric"); Assert.IsTrue(bits.Get(0)); // SortedSet type: can be retrieved via getDocTermOrds() if (DefaultCodecSupportsSortedSet) { try { FieldCache.DEFAULT.GetInt32s(ar, "sortedset", false); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetTerms(ar, "sortedset", true); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetTermsIndex(ar, "sortedset"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "sortedset"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sortedset"); sortedSet.SetDocument(0); Assert.AreEqual(0, sortedSet.NextOrd()); Assert.AreEqual(1, sortedSet.NextOrd()); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); Assert.AreEqual(2, sortedSet.ValueCount); bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sortedset"); Assert.IsTrue(bits.Get(0)); } ir.Dispose(); dir.Dispose(); }
/// <summary> /// Does all the "real work" of tallying up the counts. </summary> private void Count(IList <FacetsCollector.MatchingDocs> matchingDocs) { //System.out.println("ssdv count"); MultiDocValues.OrdinalMap ordinalMap; // TODO: is this right? really, we need a way to // verify that this ordinalMap "matches" the leaves in // matchingDocs... if (dv is MultiDocValues.MultiSortedSetDocValues && matchingDocs.Count > 1) { ordinalMap = ((MultiDocValues.MultiSortedSetDocValues)dv).Mapping; } else { ordinalMap = null; } IndexReader origReader = state.OrigReader; foreach (FacetsCollector.MatchingDocs hits in matchingDocs) { var reader = hits.Context.AtomicReader; //System.out.println(" reader=" + reader); // LUCENE-5090: make sure the provided reader context "matches" // the top-level reader passed to the // SortedSetDocValuesReaderState, else cryptic // AIOOBE can happen: if (!Equals(ReaderUtil.GetTopLevelContext(hits.Context).Reader, origReader)) { throw new InvalidOperationException("the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader"); } SortedSetDocValues segValues = reader.GetSortedSetDocValues(field); if (segValues == null) { continue; } DocIdSetIterator docs = hits.Bits.GetIterator(); // TODO: yet another option is to count all segs // first, only in seg-ord space, and then do a // merge-sort-PQ in the end to only "resolve to // global" those seg ords that can compete, if we know // we just want top K? ie, this is the same algo // that'd be used for merging facets across shards // (distributed faceting). but this has much higher // temp ram req'ts (sum of number of ords across all // segs) if (ordinalMap != null) { int segOrd = hits.Context.Ord; int numSegOrds = (int)segValues.ValueCount; if (hits.TotalHits < numSegOrds / 10) { //System.out.println(" remap as-we-go"); // Remap every ord to global ord as we iterate: int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //System.out.println(" doc=" + doc); segValues.SetDocument(doc); int term = (int)segValues.NextOrd(); while (term != SortedSetDocValues.NO_MORE_ORDS) { //System.out.println(" segOrd=" + segOrd + " ord=" + term + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, term)); counts[(int)ordinalMap.GetGlobalOrd(segOrd, term)]++; term = (int)segValues.NextOrd(); } } } else { //System.out.println(" count in seg ord first"); // First count in seg-ord space: int[] segCounts = new int[numSegOrds]; int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //System.out.println(" doc=" + doc); segValues.SetDocument(doc); int term = (int)segValues.NextOrd(); while (term != SortedSetDocValues.NO_MORE_ORDS) { //System.out.println(" ord=" + term); segCounts[term]++; term = (int)segValues.NextOrd(); } } // Then, migrate to global ords: for (int ord = 0; ord < numSegOrds; ord++) { int count = segCounts[ord]; if (count != 0) { //System.out.println(" migrate segOrd=" + segOrd + " ord=" + ord + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, ord)); counts[(int)ordinalMap.GetGlobalOrd(segOrd, ord)] += count; } } } } else { // No ord mapping (e.g., single segment index): // just aggregate directly into counts: int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { segValues.SetDocument(doc); int term = (int)segValues.NextOrd(); while (term != SortedSetDocValues.NO_MORE_ORDS) { counts[term]++; term = (int)segValues.NextOrd(); } } } } }
public override int GetOrd(int docID) { @in.SetDocument(docID); return((int)@in.NextOrd()); }