public virtual void TestDocValuesIntegration() { AssumeTrue("3.x does not support docvalues", DefaultCodecSupportsDocValues); Directory dir = NewDirectory(); IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, null); RandomIndexWriter iw = new RandomIndexWriter(Random, dir, iwc); Document doc = new Document(); doc.Add(new BinaryDocValuesField("binary", new BytesRef("binary value"))); doc.Add(new SortedDocValuesField("sorted", new BytesRef("sorted value"))); doc.Add(new NumericDocValuesField("numeric", 42)); if (DefaultCodecSupportsSortedSet) { doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value1"))); doc.Add(new SortedSetDocValuesField("sortedset", new BytesRef("sortedset value2"))); } iw.AddDocument(doc); DirectoryReader ir = iw.GetReader(); iw.Dispose(); AtomicReader ar = GetOnlySegmentReader(ir); BytesRef scratch = new BytesRef(); // Binary type: can be retrieved via getTerms() try { FieldCache.DEFAULT.GetInt32s(ar, "binary", false); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } BinaryDocValues binary = FieldCache.DEFAULT.GetTerms(ar, "binary", true); binary.Get(0, scratch); Assert.AreEqual("binary value", scratch.Utf8ToString()); try { FieldCache.DEFAULT.GetTermsIndex(ar, "binary"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetDocTermOrds(ar, "binary"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "binary"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } IBits bits = FieldCache.DEFAULT.GetDocsWithField(ar, "binary"); Assert.IsTrue(bits.Get(0)); // Sorted type: can be retrieved via getTerms(), getTermsIndex(), getDocTermOrds() try { FieldCache.DEFAULT.GetInt32s(ar, "sorted", false); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "sorted"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } binary = FieldCache.DEFAULT.GetTerms(ar, "sorted", true); binary.Get(0, scratch); Assert.AreEqual("sorted value", scratch.Utf8ToString()); SortedDocValues sorted = FieldCache.DEFAULT.GetTermsIndex(ar, "sorted"); Assert.AreEqual(0, sorted.GetOrd(0)); Assert.AreEqual(1, sorted.ValueCount); sorted.Get(0, scratch); Assert.AreEqual("sorted value", scratch.Utf8ToString()); SortedSetDocValues sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sorted"); sortedSet.SetDocument(0); Assert.AreEqual(0, sortedSet.NextOrd()); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); Assert.AreEqual(1, sortedSet.ValueCount); bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sorted"); Assert.IsTrue(bits.Get(0)); // Numeric type: can be retrieved via getInts() and so on Int32s numeric = FieldCache.DEFAULT.GetInt32s(ar, "numeric", false); Assert.AreEqual(42, numeric.Get(0)); try { FieldCache.DEFAULT.GetTerms(ar, "numeric", true); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetTermsIndex(ar, "numeric"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetDocTermOrds(ar, "numeric"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "numeric"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } bits = FieldCache.DEFAULT.GetDocsWithField(ar, "numeric"); Assert.IsTrue(bits.Get(0)); // SortedSet type: can be retrieved via getDocTermOrds() if (DefaultCodecSupportsSortedSet) { try { FieldCache.DEFAULT.GetInt32s(ar, "sortedset", false); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetTerms(ar, "sortedset", true); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { FieldCache.DEFAULT.GetTermsIndex(ar, "sortedset"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } try { new DocTermOrds(ar, null, "sortedset"); Assert.Fail(); } #pragma warning disable 168 catch (InvalidOperationException expected) #pragma warning restore 168 { } sortedSet = FieldCache.DEFAULT.GetDocTermOrds(ar, "sortedset"); sortedSet.SetDocument(0); Assert.AreEqual(0, sortedSet.NextOrd()); Assert.AreEqual(1, sortedSet.NextOrd()); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); Assert.AreEqual(2, sortedSet.ValueCount); bits = FieldCache.DEFAULT.GetDocsWithField(ar, "sortedset"); Assert.IsTrue(bits.Get(0)); } ir.Dispose(); dir.Dispose(); }
/// <summary> /// Create the results based on the search hits. /// Can be overridden by subclass to add particular behavior (e.g. weight transformation) </summary> /// <exception cref="IOException"> If there are problems reading fields from the underlying Lucene index. </exception> protected internal virtual IList <LookupResult> CreateResults(IndexSearcher searcher, TopFieldDocs hits, int num, string charSequence, bool doHighlight, ICollection <string> matchedTokens, string prefixToken) { BinaryDocValues textDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, TEXT_FIELD_NAME); // This will just be null if app didn't pass payloads to build(): // TODO: maybe just stored fields? they compress... BinaryDocValues payloadsDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, "payloads"); IList <AtomicReaderContext> leaves = searcher.IndexReader.Leaves; List <LookupResult> results = new List <LookupResult>(); BytesRef scratch = new BytesRef(); for (int i = 0; i < hits.ScoreDocs.Length; i++) { FieldDoc fd = (FieldDoc)hits.ScoreDocs[i]; textDV.Get(fd.Doc, scratch); string text = scratch.Utf8ToString(); long score = (long)fd.Fields[0]; BytesRef payload; if (payloadsDV != null) { payload = new BytesRef(); payloadsDV.Get(fd.Doc, payload); } else { payload = null; } // Must look up sorted-set by segment: int segment = ReaderUtil.SubIndex(fd.Doc, leaves); SortedSetDocValues contextsDV = leaves[segment].AtomicReader.GetSortedSetDocValues(CONTEXTS_FIELD_NAME); ISet <BytesRef> contexts; if (contextsDV != null) { contexts = new JCG.HashSet <BytesRef>(); contextsDV.SetDocument(fd.Doc - leaves[segment].DocBase); long ord; while ((ord = contextsDV.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { BytesRef context = new BytesRef(); contextsDV.LookupOrd(ord, context); contexts.Add(context); } } else { contexts = null; } LookupResult result; if (doHighlight) { object highlightKey = Highlight(text, matchedTokens, prefixToken); result = new LookupResult(highlightKey.ToString(), highlightKey, score, payload, contexts); } else { result = new LookupResult(text, score, payload, contexts); } results.Add(result); } return(results); }
public virtual void Test() { #pragma warning disable 612, 618 IFieldCache cache = FieldCache.DEFAULT; FieldCache.Doubles doubles = cache.GetDoubles(reader, "theDouble", Random.NextBoolean()); Assert.AreSame(doubles, cache.GetDoubles(reader, "theDouble", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(doubles, cache.GetDoubles(reader, "theDouble", FieldCache.DEFAULT_DOUBLE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(doubles.Get(i) == (double.MaxValue - i), doubles.Get(i) + " does not equal: " + (double.MaxValue - i)); } FieldCache.Int64s longs = cache.GetInt64s(reader, "theLong", Random.NextBoolean()); Assert.AreSame(longs, cache.GetInt64s(reader, "theLong", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(longs, cache.GetInt64s(reader, "theLong", FieldCache.DEFAULT_INT64_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(longs.Get(i) == (long.MaxValue - i), longs.Get(i) + " does not equal: " + (long.MaxValue - i) + " i=" + i); } FieldCache.Bytes bytes = cache.GetBytes(reader, "theByte", Random.NextBoolean()); Assert.AreSame(bytes, cache.GetBytes(reader, "theByte", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(bytes, cache.GetBytes(reader, "theByte", FieldCache.DEFAULT_BYTE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue((sbyte)bytes.Get(i) == (sbyte)(sbyte.MaxValue - i), (sbyte)bytes.Get(i) + " does not equal: " + (sbyte.MaxValue - i)); } FieldCache.Int16s shorts = cache.GetInt16s(reader, "theShort", Random.NextBoolean()); Assert.AreSame(shorts, cache.GetInt16s(reader, "theShort", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(shorts, cache.GetInt16s(reader, "theShort", FieldCache.DEFAULT_INT16_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(shorts.Get(i) == (short)(short.MaxValue - i), shorts.Get(i) + " does not equal: " + (short.MaxValue - i)); } FieldCache.Int32s ints = cache.GetInt32s(reader, "theInt", Random.NextBoolean()); Assert.AreSame(ints, cache.GetInt32s(reader, "theInt", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(ints, cache.GetInt32s(reader, "theInt", FieldCache.DEFAULT_INT32_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(ints.Get(i) == (int.MaxValue - i), ints.Get(i) + " does not equal: " + (int.MaxValue - i)); } FieldCache.Singles floats = cache.GetSingles(reader, "theFloat", Random.NextBoolean()); Assert.AreSame(floats, cache.GetSingles(reader, "theFloat", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(floats, cache.GetSingles(reader, "theFloat", FieldCache.DEFAULT_SINGLE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(floats.Get(i) == (float.MaxValue - i), floats.Get(i) + " does not equal: " + (float.MaxValue - i)); } #pragma warning restore 612, 618 IBits docsWithField = cache.GetDocsWithField(reader, "theLong"); Assert.AreSame(docsWithField, cache.GetDocsWithField(reader, "theLong"), "Second request to cache return same array"); Assert.IsTrue(docsWithField is Bits.MatchAllBits, "docsWithField(theLong) must be class Bits.MatchAllBits"); Assert.IsTrue(docsWithField.Length == NUM_DOCS, "docsWithField(theLong) Size: " + docsWithField.Length + " is not: " + NUM_DOCS); for (int i = 0; i < docsWithField.Length; i++) { Assert.IsTrue(docsWithField.Get(i)); } docsWithField = cache.GetDocsWithField(reader, "sparse"); Assert.AreSame(docsWithField, cache.GetDocsWithField(reader, "sparse"), "Second request to cache return same array"); Assert.IsFalse(docsWithField is Bits.MatchAllBits, "docsWithField(sparse) must not be class Bits.MatchAllBits"); Assert.IsTrue(docsWithField.Length == NUM_DOCS, "docsWithField(sparse) Size: " + docsWithField.Length + " is not: " + NUM_DOCS); for (int i = 0; i < docsWithField.Length; i++) { Assert.AreEqual(i % 2 == 0, docsWithField.Get(i)); } // getTermsIndex SortedDocValues termsIndex = cache.GetTermsIndex(reader, "theRandomUnicodeString"); Assert.AreSame(termsIndex, cache.GetTermsIndex(reader, "theRandomUnicodeString"), "Second request to cache return same array"); BytesRef br = new BytesRef(); for (int i = 0; i < NUM_DOCS; i++) { BytesRef term; int ord = termsIndex.GetOrd(i); if (ord == -1) { term = null; } else { termsIndex.LookupOrd(ord, br); term = br; } string s = term == null ? null : term.Utf8ToString(); Assert.IsTrue(unicodeStrings[i] == null || unicodeStrings[i].Equals(s, StringComparison.Ordinal), "for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i]); } int nTerms = termsIndex.ValueCount; TermsEnum tenum = termsIndex.GetTermsEnum(); BytesRef val = new BytesRef(); for (int i = 0; i < nTerms; i++) { tenum.MoveNext(); BytesRef val1 = tenum.Term; termsIndex.LookupOrd(i, val); // System.out.println("i="+i); Assert.AreEqual(val, val1); } // seek the enum around (note this isn't a great test here) int num = AtLeast(100); for (int i = 0; i < num; i++) { int k = Random.Next(nTerms); termsIndex.LookupOrd(k, val); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val)); Assert.AreEqual(val, tenum.Term); } for (int i = 0; i < nTerms; i++) { termsIndex.LookupOrd(i, val); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val)); Assert.AreEqual(val, tenum.Term); } // test bad field termsIndex = cache.GetTermsIndex(reader, "bogusfield"); // getTerms BinaryDocValues terms = cache.GetTerms(reader, "theRandomUnicodeString", true); Assert.AreSame(terms, cache.GetTerms(reader, "theRandomUnicodeString", true), "Second request to cache return same array"); IBits bits = cache.GetDocsWithField(reader, "theRandomUnicodeString"); for (int i = 0; i < NUM_DOCS; i++) { terms.Get(i, br); BytesRef term; if (!bits.Get(i)) { term = null; } else { term = br; } string s = term == null ? null : term.Utf8ToString(); Assert.IsTrue(unicodeStrings[i] == null || unicodeStrings[i].Equals(s, StringComparison.Ordinal), "for doc " + i + ": " + s + " does not equal: " + unicodeStrings[i]); } // test bad field terms = cache.GetTerms(reader, "bogusfield", false); // getDocTermOrds SortedSetDocValues termOrds = cache.GetDocTermOrds(reader, "theRandomUnicodeMultiValuedField"); int numEntries = cache.GetCacheEntries().Length; // ask for it again, and check that we didnt create any additional entries: termOrds = cache.GetDocTermOrds(reader, "theRandomUnicodeMultiValuedField"); Assert.AreEqual(numEntries, cache.GetCacheEntries().Length); for (int i = 0; i < NUM_DOCS; i++) { termOrds.SetDocument(i); // this will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId ISet <BytesRef> values = new JCG.LinkedHashSet <BytesRef>(multiValued[i]); foreach (BytesRef v in values) { if (v == null) { // why does this test use null values... instead of an empty list: confusing break; } long ord = termOrds.NextOrd(); if (Debugging.AssertsEnabled) { Debugging.Assert(ord != SortedSetDocValues.NO_MORE_ORDS); } BytesRef scratch = new BytesRef(); termOrds.LookupOrd(ord, scratch); Assert.AreEqual(v, scratch); } Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, termOrds.NextOrd()); } // test bad field termOrds = cache.GetDocTermOrds(reader, "bogusfield"); Assert.IsTrue(termOrds.ValueCount == 0); FieldCache.DEFAULT.PurgeByCacheKey(reader.CoreCacheKey); }
public virtual void TestNonIndexedFields() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter( #if FEATURE_INSTANCE_TESTDATA_INITIALIZATION this, #endif Random, dir); Document doc = new Document(); doc.Add(new StoredField("bogusbytes", "bogus")); doc.Add(new StoredField("bogusshorts", "bogus")); doc.Add(new StoredField("bogusints", "bogus")); doc.Add(new StoredField("boguslongs", "bogus")); doc.Add(new StoredField("bogusfloats", "bogus")); doc.Add(new StoredField("bogusdoubles", "bogus")); doc.Add(new StoredField("bogusterms", "bogus")); doc.Add(new StoredField("bogustermsindex", "bogus")); doc.Add(new StoredField("bogusmultivalued", "bogus")); doc.Add(new StoredField("bogusbits", "bogus")); iw.AddDocument(doc); DirectoryReader ir = iw.GetReader(); iw.Dispose(); AtomicReader ar = GetOnlySegmentReader(ir); IFieldCache cache = FieldCache.DEFAULT; cache.PurgeAllCaches(); Assert.AreEqual(0, cache.GetCacheEntries().Length); #pragma warning disable 612, 618 Bytes bytes = cache.GetBytes(ar, "bogusbytes", true); Assert.AreEqual((byte)0, bytes.Get(0)); Int16s shorts = cache.GetInt16s(ar, "bogusshorts", true); Assert.AreEqual(0, shorts.Get(0)); #pragma warning restore 612, 618 Int32s ints = cache.GetInt32s(ar, "bogusints", true); Assert.AreEqual(0, ints.Get(0)); Int64s longs = cache.GetInt64s(ar, "boguslongs", true); Assert.AreEqual(0, longs.Get(0)); Singles floats = cache.GetSingles(ar, "bogusfloats", true); Assert.AreEqual(0, floats.Get(0), 0.0f); Doubles doubles = cache.GetDoubles(ar, "bogusdoubles", true); Assert.AreEqual(0, doubles.Get(0), 0.0D); BytesRef scratch = new BytesRef(); BinaryDocValues binaries = cache.GetTerms(ar, "bogusterms", true); binaries.Get(0, scratch); Assert.AreEqual(0, scratch.Length); SortedDocValues sorted = cache.GetTermsIndex(ar, "bogustermsindex"); Assert.AreEqual(-1, sorted.GetOrd(0)); sorted.Get(0, scratch); Assert.AreEqual(0, scratch.Length); SortedSetDocValues sortedSet = cache.GetDocTermOrds(ar, "bogusmultivalued"); sortedSet.SetDocument(0); Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); IBits bits = cache.GetDocsWithField(ar, "bogusbits"); Assert.IsFalse(bits.Get(0)); // check that we cached nothing Assert.AreEqual(0, cache.GetCacheEntries().Length); ir.Dispose(); dir.Dispose(); }
private IEnumerable <long?> GetMergeSortedSetOrdsEnumerable(AtomicReader[] readers, SortedSetDocValues[] dvs, OrdinalMap map) { int readerUpto = -1; int docIDUpto = 0; AtomicReader currentReader = null; IBits currentLiveDocs = null; var ords = new long[8]; int ordUpto = 0; int ordLength = 0; while (true) { if (readerUpto == readers.Length) { yield break; } if (ordUpto < ordLength) { var value = ords[ordUpto]; ordUpto++; yield return(value); continue; } if (currentReader == null || docIDUpto == currentReader.MaxDoc) { readerUpto++; if (readerUpto < readers.Length) { currentReader = readers[readerUpto]; currentLiveDocs = currentReader.LiveDocs; } docIDUpto = 0; continue; } if (currentLiveDocs == null || currentLiveDocs.Get(docIDUpto)) { Debug.Assert(docIDUpto < currentReader.MaxDoc); SortedSetDocValues dv = dvs[readerUpto]; dv.SetDocument(docIDUpto); ordUpto = ordLength = 0; long ord; while ((ord = dv.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (ordLength == ords.Length) { ords = ArrayUtil.Grow(ords, ordLength + 1); } ords[ordLength] = map.GetGlobalOrd(readerUpto, ord); ordLength++; } docIDUpto++; continue; } docIDUpto++; } }
public virtual void TestNonIndexedFields() { Directory dir = NewDirectory(); RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); Document doc = new Document(); doc.Add(new StoredField("bogusbytes", "bogus")); doc.Add(new StoredField("bogusshorts", "bogus")); doc.Add(new StoredField("bogusints", "bogus")); doc.Add(new StoredField("boguslongs", "bogus")); doc.Add(new StoredField("bogusfloats", "bogus")); doc.Add(new StoredField("bogusdoubles", "bogus")); doc.Add(new StoredField("bogusterms", "bogus")); doc.Add(new StoredField("bogustermsindex", "bogus")); doc.Add(new StoredField("bogusmultivalued", "bogus")); doc.Add(new StoredField("bogusbits", "bogus")); iw.AddDocument(doc); DirectoryReader ir = iw.Reader; iw.Dispose(); AtomicReader ar = GetOnlySegmentReader(ir); IFieldCache cache = FieldCache.DEFAULT; cache.PurgeAllCaches(); Assert.AreEqual(0, cache.CacheEntries.Length); Bytes bytes = cache.GetBytes(ar, "bogusbytes", true); Assert.AreEqual(0, bytes.Get(0)); Shorts shorts = cache.GetShorts(ar, "bogusshorts", true); Assert.AreEqual(0, shorts.Get(0)); Ints ints = cache.GetInts(ar, "bogusints", true); Assert.AreEqual(0, ints.Get(0)); Longs longs = cache.GetLongs(ar, "boguslongs", true); Assert.AreEqual(0, longs.Get(0)); Floats floats = cache.GetFloats(ar, "bogusfloats", true); Assert.AreEqual(0, floats.Get(0), 0.0f); Doubles doubles = cache.GetDoubles(ar, "bogusdoubles", true); Assert.AreEqual(0, doubles.Get(0), 0.0D); BytesRef scratch = new BytesRef(); BinaryDocValues binaries = cache.GetTerms(ar, "bogusterms", true); binaries.Get(0, scratch); Assert.AreEqual(0, scratch.Length); SortedDocValues sorted = cache.GetTermsIndex(ar, "bogustermsindex"); Assert.AreEqual(-1, sorted.GetOrd(0)); sorted.Get(0, scratch); Assert.AreEqual(0, scratch.Length); SortedSetDocValues sortedSet = cache.GetDocTermOrds(ar, "bogusmultivalued"); sortedSet.Document = 0; Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, sortedSet.NextOrd()); Bits bits = cache.GetDocsWithField(ar, "bogusbits"); Assert.IsFalse(bits.Get(0)); // check that we cached nothing Assert.AreEqual(0, cache.CacheEntries.Length); ir.Dispose(); dir.Dispose(); }
public override long NextOrd() { return(@in.NextOrd()); }
/// <summary> /// Does all the "real work" of tallying up the counts. </summary> private void Count(IList <FacetsCollector.MatchingDocs> matchingDocs) { //System.out.println("ssdv count"); MultiDocValues.OrdinalMap ordinalMap; // TODO: is this right? really, we need a way to // verify that this ordinalMap "matches" the leaves in // matchingDocs... if (dv is MultiDocValues.MultiSortedSetDocValues && matchingDocs.Count > 1) { ordinalMap = ((MultiDocValues.MultiSortedSetDocValues)dv).Mapping; } else { ordinalMap = null; } IndexReader origReader = state.OrigReader; foreach (FacetsCollector.MatchingDocs hits in matchingDocs) { var reader = hits.Context.AtomicReader; //System.out.println(" reader=" + reader); // LUCENE-5090: make sure the provided reader context "matches" // the top-level reader passed to the // SortedSetDocValuesReaderState, else cryptic // AIOOBE can happen: if (!Equals(ReaderUtil.GetTopLevelContext(hits.Context).Reader, origReader)) { throw new InvalidOperationException("the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader"); } SortedSetDocValues segValues = reader.GetSortedSetDocValues(field); if (segValues == null) { continue; } DocIdSetIterator docs = hits.Bits.GetIterator(); // TODO: yet another option is to count all segs // first, only in seg-ord space, and then do a // merge-sort-PQ in the end to only "resolve to // global" those seg ords that can compete, if we know // we just want top K? ie, this is the same algo // that'd be used for merging facets across shards // (distributed faceting). but this has much higher // temp ram req'ts (sum of number of ords across all // segs) if (ordinalMap != null) { int segOrd = hits.Context.Ord; int numSegOrds = (int)segValues.ValueCount; if (hits.TotalHits < numSegOrds / 10) { //System.out.println(" remap as-we-go"); // Remap every ord to global ord as we iterate: int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //System.out.println(" doc=" + doc); segValues.Document = doc; int term = (int)segValues.NextOrd(); while (term != SortedSetDocValues.NO_MORE_ORDS) { //System.out.println(" segOrd=" + segOrd + " ord=" + term + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, term)); counts[(int)ordinalMap.GetGlobalOrd(segOrd, term)]++; term = (int)segValues.NextOrd(); } } } else { //System.out.println(" count in seg ord first"); // First count in seg-ord space: int[] segCounts = new int[numSegOrds]; int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //System.out.println(" doc=" + doc); segValues.Document = doc; int term = (int)segValues.NextOrd(); while (term != SortedSetDocValues.NO_MORE_ORDS) { //System.out.println(" ord=" + term); segCounts[term]++; term = (int)segValues.NextOrd(); } } // Then, migrate to global ords: for (int ord = 0; ord < numSegOrds; ord++) { int count = segCounts[ord]; if (count != 0) { //System.out.println(" migrate segOrd=" + segOrd + " ord=" + ord + " globalOrd=" + ordinalMap.getGlobalOrd(segOrd, ord)); counts[(int)ordinalMap.GetGlobalOrd(segOrd, ord)] += count; } } } } else { // No ord mapping (e.g., single segment index): // just aggregate directly into counts: int doc; while ((doc = docs.NextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { segValues.Document = doc; int term = (int)segValues.NextOrd(); while (term != SortedSetDocValues.NO_MORE_ORDS) { counts[term]++; term = (int)segValues.NextOrd(); } } } } }
public override int GetOrd(int docID) { @in.SetDocument(docID); return((int)@in.NextOrd()); }
public override DocIdSet GetDocIdSet(AtomicReaderContext context, IBits acceptDocs) { SortedSetDocValues docTermOrds = FieldCache.DEFAULT.GetDocTermOrds(context.AtomicReader, field); long lowerPoint = lowerVal is null ? -1 : docTermOrds.LookupTerm(lowerVal); long upperPoint = upperVal is null ? -1 : docTermOrds.LookupTerm(upperVal); long inclusiveLowerPoint, inclusiveUpperPoint; // Hints: // * binarySearchLookup returns -1, if value was null. // * the value is <0 if no exact hit was found, the returned value // is (-(insertion point) - 1) if (lowerPoint == -1 && lowerVal is null) { inclusiveLowerPoint = 0; } else if (includeLower && lowerPoint >= 0) { inclusiveLowerPoint = lowerPoint; } else if (lowerPoint >= 0) { inclusiveLowerPoint = lowerPoint + 1; } else { inclusiveLowerPoint = Math.Max(0, -lowerPoint - 1); } if (upperPoint == -1 && upperVal is null) { inclusiveUpperPoint = long.MaxValue; } else if (includeUpper && upperPoint >= 0) { inclusiveUpperPoint = upperPoint; } else if (upperPoint >= 0) { inclusiveUpperPoint = upperPoint - 1; } else { inclusiveUpperPoint = -upperPoint - 2; } if (inclusiveUpperPoint < 0 || inclusiveLowerPoint > inclusiveUpperPoint) { return(null); } if (Debugging.AssertsEnabled) { Debugging.Assert(inclusiveLowerPoint >= 0 && inclusiveUpperPoint >= 0); } return(new FieldCacheDocIdSet(context.AtomicReader.MaxDoc, acceptDocs, (doc) => { docTermOrds.SetDocument(doc); long ord; while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { if (ord > inclusiveUpperPoint) { return false; } else if (ord >= inclusiveLowerPoint) { return true; } } return false; })); }