private FacetResult GetDim(string dim, OrdRange ordRange, int topN) { TopOrdAndInt32Queue q = null; int bottomCount = 0; int dimCount = 0; int childCount = 0; TopOrdAndInt32Queue.OrdAndValue reuse = null; //System.out.println("getDim : " + ordRange.start + " - " + ordRange.end); for (int ord = ordRange.Start; ord <= ordRange.End; ord++) { //System.out.println(" ord=" + ord + " count=" + counts[ord]); if (counts[ord] > 0) { dimCount += counts[ord]; childCount++; if (counts[ord] > bottomCount) { if (reuse == null) { reuse = new TopOrdAndInt32Queue.OrdAndValue(); } reuse.Ord = ord; reuse.Value = counts[ord]; if (q == null) { // Lazy init, so we don't create this for the // sparse case unnecessarily q = new TopOrdAndInt32Queue(topN); } reuse = q.InsertWithOverflow(reuse); if (q.Count == topN) { bottomCount = q.Top.Value; } } } } if (q == null) { return(null); } LabelAndValue[] labelValues = new LabelAndValue[q.Count]; for (int i = labelValues.Length - 1; i >= 0; i--) { TopOrdAndInt32Queue.OrdAndValue ordAndValue = q.Pop(); var term = new BytesRef(); dv.LookupOrd(ordAndValue.Ord, term); string[] parts = FacetsConfig.StringToPath(term.Utf8ToString()); labelValues[i] = new LabelAndValue(parts[1], ordAndValue.Value); } return(new FacetResult(dim, Arrays.Empty <string>(), dimCount, labelValues, childCount)); }
public virtual void TestSortedSetDocValuesField() { AssumeTrue("default codec does not support SORTED_SET", DefaultCodecSupportsSortedSet); SortedSetDocValues dv = reader.GetSortedSetDocValues(SORTED_SET_DV_FIELD); int maxDoc = reader.MaxDoc; BytesRef bytes = new BytesRef(); for (int i = 0; i < maxDoc; i++) { dv.SetDocument(i); dv.LookupOrd(dv.NextOrd(), bytes); int value = sortedValues[i]; assertEquals("incorrect sorted-set DocValues for doc " + i, value.toString(), bytes.Utf8ToString()); dv.LookupOrd(dv.NextOrd(), bytes); assertEquals("incorrect sorted-set DocValues for doc " + i, (value + 1).ToString(), bytes.Utf8ToString()); assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.NextOrd()); } }
/// <summary> /// Creates this, pulling doc values from the specified /// field. /// </summary> public DefaultSortedSetDocValuesReaderState(IndexReader reader, string field = FacetsConfig.DEFAULT_INDEX_FIELD_NAME) { this.field = field; this.origReader = reader; // We need this to create thread-safe MultiSortedSetDV // per collector: topReader = SlowCompositeReaderWrapper.Wrap(reader); SortedSetDocValues dv = topReader.GetSortedSetDocValues(field); if (dv is null) { throw new ArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues"); } if (dv.ValueCount > int.MaxValue) { throw new ArgumentException("can only handle valueCount < System.Int32.MaxValue; got " + dv.ValueCount); } valueCount = (int)dv.ValueCount; // TODO: we can make this more efficient if eg we can be // "involved" when IOrdinalMap is being created? Ie see // each term/ord it's assigning as it goes... string lastDim = null; int startOrd = -1; BytesRef spare = new BytesRef(); // TODO: this approach can work for full hierarchy?; // TaxoReader can't do this since ords are not in // "sorted order" ... but we should generalize this to // support arbitrary hierarchy: for (int ord = 0; ord < valueCount; ord++) { dv.LookupOrd(ord, spare); string[] components = FacetsConfig.StringToPath(spare.Utf8ToString()); if (components.Length != 2) { throw new ArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.ToString(components) + " " + spare.Utf8ToString()); } if (!components[0].Equals(lastDim, StringComparison.Ordinal)) { if (lastDim != null) { prefixToOrdRange[lastDim] = new OrdRange(startOrd, ord - 1); } startOrd = ord; lastDim = components[0]; } } if (lastDim != null) { prefixToOrdRange[lastDim] = new OrdRange(startOrd, valueCount - 1); } }
public override void Collect(int doc) { _docTermOrds.SetDocument(doc); long ord; while ((ord = _docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { _docTermOrds.LookupOrd(ord, _scratch); _collectorTerms.Add(_scratch); } }
private FacetResult GetDim(string dim, OrdRange ordRange, int topN) { TopOrdAndInt32Queue q = null; int bottomCount = 0; int dimCount = 0; int childCount = 0; //System.out.println("getDim : " + ordRange.start + " - " + ordRange.end); for (int ord = ordRange.Start; ord <= ordRange.End; ord++) { //System.out.println(" ord=" + ord + " count=" + counts[ord]); if (counts[ord] > 0) { dimCount += counts[ord]; childCount++; if (counts[ord] > bottomCount) { if (q == null) { // Lazy init, so we don't create this for the // sparse case unnecessarily q = new TopOrdAndInt32Queue(topN); } // LUCENENET specific - use struct instead of reusing class instance for better performance q.Insert(new OrdAndValue <int>(ord, counts[ord])); if (q.Count == topN) { bottomCount = q.Top.Value; } } } } if (q == null) { return(null); } var scratch = new BytesRef(); LabelAndValue[] labelValues = new LabelAndValue[q.Count]; for (int i = labelValues.Length - 1; i >= 0; i--) { var ordAndValue = q.Pop(); dv.LookupOrd(ordAndValue.Ord, scratch); string[] parts = FacetsConfig.StringToPath(scratch.Utf8ToString()); labelValues[i] = new LabelAndValue(parts[1], ordAndValue.Value); } return(new FacetResult(dim, Arrays.Empty <string>(), dimCount, labelValues, childCount)); }
public virtual void Collect(int doc) { docTermOrds.SetDocument(doc); long ord; while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { docTermOrds.LookupOrd(ord, joinValue); if (!joinValueToJoinScores.TryGetValue(joinValue, out JoinScore joinScore) || joinScore == null) { joinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore(); } joinScore.AddScore(scorer.GetScore()); } }
public virtual void Collect(int doc) { docTermOrds.SetDocument(doc); long ord; while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { docTermOrds.LookupOrd(ord, joinValue); var joinScore = JoinValueToJoinScores.ContainsKey(joinValue) ? JoinValueToJoinScores[joinValue] : null; if (joinScore == null) { JoinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore(); } joinScore.AddScore(scorer.GetScore()); } }
public override void Collect(int doc) { docTermOrds.Document = doc; long ord; while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { docTermOrds.LookupOrd(ord, joinValue); var joinScore = JoinValueToJoinScores[joinValue]; if (joinScore == null) { JoinValueToJoinScores[BytesRef.DeepCopyOf(joinValue)] = joinScore = new JoinScore(); } joinScore.AddScore(scorer.Score()); } }
public virtual void Collect(int doc) { docTermOrds.SetDocument(doc); long ord; while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { docTermOrds.LookupOrd(ord, scratch); if (!joinValueToJoinScores.TryGetValue(scratch, out JoinScore joinScore) || joinScore == null) { continue; } int basedDoc = docBase + doc; // First encountered join value determines the score. // Something to keep in mind for many-to-many relations. if (!docToJoinScore.ContainsKey(basedDoc)) { docToJoinScore[basedDoc] = joinScore; } } }
public override void Collect(int doc) { docTermOrds.Document = doc; long ord; while ((ord = docTermOrds.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { docTermOrds.LookupOrd(ord, scratch); JoinScore joinScore = _joinValueToJoinScores.ContainsKey(scratch) ? _joinValueToJoinScores[scratch] : null; if (joinScore == null) { continue; } int basedDoc = docBase + doc; // First encountered join value determines the score. // Something to keep in mind for many-to-many relations. if (!_docToJoinScore.ContainsKey(basedDoc)) { _docToJoinScore[basedDoc] = joinScore; } } }
public override void LookupOrd(long ord, BytesRef result) { @in.LookupOrd(ord, result); }
/// <summary> /// Create the results based on the search hits. /// Can be overridden by subclass to add particular behavior (e.g. weight transformation) </summary> /// <exception cref="System.IO.IOException"> If there are problems reading fields from the underlying Lucene index. </exception> protected internal virtual IList <LookupResult> CreateResults(IndexSearcher searcher, TopFieldDocs hits, int num, string charSequence, bool doHighlight, IEnumerable <string> matchedTokens, string prefixToken) { BinaryDocValues textDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, TEXT_FIELD_NAME); // This will just be null if app didn't pass payloads to build(): // TODO: maybe just stored fields? they compress... BinaryDocValues payloadsDV = MultiDocValues.GetBinaryValues(searcher.IndexReader, "payloads"); IList <AtomicReaderContext> leaves = searcher.IndexReader.Leaves; List <LookupResult> results = new List <LookupResult>(); BytesRef scratch = new BytesRef(); for (int i = 0; i < hits.ScoreDocs.Length; i++) { FieldDoc fd = (FieldDoc)hits.ScoreDocs[i]; textDV.Get(fd.Doc, scratch); string text = scratch.Utf8ToString(); long score = (long)fd.Fields[0]; BytesRef payload; if (payloadsDV != null) { payload = new BytesRef(); payloadsDV.Get(fd.Doc, payload); } else { payload = null; } // Must look up sorted-set by segment: int segment = ReaderUtil.SubIndex(fd.Doc, leaves); SortedSetDocValues contextsDV = leaves[segment].AtomicReader.GetSortedSetDocValues(CONTEXTS_FIELD_NAME); HashSet <BytesRef> contexts; if (contextsDV != null) { contexts = new HashSet <BytesRef>(); contextsDV.SetDocument(fd.Doc - leaves[segment].DocBase); long ord; while ((ord = contextsDV.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { BytesRef context = new BytesRef(); contextsDV.LookupOrd(ord, context); contexts.Add(context); } } else { contexts = null; } LookupResult result; if (doHighlight) { object highlightKey = Highlight(text, matchedTokens, prefixToken); result = new LookupResult(highlightKey.ToString(), highlightKey, score, payload, contexts); } else { result = new LookupResult(text, score, payload, contexts); } results.Add(result); } return(results); }
public virtual void Test() { #pragma warning disable 612, 618 IFieldCache cache = FieldCache.DEFAULT; FieldCache.Doubles doubles = cache.GetDoubles(Reader, "theDouble", Random.NextBoolean()); Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(doubles, cache.GetDoubles(Reader, "theDouble", FieldCache.DEFAULT_DOUBLE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(doubles.Get(i) == (double.MaxValue - i), doubles.Get(i) + " does not equal: " + (double.MaxValue - i)); } FieldCache.Int64s longs = cache.GetInt64s(Reader, "theLong", Random.NextBoolean()); Assert.AreSame(longs, cache.GetInt64s(Reader, "theLong", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(longs, cache.GetInt64s(Reader, "theLong", FieldCache.DEFAULT_INT64_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(longs.Get(i) == (long.MaxValue - i), longs.Get(i) + " does not equal: " + (long.MaxValue - i) + " i=" + i); } FieldCache.Bytes bytes = cache.GetBytes(Reader, "theByte", Random.NextBoolean()); Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(bytes, cache.GetBytes(Reader, "theByte", FieldCache.DEFAULT_BYTE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue((sbyte)bytes.Get(i) == (sbyte)(sbyte.MaxValue - i), (sbyte)bytes.Get(i) + " does not equal: " + (sbyte.MaxValue - i)); } FieldCache.Int16s shorts = cache.GetInt16s(Reader, "theShort", Random.NextBoolean()); Assert.AreSame(shorts, cache.GetInt16s(Reader, "theShort", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(shorts, cache.GetInt16s(Reader, "theShort", FieldCache.DEFAULT_INT16_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(shorts.Get(i) == (short)(short.MaxValue - i), shorts.Get(i) + " does not equal: " + (short.MaxValue - i)); } FieldCache.Int32s ints = cache.GetInt32s(Reader, "theInt", Random.NextBoolean()); Assert.AreSame(ints, cache.GetInt32s(Reader, "theInt", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(ints, cache.GetInt32s(Reader, "theInt", FieldCache.DEFAULT_INT32_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(ints.Get(i) == (int.MaxValue - i), ints.Get(i) + " does not equal: " + (int.MaxValue - i)); } FieldCache.Singles floats = cache.GetSingles(Reader, "theFloat", Random.NextBoolean()); Assert.AreSame(floats, cache.GetSingles(Reader, "theFloat", Random.NextBoolean()), "Second request to cache return same array"); Assert.AreSame(floats, cache.GetSingles(Reader, "theFloat", FieldCache.DEFAULT_SINGLE_PARSER, Random.NextBoolean()), "Second request with explicit parser return same array"); for (int i = 0; i < NUM_DOCS; i++) { Assert.IsTrue(floats.Get(i) == (float.MaxValue - i), floats.Get(i) + " does not equal: " + (float.MaxValue - i)); } #pragma warning restore 612, 618 IBits docsWithField = cache.GetDocsWithField(Reader, "theLong"); Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "theLong"), "Second request to cache return same array"); Assert.IsTrue(docsWithField is Bits.MatchAllBits, "docsWithField(theLong) must be class Bits.MatchAllBits"); Assert.IsTrue(docsWithField.Length == NUM_DOCS, "docsWithField(theLong) Size: " + docsWithField.Length + " is not: " + NUM_DOCS); for (int i = 0; i < docsWithField.Length; i++) { Assert.IsTrue(docsWithField.Get(i)); } docsWithField = cache.GetDocsWithField(Reader, "sparse"); Assert.AreSame(docsWithField, cache.GetDocsWithField(Reader, "sparse"), "Second request to cache return same array"); Assert.IsFalse(docsWithField is Bits.MatchAllBits, "docsWithField(sparse) must not be class Bits.MatchAllBits"); Assert.IsTrue(docsWithField.Length == NUM_DOCS, "docsWithField(sparse) Size: " + docsWithField.Length + " is not: " + NUM_DOCS); for (int i = 0; i < docsWithField.Length; i++) { Assert.AreEqual(i % 2 == 0, docsWithField.Get(i)); } // getTermsIndex SortedDocValues termsIndex = cache.GetTermsIndex(Reader, "theRandomUnicodeString"); Assert.AreSame(termsIndex, cache.GetTermsIndex(Reader, "theRandomUnicodeString"), "Second request to cache return same array"); BytesRef br = new BytesRef(); for (int i = 0; i < NUM_DOCS; i++) { BytesRef term; int ord = termsIndex.GetOrd(i); if (ord == -1) { term = null; } else { termsIndex.LookupOrd(ord, br); term = br; } string s = term == null ? null : term.Utf8ToString(); Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s, StringComparison.Ordinal), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]); } int nTerms = termsIndex.ValueCount; TermsEnum tenum = termsIndex.GetTermsEnum(); BytesRef val = new BytesRef(); for (int i = 0; i < nTerms; i++) { BytesRef val1 = tenum.Next(); termsIndex.LookupOrd(i, val); // System.out.println("i="+i); Assert.AreEqual(val, val1); } // seek the enum around (note this isn't a great test here) int num = AtLeast(100); for (int i = 0; i < num; i++) { int k = Random.Next(nTerms); termsIndex.LookupOrd(k, val); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val)); Assert.AreEqual(val, tenum.Term); } for (int i = 0; i < nTerms; i++) { termsIndex.LookupOrd(i, val); Assert.AreEqual(TermsEnum.SeekStatus.FOUND, tenum.SeekCeil(val)); Assert.AreEqual(val, tenum.Term); } // test bad field termsIndex = cache.GetTermsIndex(Reader, "bogusfield"); // getTerms BinaryDocValues terms = cache.GetTerms(Reader, "theRandomUnicodeString", true); Assert.AreSame(terms, cache.GetTerms(Reader, "theRandomUnicodeString", true), "Second request to cache return same array"); IBits bits = cache.GetDocsWithField(Reader, "theRandomUnicodeString"); for (int i = 0; i < NUM_DOCS; i++) { terms.Get(i, br); BytesRef term; if (!bits.Get(i)) { term = null; } else { term = br; } string s = term == null ? null : term.Utf8ToString(); Assert.IsTrue(UnicodeStrings[i] == null || UnicodeStrings[i].Equals(s, StringComparison.Ordinal), "for doc " + i + ": " + s + " does not equal: " + UnicodeStrings[i]); } // test bad field terms = cache.GetTerms(Reader, "bogusfield", false); // getDocTermOrds SortedSetDocValues termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField"); int numEntries = cache.GetCacheEntries().Length; // ask for it again, and check that we didnt create any additional entries: termOrds = cache.GetDocTermOrds(Reader, "theRandomUnicodeMultiValuedField"); Assert.AreEqual(numEntries, cache.GetCacheEntries().Length); for (int i = 0; i < NUM_DOCS; i++) { termOrds.SetDocument(i); // this will remove identical terms. A DocTermOrds doesn't return duplicate ords for a docId IList <BytesRef> values = MultiValued[i].Distinct().ToList(); foreach (BytesRef v in values) { if (v == null) { // why does this test use null values... instead of an empty list: confusing break; } long ord = termOrds.NextOrd(); Debug.Assert(ord != SortedSetDocValues.NO_MORE_ORDS); BytesRef scratch = new BytesRef(); termOrds.LookupOrd(ord, scratch); Assert.AreEqual(v, scratch); } Assert.AreEqual(SortedSetDocValues.NO_MORE_ORDS, termOrds.NextOrd()); } // test bad field termOrds = cache.GetDocTermOrds(Reader, "bogusfield"); Assert.IsTrue(termOrds.ValueCount == 0); FieldCache.DEFAULT.PurgeByCacheKey(Reader.CoreCacheKey); }