public override RandomAccessDocIdSet GetRandomAccessDocIdSet(BoboIndexReader reader) { FacetDataCache dataCache = _facetHandler.GetFacetData <FacetDataCache>(reader); bool multi = dataCache is MultiValueFacetDataCache; BigNestedIntArray nestedArray = multi ? ((MultiValueFacetDataCache)dataCache).NestedArray : null; int[] range = Parse(dataCache, _rangeString); if (range == null) { return(null); } if (range[0] > range[1]) { return(EmptyDocIdSet.Instance); } if (range[0] == range[1] && range[0] < 0) { return(EmptyDocIdSet.Instance); } int start = range[0]; int end = range[1]; return(new RangeRandomAccessDocIdSet(start, end, dataCache, nestedArray, multi)); }
internal PathFacetCountCollector(string name, string sep, BrowseSelection sel, FacetSpec ospec, FacetDataCache dataCache) { _sel = sel; _ospec = ospec; _name = name; _dataCache = dataCache; _sep = sep; _sepArray = sep.ToCharArray(); _count = new LazyBigIntArray(_dataCache.Freqs.Length); log.Info(name + ": " + _count.Size()); _orderArray = _dataCache.OrderArray; _minHitCount = ospec.MinHitCount; _maxCount = ospec.MaxCount; if (_maxCount < 1) { _maxCount = _count.Size(); } FacetSpec.FacetSortSpec sortOption = ospec.OrderBy; switch (sortOption) { case FacetSpec.FacetSortSpec.OrderHitsDesc: _comparatorFactory = new FacetHitcountComparatorFactory(); break; case FacetSpec.FacetSortSpec.OrderValueAsc: _comparatorFactory = null; break; case FacetSpec.FacetSortSpec.OrderByCustom: _comparatorFactory = ospec.CustomComparatorFactory; break; default: throw new ArgumentOutOfRangeException("invalid sort option: " + sortOption); } _splitPat = new Regex(_sep, RegexOptions.Compiled); _stringData = new string[10]; _patStart = 0; _patEnd = 0; }
public override double GetFacetSelectivity(BoboIndexReader reader) { double selectivity = 0; FacetDataCache dataCache = _facetHandler.GetFacetData <FacetDataCache>(reader); int[] idxes = FacetDataCache.Convert(dataCache, _vals); if (idxes == null) { return(0.0); } int accumFreq = 0; foreach (int idx in idxes) { accumFreq += dataCache.Freqs[idx]; } int total = reader.MaxDoc; selectivity = (double)accumFreq / (double)total; if (selectivity > 0.999) { selectivity = 1.0; } return(selectivity); }
public override int GetNumItems(BoboSegmentReader reader, int id) { var dependOnFacetHandler = GetDependedFacetHandler(m_dependsOnFacetName); FacetDataCache data = dependOnFacetHandler.GetFacetData <FacetDataCache>(reader); return(data.GetNumItems(id)); }
public override IFacetCountCollector GetFacetCountCollector(BoboSegmentReader reader, int docBase) { FacetDataCache dataCache = (FacetDataCache)reader.GetFacetData(m_dataHandlerName); IFacetCountCollector baseCollector = m_baseCollectorSrc.GetFacetCountCollector(reader, docBase); return(new HistogramCollector(m_name, baseCollector, dataCache, m_ospec, m_start, m_end, m_unit)); }
public override IFacetCountCollector GetFacetCountCollector(BoboIndexReader reader, int docBase) { FacetDataCache dataCache = (FacetDataCache)reader.GetFacetData(_dataHandlerName); IFacetCountCollector baseCollector = _baseCollectorSrc.GetFacetCountCollector(reader, docBase); return(new HistogramCollector(_name, baseCollector, dataCache, _ospec, _start, _end, _unit)); }
public RangeFacetCountCollector(string name, FacetDataCache dataCache, int docBase, FacetSpec ospec, IEnumerable <string> predefinedRanges) { _name = name; _dataCache = dataCache; _countLength = _dataCache.Freqs.Length; _count = new LazyBigIntArray(_countLength); _array = _dataCache.OrderArray; _docBase = docBase; _ospec = ospec; if (predefinedRanges != null) { _predefinedRanges = new TermStringList(); var tempList = new List <string>(predefinedRanges); tempList.Sort(); _predefinedRanges.AddAll(tempList); } else { _predefinedRanges = null; } if (_predefinedRanges != null) { _predefinedRangeIndexes = new int[_predefinedRanges.Count()][]; int i = 0; foreach (string range in this._predefinedRanges) { _predefinedRangeIndexes[i++] = FacetRangeFilter.Parse(this._dataCache, range); } } }
private void GetFilters(FacetDataCache dataCache, IList <int> intSet, string val, int depth, bool strict) { IList <string> termList = dataCache.ValArray; int index = termList.IndexOf(val); int startDepth = GetPathDepth(val, _sep); if (index < 0) { int nextIndex = -(index + 1); if (nextIndex == termList.Count) { return; } index = nextIndex; } for (int i = index; i < termList.Count; ++i) { string path = termList[i]; if (path.StartsWith(val)) { if (!strict || GetPathDepth(path, _sep) - startDepth == depth) { intSet.Add(i); } } else { break; } } }
public virtual int[] Convert(FacetDataCache dataCache, string[] vals) { IList <int> intSet = new List <int>(); GetFilters(dataCache, intSet, vals, _depth, _strict); return(intSet.ToArray()); }
public override RandomAccessDocIdSet GetRandomAccessDocIdSet(BoboIndexReader reader) { FacetDataCache dataCache = _facetHandler.GetFacetData <FacetDataCache>(reader); int[] indexes = FacetDataCache.Convert(dataCache, _vals); int bits; bits = 0x0; foreach (int i in indexes) { bits |= 0x00000001 << (i - 1); } int finalBits = bits; BigSegmentedArray orderArray = dataCache.OrderArray; if (indexes.Length == 0) { return(EmptyDocIdSet.Instance); } else { return(new CompactMultiValueFacetFilterDocIdSet(dataCache, indexes, finalBits, orderArray)); } }
private void GetFilters(FacetDataCache dataCache, IList <int> intSet, string[] vals, int depth, bool strict) { foreach (string val in vals) { GetFilters(dataCache, intSet, val, depth, strict); } }
public RangeFacetCountCollector(string name, FacetDataCache dataCache, int docBase, FacetSpec ospec, IList <string> predefinedRanges) { m_name = name; m_dataCache = dataCache; m_countLength = m_dataCache.Freqs.Length; m_count = new LazyBigInt32Array(m_countLength); m_array = m_dataCache.OrderArray; m_ospec = ospec; if (predefinedRanges != null) { m_predefinedRanges = new TermStringList(); predefinedRanges.Sort(); m_predefinedRanges.AddAll(predefinedRanges); } else { m_predefinedRanges = null; } if (m_predefinedRanges != null) { m_predefinedRangeIndexes = new int[m_predefinedRanges.Count][]; int i = 0; foreach (string range in this.m_predefinedRanges) { m_predefinedRangeIndexes[i++] = FacetRangeFilter.Parse(this.m_dataCache, range); } } }
public BitSetRandomAccessDocIdSet(bool multi, MultiValueFacetDataCache multiCache, OpenBitSet openBitSet, FacetDataCache dataCache) { m_multi = multi; m_multiCache = multiCache; m_openBitSet = openBitSet; m_dataCache = dataCache; }
public CompactMultiValueFacetFilterDocIdSet(FacetDataCache dataCache, int[] indexes, int finalBits, BigSegmentedArray orderArray) { this.dataCache = dataCache; this.indexes = indexes; this.finalBits = finalBits; this.orderArray = orderArray; }
public virtual Explanation Explain(IndexReader reader, int doc, Explanation innerExplanation) { if (reader is BoboIndexReader) { BoboIndexReader boboReader = (BoboIndexReader)reader; object dataObj = boboReader.GetFacetData(_timeFacetName); if (dataObj is FacetDataCache) { FacetDataCache facetDataCache = (FacetDataCache)(boboReader.GetFacetData(_timeFacetName)); BigSegmentedArray orderArray = facetDataCache.OrderArray; TermLongList termList = (TermLongList)facetDataCache.ValArray; long now = System.Environment.TickCount; Explanation finalExpl = new Explanation(); finalExpl.AddDetail(innerExplanation); float rawScore = innerExplanation.Value; long timeVal = termList.GetPrimitiveValue(orderArray.Get(doc)); float timeScore = ComputeTimeFactor(timeVal); float finalScore = CombineScores(timeScore, rawScore); finalExpl.Value = finalScore; finalExpl.Description = "final score = (time score: " + timeScore + ") * (raw score: " + rawScore + "), timeVal: " + timeVal; return(finalExpl); } else { throw new InvalidOperationException("underlying facet data must be of type FacetDataCache<long>"); } } else { throw new ArgumentException("reader not instance of " + typeof(BoboIndexReader)); } }
public override IFacetCountCollector GetFacetCountCollector(BoboIndexReader reader, int docBase) { FacetDataCache latDataCache = _latFacetHandler.GetFacetData <FacetDataCache>(reader); FacetDataCache longDataCache = _longFacetHandler.GetFacetData <FacetDataCache>(reader); return(new GeoSimpleFacetCountCollector(_name, latDataCache, longDataCache, docBase, _fspec, _list)); }
public FacetDocIdSetIterator(FacetDataCache dataCache, int index) { _index = index; _doc = Math.Max(-1, dataCache.MinIDs[_index] - 1); _maxID = dataCache.MaxIDs[_index]; _orderArray = dataCache.OrderArray; }
static FacetDataCache MakeFacetDataCache() { FacetDataCache cache = new FacetDataCache(); cache.Freqs = new int[numVals]; Random r = new Random(); for (int i = 0; i < cache.Freqs.Length; ++i) { int p = r.Next(100); int v; if (p % 100 < percent_zero) { v = 0; } else { v = Math.Abs(rand.Next(numDocs - 1)) + 1; } cache.Freqs[i] = v; } //Arrays.Fill(cache.Freqs, 1); cache.MaxIDs = new int[numVals]; cache.MinIDs = new int[numVals]; cache.ValArray = new TermInt32List(numVals, "0000000000"); for (int i = 0; i < numVals; ++i) { cache.ValArray.Add((i + 1).ToString("0000000000")); } cache.ValArray.Seal(); cache.OrderArray = new BigInt32Array(numDocsPerSeg); return(cache); }
public FacetOrDocIdSetIterator(FacetDataCache dataCache, OpenBitSet bitset) { _dataCache = dataCache; _orderArray = dataCache.OrderArray; _bitset = bitset; _doc = int.MaxValue; _maxID = -1; int size = _dataCache.ValArray.Count; for (int i = 0; i < size; ++i) { if (!bitset.FastGet(i)) { continue; } if (_doc > _dataCache.MinIDs[i]) { _doc = _dataCache.MinIDs[i]; } if (_maxID < _dataCache.MaxIDs[i]) { _maxID = _dataCache.MaxIDs[i]; } } _doc--; if (_doc < 0) { _doc = -1; } }
public override double GetFacetSelectivity(BoboIndexReader reader) { double selectivity = 0; FacetDataCache dataCache = _facetHandler.GetFacetData <FacetDataCache>(reader); int accumFreq = 0; foreach (string val in _vals) { int idx = dataCache.ValArray.IndexOf(val); if (idx < 0) { continue; } accumFreq += dataCache.Freqs[idx]; } int total = reader.MaxDoc; selectivity = (double)accumFreq / (double)total; if (selectivity > 0.999) { selectivity = 1.0; } if (_takeCompliment) { selectivity = 1.0 - selectivity; } return(selectivity); }
//private int _totalGroups; // NOT USED public GroupByFacetCountCollector(string name, FacetDataCache dataCache, int docBase, BrowseSelection sel, FacetSpec ospec) : base(name, dataCache, docBase, sel, ospec) { }
public RangeRandomAccessDocIdSet(int start, int end, FacetDataCache dataCache, BigNestedInt32Array nestedArray, bool multi) { m_start = start; m_end = end; m_dataCache = dataCache; m_nestedArray = nestedArray; m_multi = multi; }
public GeoSimpleRandomAccessDocIdSet(int[] latRange, int[] longRange, FacetDataCache latDataCache, FacetDataCache longDataCache) { _latStart = latRange[0]; _latEnd = latRange[1]; _longStart = longRange[0]; _longEnd = longRange[1]; _latDataCache = latDataCache; _longDataCache = longDataCache; }
public virtual OpenBitSet GetBitSet(FacetDataCache dataCache) { if (m_lastCache == dataCache) { return(m_bitSet); } m_bitSet = m_bitSetBuilder.BitSet(dataCache); m_lastCache = dataCache; return(m_bitSet); }
public virtual OpenBitSet GetBitSet(FacetDataCache dataCache) { if (lastCache == dataCache) { return(bitSet); } bitSet = bitSetBuilder.BitSet(dataCache); lastCache = dataCache; return(bitSet); }
public override DocComparator GetComparator(IndexReader reader, int docbase) { if (!(reader is BoboIndexReader)) { throw new InvalidOperationException("reader must be instance of BoboIndexReader"); } var boboReader = (BoboIndexReader)reader; FacetDataCache dataCache = _facetHandler.GetFacetData <FacetDataCache>(boboReader); return(new CompactMultiValueDocComparator(dataCache, _facetHandler, boboReader)); }
internal FacetRangeDocIdSetIterator(int start, int end, FacetDataCache dataCache) { m_start = start; m_end = end; for (int i = start; i <= end; ++i) { m_minID = Math.Min(m_minID, dataCache.MinIDs[i]); m_maxID = Math.Max(m_maxID, dataCache.MaxIDs[i]); } m_doc = Math.Max(-1, m_minID - 1); m_orderArray = dataCache.OrderArray; }
public override IFacetCountCollector GetFacetCountCollector(BoboSegmentReader reader, int docBase) { FacetDataCache dataCache = m_parent.GetFacetData <FacetDataCache>(reader); if (m_multiValue) { return(new MultiValuedPathFacetCountCollector(m_name, m_separator, m_sel, m_ospec, dataCache)); } else { return(new PathFacetCountCollector(m_name, m_separator, m_sel, m_ospec, dataCache)); } }
public override IFacetCountCollector GetFacetCountCollector(BoboIndexReader reader, int docBase) { FacetDataCache dataCache = _parent.GetFacetData <FacetDataCache>(reader); if (_multiValue) { return(new MultiValuedPathFacetCountCollector(_name, _separator, _sel, _ospec, dataCache)); } else { return(new PathFacetCountCollector(_name, _separator, _sel, _ospec, dataCache)); } }
public override RandomAccessDocIdSet GetRandomAccessDocIdSet(BoboIndexReader reader) { FacetDataCache latDataCache = _latFacetHandler.GetFacetData <FacetDataCache>(reader); FacetDataCache longDataCache = _longFacetHandler.GetFacetData <FacetDataCache>(reader); int[] latRange = FacetRangeFilter.Parse(latDataCache, _latRangeString); int[] longRange = FacetRangeFilter.Parse(longDataCache, _longRangeString); if ((latRange == null) || (longRange == null)) { return(null); } return(new GeoSimpleRandomAccessDocIdSet(latRange, longRange, latDataCache, longDataCache)); }