public FacetDocIdSetIterator(FacetDataCache dataCache, int index) { _index = index; _doc = Math.Max(-1, dataCache.MinIDs[_index] - 1); _maxID = dataCache.MaxIDs[_index]; _orderArray = dataCache.OrderArray; }
public RangeFacetCountCollector(string name, FacetDataCache dataCache, int docBase, FacetSpec ospec, IEnumerable<string> predefinedRanges) { _name = name; _dataCache = dataCache; _countLength = _dataCache.Freqs.Length; _count = new LazyBigIntArray(_countLength); _array = _dataCache.OrderArray; _docBase = docBase; _ospec = ospec; if (predefinedRanges != null) { _predefinedRanges = new TermStringList(); var tempList = new List<string>(predefinedRanges); tempList.Sort(); _predefinedRanges.AddAll(tempList); } else { _predefinedRanges = null; } if (_predefinedRanges != null) { _predefinedRangeIndexes = new int[_predefinedRanges.Count()][]; int i = 0; foreach (string range in this._predefinedRanges) { _predefinedRangeIndexes[i++] = FacetRangeFilter.Parse(this._dataCache, range); } } }
public FacetDocIdSetIterator(FacetDataCache dataCache, int index) { this.index = index; doc = Math.Max(-1, dataCache.minIDs[this.index] - 1); maxID = dataCache.maxIDs[this.index]; orderArray = dataCache.orderArray; }
public FacetDataCache() { this.orderArray = null; this.valArray = null; this.maxIDs = null; this.minIDs = null; this.freqs = null; termCountSize = FacetHandler.TermCountSize.Large; }
public FacetDataCache(BigSegmentedArray orderArray, ITermValueList valArray, int[] freqs, int[] minIDs, int[] maxIDs, FacetHandler.TermCountSize termCountSize) { this.orderArray = orderArray; this.valArray = valArray; this.freqs = freqs; this.minIDs = minIDs; this.maxIDs = maxIDs; this.termCountSize = termCountSize; }
public DefaultFacetCountCollector(BrowseSelection sel, FacetDataCache dataCache, string name, FacetSpec ospec) { _sel = sel; _ospec = ospec; _name = name; _dataCache = dataCache; _count = new int[_dataCache.freqs.Length]; _array = _dataCache.orderArray; }
private static void CountUpTestHelper(BigSegmentedArray array) { Initialize(array); Assert.AreEqual(short.MaxValue * 2, array.Size()); for (int i = 0; i < array.Size(); i++) { Assert.AreEqual(i % array.MaxValue, array.Get(i)); } }
public CompactMultiValueFacetFilter(FacetDataCache dataCache, int[] index) { this.dataCache = dataCache; orderArray = this.dataCache.orderArray; this.index = index; bits = 0x0; foreach (int i in index) { bits |= 0x00000001 << (i - 1); } }
public DefaultFacetIterator(ITermValueList valList, BigSegmentedArray counts, int countlength, bool zeroBased) { _valList = valList; _count = counts; _countlength = countlength; _index = -1; _lastIndex = _countlength - 1; if (!zeroBased) _index++; facet = null; count = 0; }
public DefaultDoubleFacetIterator(TermDoubleList valList, BigSegmentedArray countarray, int countlength, bool zeroBased) { _valList = valList; _countlength = countlength; _count = countarray; _countLengthMinusOne = _countlength - 1; _index = -1; if (!zeroBased) _index++; _facet = TermDoubleList.VALUE_MISSING; count = 0; }
internal FacetRangeDocIdSetIterator(int start, int end, FacetDataCache dataCache) { _totalFreq = 0; _start = start; _end = end; for (int i = start; i <= end; ++i) { _totalFreq += dataCache.freqs[i]; _minID = Math.Min(_minID, dataCache.minIDs[i]); _maxID = Math.Max(_maxID, dataCache.maxIDs[i]); } _doc = Math.Max(-1, _minID - 1); _orderArray = dataCache.orderArray; }
public FacetOrFilter(FacetDataCache dataCache, int[] index, bool takeCompliment) { this.dataCache = dataCache; orderArray = dataCache.orderArray; this.index = index; bitset = new OpenBitSet(this.dataCache.valArray.Count); foreach (int i in this.index) { bitset.FastSet(i); } if (takeCompliment) { bitset.Flip(0, this.dataCache.valArray.Count); } }
private static void FindValueHelper(BigSegmentedArray array) { int a = array.MaxValue / 16; int b = a * 2; int c = a * 3; array.Add(1000, a); array.Add(2000, b); Assert.AreEqual(1000, array.FindValue(a, 0, 2000)); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, array.FindValue(a, 1001, 2000)); Assert.AreEqual(2000, array.FindValue(b, 2000, 3000)); array.Fill(c); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, array.FindValue(b, 2000, 3000)); Assert.AreEqual(4000, array.FindValue(c, 4000, 4000)); }
protected internal RangeFacetCountCollector(string name, FacetDataCache dataCache, FacetSpec ospec, IEnumerable<string> predefinedRanges, bool autoRange) { this.name = name; this.dataCache = dataCache; this.ospec = ospec; count = new int[this.dataCache.freqs.Length]; orderArray = this.dataCache.orderArray; this.predefinedRanges = predefinedRanges; this.autoRange = autoRange; if (this.predefinedRanges != null) { predefinedRangeIndexes = new int[this.predefinedRanges.Count()][]; int i = 0; foreach (string range in this.predefinedRanges) { predefinedRangeIndexes[i++] = RangeFacetHandler.Parse(this.dataCache, range); } } }
public GeoSimpleFacetCountCollector(string name, FacetDataCache latDataCache, FacetDataCache longDataCache, int docBase, FacetSpec spec, IEnumerable<string> predefinedRanges) { _name = name; _latDataCache = latDataCache; _longDataCache = longDataCache; _latCount = new int[_latDataCache.Freqs.Length]; _longCount = new int[_longDataCache.Freqs.Length]; log.Info("latCount: " + _latDataCache.Freqs.Length + " longCount: " + _longDataCache.Freqs.Length); _latOrderArray = _latDataCache.OrderArray; _longOrderArray = _longDataCache.OrderArray; _docBase = docBase; _spec = spec; _predefinedRanges = new TermStringList(); var predefinedRangesTemp = new List<string>(predefinedRanges); predefinedRangesTemp.Sort(); _predefinedRanges.AddAll(predefinedRangesTemp); if (predefinedRanges != null) { _latPredefinedRangeIndexes = new int[_predefinedRanges.Count][]; for (int j = 0; j < _latPredefinedRangeIndexes.Length; j++) { _latPredefinedRangeIndexes[j] = new int[2]; } _longPredefinedRangeIndexes = new int[_predefinedRanges.Count][]; for (int j = 0; j < _longPredefinedRangeIndexes.Length; j++) { _longPredefinedRangeIndexes[j] = new int[2]; } int i = 0; foreach (string range in _predefinedRanges) { int[] ranges = GeoSimpleFacetFilter.Parse(_latDataCache, _longDataCache, range); _latPredefinedRangeIndexes[i][0] = ranges[0]; // latStart _latPredefinedRangeIndexes[i][1] = ranges[1]; // latEnd _longPredefinedRangeIndexes[i][0] = ranges[2]; // longStart _longPredefinedRangeIndexes[i][1] = ranges[3]; // longEnd i++; } } }
public BucketFacetCountCollector(string name, DefaultFacetCountCollector subCollector, FacetSpec ospec, IDictionary<string, string[]> predefinedBuckets, int numdocs) { _name = name; _subCollector = subCollector; _ospec = ospec; _numdocs = numdocs; _predefinedBuckets = predefinedBuckets; _collapsedCounts = null; _bucketValues = new TermStringList(); _bucketValues.Add(""); List<string> bucketArray = _predefinedBuckets.Keys.ToList(); bucketArray.Sort(); foreach (string bucket in bucketArray) { _bucketValues.Add(bucket); } _bucketValues.Seal(); }
internal GeoSimpleDocIdSetIterator(int latStart, int latEnd, int longStart, int longEnd, FacetDataCache latDataCache, FacetDataCache longDataCache) { //_totalFreq = 0; // NOT USED _latStart = latStart; _longStart = longStart; _latEnd = latEnd; _longEnd = longEnd; for (int i = latStart; i <= latEnd; ++i) { _minID = Math.Min(_minID, latDataCache.MinIDs[i]); _maxID = Math.Max(_maxID, latDataCache.MaxIDs[i]); } for (int i = longStart; i <= longEnd; ++i) { _minID = Math.Min(_minID, longDataCache.MinIDs[i]); _maxID = Math.Max(_maxID, longDataCache.MaxIDs[i]); } _doc = Math.Max(-1, _minID - 1); _latOrderArray = latDataCache.OrderArray; _longOrderArray = longDataCache.OrderArray; }
private BigSegmentedArray GetCollapsedCounts() { if (_collapsedCounts == null) { _collapsedCounts = new LazyBigIntArray(_bucketValues.Count); FacetDataCache dataCache = _subCollector.DataCache; ITermValueList subList = dataCache.ValArray; BigSegmentedArray subcounts = _subCollector.Count; BitVector indexSet = new BitVector(subcounts.Size()); int c = 0; int i = 0; foreach (string val in _bucketValues) { if (val.Length > 0) { string[] subVals = _predefinedBuckets.Get(val); int count = 0; foreach (string subVal in subVals) { int index = subList.IndexOf(subVal); if (index > 0) { int subcount = subcounts.Get(index); count += subcount; if (!indexSet.Get(index)) { indexSet.Set(index); c += dataCache.Freqs[index]; } } } _collapsedCounts.Add(i, count); } i++; } _collapsedCounts.Add(0, (_numdocs - c)); } return _collapsedCounts; }
public CompactMultiValueFacetDocIdSetIterator(FacetDataCache dataCache, int[] index, int bits) { this.bits = bits; doc = int.MaxValue; maxID = -1; orderArray = dataCache.orderArray; foreach (int i in index) { if (doc > dataCache.minIDs[i]) { doc = dataCache.minIDs[i]; } if (maxID < dataCache.maxIDs[i]) { maxID = dataCache.maxIDs[i]; } } doc--; if (doc < 0) { doc = -1; } }
public CompactMultiValueFacetDocIdSetIterator(FacetDataCache dataCache, int[] index, int bits) { _bits = bits; _doc = int.MaxValue; _maxID = -1; _orderArray = dataCache.OrderArray; foreach (int i in index) { if (_doc > dataCache.MinIDs[i]) { _doc = dataCache.MinIDs[i]; } if (_maxID < dataCache.MaxIDs[i]) { _maxID = dataCache.MaxIDs[i]; } } _doc--; if (_doc < 0) { _doc = -1; } }
public void CountNoReturnWithFilter(int id, BigSegmentedArray count, OpenBitSet filter) { int[] page = m_list[id >> PAGEID_SHIFT]; if (page == null) { count.Add(0, count.Get(0) + 1); return; } int val = page[id & SLOTID_MASK]; if (val >= 0) { if (filter.FastGet(val)) { count.Add(val, count.Get(val) + 1); } return; } else if (val != MISSING) { int idx = -(val >> VALIDX_SHIFT); // signed shift, remember val is a negative number int cnt = (val & COUNT_MASK); int end = idx + cnt; while (idx < end) { int value = page[idx++]; if (filter.FastGet(value)) { count.Add(value, count.Get(value) + 1); } } return; } count.Add(0, count.Get(0) + 1); return; }
public DefaultFacetCountCollector(string name, FacetDataCache dataCache, int docBase, BrowseSelection sel, FacetSpec ospec) { _sel = sel; _ospec = ospec; _name = name; _dataCache = dataCache; _countlength = _dataCache.Freqs.Length; if (_dataCache.Freqs.Length <= 3096) { _count = new LazyBigIntArray(_countlength); } else { _count = new LazyBigIntArray(_countlength); // NOTE: Removed memory manager implementation //_count = intarraymgr.Get(_countlength); //intarraylist.Add(_count); } _array = _dataCache.OrderArray; _docBase = docBase; }
private static void FindValueRangeHelper(BigSegmentedArray array) { int a = array.MaxValue / 16; int b = a * 2; int c = a * 3; int d = a * 4; int e = a * 5; int f = a * 6; array.Add(10000, b); Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, array.FindValueRange(d, e, 0, array.Size())); Assert.AreEqual(10000, array.FindValueRange(a, e, 0, array.Size())); Assert.AreEqual(10000, array.FindValueRange(a, e, 10000, array.Size())); Assert.AreEqual(10000, array.FindValueRange(a, e, 0, 10000)); Assert.AreEqual(10000, array.FindValueRange(a, b, 9000, 10100)); Assert.AreEqual(10000, array.FindValueRange(b, e, 9000, 10000)); Assert.AreEqual(10000, array.FindValueRange(b, b, 9000, 10000)); }
public FacetOrDocIdSetIterator(FacetDataCache dataCache, int[] index, OpenBitSet bitset) { _dataCache = dataCache; _index = index; _orderArray = dataCache.orderArray; _bitset = bitset; _doc = int.MaxValue; _maxID = -1; foreach (int i in _index) { if (_doc > _dataCache.minIDs[i]) { _doc = _dataCache.minIDs[i]; } if (_maxID < _dataCache.maxIDs[i]) { _maxID = _dataCache.maxIDs[i]; } } _doc--; if (_doc < 0) _doc = -1; }
private static void EmptyArrayTestHelper(BigSegmentedArray array) { Assert.AreEqual(0, array.Get(0)); Assert.AreEqual(0, array.Size()); }
public static BigSegmentedArray Initialize(BigSegmentedArray array) { for (int i = 0; i < array.Size(); i++) { array.Add(i, i % array.MaxValue); } return array; }
private static void FillTestHelper(BigSegmentedArray array) { int a = array.MaxValue / 4; int b = array.MaxValue / 2; int c = array.MaxValue - 1; Assert.AreEqual(0, array.Get(20000)); array.Fill(a); Assert.AreEqual(a, array.Get(20000)); array.Add(20000, b); Assert.AreEqual(b, array.Get(20000)); Assert.AreEqual(a, array.Get(20001)); Assert.AreEqual(20000, array.FindValue(b, 0, 21000)); array.Fill(c); Assert.AreEqual(c, array.Get(20000)); Assert.AreEqual(c, array.Get(40000)); Assert.AreEqual(c, array.Get(0)); }
public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory) { string field = string.Intern(fieldName); int maxDoc = reader.MaxDoc; if (orderArray == null) // we want to reuse the memory { orderArray = NewInstance(termCountSize, maxDoc); } else { orderArray.EnsureCapacity(maxDoc); // no need to fill to 0, we are reseting the data anyway } List<int> minIDList = new List<int>(); List<int> maxIDList = new List<int>(); List<int> freqList = new List<int>(); int length = maxDoc + 1; ITermValueList list = listFactory == null ? new TermStringList() : listFactory.CreateTermList(); TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field)); int t = 0; // current term number list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); //int df = 0; t++; try { do { Term term = termEnum.Term; if (term == null || string.CompareOrdinal(term.Field, field) != 0) break; if (t >= orderArray.MaxValue()) { throw new System.IO.IOException("maximum number of value cannot exceed: " + orderArray.MaxValue()); } // Alexey: well, we could get now more than one term per document. Effectively, we could build facet againsts tokenized field /*// we expect that there is at most one term per document if (t >= length) { throw new RuntimeException("there are more terms than " + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields"); }*/ // store term text list.Add(term.Text); termDocs.Seek(termEnum); // freqList.add(termEnum.docFreq()); // doesn't take into account deldocs int minID = -1; int maxID = -1; int df = 0; if (termDocs.Next()) { df++; int docid = termDocs.Doc; orderArray.Add(docid, t); minID = docid; while (termDocs.Next()) { df++; docid = termDocs.Doc; orderArray.Add(docid, t); } maxID = docid; } freqList.Add(df); minIDList.Add(minID); maxIDList.Add(maxID); t++; } while (termEnum.Next()); } finally { termDocs.Dispose(); termEnum.Dispose(); } list.Seal(); this.valArray = list; this.freqs = freqList.ToArray(); this.minIDs = minIDList.ToArray(); this.maxIDs = maxIDList.ToArray(); }
private static void EmptyArrayTestHelper(BigSegmentedArray array) { Assert.AreEqual(0, array.Get(0)); Assert.AreEqual(0, array.Length); }
public static IEnumerable<BrowseFacet> GetFacets(FacetSpec ospec, BigSegmentedArray count, int countlength, ITermValueList valList) { if (ospec != null) { int minCount = ospec.MinHitCount; int max = ospec.MaxCount; if (max <= 0) max = countlength; LinkedList<BrowseFacet> facetColl; FacetSpec.FacetSortSpec sortspec = ospec.OrderBy; if (sortspec == FacetSpec.FacetSortSpec.OrderValueAsc) { facetColl = new LinkedList<BrowseFacet>(); for (int i = 1; i < countlength; ++i) // exclude zero { int hits = count.Get(i); if (hits >= minCount) { BrowseFacet facet = new BrowseFacet(valList.Get(i), hits); facetColl.AddLast(facet); } if (facetColl.Count >= max) break; } } else //if (sortspec == FacetSortSpec.OrderHitsDesc) { IComparatorFactory comparatorFactory; if (sortspec == FacetSpec.FacetSortSpec.OrderHitsDesc) { comparatorFactory = new FacetHitcountComparatorFactory(); } else { comparatorFactory = ospec.CustomComparatorFactory; } if (comparatorFactory == null) { throw new ArgumentException("facet comparator factory not specified"); } IComparer<int> comparator = comparatorFactory.NewComparator(new DefaultFacetCountCollectorFieldAccessor(valList), count); facetColl = new LinkedList<BrowseFacet>(); int forbidden = -1; IntBoundedPriorityQueue pq = new IntBoundedPriorityQueue(comparator, max, forbidden); for (int i = 1; i < countlength; ++i) // exclude zero { int hits = count.Get(i); if (hits >= minCount) { pq.Offer(i); } } int val; while ((val = pq.Poll()) != forbidden) { BrowseFacet facet = new BrowseFacet(valList[val], count.Get(val)); facetColl.AddFirst(facet); } } return facetColl; } else { return FacetCountCollector_Fields.EMPTY_FACET_LIST; } }
public CompactMultiValueFacetFilterDocIdSet(FacetDataCache dataCache, int[] indexes, int finalBits, BigSegmentedArray orderArray) { this.dataCache = dataCache; this.indexes = indexes; this.finalBits = finalBits; this.orderArray = orderArray; }
public FacetFieldComparator(int numHits,int type, FacetDataCache dataCache) { _docs = new int[numHits]; _fieldType = type; _dataCache = dataCache; orderArray = _dataCache.orderArray; }