Esempio n. 1
0
 public FacetDocIdSetIterator(FacetDataCache dataCache, int index)
 {
     _index = index;
     _doc = Math.Max(-1, dataCache.MinIDs[_index] - 1);
     _maxID = dataCache.MaxIDs[_index];
     _orderArray = dataCache.OrderArray;
 }
        public RangeFacetCountCollector(string name, FacetDataCache dataCache, int docBase, FacetSpec ospec, IEnumerable<string> predefinedRanges)
        {
            _name = name;
            _dataCache = dataCache;
            _countLength = _dataCache.Freqs.Length;
            _count = new LazyBigIntArray(_countLength);
            _array = _dataCache.OrderArray;
            _docBase = docBase;
            _ospec = ospec;
            if (predefinedRanges != null)
            {
                _predefinedRanges = new TermStringList();
                var tempList = new List<string>(predefinedRanges);
                tempList.Sort();
                _predefinedRanges.AddAll(tempList);
            }
            else
            {
                _predefinedRanges = null;
            }

            if (_predefinedRanges != null)
            {
                _predefinedRangeIndexes = new int[_predefinedRanges.Count()][];
                int i = 0;
                foreach (string range in this._predefinedRanges)
                {
                    _predefinedRangeIndexes[i++] = FacetRangeFilter.Parse(this._dataCache, range);
                }
            }
        }
Esempio n. 3
0
 public FacetDocIdSetIterator(FacetDataCache dataCache, int index)
 {
     this.index = index;
     doc = Math.Max(-1, dataCache.minIDs[this.index] - 1);
     maxID = dataCache.maxIDs[this.index];
     orderArray = dataCache.orderArray;
 }
 public FacetDataCache()
 {
     this.orderArray = null;
     this.valArray = null;
     this.maxIDs = null;
     this.minIDs = null;
     this.freqs = null;
     termCountSize = FacetHandler.TermCountSize.Large;
 }
 public FacetDataCache(BigSegmentedArray orderArray, ITermValueList valArray, int[] freqs, int[] minIDs, int[] maxIDs, FacetHandler.TermCountSize termCountSize)
 {
     this.orderArray = orderArray;
     this.valArray = valArray;
     this.freqs = freqs;
     this.minIDs = minIDs;
     this.maxIDs = maxIDs;
     this.termCountSize = termCountSize;
 }
 public DefaultFacetCountCollector(BrowseSelection sel, FacetDataCache dataCache, string name, FacetSpec ospec)
 {
     _sel = sel;
     _ospec = ospec;
     _name = name;
     _dataCache = dataCache;
     _count = new int[_dataCache.freqs.Length];
     _array = _dataCache.orderArray;
 }
 private static void CountUpTestHelper(BigSegmentedArray array)
 {
     Initialize(array);
     Assert.AreEqual(short.MaxValue * 2, array.Size());
     for (int i = 0; i < array.Size(); i++)
     {
         Assert.AreEqual(i % array.MaxValue, array.Get(i));
     }
 }
 public CompactMultiValueFacetFilter(FacetDataCache dataCache, int[] index)
 {
     this.dataCache = dataCache;
     orderArray = this.dataCache.orderArray;
     this.index = index;
     bits = 0x0;
     foreach (int i in index)
     {
         bits |= 0x00000001 << (i - 1);
     }
 }
 public DefaultFacetIterator(ITermValueList valList, BigSegmentedArray counts, int countlength, bool zeroBased)
 {
     _valList = valList;
     _count = counts;
     _countlength = countlength;
     _index = -1;
     _lastIndex = _countlength - 1;
     if (!zeroBased)
         _index++;
     facet = null;
     count = 0;
 }
 public DefaultDoubleFacetIterator(TermDoubleList valList, BigSegmentedArray countarray, int countlength, bool zeroBased)
 {
     _valList = valList;
     _countlength = countlength;
     _count = countarray;
     _countLengthMinusOne = _countlength - 1;
     _index = -1;
     if (!zeroBased)
         _index++;
     _facet = TermDoubleList.VALUE_MISSING;
     count = 0;
 }
 internal FacetRangeDocIdSetIterator(int start, int end, FacetDataCache dataCache)
 {
     _totalFreq = 0;
     _start = start;
     _end = end;
     for (int i = start; i <= end; ++i)
     {
         _totalFreq += dataCache.freqs[i];
         _minID = Math.Min(_minID, dataCache.minIDs[i]);
         _maxID = Math.Max(_maxID, dataCache.maxIDs[i]);
     }
     _doc = Math.Max(-1, _minID - 1);
     _orderArray = dataCache.orderArray;
 }
Esempio n. 12
0
 public FacetOrFilter(FacetDataCache dataCache, int[] index, bool takeCompliment)
 {
     this.dataCache = dataCache;
     orderArray = dataCache.orderArray;
     this.index = index;
     bitset = new OpenBitSet(this.dataCache.valArray.Count);
     foreach (int i in this.index)
     {
         bitset.FastSet(i);
     }
     if (takeCompliment)
     {
         bitset.Flip(0, this.dataCache.valArray.Count);
     }
 }
        private static void FindValueHelper(BigSegmentedArray array)
        {
            int a = array.MaxValue / 16;
            int b = a * 2;
            int c = a * 3;

            array.Add(1000, a);
            array.Add(2000, b);
            Assert.AreEqual(1000, array.FindValue(a, 0, 2000));
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, array.FindValue(a, 1001, 2000));
            Assert.AreEqual(2000, array.FindValue(b, 2000, 3000));

            array.Fill(c);
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, array.FindValue(b, 2000, 3000));
            Assert.AreEqual(4000, array.FindValue(c, 4000, 4000));
        }
        protected internal RangeFacetCountCollector(string name, FacetDataCache dataCache, FacetSpec ospec, IEnumerable<string> predefinedRanges, bool autoRange)
        {
            this.name = name;
            this.dataCache = dataCache;
            this.ospec = ospec;
            count = new int[this.dataCache.freqs.Length];
            orderArray = this.dataCache.orderArray;
            this.predefinedRanges = predefinedRanges;
            this.autoRange = autoRange;

            if (this.predefinedRanges != null)
            {
                predefinedRangeIndexes = new int[this.predefinedRanges.Count()][];
                int i = 0;
                foreach (string range in this.predefinedRanges)
                {
                    predefinedRangeIndexes[i++] = RangeFacetHandler.Parse(this.dataCache, range);
                }
            }
        }
        public GeoSimpleFacetCountCollector(string name, FacetDataCache latDataCache, FacetDataCache longDataCache, int docBase, FacetSpec spec, IEnumerable<string> predefinedRanges)
        {
            _name = name;
            _latDataCache = latDataCache;
            _longDataCache = longDataCache;
            _latCount = new int[_latDataCache.Freqs.Length];
            _longCount = new int[_longDataCache.Freqs.Length];
            log.Info("latCount: " + _latDataCache.Freqs.Length + " longCount: " + _longDataCache.Freqs.Length);
            _latOrderArray = _latDataCache.OrderArray;
            _longOrderArray = _longDataCache.OrderArray;
            _docBase = docBase;
            _spec = spec;
            _predefinedRanges = new TermStringList();
            var predefinedRangesTemp = new List<string>(predefinedRanges);
            predefinedRangesTemp.Sort();
            _predefinedRanges.AddAll(predefinedRangesTemp);

            if (predefinedRanges != null)
            {
                _latPredefinedRangeIndexes = new int[_predefinedRanges.Count][];
                for (int j = 0; j < _latPredefinedRangeIndexes.Length; j++)
                {
                    _latPredefinedRangeIndexes[j] = new int[2];
                }
                _longPredefinedRangeIndexes = new int[_predefinedRanges.Count][];
                for (int j = 0; j < _longPredefinedRangeIndexes.Length; j++)
                {
                    _longPredefinedRangeIndexes[j] = new int[2];
                }
                int i = 0;
                foreach (string range in _predefinedRanges)
                {
                    int[] ranges = GeoSimpleFacetFilter.Parse(_latDataCache, _longDataCache, range);
                    _latPredefinedRangeIndexes[i][0] = ranges[0];   // latStart 
                    _latPredefinedRangeIndexes[i][1] = ranges[1];   // latEnd
                    _longPredefinedRangeIndexes[i][0] = ranges[2];  // longStart
                    _longPredefinedRangeIndexes[i][1] = ranges[3];  // longEnd
                    i++;
                }
            }
        }
        public BucketFacetCountCollector(string name, DefaultFacetCountCollector subCollector, FacetSpec ospec, IDictionary<string, string[]> predefinedBuckets, int numdocs)
        {
            _name = name;
            _subCollector = subCollector;
            _ospec = ospec;
            _numdocs = numdocs;

            _predefinedBuckets = predefinedBuckets;
            _collapsedCounts = null;

            _bucketValues = new TermStringList();
            _bucketValues.Add("");

            List<string> bucketArray = _predefinedBuckets.Keys.ToList();
            bucketArray.Sort();
            foreach (string bucket in bucketArray)
            {
                _bucketValues.Add(bucket);
            }
            _bucketValues.Seal();
        }
 internal GeoSimpleDocIdSetIterator(int latStart, int latEnd, int longStart, int longEnd, FacetDataCache latDataCache, FacetDataCache longDataCache)
 {
     //_totalFreq = 0; // NOT USED
     _latStart = latStart;
     _longStart = longStart;
     _latEnd = latEnd;
     _longEnd = longEnd;
     for (int i = latStart; i <= latEnd; ++i)
     {
         _minID = Math.Min(_minID, latDataCache.MinIDs[i]);
         _maxID = Math.Max(_maxID, latDataCache.MaxIDs[i]);
     }
     for (int i = longStart; i <= longEnd; ++i)
     {
         _minID = Math.Min(_minID, longDataCache.MinIDs[i]);
         _maxID = Math.Max(_maxID, longDataCache.MaxIDs[i]);
     }
     _doc = Math.Max(-1, _minID - 1);
     _latOrderArray = latDataCache.OrderArray;
     _longOrderArray = longDataCache.OrderArray;
 }
 private BigSegmentedArray GetCollapsedCounts()
 {
     if (_collapsedCounts == null)
     {
         _collapsedCounts = new LazyBigIntArray(_bucketValues.Count);
         FacetDataCache dataCache = _subCollector.DataCache;
         ITermValueList subList = dataCache.ValArray;
         BigSegmentedArray subcounts = _subCollector.Count;
         BitVector indexSet = new BitVector(subcounts.Size());
         int c = 0;
         int i = 0;
         foreach (string val in _bucketValues)
         {
             if (val.Length > 0)
             {
                 string[] subVals = _predefinedBuckets.Get(val);
                 int count = 0;
                 foreach (string subVal in subVals)
                 {
                     int index = subList.IndexOf(subVal);
                     if (index > 0)
                     {
                         int subcount = subcounts.Get(index);
                         count += subcount;
                         if (!indexSet.Get(index))
                         {
                             indexSet.Set(index);
                             c += dataCache.Freqs[index];
                         }
                     }
                 }
                 _collapsedCounts.Add(i, count);
             }
             i++;
         }
         _collapsedCounts.Add(0, (_numdocs - c));
     }
     return _collapsedCounts;
 }
 public CompactMultiValueFacetDocIdSetIterator(FacetDataCache dataCache, int[] index, int bits)
 {
     this.bits = bits;
     doc = int.MaxValue;
     maxID = -1;
     orderArray = dataCache.orderArray;
     foreach (int i in index)
     {
         if (doc > dataCache.minIDs[i])
         {
             doc = dataCache.minIDs[i];
         }
         if (maxID < dataCache.maxIDs[i])
         {
             maxID = dataCache.maxIDs[i];
         }
     }
     doc--;
     if (doc < 0)
     {
         doc = -1;
     }
 }            
 public CompactMultiValueFacetDocIdSetIterator(FacetDataCache dataCache, int[] index, int bits)
 {
     _bits = bits;
     _doc = int.MaxValue;
     _maxID = -1;
     _orderArray = dataCache.OrderArray;
     foreach (int i in index)
     {
         if (_doc > dataCache.MinIDs[i])
         {
             _doc = dataCache.MinIDs[i];
         }
         if (_maxID < dataCache.MaxIDs[i])
         {
             _maxID = dataCache.MaxIDs[i];
         }
     }
     _doc--;
     if (_doc < 0)
     {
         _doc = -1;
     }
 }
Esempio n. 21
0
        public void CountNoReturnWithFilter(int id, BigSegmentedArray count, OpenBitSet filter)
        {
            int[] page = m_list[id >> PAGEID_SHIFT];
            if (page == null)
            {
                count.Add(0, count.Get(0) + 1);
                return;
            }

            int val = page[id & SLOTID_MASK];

            if (val >= 0)
            {
                if (filter.FastGet(val))
                {
                    count.Add(val, count.Get(val) + 1);
                }
                return;
            }
            else if (val != MISSING)
            {
                int idx = -(val >> VALIDX_SHIFT); // signed shift, remember val is a negative number
                int cnt = (val & COUNT_MASK);
                int end = idx + cnt;
                while (idx < end)
                {
                    int value = page[idx++];
                    if (filter.FastGet(value))
                    {
                        count.Add(value, count.Get(value) + 1);
                    }
                }
                return;
            }
            count.Add(0, count.Get(0) + 1);
            return;
        }
        public DefaultFacetCountCollector(string name, FacetDataCache dataCache, int docBase, BrowseSelection sel, FacetSpec ospec)
        {
            _sel = sel;
            _ospec = ospec;
            _name = name;
            _dataCache = dataCache;
            _countlength = _dataCache.Freqs.Length;

            if (_dataCache.Freqs.Length <= 3096)
            {
                _count = new LazyBigIntArray(_countlength);
            }
            else
            {
                _count = new LazyBigIntArray(_countlength);

                // NOTE: Removed memory manager implementation
                //_count = intarraymgr.Get(_countlength);
                //intarraylist.Add(_count);
            }

            _array = _dataCache.OrderArray;
            _docBase = docBase;
        }
        private static void FindValueRangeHelper(BigSegmentedArray array)
        {
            int a = array.MaxValue / 16;
            int b = a * 2;
            int c = a * 3;
            int d = a * 4;
            int e = a * 5;
            int f = a * 6;

            array.Add(10000, b);
            Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, array.FindValueRange(d, e, 0, array.Size()));
            Assert.AreEqual(10000, array.FindValueRange(a, e, 0, array.Size()));
            Assert.AreEqual(10000, array.FindValueRange(a, e, 10000, array.Size()));
            Assert.AreEqual(10000, array.FindValueRange(a, e, 0, 10000));

            Assert.AreEqual(10000, array.FindValueRange(a, b, 9000, 10100));
            Assert.AreEqual(10000, array.FindValueRange(b, e, 9000, 10000));
            Assert.AreEqual(10000, array.FindValueRange(b, b, 9000, 10000));
        }
Esempio n. 24
0
            public FacetOrDocIdSetIterator(FacetDataCache dataCache, int[] index, OpenBitSet bitset)
            {
                _dataCache = dataCache;
                _index = index;
                _orderArray = dataCache.orderArray;
                _bitset = bitset;

                _doc = int.MaxValue;
                _maxID = -1;
                foreach (int i in _index)
                {
                    if (_doc > _dataCache.minIDs[i])
                    {
                        _doc = _dataCache.minIDs[i];
                    }
                    if (_maxID < _dataCache.maxIDs[i])
                    {
                        _maxID = _dataCache.maxIDs[i];
                    }
                }
                _doc--;
                if (_doc < 0)
                    _doc = -1;
            }        
 private static void EmptyArrayTestHelper(BigSegmentedArray array)
 {
     Assert.AreEqual(0, array.Get(0));
     Assert.AreEqual(0, array.Size());
 }
 public static BigSegmentedArray Initialize(BigSegmentedArray array)
 {
     for (int i = 0; i < array.Size(); i++)
     {
         array.Add(i, i % array.MaxValue);
     }
     return array;
 }
        private static void FillTestHelper(BigSegmentedArray array)
        {
            int a = array.MaxValue / 4;
            int b = array.MaxValue / 2;
            int c = array.MaxValue - 1;

            Assert.AreEqual(0, array.Get(20000));

            array.Fill(a);
            Assert.AreEqual(a, array.Get(20000));

            array.Add(20000, b);
            Assert.AreEqual(b, array.Get(20000));
            Assert.AreEqual(a, array.Get(20001));

            Assert.AreEqual(20000, array.FindValue(b, 0, 21000));

            array.Fill(c);
            Assert.AreEqual(c, array.Get(20000));
            Assert.AreEqual(c, array.Get(40000));
            Assert.AreEqual(c, array.Get(0));
        }
        public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory)
        {
            string field = string.Intern(fieldName);
            int maxDoc = reader.MaxDoc;

            if (orderArray == null) // we want to reuse the memory
            {
                orderArray = NewInstance(termCountSize, maxDoc);
            }
            else
            {
                orderArray.EnsureCapacity(maxDoc); // no need to fill to 0, we are reseting the data anyway
            }

            List<int> minIDList = new List<int>();
            List<int> maxIDList = new List<int>();
            List<int> freqList = new List<int>();

            int length = maxDoc + 1;
            ITermValueList list = listFactory == null ? new TermStringList() : listFactory.CreateTermList();
            TermDocs termDocs = reader.TermDocs();
            TermEnum termEnum = reader.Terms(new Term(field));
            int t = 0; // current term number

            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            //int df = 0;
            t++;
            try
            {
                do
                {
                    Term term = termEnum.Term;
                    if (term == null || string.CompareOrdinal(term.Field, field) != 0)
                        break;

                    if (t >= orderArray.MaxValue())
                    {
                        throw new System.IO.IOException("maximum number of value cannot exceed: " + orderArray.MaxValue());
                    }
                    // Alexey: well, we could get now more than one term per document. Effectively, we could build facet againsts tokenized field
                    /*// we expect that there is at most one term per document
                    if (t >= length)
                    {
                        throw new RuntimeException("there are more terms than " + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields");
                    }*/
                    // store term text
                    list.Add(term.Text);
                    termDocs.Seek(termEnum);
                    // freqList.add(termEnum.docFreq()); // doesn't take into account deldocs
                    int minID = -1;
                    int maxID = -1;
                    int df = 0;
                    if (termDocs.Next())
                    {
                        df++;
                        int docid = termDocs.Doc;
                        orderArray.Add(docid, t);
                        minID = docid;
                        while (termDocs.Next())
                        {
                            df++;
                            docid = termDocs.Doc;
                            orderArray.Add(docid, t);
                        }
                        maxID = docid;
                    }
                    freqList.Add(df);
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);

                    t++;
                } while (termEnum.Next());
            }
            finally
            {
                termDocs.Dispose();
                termEnum.Dispose();
            }
            list.Seal();

            this.valArray = list;
            this.freqs = freqList.ToArray();
            this.minIDs = minIDList.ToArray();
            this.maxIDs = maxIDList.ToArray();
        }
Esempio n. 29
0
 private static void EmptyArrayTestHelper(BigSegmentedArray array)
 {
     Assert.AreEqual(0, array.Get(0));
     Assert.AreEqual(0, array.Length);
 }
        public static IEnumerable<BrowseFacet> GetFacets(FacetSpec ospec, BigSegmentedArray count, int countlength, ITermValueList valList)
        {
            if (ospec != null)
            {
                int minCount = ospec.MinHitCount;
                int max = ospec.MaxCount;
                if (max <= 0) max = countlength;

                LinkedList<BrowseFacet> facetColl;
                FacetSpec.FacetSortSpec sortspec = ospec.OrderBy;
                if (sortspec == FacetSpec.FacetSortSpec.OrderValueAsc)
                {
                    facetColl = new LinkedList<BrowseFacet>();
                    for (int i = 1; i < countlength; ++i) // exclude zero
                    {
                        int hits = count.Get(i);
                        if (hits >= minCount)
                        {
                            BrowseFacet facet = new BrowseFacet(valList.Get(i), hits);
                            facetColl.AddLast(facet);
                        }

                        if (facetColl.Count >= max)
                            break;
                    }
                }
                else //if (sortspec == FacetSortSpec.OrderHitsDesc)
                {
                    IComparatorFactory comparatorFactory;
                    if (sortspec == FacetSpec.FacetSortSpec.OrderHitsDesc)
                    {
                        comparatorFactory = new FacetHitcountComparatorFactory();
                    }
                    else
                    {
                        comparatorFactory = ospec.CustomComparatorFactory;
                    }

                    if (comparatorFactory == null)
                    {
                        throw new ArgumentException("facet comparator factory not specified");
                    }

                    IComparer<int> comparator = comparatorFactory.NewComparator(new DefaultFacetCountCollectorFieldAccessor(valList), count);
                    facetColl = new LinkedList<BrowseFacet>();
                    int forbidden = -1;
                    IntBoundedPriorityQueue pq = new IntBoundedPriorityQueue(comparator, max, forbidden);

                    for (int i = 1; i < countlength; ++i) // exclude zero
                    {
                        int hits = count.Get(i);
                        if (hits >= minCount)
                        {
                            pq.Offer(i);
                        }
                    }

                    int val;
                    while ((val = pq.Poll()) != forbidden)
                    {
                        BrowseFacet facet = new BrowseFacet(valList[val], count.Get(val));
                        facetColl.AddFirst(facet);
                    }
                }
                return facetColl;
            }
            else
            {
                return FacetCountCollector_Fields.EMPTY_FACET_LIST;
            }
        }
 public CompactMultiValueFacetFilterDocIdSet(FacetDataCache dataCache, int[] indexes, int finalBits, BigSegmentedArray orderArray)
 {
     this.dataCache = dataCache;
     this.indexes = indexes;
     this.finalBits = finalBits;
     this.orderArray = orderArray;
 }
 public FacetFieldComparator(int numHits,int type, FacetDataCache dataCache)
 {
     _docs = new int[numHits];
     _fieldType = type;
     _dataCache = dataCache;
     orderArray = _dataCache.orderArray;
 }