Esempio n. 1
0
        // NOTE: The Weight.Scorer method lost the scoreDocsInOrder and topScorer parameters between
        // Lucene 4.3.0 and 4.8.0. They are not used by BoboBrowse anyway, so the code here diverges
        // from the original Java source to remove these two parameters.

        // public virtual Scorer CreateScorer(Scorer innerScorer, AtomicReader reader, bool scoreDocsInOrder, bool topScorer)
        public virtual Scorer CreateScorer(Scorer innerScorer, AtomicReader reader)
        {
            if (reader is BoboSegmentReader)
            {
                BoboSegmentReader boboReader = (BoboSegmentReader)reader;
                object            dataObj    = boboReader.GetFacetData(m_timeFacetName);
                if (dataObj is FacetDataCache)
                {
                    FacetDataCache    facetDataCache = (FacetDataCache)(boboReader.GetFacetData(m_timeFacetName));
                    BigSegmentedArray orderArray     = facetDataCache.OrderArray;
                    TermInt64List     termList       = (TermInt64List)facetDataCache.ValArray;
                    return(new RecencyBoostScorer(this, innerScorer, orderArray, termList));
                }
                else
                {
                    throw new InvalidOperationException("underlying facet data must be of type FacetDataCache<long>");
                }
            }
            else
            {
                throw new ArgumentException("reader not instance of " + typeof(BoboSegmentReader));
            }
        }
Esempio n. 2
0
 public CompactMultiValueFacetDocIdSetIterator(FacetDataCache dataCache, int[] index, int bits)
 {
     m_bits       = bits;
     m_doc        = int.MaxValue;
     m_maxID      = -1;
     m_orderArray = dataCache.OrderArray;
     foreach (int i in index)
     {
         if (m_doc > dataCache.MinIDs[i])
         {
             m_doc = dataCache.MinIDs[i];
         }
         if (m_maxID < dataCache.MaxIDs[i])
         {
             m_maxID = dataCache.MaxIDs[i];
         }
     }
     m_doc--;
     if (m_doc < 0)
     {
         m_doc = -1;
     }
 }
Esempio n. 3
0
        public DefaultFacetCountCollector(string name, FacetDataCache dataCache, int docBase, BrowseSelection sel, FacetSpec ospec)
        {
            m_sel         = sel;
            this.m_ospec  = ospec;
            m_name        = name;
            m_dataCache   = dataCache;
            m_countlength = m_dataCache.Freqs.Length;

            if (m_dataCache.Freqs.Length <= 3096)
            {
                m_count = new LazyBigInt32Array(m_countlength);
            }
            else
            {
                m_count = new LazyBigInt32Array(m_countlength);

                // NOTE: Removed memory manager implementation
                //_count = intarraymgr.Get(_countlength);
                //intarraylist.Add(_count);
            }

            m_array = m_dataCache.OrderArray;
        }
        /// <summary>
        /// (non-Javadoc)
        /// see com.browseengine.bobo.facets.FacetCountCollector#getCountDistribution()
        /// </summary>
        /// <returns></returns>
        public virtual BigSegmentedArray GetCountDistribution()
        {
            BigSegmentedArray dist = null;

            if (_latPredefinedRangeIndexes != null)
            {
                dist = new LazyBigIntArray(_latPredefinedRangeIndexes.Length);
                int n = 0;
                int start;
                int end;
                foreach (int[] range in _latPredefinedRangeIndexes)
                {
                    start = range[0];
                    end   = range[1];
                    int sum = 0;
                    for (int i = start; i < end; i++)
                    {
                        sum += _latCount[i];
                    }
                    dist.Add(n++, sum);
                }
            }
            return(dist);
        }
 /// <summary>
 /// Constructor
 /// </summary>
 /// <param name="name">name of the Geo Facet</param>
 /// <param name="dataCache">The data cache for the Geo Facet</param>
 /// <param name="docBase">the base doc id</param>
 /// <param name="fspec">the facet spec for this facet</param>
 /// <param name="predefinedRanges">List of ranges, where each range looks like &lt;lat, lon: rad&gt;</param>
 /// <param name="miles">variable to specify if the geo distance calculations are in miles. False indicates distance calculation is in kilometers</param>
 public GeoFacetCountCollector(string name, GeoFacetHandler.GeoFacetData dataCache,
     int docBase, FacetSpec fspec, IEnumerable<string> predefinedRanges, bool miles)
 {
     _name = name;
     _dataCache = dataCache;
     _xvals = dataCache.xValArray;
     _yvals = dataCache.yValArray;
     _zvals = dataCache.zValArray;
     _spec = fspec;
     _predefinedRanges = new TermStringList();
     var predefinedTemp = new List<string>(predefinedRanges);
     predefinedTemp.Sort();
     _predefinedRanges.AddAll(predefinedTemp);
     _docBase = docBase;
     _countlength = predefinedTemp.Count;
     _count = new LazyBigIntArray(_countlength);
     _ranges = new GeoRange[predefinedTemp.Count];
     int index = 0;
     foreach (string range in predefinedTemp)
     {
         _ranges[index++] = Parse(range);
     }
     _miles = miles;
 }
Esempio n. 6
0
        public virtual IEnumerable <BrowseFacet> GetFacets()
        {
            BigSegmentedArray counts = GetCollapsedCounts();

            return(DefaultFacetCountCollector.GetFacets(_ospec, counts, counts.Size(), _bucketValues));
        }
Esempio n. 7
0
 internal FacetDataRandomAccessDocIdSet(FacetDataCache dataCache, int index)
 {
     _dataCache  = dataCache;
     _orderArray = dataCache.OrderArray;
     _index      = index;
 }
Esempio n. 8
0
 public virtual IComparer <int> NewComparator(IFieldValueAccessor valueList, BigSegmentedArray counts)
 {
     return(new FacetHitComparer {
         counts = counts
     });
 }
Esempio n. 9
0
 public FacetDocComparator(FacetDataCache dataCache, BigSegmentedArray orderArray)
 {
     _dataCache  = dataCache;
     _orderArray = orderArray;
 }
Esempio n. 10
0
        public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory)
        {
            string field  = string.Intern(fieldName);
            int    maxDoc = reader.MaxDoc;

            BigSegmentedArray order = this.orderArray;

            if (order == null) // we want to reuse the memory
            {
                int dictValueCount = GetDictValueCount(reader, fieldName);
                order = NewInstance(dictValueCount, maxDoc);
            }
            else
            {
                order.EnsureCapacity(maxDoc); // no need to fill to 0, we are reseting the
                                              // data anyway
            }
            this.orderArray = order;

            List <int> minIDList = new List <int>();
            List <int> maxIDList = new List <int>();
            List <int> freqList  = new List <int>();

            int            length             = maxDoc + 1;
            ITermValueList list               = listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList();
            int            negativeValueCount = GetNegativeValueCount(reader, field);

            TermDocs termDocs = reader.TermDocs();
            TermEnum termEnum = reader.Terms(new Term(field, ""));
            int      t        = 0; // current term number

            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            int totalFreq = 0;

            //int df = 0;
            t++;
            try
            {
                do
                {
                    Term term = termEnum.Term;
                    if (term == null || string.CompareOrdinal(term.Field, field) != 0)
                    {
                        break;
                    }

                    // store term text
                    // we expect that there is at most one term per document

                    // Alexey: well, we could get now more than one term per document. Effectively, we could build facet against tokenized field
                    //if (t >= length)
                    //{
                    //    throw new RuntimeException("there are more terms than " + "documents in field \"" + field
                    //        + "\", but it's impossible to sort on " + "tokenized fields");
                    //}
                    list.Add(term.Text);
                    termDocs.Seek(termEnum);
                    // freqList.add(termEnum.docFreq()); // doesn't take into account deldocs
                    int minID = -1;
                    int maxID = -1;
                    int df    = 0;
                    int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                    if (termDocs.Next())
                    {
                        df++;
                        int docid = termDocs.Doc;
                        order.Add(docid, valId);
                        minID = docid;
                        while (termDocs.Next())
                        {
                            df++;
                            docid = termDocs.Doc;
                            order.Add(docid, valId);
                        }
                        maxID = docid;
                    }
                    freqList.Add(df);
                    totalFreq += df;
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);

                    t++;
                } while (termEnum.Next());
            }
            finally
            {
                termDocs.Dispose();
                termEnum.Dispose();
            }
            list.Seal();
            this.valArray = list;
            this.freqs    = freqList.ToArray();
            this.minIDs   = minIDList.ToArray();
            this.maxIDs   = maxIDList.ToArray();

            int doc = 0;

            while (doc <= maxDoc && order.Get(doc) != 0)
            {
                ++doc;
            }
            if (doc <= maxDoc)
            {
                this.minIDs[0] = doc;
                // Try to get the max
                doc = maxDoc;
                while (doc > 0 && order.Get(doc) != 0)
                {
                    --doc;
                }
                if (doc > 0)
                {
                    this.maxIDs[0] = doc;
                }
            }
            this.freqs[0] = maxDoc + 1 - totalFreq;
        }
Esempio n. 11
0
        public virtual FacetIterator GetIterator()
        {
            BigSegmentedArray counts = GetCollapsedCounts();

            return(new DefaultFacetIterator(m_bucketValues, counts, counts.Length, true));
        }
Esempio n. 12
0
 public void CollectAll()
 {
     m_count       = BigInt32Array.FromArray(m_dataCache.Freqs);
     m_countLength = m_dataCache.Freqs.Length;
 }
Esempio n. 13
0
            public virtual void Load(string latFieldName, string lonFieldName, BoboIndexReader reader)
            {
                if (reader == null)
                {
                    throw new ArgumentNullException("reader object is null");
                }

                FacetDataCache latCache = (FacetDataCache)reader.GetFacetData(latFieldName);
                FacetDataCache lonCache = (FacetDataCache)reader.GetFacetData(lonFieldName);

                int maxDoc = reader.MaxDoc;

                BigFloatArray xVals = this._xValArray;
                BigFloatArray yVals = this._yValArray;
                BigFloatArray zVals = this._zValArray;

                if (xVals == null)
                {
                    xVals = NewInstance(maxDoc);
                }
                else
                {
                    xVals.EnsureCapacity(maxDoc);
                }
                if (yVals == null)
                {
                    yVals = NewInstance(maxDoc);
                }
                else
                {
                    yVals.EnsureCapacity(maxDoc);
                }
                if (zVals == null)
                {
                    zVals = NewInstance(maxDoc);
                }
                else
                {
                    zVals.EnsureCapacity(maxDoc);
                }

                this._xValArray = xVals;
                this._yValArray = yVals;
                this._zValArray = zVals;

                BigSegmentedArray latOrderArray = latCache.OrderArray;
                ITermValueList    latValList    = latCache.ValArray;

                BigSegmentedArray lonOrderArray = lonCache.OrderArray;
                ITermValueList    lonValList    = lonCache.ValArray;

                for (int i = 0; i < maxDoc; ++i)
                {
                    string docLatString = latValList.Get(latOrderArray.Get(i)).Trim();
                    string docLonString = lonValList.Get(lonOrderArray.Get(i)).Trim();

                    float docLat = 0;
                    if (docLatString.Length > 0)
                    {
                        docLat = float.Parse(docLatString);
                    }

                    float docLon = 0;
                    if (docLonString.Length > 0)
                    {
                        docLon = float.Parse(docLonString);
                    }

                    float[] coords = GeoMatchUtil.GeoMatchCoordsFromDegrees(docLat, docLon);
                    _xValArray.Add(i, coords[0]);
                    _yValArray.Add(i, coords[1]);
                    _zValArray.Add(i, coords[2]);
                }
            }
            private void Aggregate()
            {
                if (m_isAggregated)
                {
                    return;
                }

                m_isAggregated = true;

                int startIdx = m_valArray.IndexOf(m_start);

                if (startIdx < 0)
                {
                    startIdx = -(startIdx + 1);
                }

                int endIdx = m_valArray.IndexOf(m_end);

                if (endIdx < 0)
                {
                    endIdx = -(endIdx + 1);
                }

                BigSegmentedArray baseCounts = m_baseCollector.GetCountDistribution();

                if (m_start is long)
                {
                    long          start    = Convert.ToInt64(m_start);
                    long          unit     = Convert.ToInt64(m_unit);
                    TermInt64List valArray = (TermInt64List)m_valArray;
                    for (int i = startIdx; i < endIdx; i++)
                    {
                        long val = valArray.GetPrimitiveValue(i);
                        int  idx = (int)((val - start) / unit);
                        if (idx >= 0 && idx < m_count.Length)
                        {
                            m_count.Add(idx, m_count.Get(idx) + baseCounts.Get(i));
                        }
                    }
                }
                else if (m_start is int)
                {
                    int           start    = Convert.ToInt32(m_start);
                    int           unit     = Convert.ToInt32(m_unit);
                    TermInt32List valArray = (TermInt32List)m_valArray;
                    for (int i = startIdx; i < endIdx; i++)
                    {
                        int val = valArray.GetPrimitiveValue(i);
                        int idx = ((val - start) / unit);
                        if (idx >= 0 && idx < m_count.Length)
                        {
                            m_count.Add(idx, m_count.Get(idx) + baseCounts.Get(i));
                        }
                    }
                }
                else
                {
                    double start = Convert.ToDouble(m_start);
                    double unit  = Convert.ToDouble(m_unit);
                    for (int i = startIdx; i < endIdx; i++)
                    {
                        double val = (double)m_valArray.GetRawValue(i);
                        int    idx = (int)((val - start) / unit);
                        if (idx >= 0 && idx < m_count.Length)
                        {
                            m_count.Add(idx, m_count.Get(idx) + baseCounts.Get(i));
                        }
                    }
                }
            }
Esempio n. 15
0
 public FacetDocComparer(FacetDataCache dataCache, BigSegmentedArray orderArray)
 {
     m_dataCache  = dataCache;
     m_orderArray = orderArray;
 }
Esempio n. 16
0
        public virtual void Load(string fieldName, AtomicReader reader, TermListFactory listFactory)
        {
#if FEATURE_STRING_INTERN
            string field = string.Intern(fieldName);
#else
            string field = fieldName;
#endif
            int maxDoc = reader.MaxDoc;

            int dictValueCount      = GetDictValueCount(reader, fieldName);
            BigSegmentedArray order = NewInstance(dictValueCount, maxDoc);

            this.m_orderArray = order;

            List <int> minIDList = new List <int>();
            List <int> maxIDList = new List <int>();
            List <int> freqList  = new List <int>();

            int            length             = maxDoc + 1;
            ITermValueList list               = listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList();
            int            negativeValueCount = GetNegativeValueCount(reader, field);

            int t = 1; // valid term id starts from 1

            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            int   totalFreq = 0;
            Terms terms     = reader.GetTerms(field);
            if (terms != null)
            {
                TermsEnum termsEnum = terms.GetIterator(null);
                BytesRef  text;
                while ((text = termsEnum.Next()) != null)
                {
                    // store term text
                    // we expect that there is at most one term per document
                    if (t >= length)
                    {
                        throw new RuntimeException("there are more terms than "
                                                   + "documents in field \"" + field + "\", but it's impossible to sort on "
                                                   + "tokenized fields");
                    }
                    string strText = text.Utf8ToString();
                    list.Add(strText);
                    Term     term     = new Term(field, strText);
                    DocsEnum docsEnum = reader.GetTermDocsEnum(term);
                    // freqList.add(termEnum.docFreq()); // doesn't take into account
                    // deldocs
                    int minID = -1;
                    int maxID = -1;
                    int docID = -1;
                    int df    = 0;
                    int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                    while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                    {
                        df++;
                        order.Add(docID, valId);
                        minID = docID;
                        while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS)
                        {
                            docID = docsEnum.DocID;
                            df++;
                            order.Add(docID, valId);
                        }
                        maxID = docID;
                    }
                    freqList.Add(df);
                    totalFreq += df;
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);
                    t++;
                }
            }

            list.Seal();
            this.m_valArray = list;
            this.m_freqs    = freqList.ToArray();
            this.m_minIDs   = minIDList.ToArray();
            this.m_maxIDs   = maxIDList.ToArray();

            int doc = 0;
            while (doc < maxDoc && order.Get(doc) != 0)
            {
                ++doc;
            }
            if (doc < maxDoc)
            {
                this.m_minIDs[0] = doc;
                // Try to get the max
                doc = maxDoc - 1;
                while (doc >= 0 && order.Get(doc) != 0)
                {
                    --doc;
                }
                this.m_maxIDs[0] = doc;
            }
            this.m_freqs[0] = reader.NumDocs - totalFreq;
        }
 public void CollectAll()
 {
     _count       = BigIntArray.FromArray(_dataCache.Freqs);
     _countLength = _dataCache.Freqs.Length;
 }
Esempio n. 18
0
 internal CompactMultiValueFacetCountCollector(string name, BrowseSelection sel, FacetDataCache dataCache, int docBase, FacetSpec ospec)
     : base(name, dataCache, docBase, sel, ospec)
 {
     _array = _dataCache.OrderArray;
 }
Esempio n. 19
0
 public RecencyBoostScorer(RecencyBoostScorerBuilder parent, Scorer innerScorer, BigSegmentedArray orderArray, TermLongList termList)
     : base(innerScorer.Similarity)
 {
     _parent      = parent;
     _innerScorer = innerScorer;
     _orderArray  = orderArray;
     _termList    = termList;
 }
        public static IEnumerable <BrowseFacet> GetFacets(FacetSpec ospec, BigSegmentedArray count, int countlength, ITermValueList valList)
        {
            if (ospec != null)
            {
                int minCount = ospec.MinHitCount;
                int max      = ospec.MaxCount;
                if (max <= 0)
                {
                    max = countlength;
                }

                LinkedList <BrowseFacet> facetColl;
                FacetSpec.FacetSortSpec  sortspec = ospec.OrderBy;
                if (sortspec == FacetSpec.FacetSortSpec.OrderValueAsc)
                {
                    facetColl = new LinkedList <BrowseFacet>();
                    for (int i = 1; i < countlength; ++i) // exclude zero
                    {
                        int hits = count.Get(i);
                        if (hits >= minCount)
                        {
                            BrowseFacet facet = new BrowseFacet(valList.Get(i), hits);
                            facetColl.AddLast(facet);
                        }

                        if (facetColl.Count >= max)
                        {
                            break;
                        }
                    }
                }
                else //if (sortspec == FacetSortSpec.OrderHitsDesc)
                {
                    IComparatorFactory comparatorFactory;
                    if (sortspec == FacetSpec.FacetSortSpec.OrderHitsDesc)
                    {
                        comparatorFactory = new FacetHitcountComparatorFactory();
                    }
                    else
                    {
                        comparatorFactory = ospec.CustomComparatorFactory;
                    }

                    if (comparatorFactory == null)
                    {
                        throw new ArgumentException("facet comparator factory not specified");
                    }

                    IComparer <int> comparator = comparatorFactory.NewComparator(new DefaultFacetCountCollectorFieldAccessor(valList), count);
                    facetColl = new LinkedList <BrowseFacet>();
                    int forbidden = -1;
                    IntBoundedPriorityQueue pq = new IntBoundedPriorityQueue(comparator, max, forbidden);

                    for (int i = 1; i < countlength; ++i) // exclude zero
                    {
                        int hits = count.Get(i);
                        if (hits >= minCount)
                        {
                            pq.Offer(i);
                        }
                    }

                    int val;
                    while ((val = pq.Poll()) != forbidden)
                    {
                        BrowseFacet facet = new BrowseFacet(valList[val], count.Get(val));
                        facetColl.AddFirst(facet);
                    }
                }
                return(facetColl);
            }
            else
            {
                return(FacetCountCollector_Fields.EMPTY_FACET_LIST);
            }
        }
Esempio n. 21
0
 public RecencyBoostScorer(RecencyBoostScorerBuilder parent, Scorer innerScorer, BigSegmentedArray orderArray, TermInt64List termList)
     : base(innerScorer.Weight)
 {
     m_parent      = parent;
     m_innerScorer = innerScorer;
     m_orderArray  = orderArray;
     m_termList    = termList;
 }
 public CompactMultiValueFacetFilterDocIdSet(FacetDataCache dataCache, int[] indexes, int finalBits, BigSegmentedArray orderArray)
 {
     this.dataCache  = dataCache;
     this.indexes    = indexes;
     this.finalBits  = finalBits;
     this.orderArray = orderArray;
 }
 public virtual void CollectAll()
 {
     m_count = BigInt32Array.FromArray(m_dataCache.Freqs);
 }
 public virtual IComparer <int> NewComparer(IFieldValueAccessor fieldValueAccessor, BigSegmentedArray counts)
 {
     return(new FacetValueComparerFactoryComparer());
 }
Esempio n. 25
0
        public virtual ICollection <BrowseFacet> GetFacets()
        {
            BigSegmentedArray counts = GetCollapsedCounts();

            return(DefaultFacetCountCollector.GetFacets(m_ospec, counts, counts.Length, m_bucketValues));
        }