Example #1
0
 public FacetDataCache()
 {
     this.orderArray = null;
     this.valArray   = null;
     this.maxIDs     = null;
     this.minIDs     = null;
     this.freqs      = null;
 }
Example #2
0
 public FacetDataCache()
 {
     this.m_orderArray = null;
     this.m_valArray   = null;
     this.m_maxIDs     = null;
     this.m_minIDs     = null;
     this.m_freqs      = null;
 }
 public FacetDataCache(BigSegmentedArray orderArray, ITermValueList valArray, int[] freqs, int[] minIDs, int[] maxIDs, FacetHandler.TermCountSize termCountSize)
 {
     this.orderArray = orderArray;
     this.valArray = valArray;
     this.freqs = freqs;
     this.minIDs = minIDs;
     this.maxIDs = maxIDs;
     this.termCountSize = termCountSize;
 }
 public FacetDataCache()
 {
     this.orderArray = null;
     this.valArray = null;
     this.maxIDs = null;
     this.minIDs = null;
     this.freqs = null;
     termCountSize = FacetHandler.TermCountSize.Large;
 }
Example #5
0
 public FacetDataCache(BigSegmentedArray orderArray, ITermValueList valArray, int[] freqs, int[] minIDs,
                       int[] maxIDs, TermCountSize termCountSize)
 {
     this.orderArray = orderArray;
     this.valArray   = valArray;
     this.freqs      = freqs;
     this.minIDs     = minIDs;
     this.maxIDs     = maxIDs;
 }
 public HistogramCollector(string facetName, IFacetCountCollector baseCollector, FacetDataCache dataCache, FacetSpec ospec, T start, T end, T unit)
 {
     m_facetName     = facetName;
     m_baseCollector = baseCollector;
     m_valArray      = dataCache.ValArray;
     m_ospec         = ospec;
     m_isAggregated  = false;
     m_start         = start;
     m_end           = end;
     m_unit          = unit;
     m_count         = new LazyBigInt32Array(CountArraySize());
 }
 public DefaultFacetIterator(ITermValueList valList, BigSegmentedArray counts, int countlength, bool zeroBased)
 {
     _valList = valList;
     _count = counts;
     _countlength = countlength;
     _index = -1;
     _lastIndex = _countlength - 1;
     if (!zeroBased)
         _index++;
     facet = null;
     count = 0;
 }
 public DefaultFacetIterator(ITermValueList valList, BigSegmentedArray countarray, int countlength, bool zeroBased)
 {
     m_valList             = valList;
     m_count               = countarray;
     m_countlength         = countlength;
     m_index               = -1;
     m_countLengthMinusOne = m_countlength - 1;
     if (!zeroBased)
     {
         m_index++;
     }
     m_facet      = null;
     base.m_count = 0;
 }
 public DefaultFacetIterator(ITermValueList valList, BigSegmentedArray counts, int countlength, bool zeroBased)
 {
     _valList     = valList;
     _count       = counts;
     _countlength = countlength;
     _index       = -1;
     _lastIndex   = _countlength - 1;
     if (!zeroBased)
     {
         _index++;
     }
     facet = null;
     count = 0;
 }
        public override RandomAccessDocIdSet GetRandomAccessDocIdSet(BoboIndexReader reader)
        {
            RandomAccessDocIdSet innerDocSet = _facetFilter.GetRandomAccessDocIdSet(reader);

            if (innerDocSet == EmptyDocIdSet.Instance)
            {
                return(innerDocSet);
            }

            FacetDataCache dataCache  = _facetDataCacheBuilder.Build(reader);
            int            totalCount = reader.MaxDoc;
            ITermValueList valArray   = dataCache.ValArray;
            int            freqCount  = 0;

            var validVals = new List <string>(_valSet.Count());

            foreach (string val in _valSet)
            {
                int idx = valArray.IndexOf(val);
                if (idx >= 0)
                {
                    validVals.Add(valArray.Get(idx));  // get and format the value
                    freqCount += dataCache.Freqs[idx];
                }
            }

            if (validVals.Count == 0)
            {
                return(EmptyDocIdSet.Instance);
            }

            // takeComplement is only used to choose between TermListRandomAccessDocIdSet and innerDocSet
            int validFreqCount = _takeComplement ? (totalCount - freqCount) : freqCount;

            if (_facetDataCacheBuilder.IndexFieldName != null && ((validFreqCount << 1) < totalCount))
            {
                return(new TermListRandomAccessDocIdSet(_facetDataCacheBuilder.IndexFieldName, innerDocSet, validVals, reader));
            }
            else
            {
                return(innerDocSet);
            }
        }
Example #11
0
 private BigSegmentedArray GetCollapsedCounts()
 {
     if (m_collapsedCounts == null)
     {
         m_collapsedCounts = new LazyBigInt32Array(m_bucketValues.Count);
         FacetDataCache    dataCache = m_subCollector.DataCache;
         ITermValueList    subList   = dataCache.ValArray;
         BigSegmentedArray subcounts = m_subCollector.Count;
         FixedBitSet       indexSet  = new FixedBitSet(subcounts.Length);
         int c = 0;
         int i = 0;
         foreach (string val in m_bucketValues)
         {
             if (val.Length > 0)
             {
                 string[] subVals = m_predefinedBuckets.Get(val);
                 int      count   = 0;
                 foreach (string subVal in subVals)
                 {
                     int index = subList.IndexOf(subVal);
                     if (index > 0)
                     {
                         int subcount = subcounts.Get(index);
                         count += subcount;
                         if (!indexSet.Get(index))
                         {
                             indexSet.Set(index);
                             c += dataCache.Freqs[index];
                         }
                     }
                 }
                 m_collapsedCounts.Add(i, count);
             }
             i++;
         }
         m_collapsedCounts.Add(0, (m_numdocs - c));
     }
     return(m_collapsedCounts);
 }
Example #12
0
        /// <summary>
        /// translates the int value using the val list
        /// </summary>
        /// <param name="id"></param>
        /// <param name="valarray"></param>
        /// <returns></returns>
        public object[] GetRawData(int id, ITermValueList valarray)
        {
            // NOTE: Added Get() extension method call because
            // the default .NET behavior throws an exception if the
            // index is out of bounds, rather than returning null.
            int[] page = m_list.Get(id >> PAGEID_SHIFT);

            if (page == null)
            {
                return(EMPTY);
            }
            else
            {
                int val = page[id & SLOTID_MASK];

                if (val >= 0)
                {
                    return(new object[] { valarray.GetRawValue(val) });
                }
                else if (val == MISSING)
                {
                    return(EMPTY);
                }
                else
                {
                    int num = (val & COUNT_MASK);
                    val >>= VALIDX_SHIFT; // signed shift, remember this is a negative number

                    object[] ret = new object[num];
                    for (int i = 0; i < num; i++)
                    {
                        ret[i] = valarray.GetRawValue(page[i - val]);
                    }
                    return(ret);
                }
            }
        }
        public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory)
        {
            string field = string.Intern(fieldName);
            int maxDoc = reader.MaxDoc;

            if (orderArray == null) // we want to reuse the memory
            {
                orderArray = NewInstance(termCountSize, maxDoc);
            }
            else
            {
                orderArray.EnsureCapacity(maxDoc); // no need to fill to 0, we are reseting the data anyway
            }

            List<int> minIDList = new List<int>();
            List<int> maxIDList = new List<int>();
            List<int> freqList = new List<int>();

            int length = maxDoc + 1;
            ITermValueList list = listFactory == null ? new TermStringList() : listFactory.CreateTermList();
            TermDocs termDocs = reader.TermDocs();
            TermEnum termEnum = reader.Terms(new Term(field));
            int t = 0; // current term number

            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            //int df = 0;
            t++;
            try
            {
                do
                {
                    Term term = termEnum.Term;
                    if (term == null || string.CompareOrdinal(term.Field, field) != 0)
                        break;

                    if (t >= orderArray.MaxValue())
                    {
                        throw new System.IO.IOException("maximum number of value cannot exceed: " + orderArray.MaxValue());
                    }
                    // Alexey: well, we could get now more than one term per document. Effectively, we could build facet againsts tokenized field
                    /*// we expect that there is at most one term per document
                    if (t >= length)
                    {
                        throw new RuntimeException("there are more terms than " + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields");
                    }*/
                    // store term text
                    list.Add(term.Text);
                    termDocs.Seek(termEnum);
                    // freqList.add(termEnum.docFreq()); // doesn't take into account deldocs
                    int minID = -1;
                    int maxID = -1;
                    int df = 0;
                    if (termDocs.Next())
                    {
                        df++;
                        int docid = termDocs.Doc;
                        orderArray.Add(docid, t);
                        minID = docid;
                        while (termDocs.Next())
                        {
                            df++;
                            docid = termDocs.Doc;
                            orderArray.Add(docid, t);
                        }
                        maxID = docid;
                    }
                    freqList.Add(df);
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);

                    t++;
                } while (termEnum.Next());
            }
            finally
            {
                termDocs.Dispose();
                termEnum.Dispose();
            }
            list.Seal();

            this.valArray = list;
            this.freqs = freqList.ToArray();
            this.minIDs = minIDList.ToArray();
            this.maxIDs = maxIDList.ToArray();
        }
        public override FacetDataCache Load(BoboSegmentReader reader)
        {
            TreeDictionary <object, List <int> > dataMap = null;
            List <int> docList = null;

            int nullMinId = -1;
            int nullMaxId = -1;
            int nullFreq  = 0;
            int doc       = -1;

            IBits liveDocs = reader.LiveDocs;

            for (int i = 0; i < reader.MaxDoc; ++i)
            {
                if (liveDocs != null && !liveDocs.Get(i))
                {
                    continue;
                }
                doc = i;
                object val = m_facetDataFetcher.Fetch(reader, doc);
                if (val == null)
                {
                    if (nullMinId < 0)
                    {
                        nullMinId = doc;
                    }
                    nullMaxId = doc;
                    ++nullFreq;
                    continue;
                }
                if (dataMap == null)
                {
                    // Initialize.
                    if (val is long[])
                    {
                        if (m_termListFactory == null)
                        {
                            m_termListFactory = new TermFixedLengthInt64ArrayListFactory(
                                ((long[])val).Length);
                        }

                        dataMap = new TreeDictionary <object, List <int> >(new VirtualSimpleFacetHandlerInt16ArrayComparer());
                    }
                    else if (val is IComparable)
                    {
                        dataMap = new TreeDictionary <object, List <int> >();
                    }
                    else
                    {
                        dataMap = new TreeDictionary <object, List <int> >(new VirtualSimpleFacetHandlerObjectComparer());
                    }
                }

                if (dataMap.Contains(val))
                {
                    docList = dataMap[val];
                }
                else
                {
                    docList = null;
                }

                if (docList == null)
                {
                    docList      = new List <int>();
                    dataMap[val] = docList;
                }
                docList.Add(doc);
            }

            m_facetDataFetcher.Cleanup(reader);

            int maxDoc = reader.MaxDoc;
            int size   = dataMap == null ? 1 : (dataMap.Count + 1);

            BigSegmentedArray order = new BigInt32Array(maxDoc);
            ITermValueList    list  = m_termListFactory == null ?
                                      new TermStringList(size) :
                                      m_termListFactory.CreateTermList(size);

            int[] freqs  = new int[size];
            int[] minIDs = new int[size];
            int[] maxIDs = new int[size];

            list.Add(null);
            freqs[0]  = nullFreq;
            minIDs[0] = nullMinId;
            maxIDs[0] = nullMaxId;

            if (dataMap != null)
            {
                int i = 1;
                int?docId;
                foreach (var entry in dataMap)
                {
                    list.Add(list.Format(entry.Key));
                    docList   = entry.Value;
                    freqs[i]  = docList.Count;
                    minIDs[i] = docList.Get(0, int.MinValue);
                    while ((docId = docList.Poll(int.MinValue)) != int.MinValue)
                    {
                        doc = (int)docId;
                        order.Add(doc, i);
                    }
                    maxIDs[i] = doc;
                    ++i;
                }
            }
            list.Seal();

            FacetDataCache dataCache = new FacetDataCache(order, list, freqs, minIDs, maxIDs,
                                                          TermCountSize.Large);

            return(dataCache);
        }
Example #15
0
        public override FacetDataCache Load(BoboIndexReader reader)
        {
            int maxDoc = reader.MaxDoc;

            BigIntArray order = new BigIntArray(maxDoc);

            ITermValueList mterms = _termListFactory == null ? new TermStringList() : _termListFactory.CreateTermList();

            List <int> minIDList = new List <int>();
            List <int> maxIDList = new List <int>();
            List <int> freqList  = new List <int>();

            TermDocs termDocs = null;
            TermEnum termEnum = null;
            int      t        = 0; // current term number

            mterms.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            t++;
            try
            {
                termDocs = reader.TermDocs();
                termEnum = reader.Terms(new Term(_indexFieldName, ""));
                do
                {
                    if (termEnum == null)
                    {
                        break;
                    }
                    Term term = termEnum.Term;
                    if (term == null || !_indexFieldName.Equals(term.Field))
                    {
                        break;
                    }

                    // store term text
                    // we expect that there is at most one term per document
                    if (t > MAX_VAL_COUNT)
                    {
                        throw new IOException("maximum number of value cannot exceed: " + MAX_VAL_COUNT);
                    }
                    string val = term.Text;
                    mterms.Add(val);
                    int bit = (0x00000001 << (t - 1));
                    termDocs.Seek(termEnum);
                    //freqList.add(termEnum.docFreq());  // removed because the df doesn't take into account the num of deletedDocs
                    int df    = 0;
                    int minID = -1;
                    int maxID = -1;
                    if (termDocs.Next())
                    {
                        df++;
                        int docid = termDocs.Doc;
                        order.Add(docid, order.Get(docid) | bit);
                        minID = docid;
                        while (termDocs.Next())
                        {
                            df++;
                            docid = termDocs.Doc;
                            order.Add(docid, order.Get(docid) | bit);
                        }
                        maxID = docid;
                    }
                    freqList.Add(df);
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);
                    t++;
                } while (termEnum.Next());
            }
            finally
            {
                try
                {
                    if (termDocs != null)
                    {
                        termDocs.Dispose();
                    }
                }
                finally
                {
                    if (termEnum != null)
                    {
                        termEnum.Dispose();
                    }
                }
            }

            mterms.Seal();

            return(new FacetDataCache(order, mterms, freqList.ToArray(), minIDList.ToArray(), maxIDList.ToArray(), TermCountSize.Large));
        }
        /// <summary>
        /// loads multi-value facet data. This method uses a workarea to prepare loading.
        /// </summary>
        /// <param name="fieldName"></param>
        /// <param name="reader"></param>
        /// <param name="listFactory"></param>
        /// <param name="workArea"></param>
        public virtual void Load(string fieldName, AtomicReader reader, TermListFactory listFactory, BoboSegmentReader.WorkArea workArea)
        {
#if FEATURE_STRING_INTERN
            string field = string.Intern(fieldName);
#else
            string field = fieldName;
#endif
            int maxdoc = reader.MaxDoc;
            BigNestedInt32Array.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea);

            ITermValueList list               = (listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList());
            List <int>     minIDList          = new List <int>();
            List <int>     maxIDList          = new List <int>();
            List <int>     freqList           = new List <int>();
            OpenBitSet     bitset             = new OpenBitSet(maxdoc + 1);
            int            negativeValueCount = GetNegativeValueCount(reader, field);
            int            t = 1; // valid term id starts from 1
            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);

            m_overflow = false;
            Terms terms = reader.GetTerms(field);
            if (terms != null)
            {
                TermsEnum termsEnum = terms.GetIterator(null);
                BytesRef  text;
                while ((text = termsEnum.Next()) != null)
                {
                    string strText = text.Utf8ToString();
                    list.Add(strText);

                    Term     term     = new Term(field, strText);
                    DocsEnum docsEnum = reader.GetTermDocsEnum(term);
                    int      df       = 0;
                    int      minID    = -1;
                    int      maxID    = -1;
                    int      docID    = -1;
                    int      valId    = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                    while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                    {
                        df++;
                        if (!loader.Add(docID, valId))
                        {
                            LogOverflow(fieldName);
                        }
                        minID = docID;
                        bitset.FastSet(docID);
                        while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS)
                        {
                            docID = docsEnum.DocID;
                            df++;
                            if (!loader.Add(docID, valId))
                            {
                                LogOverflow(fieldName);
                            }
                            bitset.FastSet(docID);
                        }
                        maxID = docID;
                    }
                    freqList.Add(df);
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);
                    t++;
                }
            }

            list.Seal();

            try
            {
                m_nestedArray.Load(maxdoc + 1, loader);
            }
            catch (Exception e)
            {
                throw new RuntimeException("failed to load due to " + e.ToString(), e);
            }

            this.m_valArray = list;
            this.m_freqs    = freqList.ToArray();
            this.m_minIDs   = minIDList.ToArray();
            this.m_maxIDs   = maxIDList.ToArray();

            int doc = 0;
            while (doc < maxdoc && !m_nestedArray.Contains(doc, 0, true))
            {
                ++doc;
            }
            if (doc < maxdoc)
            {
                this.m_minIDs[0] = doc;
                doc = maxdoc - 1;
                while (doc >= 0 && !m_nestedArray.Contains(doc, 0, true))
                {
                    --doc;
                }
                this.m_maxIDs[0] = doc;
            }
            this.m_freqs[0] = maxdoc - (int)bitset.Cardinality();
        }
 public DefaultFacetCountCollectorFieldAccessor(ITermValueList valList)
 {
     this.valList = valList;
 }
 public DefaultFacetCountCollectorFieldAccessor(ITermValueList valList)
 {
     this.valList = valList;
 }
Example #19
0
        public override FacetDataCache Load(BoboSegmentReader reader)
        {
            int maxDoc = reader.MaxDoc;

            BigInt32Array order = new BigInt32Array(maxDoc);

            ITermValueList mterms = m_termListFactory == null ? new TermStringList() : m_termListFactory.CreateTermList();

            List <int> minIDList = new List <int>();
            List <int> maxIDList = new List <int>();
            List <int> freqList  = new List <int>();

            int t = 0; // current term number

            mterms.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            t++;
            Terms terms = reader.GetTerms(m_indexFieldName);

            if (terms != null)
            {
                TermsEnum termsEnum = terms.GetIterator(null);
                BytesRef  text;
                while ((text = termsEnum.Next()) != null)
                {
                    // store term text
                    // we expect that there is at most one term per document
                    if (t > MAX_VAL_COUNT)
                    {
                        throw new IOException("maximum number of value cannot exceed: " + MAX_VAL_COUNT);
                    }
                    string val = text.Utf8ToString();
                    mterms.Add(val);
                    int      bit      = (0x00000001 << (t - 1));
                    Term     term     = new Term(m_indexFieldName, val);
                    DocsEnum docsEnum = reader.GetTermDocsEnum(term);
                    //freqList.add(termEnum.docFreq());  // removed because the df doesn't take into account the
                    // num of deletedDocs
                    int df    = 0;
                    int minID = -1;
                    int maxID = -1;
                    int docID = -1;
                    while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                    {
                        df++;
                        order.Add(docID, order.Get(docID) | bit);
                        minID = docID;
                        while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS)
                        {
                            docID = docsEnum.DocID;
                            df++;
                            order.Add(docID, order.Get(docID) | bit);
                        }
                        maxID = docID;
                    }
                    freqList.Add(df);
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);
                    t++;
                }
            }

            mterms.Seal();

            return(new FacetDataCache(order, mterms, freqList.ToArray(), minIDList.ToArray(), maxIDList.ToArray(), TermCountSize.Large));
        }
Example #20
0
            public virtual void Load(string latFieldName, string lonFieldName, BoboIndexReader reader)
            {
                if (reader == null)
                {
                    throw new ArgumentNullException("reader object is null");
                }

                FacetDataCache latCache = (FacetDataCache)reader.GetFacetData(latFieldName);
                FacetDataCache lonCache = (FacetDataCache)reader.GetFacetData(lonFieldName);

                int maxDoc = reader.MaxDoc;

                BigFloatArray xVals = this._xValArray;
                BigFloatArray yVals = this._yValArray;
                BigFloatArray zVals = this._zValArray;

                if (xVals == null)
                {
                    xVals = NewInstance(maxDoc);
                }
                else
                {
                    xVals.EnsureCapacity(maxDoc);
                }
                if (yVals == null)
                {
                    yVals = NewInstance(maxDoc);
                }
                else
                {
                    yVals.EnsureCapacity(maxDoc);
                }
                if (zVals == null)
                {
                    zVals = NewInstance(maxDoc);
                }
                else
                {
                    zVals.EnsureCapacity(maxDoc);
                }

                this._xValArray = xVals;
                this._yValArray = yVals;
                this._zValArray = zVals;

                BigSegmentedArray latOrderArray = latCache.OrderArray;
                ITermValueList    latValList    = latCache.ValArray;

                BigSegmentedArray lonOrderArray = lonCache.OrderArray;
                ITermValueList    lonValList    = lonCache.ValArray;

                for (int i = 0; i < maxDoc; ++i)
                {
                    string docLatString = latValList.Get(latOrderArray.Get(i)).Trim();
                    string docLonString = lonValList.Get(lonOrderArray.Get(i)).Trim();

                    float docLat = 0;
                    if (docLatString.Length > 0)
                    {
                        docLat = float.Parse(docLatString);
                    }

                    float docLon = 0;
                    if (docLonString.Length > 0)
                    {
                        docLon = float.Parse(docLonString);
                    }

                    float[] coords = GeoMatchUtil.GeoMatchCoordsFromDegrees(docLat, docLon);
                    _xValArray.Add(i, coords[0]);
                    _yValArray.Add(i, coords[1]);
                    _zValArray.Add(i, coords[2]);
                }
            }
        /// <summary>
        /// translates the int value using the val list
        /// </summary>
        /// <param name="id"></param>
        /// <param name="valarray"></param>
        /// <returns></returns>
        public object[] GetRawData(int id, ITermValueList valarray)
        {
            // NOTE: Added Get() extension method call because 
            // the default .NET behavior throws an exception if the
            // index is out of bounds, rather than returning null.
            int[] page = _list.Get(id >> PAGEID_SHIFT);

            if (page == null)
            {
                return EMPTY;
            }
            else
            {
                int val = page[id & SLOTID_MASK];

                if (val >= 0)
                {
                    return new object[] { valarray.GetRawValue(val) };
                }
                else if (val == MISSING)
                {
                    return EMPTY;
                }
                else
                {
                    int num = (val & COUNT_MASK);
                    val >>= VALIDX_SHIFT; // signed shift, remember this is a negative number

                    object[] ret = new object[num];
                    for (int i = 0; i < num; i++)
                    {
                        ret[i] = valarray.GetRawValue(page[i - val]);
                    }
                    return ret;
                }
            }
        }
        ///   <summary> * translates the int value using the val list </summary>
        ///   * @param <T> </param>
        ///   * <param name="array"> </param>
        ///   * <param name="id"> </param>
        ///   * <param name="valarray">
        ///   * @return </param>
        public object[] getRawData(int id, ITermValueList valarray)
        {
            int[] page = _list[id >> PAGEID_SHIFT];

            if (page == null)
            {
                return EMPTY;
            }
            else
            {
                int val = page[id & SLOTID_MASK];

                if (val >= 0)
                {
                    return new object[] { valarray.GetRawValue(val) };
                }
                else if (val == MISSING)
                {
                    return EMPTY;
                }
                else
                {
                    int num = (val & COUNT_MASK);
                    val >>= VALIDX_SHIFT; // signed shift, remember this is a negative number

                    object[] ret = new object[num];
                    for (int i = 0; i < num; i++)
                    {
                        ret[i] = valarray.GetRawValue(page[i - val]);
                    }
                    return ret;
                }
            }
        }
        public static IEnumerable <BrowseFacet> GetFacets(FacetSpec ospec, BigSegmentedArray count, int countlength, ITermValueList valList)
        {
            if (ospec != null)
            {
                int minCount = ospec.MinHitCount;
                int max      = ospec.MaxCount;
                if (max <= 0)
                {
                    max = countlength;
                }

                LinkedList <BrowseFacet> facetColl;
                FacetSpec.FacetSortSpec  sortspec = ospec.OrderBy;
                if (sortspec == FacetSpec.FacetSortSpec.OrderValueAsc)
                {
                    facetColl = new LinkedList <BrowseFacet>();
                    for (int i = 1; i < countlength; ++i) // exclude zero
                    {
                        int hits = count.Get(i);
                        if (hits >= minCount)
                        {
                            BrowseFacet facet = new BrowseFacet(valList.Get(i), hits);
                            facetColl.AddLast(facet);
                        }

                        if (facetColl.Count >= max)
                        {
                            break;
                        }
                    }
                }
                else //if (sortspec == FacetSortSpec.OrderHitsDesc)
                {
                    IComparatorFactory comparatorFactory;
                    if (sortspec == FacetSpec.FacetSortSpec.OrderHitsDesc)
                    {
                        comparatorFactory = new FacetHitcountComparatorFactory();
                    }
                    else
                    {
                        comparatorFactory = ospec.CustomComparatorFactory;
                    }

                    if (comparatorFactory == null)
                    {
                        throw new ArgumentException("facet comparator factory not specified");
                    }

                    IComparer <int> comparator = comparatorFactory.NewComparator(new DefaultFacetCountCollectorFieldAccessor(valList), count);
                    facetColl = new LinkedList <BrowseFacet>();
                    int forbidden = -1;
                    IntBoundedPriorityQueue pq = new IntBoundedPriorityQueue(comparator, max, forbidden);

                    for (int i = 1; i < countlength; ++i) // exclude zero
                    {
                        int hits = count.Get(i);
                        if (hits >= minCount)
                        {
                            pq.Offer(i);
                        }
                    }

                    int val;
                    while ((val = pq.Poll()) != forbidden)
                    {
                        BrowseFacet facet = new BrowseFacet(valList[val], count.Get(val));
                        facetColl.AddFirst(facet);
                    }
                }
                return(facetColl);
            }
            else
            {
                return(FacetCountCollector_Fields.EMPTY_FACET_LIST);
            }
        }
        public static IEnumerable<BrowseFacet> GetFacets(FacetSpec ospec, BigSegmentedArray count, int countlength, ITermValueList valList)
        {
            if (ospec != null)
            {
                int minCount = ospec.MinHitCount;
                int max = ospec.MaxCount;
                if (max <= 0) max = countlength;

                LinkedList<BrowseFacet> facetColl;
                FacetSpec.FacetSortSpec sortspec = ospec.OrderBy;
                if (sortspec == FacetSpec.FacetSortSpec.OrderValueAsc)
                {
                    facetColl = new LinkedList<BrowseFacet>();
                    for (int i = 1; i < countlength; ++i) // exclude zero
                    {
                        int hits = count.Get(i);
                        if (hits >= minCount)
                        {
                            BrowseFacet facet = new BrowseFacet(valList.Get(i), hits);
                            facetColl.AddLast(facet);
                        }

                        if (facetColl.Count >= max)
                            break;
                    }
                }
                else //if (sortspec == FacetSortSpec.OrderHitsDesc)
                {
                    IComparatorFactory comparatorFactory;
                    if (sortspec == FacetSpec.FacetSortSpec.OrderHitsDesc)
                    {
                        comparatorFactory = new FacetHitcountComparatorFactory();
                    }
                    else
                    {
                        comparatorFactory = ospec.CustomComparatorFactory;
                    }

                    if (comparatorFactory == null)
                    {
                        throw new ArgumentException("facet comparator factory not specified");
                    }

                    IComparer<int> comparator = comparatorFactory.NewComparator(new DefaultFacetCountCollectorFieldAccessor(valList), count);
                    facetColl = new LinkedList<BrowseFacet>();
                    int forbidden = -1;
                    IntBoundedPriorityQueue pq = new IntBoundedPriorityQueue(comparator, max, forbidden);

                    for (int i = 1; i < countlength; ++i) // exclude zero
                    {
                        int hits = count.Get(i);
                        if (hits >= minCount)
                        {
                            pq.Offer(i);
                        }
                    }

                    int val;
                    while ((val = pq.Poll()) != forbidden)
                    {
                        BrowseFacet facet = new BrowseFacet(valList[val], count.Get(val));
                        facetColl.AddFirst(facet);
                    }
                }
                return facetColl;
            }
            else
            {
                return FacetCountCollector_Fields.EMPTY_FACET_LIST;
            }
        }
Example #25
0
        public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory)
        {
            string field  = string.Intern(fieldName);
            int    maxDoc = reader.MaxDoc;

            BigSegmentedArray order = this.orderArray;

            if (order == null) // we want to reuse the memory
            {
                int dictValueCount = GetDictValueCount(reader, fieldName);
                order = NewInstance(dictValueCount, maxDoc);
            }
            else
            {
                order.EnsureCapacity(maxDoc); // no need to fill to 0, we are reseting the
                                              // data anyway
            }
            this.orderArray = order;

            List <int> minIDList = new List <int>();
            List <int> maxIDList = new List <int>();
            List <int> freqList  = new List <int>();

            int            length             = maxDoc + 1;
            ITermValueList list               = listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList();
            int            negativeValueCount = GetNegativeValueCount(reader, field);

            TermDocs termDocs = reader.TermDocs();
            TermEnum termEnum = reader.Terms(new Term(field, ""));
            int      t        = 0; // current term number

            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            int totalFreq = 0;

            //int df = 0;
            t++;
            try
            {
                do
                {
                    Term term = termEnum.Term;
                    if (term == null || string.CompareOrdinal(term.Field, field) != 0)
                    {
                        break;
                    }

                    // store term text
                    // we expect that there is at most one term per document

                    // Alexey: well, we could get now more than one term per document. Effectively, we could build facet against tokenized field
                    //if (t >= length)
                    //{
                    //    throw new RuntimeException("there are more terms than " + "documents in field \"" + field
                    //        + "\", but it's impossible to sort on " + "tokenized fields");
                    //}
                    list.Add(term.Text);
                    termDocs.Seek(termEnum);
                    // freqList.add(termEnum.docFreq()); // doesn't take into account deldocs
                    int minID = -1;
                    int maxID = -1;
                    int df    = 0;
                    int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                    if (termDocs.Next())
                    {
                        df++;
                        int docid = termDocs.Doc;
                        order.Add(docid, valId);
                        minID = docid;
                        while (termDocs.Next())
                        {
                            df++;
                            docid = termDocs.Doc;
                            order.Add(docid, valId);
                        }
                        maxID = docid;
                    }
                    freqList.Add(df);
                    totalFreq += df;
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);

                    t++;
                } while (termEnum.Next());
            }
            finally
            {
                termDocs.Dispose();
                termEnum.Dispose();
            }
            list.Seal();
            this.valArray = list;
            this.freqs    = freqList.ToArray();
            this.minIDs   = minIDList.ToArray();
            this.maxIDs   = maxIDList.ToArray();

            int doc = 0;

            while (doc <= maxDoc && order.Get(doc) != 0)
            {
                ++doc;
            }
            if (doc <= maxDoc)
            {
                this.minIDs[0] = doc;
                // Try to get the max
                doc = maxDoc;
                while (doc > 0 && order.Get(doc) != 0)
                {
                    --doc;
                }
                if (doc > 0)
                {
                    this.maxIDs[0] = doc;
                }
            }
            this.freqs[0] = maxDoc + 1 - totalFreq;
        }
Example #26
0
        public virtual void Load(string fieldName, AtomicReader reader, TermListFactory listFactory)
        {
#if FEATURE_STRING_INTERN
            string field = string.Intern(fieldName);
#else
            string field = fieldName;
#endif
            int maxDoc = reader.MaxDoc;

            int dictValueCount      = GetDictValueCount(reader, fieldName);
            BigSegmentedArray order = NewInstance(dictValueCount, maxDoc);

            this.m_orderArray = order;

            List <int> minIDList = new List <int>();
            List <int> maxIDList = new List <int>();
            List <int> freqList  = new List <int>();

            int            length             = maxDoc + 1;
            ITermValueList list               = listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList();
            int            negativeValueCount = GetNegativeValueCount(reader, field);

            int t = 1; // valid term id starts from 1

            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            int   totalFreq = 0;
            Terms terms     = reader.GetTerms(field);
            if (terms != null)
            {
                TermsEnum termsEnum = terms.GetIterator(null);
                BytesRef  text;
                while ((text = termsEnum.Next()) != null)
                {
                    // store term text
                    // we expect that there is at most one term per document
                    if (t >= length)
                    {
                        throw new RuntimeException("there are more terms than "
                                                   + "documents in field \"" + field + "\", but it's impossible to sort on "
                                                   + "tokenized fields");
                    }
                    string strText = text.Utf8ToString();
                    list.Add(strText);
                    Term     term     = new Term(field, strText);
                    DocsEnum docsEnum = reader.GetTermDocsEnum(term);
                    // freqList.add(termEnum.docFreq()); // doesn't take into account
                    // deldocs
                    int minID = -1;
                    int maxID = -1;
                    int docID = -1;
                    int df    = 0;
                    int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                    while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                    {
                        df++;
                        order.Add(docID, valId);
                        minID = docID;
                        while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS)
                        {
                            docID = docsEnum.DocID;
                            df++;
                            order.Add(docID, valId);
                        }
                        maxID = docID;
                    }
                    freqList.Add(df);
                    totalFreq += df;
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);
                    t++;
                }
            }

            list.Seal();
            this.m_valArray = list;
            this.m_freqs    = freqList.ToArray();
            this.m_minIDs   = minIDList.ToArray();
            this.m_maxIDs   = maxIDList.ToArray();

            int doc = 0;
            while (doc < maxDoc && order.Get(doc) != 0)
            {
                ++doc;
            }
            if (doc < maxDoc)
            {
                this.m_minIDs[0] = doc;
                // Try to get the max
                doc = maxDoc - 1;
                while (doc >= 0 && order.Get(doc) != 0)
                {
                    --doc;
                }
                this.m_maxIDs[0] = doc;
            }
            this.m_freqs[0] = reader.NumDocs - totalFreq;
        }
        /// <summary>
        /// loads multi-value facet data. This method uses a workarea to prepare loading.
        /// </summary>
        /// <param name="fieldName"></param>
        /// <param name="reader"></param>
        /// <param name="listFactory"></param>
        /// <param name="workArea"></param>
        public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory, BoboIndexReader.WorkArea workArea)
        {
            long t0     = Environment.TickCount;
            int  maxdoc = reader.MaxDoc;

            BigNestedIntArray.BufferedLoader loader = GetBufferedLoader(maxdoc, workArea);

            TermEnum       tenum              = null;
            TermDocs       tdoc               = null;
            ITermValueList list               = (listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList());
            List <int>     minIDList          = new List <int>();
            List <int>     maxIDList          = new List <int>();
            List <int>     freqList           = new List <int>();
            OpenBitSet     bitset             = new OpenBitSet();
            int            negativeValueCount = GetNegativeValueCount(reader, string.Intern(fieldName));
            int            t = 0; // current term number

            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            t++;

            _overflow = false;
            try
            {
                tdoc  = reader.TermDocs();
                tenum = reader.Terms(new Term(fieldName, ""));
                if (tenum != null)
                {
                    do
                    {
                        Term term = tenum.Term;
                        if (term == null || !fieldName.Equals(term.Field))
                        {
                            break;
                        }

                        string val = term.Text;

                        if (val != null)
                        {
                            list.Add(val);

                            tdoc.Seek(tenum);
                            //freqList.add(tenum.docFreq()); // removed because the df doesn't take into account the num of deletedDocs
                            int df    = 0;
                            int minID = -1;
                            int maxID = -1;
                            int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                            if (tdoc.Next())
                            {
                                df++;
                                int docid = tdoc.Doc;

                                if (!loader.Add(docid, valId))
                                {
                                    LogOverflow(fieldName);
                                }
                                minID = docid;
                                bitset.Set(docid);
                                while (tdoc.Next())
                                {
                                    df++;
                                    docid = tdoc.Doc;

                                    if (!loader.Add(docid, valId))
                                    {
                                        LogOverflow(fieldName);
                                    }
                                    bitset.Set(docid);
                                }
                                maxID = docid;
                            }
                            freqList.Add(df);
                            minIDList.Add(minID);
                            maxIDList.Add(maxID);
                        }

                        t++;
                    }while (tenum.Next());
                }
            }
            finally
            {
                try
                {
                    if (tdoc != null)
                    {
                        tdoc.Dispose();
                    }
                }
                finally
                {
                    if (tenum != null)
                    {
                        tenum.Dispose();
                    }
                }
            }

            list.Seal();

            try
            {
                _nestedArray.Load(maxdoc + 1, loader);
            }
            catch (System.IO.IOException e)
            {
                throw e;
            }
            catch (Exception e)
            {
                throw new RuntimeException("failed to load due to " + e.ToString(), e);
            }

            this.valArray = list;
            this.freqs    = freqList.ToArray();
            this.minIDs   = minIDList.ToArray();
            this.maxIDs   = maxIDList.ToArray();

            int doc = 0;

            while (doc <= maxdoc && !_nestedArray.Contains(doc, 0, true))
            {
                ++doc;
            }
            if (doc <= maxdoc)
            {
                this.minIDs[0] = doc;
                doc            = maxdoc;
                while (doc > 0 && !_nestedArray.Contains(doc, 0, true))
                {
                    --doc;
                }
                if (doc > 0)
                {
                    this.maxIDs[0] = doc;
                }
            }
            this.freqs[0] = maxdoc + 1 - (int)bitset.Cardinality();
        }
        public override FacetDataCache Load(BoboIndexReader reader)
        {
            int doc = -1;

            C5.TreeDictionary <object, List <int> > dataMap = null;
            List <int> docList = null;

            int nullMinId = -1;
            int nullMaxId = -1;
            int nullFreq  = 0;

            TermDocs termDocs = reader.TermDocs(null);

            try
            {
                while (termDocs.Next())
                {
                    doc = termDocs.Doc;
                    object val = _facetDataFetcher.Fetch(reader, doc);
                    if (val == null)
                    {
                        if (nullMinId < 0)
                        {
                            nullMinId = doc;
                        }
                        nullMaxId = doc;
                        ++nullFreq;
                        continue;
                    }
                    if (dataMap == null)
                    {
                        // Initialize.
                        if (val is long[])
                        {
                            if (_termListFactory == null)
                            {
                                _termListFactory = new TermFixedLengthLongArrayListFactory(
                                    ((long[])val).Length);
                            }

                            dataMap = new C5.TreeDictionary <object, List <int> >(new VirtualSimpleFacetHandlerLongArrayComparator());
                        }
                        else if (val is IComparable)
                        {
                            // NOTE: In .NET 3.5, the default constructor doesn't work in this case. We therefore have a custom type
                            // that converts the objects to IComparable before comparing them, falling back to a string comparison
                            // if they don't convert. This differs from the Java implementation that uses the default constructor.
                            dataMap = new C5.TreeDictionary <object, List <int> >(new VirtualSimpleFacetHandlerComparableComparator());
                        }
                        else
                        {
                            dataMap = new C5.TreeDictionary <object, List <int> >(new VirtualSimpleFacetHandlerObjectComparator());
                        }
                    }

                    if (dataMap.Contains(val))
                    {
                        docList = dataMap[val];
                    }
                    else
                    {
                        docList = null;
                    }

                    if (docList == null)
                    {
                        docList      = new List <int>();
                        dataMap[val] = docList;
                    }
                    docList.Add(doc);
                }
            }
            finally
            {
                termDocs.Dispose();
            }
            _facetDataFetcher.Cleanup(reader);

            int maxDoc = reader.MaxDoc;
            int size   = dataMap == null ? 1 : (dataMap.Count + 1);

            BigSegmentedArray order = new BigIntArray(maxDoc);
            ITermValueList    list  = _termListFactory == null ?
                                      new TermStringList(size) :
                                      _termListFactory.CreateTermList(size);

            int[] freqs  = new int[size];
            int[] minIDs = new int[size];
            int[] maxIDs = new int[size];

            list.Add(null);
            freqs[0]  = nullFreq;
            minIDs[0] = nullMinId;
            maxIDs[0] = nullMaxId;

            if (dataMap != null)
            {
                int i = 1;
                int?docId;
                foreach (var entry in dataMap)
                {
                    list.Add(list.Format(entry.Key));
                    docList   = entry.Value;
                    freqs[i]  = docList.Count;
                    minIDs[i] = docList.Get(0, int.MinValue);
                    while ((docId = docList.Poll(int.MinValue)) != int.MinValue)
                    {
                        doc = (int)docId;
                        order.Add(doc, i);
                    }
                    maxIDs[i] = doc;
                    ++i;
                }
            }
            list.Seal();

            FacetDataCache dataCache = new FacetDataCache(order, list, freqs, minIDs,
                                                          maxIDs, TermCountSize.Large);

            return(dataCache);
        }