Esempio n. 1
0
 private BigSegmentedArray GetCollapsedCounts()
 {
     if (m_collapsedCounts == null)
     {
         m_collapsedCounts = new LazyBigInt32Array(m_bucketValues.Count);
         FacetDataCache    dataCache = m_subCollector.DataCache;
         ITermValueList    subList   = dataCache.ValArray;
         BigSegmentedArray subcounts = m_subCollector.Count;
         FixedBitSet       indexSet  = new FixedBitSet(subcounts.Length);
         int c = 0;
         int i = 0;
         foreach (string val in m_bucketValues)
         {
             if (val.Length > 0)
             {
                 string[] subVals = m_predefinedBuckets.Get(val);
                 int      count   = 0;
                 foreach (string subVal in subVals)
                 {
                     int index = subList.IndexOf(subVal);
                     if (index > 0)
                     {
                         int subcount = subcounts.Get(index);
                         count += subcount;
                         if (!indexSet.Get(index))
                         {
                             indexSet.Set(index);
                             c += dataCache.Freqs[index];
                         }
                     }
                 }
                 m_collapsedCounts.Add(i, count);
             }
             i++;
         }
         m_collapsedCounts.Add(0, (m_numdocs - c));
     }
     return(m_collapsedCounts);
 }
            private void Aggregate()
            {
                if (m_isAggregated)
                {
                    return;
                }

                m_isAggregated = true;

                int startIdx = m_valArray.IndexOf(m_start);

                if (startIdx < 0)
                {
                    startIdx = -(startIdx + 1);
                }

                int endIdx = m_valArray.IndexOf(m_end);

                if (endIdx < 0)
                {
                    endIdx = -(endIdx + 1);
                }

                BigSegmentedArray baseCounts = m_baseCollector.GetCountDistribution();

                if (m_start is long)
                {
                    long          start    = Convert.ToInt64(m_start);
                    long          unit     = Convert.ToInt64(m_unit);
                    TermInt64List valArray = (TermInt64List)m_valArray;
                    for (int i = startIdx; i < endIdx; i++)
                    {
                        long val = valArray.GetPrimitiveValue(i);
                        int  idx = (int)((val - start) / unit);
                        if (idx >= 0 && idx < m_count.Length)
                        {
                            m_count.Add(idx, m_count.Get(idx) + baseCounts.Get(i));
                        }
                    }
                }
                else if (m_start is int)
                {
                    int           start    = Convert.ToInt32(m_start);
                    int           unit     = Convert.ToInt32(m_unit);
                    TermInt32List valArray = (TermInt32List)m_valArray;
                    for (int i = startIdx; i < endIdx; i++)
                    {
                        int val = valArray.GetPrimitiveValue(i);
                        int idx = ((val - start) / unit);
                        if (idx >= 0 && idx < m_count.Length)
                        {
                            m_count.Add(idx, m_count.Get(idx) + baseCounts.Get(i));
                        }
                    }
                }
                else
                {
                    double start = Convert.ToDouble(m_start);
                    double unit  = Convert.ToDouble(m_unit);
                    for (int i = startIdx; i < endIdx; i++)
                    {
                        double val = (double)m_valArray.GetRawValue(i);
                        int    idx = (int)((val - start) / unit);
                        if (idx >= 0 && idx < m_count.Length)
                        {
                            m_count.Add(idx, m_count.Get(idx) + baseCounts.Get(i));
                        }
                    }
                }
            }
Esempio n. 3
0
        public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory)
        {
            string field  = string.Intern(fieldName);
            int    maxDoc = reader.MaxDoc;

            BigSegmentedArray order = this.orderArray;

            if (order == null) // we want to reuse the memory
            {
                int dictValueCount = GetDictValueCount(reader, fieldName);
                order = NewInstance(dictValueCount, maxDoc);
            }
            else
            {
                order.EnsureCapacity(maxDoc); // no need to fill to 0, we are reseting the
                                              // data anyway
            }
            this.orderArray = order;

            List <int> minIDList = new List <int>();
            List <int> maxIDList = new List <int>();
            List <int> freqList  = new List <int>();

            int            length             = maxDoc + 1;
            ITermValueList list               = listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList();
            int            negativeValueCount = GetNegativeValueCount(reader, field);

            TermDocs termDocs = reader.TermDocs();
            TermEnum termEnum = reader.Terms(new Term(field, ""));
            int      t        = 0; // current term number

            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            int totalFreq = 0;

            //int df = 0;
            t++;
            try
            {
                do
                {
                    Term term = termEnum.Term;
                    if (term == null || string.CompareOrdinal(term.Field, field) != 0)
                    {
                        break;
                    }

                    // store term text
                    // we expect that there is at most one term per document

                    // Alexey: well, we could get now more than one term per document. Effectively, we could build facet against tokenized field
                    //if (t >= length)
                    //{
                    //    throw new RuntimeException("there are more terms than " + "documents in field \"" + field
                    //        + "\", but it's impossible to sort on " + "tokenized fields");
                    //}
                    list.Add(term.Text);
                    termDocs.Seek(termEnum);
                    // freqList.add(termEnum.docFreq()); // doesn't take into account deldocs
                    int minID = -1;
                    int maxID = -1;
                    int df    = 0;
                    int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                    if (termDocs.Next())
                    {
                        df++;
                        int docid = termDocs.Doc;
                        order.Add(docid, valId);
                        minID = docid;
                        while (termDocs.Next())
                        {
                            df++;
                            docid = termDocs.Doc;
                            order.Add(docid, valId);
                        }
                        maxID = docid;
                    }
                    freqList.Add(df);
                    totalFreq += df;
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);

                    t++;
                } while (termEnum.Next());
            }
            finally
            {
                termDocs.Dispose();
                termEnum.Dispose();
            }
            list.Seal();
            this.valArray = list;
            this.freqs    = freqList.ToArray();
            this.minIDs   = minIDList.ToArray();
            this.maxIDs   = maxIDList.ToArray();

            int doc = 0;

            while (doc <= maxDoc && order.Get(doc) != 0)
            {
                ++doc;
            }
            if (doc <= maxDoc)
            {
                this.minIDs[0] = doc;
                // Try to get the max
                doc = maxDoc;
                while (doc > 0 && order.Get(doc) != 0)
                {
                    --doc;
                }
                if (doc > 0)
                {
                    this.maxIDs[0] = doc;
                }
            }
            this.freqs[0] = maxDoc + 1 - totalFreq;
        }
        public virtual void Collect(int docid)
        {
            int i = m_orderArray.Get(docid);

            m_count.Add(i, m_count.Get(i) + 1);
        }
Esempio n. 5
0
        public virtual void Load(string fieldName, AtomicReader reader, TermListFactory listFactory)
        {
#if FEATURE_STRING_INTERN
            string field = string.Intern(fieldName);
#else
            string field = fieldName;
#endif
            int maxDoc = reader.MaxDoc;

            int dictValueCount      = GetDictValueCount(reader, fieldName);
            BigSegmentedArray order = NewInstance(dictValueCount, maxDoc);

            this.m_orderArray = order;

            List <int> minIDList = new List <int>();
            List <int> maxIDList = new List <int>();
            List <int> freqList  = new List <int>();

            int            length             = maxDoc + 1;
            ITermValueList list               = listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList();
            int            negativeValueCount = GetNegativeValueCount(reader, field);

            int t = 1; // valid term id starts from 1

            list.Add(null);
            minIDList.Add(-1);
            maxIDList.Add(-1);
            freqList.Add(0);
            int   totalFreq = 0;
            Terms terms     = reader.GetTerms(field);
            if (terms != null)
            {
                TermsEnum termsEnum = terms.GetIterator(null);
                BytesRef  text;
                while ((text = termsEnum.Next()) != null)
                {
                    // store term text
                    // we expect that there is at most one term per document
                    if (t >= length)
                    {
                        throw new RuntimeException("there are more terms than "
                                                   + "documents in field \"" + field + "\", but it's impossible to sort on "
                                                   + "tokenized fields");
                    }
                    string strText = text.Utf8ToString();
                    list.Add(strText);
                    Term     term     = new Term(field, strText);
                    DocsEnum docsEnum = reader.GetTermDocsEnum(term);
                    // freqList.add(termEnum.docFreq()); // doesn't take into account
                    // deldocs
                    int minID = -1;
                    int maxID = -1;
                    int docID = -1;
                    int df    = 0;
                    int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
                    while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS)
                    {
                        df++;
                        order.Add(docID, valId);
                        minID = docID;
                        while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS)
                        {
                            docID = docsEnum.DocID;
                            df++;
                            order.Add(docID, valId);
                        }
                        maxID = docID;
                    }
                    freqList.Add(df);
                    totalFreq += df;
                    minIDList.Add(minID);
                    maxIDList.Add(maxID);
                    t++;
                }
            }

            list.Seal();
            this.m_valArray = list;
            this.m_freqs    = freqList.ToArray();
            this.m_minIDs   = minIDList.ToArray();
            this.m_maxIDs   = maxIDList.ToArray();

            int doc = 0;
            while (doc < maxDoc && order.Get(doc) != 0)
            {
                ++doc;
            }
            if (doc < maxDoc)
            {
                this.m_minIDs[0] = doc;
                // Try to get the max
                doc = maxDoc - 1;
                while (doc >= 0 && order.Get(doc) != 0)
                {
                    --doc;
                }
                this.m_maxIDs[0] = doc;
            }
            this.m_freqs[0] = reader.NumDocs - totalFreq;
        }
Esempio n. 6
0
        /// <summary>
        ///
        /// </summary>
        /// <param name="docid">The docid for which the facet counts are to be calculated</param>
        public virtual void Collect(int docid)
        {
            float docX = m_xvals.Get(docid);
            float docY = m_yvals.Get(docid);
            float docZ = m_zvals.Get(docid);

            float radius, targetX, targetY, targetZ, delta;
            float xu, xl, yu, yl, zu, zl;
            int   countIndex = -1;

            foreach (GeoRange range in m_ranges)
            {
                // the countIndex for the count array should increment with the range index of the _ranges array
                countIndex++;
                if (m_miles)
                {
                    radius = GeoMatchUtil.GetMilesRadiusCosine(range.Rad);
                }
                else
                {
                    radius = GeoMatchUtil.GetKMRadiusCosine(range.Rad);
                }

                float[] coords = GeoMatchUtil.GeoMatchCoordsFromDegrees(range.Lat, range.Lon);
                targetX = coords[0];
                targetY = coords[1];
                targetZ = coords[2];

                if (m_miles)
                {
                    delta = (float)(range.Rad / GeoMatchUtil.EARTH_RADIUS_MILES);
                }
                else
                {
                    delta = (float)(range.Rad / GeoMatchUtil.EARTH_RADIUS_KM);
                }

                xu = targetX + delta;
                xl = targetX - delta;

                // try to see if the range checks can short circuit the actual inCircle check
                if (docX > xu || docX < xl)
                {
                    continue;
                }

                yu = targetY + delta;
                yl = targetY - delta;

                if (docY > yu || docY < yl)
                {
                    continue;
                }

                zu = targetZ + delta;
                zl = targetZ - delta;

                if (docZ > zu || docZ < zl)
                {
                    continue;
                }

                if (GeoFacetFilter.InCircle(docX, docY, docZ, targetX, targetY, targetZ, radius))
                {
                    // if the lat, lon values of this docid match the current user-specified range, then increment the
                    // appropriate count[] value
                    m_count.Add(countIndex, m_count.Get(countIndex) + 1);
                    // do not break here, since one document could lie in multiple user-specified ranges
                }
            }
        }
        public virtual void Collect(int docid)
        {
            int i = _array.Get(docid);

            _count.Add(i, _count.Get(i) + 1);
        }