Пример #1
0
        public override void SetNextReader(AtomicReaderContext context)
        {
            AtomicReader reader = context.AtomicReader;

            if (!(reader is BoboSegmentReader))
            {
                throw new ArgumentException("reader must be a BoboIndexReader");
            }
            m_currentReader = (BoboSegmentReader)reader;
            int docBase = context.DocBase;

            m_currentComparer = m_compSource.GetComparer(m_currentReader, docBase);
            m_currentQueue    = new DocIDPriorityQueue(m_currentComparer, m_numHits, docBase);
            if (m_groupBy != null)
            {
                if (m_facetCountCollectorMulti != null)  // _facetCountCollectorMulti.Length >= 1
                {
                    for (int i = 0; i < m_facetCountCollectorMulti.Length; ++i)
                    {
                        m_facetCountCollectorMulti[i] = m_groupByMulti[i].GetFacetCountCollectorSource(null, null, true).GetFacetCountCollector(m_currentReader, docBase);
                    }
                    //if (_facetCountCollector != null)
                    //    collectTotalGroups();
                    m_facetCountCollector = m_facetCountCollectorMulti[0];
                    if (m_facetAccessibleLists != null)
                    {
                        for (int i = 0; i < m_facetCountCollectorMulti.Length; ++i)
                        {
                            m_facetAccessibleLists[i].Add(m_facetCountCollectorMulti[i]);
                        }
                    }
                }
                if (m_currentValueDocMaps != null)
                {
                    m_currentValueDocMaps.Clear();
                }

                // NightOwl888: The _collectDocIdCache setting seems to put arrays into
                // memory, but then do nothing with the arrays. Seems wasteful and unnecessary.
                //if (contextList != null)
                //{
                //    _currentContext = new CollectorContext(_currentReader, docBase, _currentComparer);
                //    contextList.Add(_currentContext);
                //}
            }
            MyScoreDoc myScoreDoc = (MyScoreDoc)m_tmpScoreDoc;

            myScoreDoc.m_queue     = m_currentQueue;
            myScoreDoc.m_reader    = m_currentReader;
            myScoreDoc.m_sortValue = null;
            m_pqList.Add(m_currentQueue);
            m_queueFull = false;
        }
Пример #2
0
        protected static BrowseHit[] BuildHits(MyScoreDoc[] scoreDocs, SortField[] sortFields,
                                               IDictionary <string, IFacetHandler> facetHandlerMap, bool fetchStoredFields,
                                               IEnumerable <string> termVectorsToFetch, IFacetHandler groupBy, CombinedFacetAccessible[] groupAccessibles)
        {
            BrowseHit[] hits = new BrowseHit[scoreDocs.Length];
            IEnumerable <IFacetHandler> facetHandlers = facetHandlerMap.Values;

            for (int i = scoreDocs.Length - 1; i >= 0; i--)
            {
                MyScoreDoc      fdoc   = scoreDocs[i];
                BoboIndexReader reader = fdoc.reader;
                BrowseHit       hit    = new BrowseHit();
                if (fetchStoredFields)
                {
                    hit.StoredFields = reader.Document(fdoc.Doc);
                }
                if (termVectorsToFetch != null && termVectorsToFetch.Count() > 0)
                {
                    var tvMap = new Dictionary <string, BrowseHit.TermFrequencyVector>();
                    hit.TermFreqMap = tvMap;
                    foreach (string field in termVectorsToFetch)
                    {
                        ITermFreqVector tv = reader.GetTermFreqVector(fdoc.Doc, field);
                        if (tv != null)
                        {
                            int[]    freqs = tv.GetTermFrequencies();
                            string[] terms = tv.GetTerms();
                            tvMap[field] = new BrowseHit.TermFrequencyVector(terms, freqs);
                        }
                    }
                }
                var map    = new Dictionary <string, string[]>();
                var rawMap = new Dictionary <string, object[]>();
                foreach (var facetHandler in facetHandlers)
                {
                    map[facetHandler.Name]    = facetHandler.GetFieldValues(reader, fdoc.Doc);
                    rawMap[facetHandler.Name] = facetHandler.GetRawFieldValues(reader, fdoc.Doc);
                }
                hit.FieldValues    = map;
                hit.RawFieldValues = rawMap;
                hit.DocId          = fdoc.Doc + fdoc.queue.@base;
                hit.Score          = fdoc.Score;
                hit.Comparable     = fdoc.Value;
                if (groupBy != null)
                {
                    hit.GroupField    = groupBy.Name;
                    hit.GroupValue    = hit.GetField(groupBy.Name);
                    hit.RawGroupValue = hit.GetRawField(groupBy.Name);
                    if (groupAccessibles != null &&
                        hit.GroupValue != null &&
                        groupAccessibles != null &&
                        groupAccessibles.Length > 0)
                    {
                        BrowseFacet facet = groupAccessibles[0].GetFacet(hit.GroupValue);
                        hit.GroupHitsCount = facet.FacetValueHitCount;
                    }
                }
                hits[i] = hit;
            }
            return(hits);
        }
Пример #3
0
        public override void Collect(int doc)
        {
            ++_totalHits;

            if (groupBy != null)
            {
                if (_facetCountCollectorMulti != null)
                {
                    for (int i = 0; i < _facetCountCollectorMulti.Length; ++i)
                    {
                        if (_facetCountCollectorMulti[i] != null)
                        {
                            _facetCountCollectorMulti[i].Collect(doc);
                        }
                    }

                    if (_count > 0)
                    {
                        float score = (_doScoring ? _scorer.Score() : 0.0f);

                        // NightOwl888: The _collectDocIdCache setting seems to put arrays into
                        // memory, but then do nothing with the arrays. Seems wasteful and unnecessary.
                        //if (_collectDocIdCache)
                        //{
                        //    if (_totalHits > _docIdCacheCapacity)
                        //    {
                        //        _currentDocIdArray = intarraymgr.Get(BLOCK_SIZE);
                        //        docidarraylist.Add(_currentDocIdArray);
                        //        if (_doScoring)
                        //        {
                        //            _currentScoreArray = floatarraymgr.Get(BLOCK_SIZE);
                        //            scorearraylist.Add(_currentScoreArray);
                        //        }
                        //        _docIdCacheCapacity += BLOCK_SIZE;
                        //        _docIdArrayCursor = 0;
                        //    }
                        //    _currentDocIdArray[_docIdArrayCursor] = doc;
                        //    if (_doScoring)
                        //        _currentScoreArray[_docIdArrayCursor] = score;
                        //    ++_docIdArrayCursor;
                        //    ++_currentContext.length;
                        //}
                    }
                    return;
                }
                else
                {
                    //if (_facetCountCollector != null)
                    //_facetCountCollector.collect(doc);

                    if (_count > 0)
                    {
                        float score = (_doScoring ? _scorer.Score() : 0.0f);

                        // NightOwl888: The _collectDocIdCache setting seems to put arrays into
                        // memory, but then do nothing with the arrays. Seems wasteful and unnecessary.
                        //if (_collectDocIdCache)
                        //{
                        //    if (_totalHits > _docIdCacheCapacity)
                        //    {
                        //        _currentDocIdArray = intarraymgr.Get(BLOCK_SIZE);
                        //        docidarraylist.Add(_currentDocIdArray);
                        //        if (_doScoring)
                        //        {
                        //            _currentScoreArray = floatarraymgr.Get(BLOCK_SIZE);
                        //            scorearraylist.Add(_currentScoreArray);
                        //        }
                        //        _docIdCacheCapacity += BLOCK_SIZE;
                        //        _docIdArrayCursor = 0;
                        //    }
                        //    _currentDocIdArray[_docIdArrayCursor] = doc;
                        //    if (_doScoring)
                        //        _currentScoreArray[_docIdArrayCursor] = score;
                        //    ++_docIdArrayCursor;
                        //    ++_currentContext.length;
                        //}

                        _tmpScoreDoc.Doc   = doc;
                        _tmpScoreDoc.Score = score;
                        if (!_queueFull || _currentComparator.Compare(_bottom, _tmpScoreDoc) > 0)
                        {
                            int      order = groupBy.GetFacetData <FacetDataCache>(_currentReader).OrderArray.Get(doc);
                            ScoreDoc pre   = _currentValueDocMaps.Get(order);
                            if (pre != null)
                            {
                                if (_currentComparator.Compare(pre, _tmpScoreDoc) > 0)
                                {
                                    ScoreDoc tmp = pre;
                                    _bottom = _currentQueue.Replace(_tmpScoreDoc, pre);
                                    _currentValueDocMaps.Put(order, _tmpScoreDoc);
                                    _tmpScoreDoc = tmp;
                                }
                            }
                            else
                            {
                                if (_queueFull)
                                {
                                    MyScoreDoc tmp = (MyScoreDoc)_bottom;
                                    _currentValueDocMaps.Remove(groupBy.GetFacetData <FacetDataCache>(tmp.reader).OrderArray.Get(tmp.Doc));
                                    _bottom = _currentQueue.Replace(_tmpScoreDoc);
                                    _currentValueDocMaps.Put(order, _tmpScoreDoc);
                                    _tmpScoreDoc = tmp;
                                }
                                else
                                {
                                    ScoreDoc tmp = new MyScoreDoc(doc, score, _currentQueue, _currentReader);
                                    _bottom = _currentQueue.Add(tmp);
                                    _currentValueDocMaps.Put(order, tmp);
                                    _queueFull = (_currentQueue.size >= _numHits);
                                }
                            }
                        }
                    }
                }
            }
            else
            {
                if (_count > 0)
                {
                    float score = (_doScoring ? _scorer.Score() : 0.0f);

                    if (_queueFull)
                    {
                        _tmpScoreDoc.Doc   = doc;
                        _tmpScoreDoc.Score = score;

                        if (_currentComparator.Compare(_bottom, _tmpScoreDoc) > 0)
                        {
                            ScoreDoc tmp = _bottom;
                            _bottom      = _currentQueue.Replace(_tmpScoreDoc);
                            _tmpScoreDoc = tmp;
                        }
                    }
                    else
                    {
                        _bottom    = _currentQueue.Add(new MyScoreDoc(doc, score, _currentQueue, _currentReader));
                        _queueFull = (_currentQueue.size >= _numHits);
                    }
                }
            }

            if (this.Collector != null)
            {
                this.Collector.Collect(doc);
            }
        }
Пример #4
0
        protected static BrowseHit[] BuildHits(MyScoreDoc[] scoreDocs, SortField[] sortFields,
                                               IDictionary <string, IFacetHandler> facetHandlerMap, bool fetchStoredFields,
                                               ICollection <string> termVectorsToFetch, IFacetHandler groupBy, CombinedFacetAccessible[] groupAccessibles)
        {
            BrowseHit[] hits = new BrowseHit[scoreDocs.Length];
            IEnumerable <IFacetHandler> facetHandlers = facetHandlerMap.Values;

            for (int i = scoreDocs.Length - 1; i >= 0; i--)
            {
                MyScoreDoc        fdoc   = scoreDocs[i];
                BoboSegmentReader reader = fdoc.m_reader;
                BrowseHit         hit    = new BrowseHit();
                if (fetchStoredFields)
                {
                    hit.SetStoredFields(reader.Document(fdoc.Doc));
                }
                if (termVectorsToFetch != null && termVectorsToFetch.Count > 0)
                {
                    var tvMap = new Dictionary <string, IList <BrowseHit.BoboTerm> >();
                    hit.TermVectorMap = tvMap;
                    Fields fds = reader.GetTermVectors(fdoc.Doc);
                    foreach (string field in termVectorsToFetch)
                    {
                        Terms terms = fds.GetTerms(field);
                        if (terms == null)
                        {
                            continue;
                        }

                        TermsEnum                 termsEnum = terms.GetIterator(null);
                        BytesRef                  text;
                        DocsAndPositionsEnum      docsAndPositions = null;
                        List <BrowseHit.BoboTerm> boboTermList     = new List <BrowseHit.BoboTerm>();

                        while ((text = termsEnum.Next()) != null)
                        {
                            BrowseHit.BoboTerm boboTerm = new BrowseHit.BoboTerm();
                            boboTerm.Term    = text.Utf8ToString();
                            boboTerm.Freq    = (int)termsEnum.TotalTermFreq;
                            docsAndPositions = termsEnum.DocsAndPositions(null, docsAndPositions);
                            if (docsAndPositions != null)
                            {
                                docsAndPositions.NextDoc();
                                boboTerm.Positions    = new List <int>();
                                boboTerm.StartOffsets = new List <int>();
                                boboTerm.EndOffsets   = new List <int>();
                                for (int t = 0; t < boboTerm.Freq; ++t)
                                {
                                    boboTerm.Positions.Add(docsAndPositions.NextPosition());
                                    boboTerm.StartOffsets.Add(docsAndPositions.StartOffset);
                                    boboTerm.EndOffsets.Add(docsAndPositions.EndOffset);
                                }
                            }
                            boboTermList.Add(boboTerm);
                        }
                        tvMap.Put(field, boboTermList);
                    }
                }
                var map    = new Dictionary <string, string[]>();
                var rawMap = new Dictionary <string, object[]>();
                foreach (var facetHandler in facetHandlers)
                {
                    map[facetHandler.Name]    = facetHandler.GetFieldValues(reader, fdoc.Doc);
                    rawMap[facetHandler.Name] = facetHandler.GetRawFieldValues(reader, fdoc.Doc);
                }
                hit.FieldValues    = map;
                hit.RawFieldValues = rawMap;
                hit.DocId          = fdoc.Doc + fdoc.m_queue.m_base;
                hit.Score          = fdoc.Score;
                hit.Comparable     = fdoc.Value;
                if (groupBy != null)
                {
                    hit.GroupField    = groupBy.Name;
                    hit.GroupValue    = hit.GetField(groupBy.Name);
                    hit.RawGroupValue = hit.GetRawField(groupBy.Name);
                    if (groupAccessibles != null &&
                        hit.GroupValue != null &&
                        groupAccessibles != null &&
                        groupAccessibles.Length > 0)
                    {
                        BrowseFacet facet = groupAccessibles[0].GetFacet(hit.GroupValue);
                        hit.GroupHitsCount = facet.FacetValueHitCount;
                    }
                }
                hits[i] = hit;
            }
            return(hits);
        }