public override void SetNextReader(AtomicReaderContext context) { AtomicReader reader = context.AtomicReader; if (!(reader is BoboSegmentReader)) { throw new ArgumentException("reader must be a BoboIndexReader"); } m_currentReader = (BoboSegmentReader)reader; int docBase = context.DocBase; m_currentComparer = m_compSource.GetComparer(m_currentReader, docBase); m_currentQueue = new DocIDPriorityQueue(m_currentComparer, m_numHits, docBase); if (m_groupBy != null) { if (m_facetCountCollectorMulti != null) // _facetCountCollectorMulti.Length >= 1 { for (int i = 0; i < m_facetCountCollectorMulti.Length; ++i) { m_facetCountCollectorMulti[i] = m_groupByMulti[i].GetFacetCountCollectorSource(null, null, true).GetFacetCountCollector(m_currentReader, docBase); } //if (_facetCountCollector != null) // collectTotalGroups(); m_facetCountCollector = m_facetCountCollectorMulti[0]; if (m_facetAccessibleLists != null) { for (int i = 0; i < m_facetCountCollectorMulti.Length; ++i) { m_facetAccessibleLists[i].Add(m_facetCountCollectorMulti[i]); } } } if (m_currentValueDocMaps != null) { m_currentValueDocMaps.Clear(); } // NightOwl888: The _collectDocIdCache setting seems to put arrays into // memory, but then do nothing with the arrays. Seems wasteful and unnecessary. //if (contextList != null) //{ // _currentContext = new CollectorContext(_currentReader, docBase, _currentComparer); // contextList.Add(_currentContext); //} } MyScoreDoc myScoreDoc = (MyScoreDoc)m_tmpScoreDoc; myScoreDoc.m_queue = m_currentQueue; myScoreDoc.m_reader = m_currentReader; myScoreDoc.m_sortValue = null; m_pqList.Add(m_currentQueue); m_queueFull = false; }
protected static BrowseHit[] BuildHits(MyScoreDoc[] scoreDocs, SortField[] sortFields, IDictionary <string, IFacetHandler> facetHandlerMap, bool fetchStoredFields, IEnumerable <string> termVectorsToFetch, IFacetHandler groupBy, CombinedFacetAccessible[] groupAccessibles) { BrowseHit[] hits = new BrowseHit[scoreDocs.Length]; IEnumerable <IFacetHandler> facetHandlers = facetHandlerMap.Values; for (int i = scoreDocs.Length - 1; i >= 0; i--) { MyScoreDoc fdoc = scoreDocs[i]; BoboIndexReader reader = fdoc.reader; BrowseHit hit = new BrowseHit(); if (fetchStoredFields) { hit.StoredFields = reader.Document(fdoc.Doc); } if (termVectorsToFetch != null && termVectorsToFetch.Count() > 0) { var tvMap = new Dictionary <string, BrowseHit.TermFrequencyVector>(); hit.TermFreqMap = tvMap; foreach (string field in termVectorsToFetch) { ITermFreqVector tv = reader.GetTermFreqVector(fdoc.Doc, field); if (tv != null) { int[] freqs = tv.GetTermFrequencies(); string[] terms = tv.GetTerms(); tvMap[field] = new BrowseHit.TermFrequencyVector(terms, freqs); } } } var map = new Dictionary <string, string[]>(); var rawMap = new Dictionary <string, object[]>(); foreach (var facetHandler in facetHandlers) { map[facetHandler.Name] = facetHandler.GetFieldValues(reader, fdoc.Doc); rawMap[facetHandler.Name] = facetHandler.GetRawFieldValues(reader, fdoc.Doc); } hit.FieldValues = map; hit.RawFieldValues = rawMap; hit.DocId = fdoc.Doc + fdoc.queue.@base; hit.Score = fdoc.Score; hit.Comparable = fdoc.Value; if (groupBy != null) { hit.GroupField = groupBy.Name; hit.GroupValue = hit.GetField(groupBy.Name); hit.RawGroupValue = hit.GetRawField(groupBy.Name); if (groupAccessibles != null && hit.GroupValue != null && groupAccessibles != null && groupAccessibles.Length > 0) { BrowseFacet facet = groupAccessibles[0].GetFacet(hit.GroupValue); hit.GroupHitsCount = facet.FacetValueHitCount; } } hits[i] = hit; } return(hits); }
public override void Collect(int doc) { ++_totalHits; if (groupBy != null) { if (_facetCountCollectorMulti != null) { for (int i = 0; i < _facetCountCollectorMulti.Length; ++i) { if (_facetCountCollectorMulti[i] != null) { _facetCountCollectorMulti[i].Collect(doc); } } if (_count > 0) { float score = (_doScoring ? _scorer.Score() : 0.0f); // NightOwl888: The _collectDocIdCache setting seems to put arrays into // memory, but then do nothing with the arrays. Seems wasteful and unnecessary. //if (_collectDocIdCache) //{ // if (_totalHits > _docIdCacheCapacity) // { // _currentDocIdArray = intarraymgr.Get(BLOCK_SIZE); // docidarraylist.Add(_currentDocIdArray); // if (_doScoring) // { // _currentScoreArray = floatarraymgr.Get(BLOCK_SIZE); // scorearraylist.Add(_currentScoreArray); // } // _docIdCacheCapacity += BLOCK_SIZE; // _docIdArrayCursor = 0; // } // _currentDocIdArray[_docIdArrayCursor] = doc; // if (_doScoring) // _currentScoreArray[_docIdArrayCursor] = score; // ++_docIdArrayCursor; // ++_currentContext.length; //} } return; } else { //if (_facetCountCollector != null) //_facetCountCollector.collect(doc); if (_count > 0) { float score = (_doScoring ? _scorer.Score() : 0.0f); // NightOwl888: The _collectDocIdCache setting seems to put arrays into // memory, but then do nothing with the arrays. Seems wasteful and unnecessary. //if (_collectDocIdCache) //{ // if (_totalHits > _docIdCacheCapacity) // { // _currentDocIdArray = intarraymgr.Get(BLOCK_SIZE); // docidarraylist.Add(_currentDocIdArray); // if (_doScoring) // { // _currentScoreArray = floatarraymgr.Get(BLOCK_SIZE); // scorearraylist.Add(_currentScoreArray); // } // _docIdCacheCapacity += BLOCK_SIZE; // _docIdArrayCursor = 0; // } // _currentDocIdArray[_docIdArrayCursor] = doc; // if (_doScoring) // _currentScoreArray[_docIdArrayCursor] = score; // ++_docIdArrayCursor; // ++_currentContext.length; //} _tmpScoreDoc.Doc = doc; _tmpScoreDoc.Score = score; if (!_queueFull || _currentComparator.Compare(_bottom, _tmpScoreDoc) > 0) { int order = groupBy.GetFacetData <FacetDataCache>(_currentReader).OrderArray.Get(doc); ScoreDoc pre = _currentValueDocMaps.Get(order); if (pre != null) { if (_currentComparator.Compare(pre, _tmpScoreDoc) > 0) { ScoreDoc tmp = pre; _bottom = _currentQueue.Replace(_tmpScoreDoc, pre); _currentValueDocMaps.Put(order, _tmpScoreDoc); _tmpScoreDoc = tmp; } } else { if (_queueFull) { MyScoreDoc tmp = (MyScoreDoc)_bottom; _currentValueDocMaps.Remove(groupBy.GetFacetData <FacetDataCache>(tmp.reader).OrderArray.Get(tmp.Doc)); _bottom = _currentQueue.Replace(_tmpScoreDoc); _currentValueDocMaps.Put(order, _tmpScoreDoc); _tmpScoreDoc = tmp; } else { ScoreDoc tmp = new MyScoreDoc(doc, score, _currentQueue, _currentReader); _bottom = _currentQueue.Add(tmp); _currentValueDocMaps.Put(order, tmp); _queueFull = (_currentQueue.size >= _numHits); } } } } } } else { if (_count > 0) { float score = (_doScoring ? _scorer.Score() : 0.0f); if (_queueFull) { _tmpScoreDoc.Doc = doc; _tmpScoreDoc.Score = score; if (_currentComparator.Compare(_bottom, _tmpScoreDoc) > 0) { ScoreDoc tmp = _bottom; _bottom = _currentQueue.Replace(_tmpScoreDoc); _tmpScoreDoc = tmp; } } else { _bottom = _currentQueue.Add(new MyScoreDoc(doc, score, _currentQueue, _currentReader)); _queueFull = (_currentQueue.size >= _numHits); } } } if (this.Collector != null) { this.Collector.Collect(doc); } }
protected static BrowseHit[] BuildHits(MyScoreDoc[] scoreDocs, SortField[] sortFields, IDictionary <string, IFacetHandler> facetHandlerMap, bool fetchStoredFields, ICollection <string> termVectorsToFetch, IFacetHandler groupBy, CombinedFacetAccessible[] groupAccessibles) { BrowseHit[] hits = new BrowseHit[scoreDocs.Length]; IEnumerable <IFacetHandler> facetHandlers = facetHandlerMap.Values; for (int i = scoreDocs.Length - 1; i >= 0; i--) { MyScoreDoc fdoc = scoreDocs[i]; BoboSegmentReader reader = fdoc.m_reader; BrowseHit hit = new BrowseHit(); if (fetchStoredFields) { hit.SetStoredFields(reader.Document(fdoc.Doc)); } if (termVectorsToFetch != null && termVectorsToFetch.Count > 0) { var tvMap = new Dictionary <string, IList <BrowseHit.BoboTerm> >(); hit.TermVectorMap = tvMap; Fields fds = reader.GetTermVectors(fdoc.Doc); foreach (string field in termVectorsToFetch) { Terms terms = fds.GetTerms(field); if (terms == null) { continue; } TermsEnum termsEnum = terms.GetIterator(null); BytesRef text; DocsAndPositionsEnum docsAndPositions = null; List <BrowseHit.BoboTerm> boboTermList = new List <BrowseHit.BoboTerm>(); while ((text = termsEnum.Next()) != null) { BrowseHit.BoboTerm boboTerm = new BrowseHit.BoboTerm(); boboTerm.Term = text.Utf8ToString(); boboTerm.Freq = (int)termsEnum.TotalTermFreq; docsAndPositions = termsEnum.DocsAndPositions(null, docsAndPositions); if (docsAndPositions != null) { docsAndPositions.NextDoc(); boboTerm.Positions = new List <int>(); boboTerm.StartOffsets = new List <int>(); boboTerm.EndOffsets = new List <int>(); for (int t = 0; t < boboTerm.Freq; ++t) { boboTerm.Positions.Add(docsAndPositions.NextPosition()); boboTerm.StartOffsets.Add(docsAndPositions.StartOffset); boboTerm.EndOffsets.Add(docsAndPositions.EndOffset); } } boboTermList.Add(boboTerm); } tvMap.Put(field, boboTermList); } } var map = new Dictionary <string, string[]>(); var rawMap = new Dictionary <string, object[]>(); foreach (var facetHandler in facetHandlers) { map[facetHandler.Name] = facetHandler.GetFieldValues(reader, fdoc.Doc); rawMap[facetHandler.Name] = facetHandler.GetRawFieldValues(reader, fdoc.Doc); } hit.FieldValues = map; hit.RawFieldValues = rawMap; hit.DocId = fdoc.Doc + fdoc.m_queue.m_base; hit.Score = fdoc.Score; hit.Comparable = fdoc.Value; if (groupBy != null) { hit.GroupField = groupBy.Name; hit.GroupValue = hit.GetField(groupBy.Name); hit.RawGroupValue = hit.GetRawField(groupBy.Name); if (groupAccessibles != null && hit.GroupValue != null && groupAccessibles != null && groupAccessibles.Length > 0) { BrowseFacet facet = groupAccessibles[0].GetFacet(hit.GroupValue); hit.GroupHitsCount = facet.FacetValueHitCount; } } hits[i] = hit; } return(hits); }