// NOTE: The Weight.Scorer method lost the scoreDocsInOrder and topScorer parameters between // Lucene 4.3.0 and 4.8.0. They are not used by BoboBrowse anyway, so the code here diverges // from the original Java source to remove these two parameters. // public virtual Scorer CreateScorer(Scorer innerScorer, AtomicReader reader, bool scoreDocsInOrder, bool topScorer) public virtual Scorer CreateScorer(Scorer innerScorer, AtomicReader reader) { if (reader is BoboSegmentReader) { BoboSegmentReader boboReader = (BoboSegmentReader)reader; object dataObj = boboReader.GetFacetData(m_timeFacetName); if (dataObj is FacetDataCache) { FacetDataCache facetDataCache = (FacetDataCache)(boboReader.GetFacetData(m_timeFacetName)); BigSegmentedArray orderArray = facetDataCache.OrderArray; TermInt64List termList = (TermInt64List)facetDataCache.ValArray; return(new RecencyBoostScorer(this, innerScorer, orderArray, termList)); } else { throw new InvalidOperationException("underlying facet data must be of type FacetDataCache<long>"); } } else { throw new ArgumentException("reader not instance of " + typeof(BoboSegmentReader)); } }
public CompactMultiValueFacetDocIdSetIterator(FacetDataCache dataCache, int[] index, int bits) { m_bits = bits; m_doc = int.MaxValue; m_maxID = -1; m_orderArray = dataCache.OrderArray; foreach (int i in index) { if (m_doc > dataCache.MinIDs[i]) { m_doc = dataCache.MinIDs[i]; } if (m_maxID < dataCache.MaxIDs[i]) { m_maxID = dataCache.MaxIDs[i]; } } m_doc--; if (m_doc < 0) { m_doc = -1; } }
public DefaultFacetCountCollector(string name, FacetDataCache dataCache, int docBase, BrowseSelection sel, FacetSpec ospec) { m_sel = sel; this.m_ospec = ospec; m_name = name; m_dataCache = dataCache; m_countlength = m_dataCache.Freqs.Length; if (m_dataCache.Freqs.Length <= 3096) { m_count = new LazyBigInt32Array(m_countlength); } else { m_count = new LazyBigInt32Array(m_countlength); // NOTE: Removed memory manager implementation //_count = intarraymgr.Get(_countlength); //intarraylist.Add(_count); } m_array = m_dataCache.OrderArray; }
/// <summary> /// (non-Javadoc) /// see com.browseengine.bobo.facets.FacetCountCollector#getCountDistribution() /// </summary> /// <returns></returns> public virtual BigSegmentedArray GetCountDistribution() { BigSegmentedArray dist = null; if (_latPredefinedRangeIndexes != null) { dist = new LazyBigIntArray(_latPredefinedRangeIndexes.Length); int n = 0; int start; int end; foreach (int[] range in _latPredefinedRangeIndexes) { start = range[0]; end = range[1]; int sum = 0; for (int i = start; i < end; i++) { sum += _latCount[i]; } dist.Add(n++, sum); } } return(dist); }
/// <summary> /// Constructor /// </summary> /// <param name="name">name of the Geo Facet</param> /// <param name="dataCache">The data cache for the Geo Facet</param> /// <param name="docBase">the base doc id</param> /// <param name="fspec">the facet spec for this facet</param> /// <param name="predefinedRanges">List of ranges, where each range looks like <lat, lon: rad></param> /// <param name="miles">variable to specify if the geo distance calculations are in miles. False indicates distance calculation is in kilometers</param> public GeoFacetCountCollector(string name, GeoFacetHandler.GeoFacetData dataCache, int docBase, FacetSpec fspec, IEnumerable<string> predefinedRanges, bool miles) { _name = name; _dataCache = dataCache; _xvals = dataCache.xValArray; _yvals = dataCache.yValArray; _zvals = dataCache.zValArray; _spec = fspec; _predefinedRanges = new TermStringList(); var predefinedTemp = new List<string>(predefinedRanges); predefinedTemp.Sort(); _predefinedRanges.AddAll(predefinedTemp); _docBase = docBase; _countlength = predefinedTemp.Count; _count = new LazyBigIntArray(_countlength); _ranges = new GeoRange[predefinedTemp.Count]; int index = 0; foreach (string range in predefinedTemp) { _ranges[index++] = Parse(range); } _miles = miles; }
public virtual IEnumerable <BrowseFacet> GetFacets() { BigSegmentedArray counts = GetCollapsedCounts(); return(DefaultFacetCountCollector.GetFacets(_ospec, counts, counts.Size(), _bucketValues)); }
internal FacetDataRandomAccessDocIdSet(FacetDataCache dataCache, int index) { _dataCache = dataCache; _orderArray = dataCache.OrderArray; _index = index; }
public virtual IComparer <int> NewComparator(IFieldValueAccessor valueList, BigSegmentedArray counts) { return(new FacetHitComparer { counts = counts }); }
public FacetDocComparator(FacetDataCache dataCache, BigSegmentedArray orderArray) { _dataCache = dataCache; _orderArray = orderArray; }
public virtual void Load(string fieldName, IndexReader reader, TermListFactory listFactory) { string field = string.Intern(fieldName); int maxDoc = reader.MaxDoc; BigSegmentedArray order = this.orderArray; if (order == null) // we want to reuse the memory { int dictValueCount = GetDictValueCount(reader, fieldName); order = NewInstance(dictValueCount, maxDoc); } else { order.EnsureCapacity(maxDoc); // no need to fill to 0, we are reseting the // data anyway } this.orderArray = order; List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); int length = maxDoc + 1; ITermValueList list = listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList(); int negativeValueCount = GetNegativeValueCount(reader, field); TermDocs termDocs = reader.TermDocs(); TermEnum termEnum = reader.Terms(new Term(field, "")); int t = 0; // current term number list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); int totalFreq = 0; //int df = 0; t++; try { do { Term term = termEnum.Term; if (term == null || string.CompareOrdinal(term.Field, field) != 0) { break; } // store term text // we expect that there is at most one term per document // Alexey: well, we could get now more than one term per document. Effectively, we could build facet against tokenized field //if (t >= length) //{ // throw new RuntimeException("there are more terms than " + "documents in field \"" + field // + "\", but it's impossible to sort on " + "tokenized fields"); //} list.Add(term.Text); termDocs.Seek(termEnum); // freqList.add(termEnum.docFreq()); // doesn't take into account deldocs int minID = -1; int maxID = -1; int df = 0; int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; if (termDocs.Next()) { df++; int docid = termDocs.Doc; order.Add(docid, valId); minID = docid; while (termDocs.Next()) { df++; docid = termDocs.Doc; order.Add(docid, valId); } maxID = docid; } freqList.Add(df); totalFreq += df; minIDList.Add(minID); maxIDList.Add(maxID); t++; } while (termEnum.Next()); } finally { termDocs.Dispose(); termEnum.Dispose(); } list.Seal(); this.valArray = list; this.freqs = freqList.ToArray(); this.minIDs = minIDList.ToArray(); this.maxIDs = maxIDList.ToArray(); int doc = 0; while (doc <= maxDoc && order.Get(doc) != 0) { ++doc; } if (doc <= maxDoc) { this.minIDs[0] = doc; // Try to get the max doc = maxDoc; while (doc > 0 && order.Get(doc) != 0) { --doc; } if (doc > 0) { this.maxIDs[0] = doc; } } this.freqs[0] = maxDoc + 1 - totalFreq; }
public virtual FacetIterator GetIterator() { BigSegmentedArray counts = GetCollapsedCounts(); return(new DefaultFacetIterator(m_bucketValues, counts, counts.Length, true)); }
public void CollectAll() { m_count = BigInt32Array.FromArray(m_dataCache.Freqs); m_countLength = m_dataCache.Freqs.Length; }
public virtual void Load(string latFieldName, string lonFieldName, BoboIndexReader reader) { if (reader == null) { throw new ArgumentNullException("reader object is null"); } FacetDataCache latCache = (FacetDataCache)reader.GetFacetData(latFieldName); FacetDataCache lonCache = (FacetDataCache)reader.GetFacetData(lonFieldName); int maxDoc = reader.MaxDoc; BigFloatArray xVals = this._xValArray; BigFloatArray yVals = this._yValArray; BigFloatArray zVals = this._zValArray; if (xVals == null) { xVals = NewInstance(maxDoc); } else { xVals.EnsureCapacity(maxDoc); } if (yVals == null) { yVals = NewInstance(maxDoc); } else { yVals.EnsureCapacity(maxDoc); } if (zVals == null) { zVals = NewInstance(maxDoc); } else { zVals.EnsureCapacity(maxDoc); } this._xValArray = xVals; this._yValArray = yVals; this._zValArray = zVals; BigSegmentedArray latOrderArray = latCache.OrderArray; ITermValueList latValList = latCache.ValArray; BigSegmentedArray lonOrderArray = lonCache.OrderArray; ITermValueList lonValList = lonCache.ValArray; for (int i = 0; i < maxDoc; ++i) { string docLatString = latValList.Get(latOrderArray.Get(i)).Trim(); string docLonString = lonValList.Get(lonOrderArray.Get(i)).Trim(); float docLat = 0; if (docLatString.Length > 0) { docLat = float.Parse(docLatString); } float docLon = 0; if (docLonString.Length > 0) { docLon = float.Parse(docLonString); } float[] coords = GeoMatchUtil.GeoMatchCoordsFromDegrees(docLat, docLon); _xValArray.Add(i, coords[0]); _yValArray.Add(i, coords[1]); _zValArray.Add(i, coords[2]); } }
private void Aggregate() { if (m_isAggregated) { return; } m_isAggregated = true; int startIdx = m_valArray.IndexOf(m_start); if (startIdx < 0) { startIdx = -(startIdx + 1); } int endIdx = m_valArray.IndexOf(m_end); if (endIdx < 0) { endIdx = -(endIdx + 1); } BigSegmentedArray baseCounts = m_baseCollector.GetCountDistribution(); if (m_start is long) { long start = Convert.ToInt64(m_start); long unit = Convert.ToInt64(m_unit); TermInt64List valArray = (TermInt64List)m_valArray; for (int i = startIdx; i < endIdx; i++) { long val = valArray.GetPrimitiveValue(i); int idx = (int)((val - start) / unit); if (idx >= 0 && idx < m_count.Length) { m_count.Add(idx, m_count.Get(idx) + baseCounts.Get(i)); } } } else if (m_start is int) { int start = Convert.ToInt32(m_start); int unit = Convert.ToInt32(m_unit); TermInt32List valArray = (TermInt32List)m_valArray; for (int i = startIdx; i < endIdx; i++) { int val = valArray.GetPrimitiveValue(i); int idx = ((val - start) / unit); if (idx >= 0 && idx < m_count.Length) { m_count.Add(idx, m_count.Get(idx) + baseCounts.Get(i)); } } } else { double start = Convert.ToDouble(m_start); double unit = Convert.ToDouble(m_unit); for (int i = startIdx; i < endIdx; i++) { double val = (double)m_valArray.GetRawValue(i); int idx = (int)((val - start) / unit); if (idx >= 0 && idx < m_count.Length) { m_count.Add(idx, m_count.Get(idx) + baseCounts.Get(i)); } } } }
public FacetDocComparer(FacetDataCache dataCache, BigSegmentedArray orderArray) { m_dataCache = dataCache; m_orderArray = orderArray; }
public virtual void Load(string fieldName, AtomicReader reader, TermListFactory listFactory) { #if FEATURE_STRING_INTERN string field = string.Intern(fieldName); #else string field = fieldName; #endif int maxDoc = reader.MaxDoc; int dictValueCount = GetDictValueCount(reader, fieldName); BigSegmentedArray order = NewInstance(dictValueCount, maxDoc); this.m_orderArray = order; List <int> minIDList = new List <int>(); List <int> maxIDList = new List <int>(); List <int> freqList = new List <int>(); int length = maxDoc + 1; ITermValueList list = listFactory == null ? (ITermValueList) new TermStringList() : listFactory.CreateTermList(); int negativeValueCount = GetNegativeValueCount(reader, field); int t = 1; // valid term id starts from 1 list.Add(null); minIDList.Add(-1); maxIDList.Add(-1); freqList.Add(0); int totalFreq = 0; Terms terms = reader.GetTerms(field); if (terms != null) { TermsEnum termsEnum = terms.GetIterator(null); BytesRef text; while ((text = termsEnum.Next()) != null) { // store term text // we expect that there is at most one term per document if (t >= length) { throw new RuntimeException("there are more terms than " + "documents in field \"" + field + "\", but it's impossible to sort on " + "tokenized fields"); } string strText = text.Utf8ToString(); list.Add(strText); Term term = new Term(field, strText); DocsEnum docsEnum = reader.GetTermDocsEnum(term); // freqList.add(termEnum.docFreq()); // doesn't take into account // deldocs int minID = -1; int maxID = -1; int docID = -1; int df = 0; int valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t; while ((docID = docsEnum.NextDoc()) != DocsEnum.NO_MORE_DOCS) { df++; order.Add(docID, valId); minID = docID; while (docsEnum.NextDoc() != DocsEnum.NO_MORE_DOCS) { docID = docsEnum.DocID; df++; order.Add(docID, valId); } maxID = docID; } freqList.Add(df); totalFreq += df; minIDList.Add(minID); maxIDList.Add(maxID); t++; } } list.Seal(); this.m_valArray = list; this.m_freqs = freqList.ToArray(); this.m_minIDs = minIDList.ToArray(); this.m_maxIDs = maxIDList.ToArray(); int doc = 0; while (doc < maxDoc && order.Get(doc) != 0) { ++doc; } if (doc < maxDoc) { this.m_minIDs[0] = doc; // Try to get the max doc = maxDoc - 1; while (doc >= 0 && order.Get(doc) != 0) { --doc; } this.m_maxIDs[0] = doc; } this.m_freqs[0] = reader.NumDocs - totalFreq; }
public void CollectAll() { _count = BigIntArray.FromArray(_dataCache.Freqs); _countLength = _dataCache.Freqs.Length; }
internal CompactMultiValueFacetCountCollector(string name, BrowseSelection sel, FacetDataCache dataCache, int docBase, FacetSpec ospec) : base(name, dataCache, docBase, sel, ospec) { _array = _dataCache.OrderArray; }
public RecencyBoostScorer(RecencyBoostScorerBuilder parent, Scorer innerScorer, BigSegmentedArray orderArray, TermLongList termList) : base(innerScorer.Similarity) { _parent = parent; _innerScorer = innerScorer; _orderArray = orderArray; _termList = termList; }
public static IEnumerable <BrowseFacet> GetFacets(FacetSpec ospec, BigSegmentedArray count, int countlength, ITermValueList valList) { if (ospec != null) { int minCount = ospec.MinHitCount; int max = ospec.MaxCount; if (max <= 0) { max = countlength; } LinkedList <BrowseFacet> facetColl; FacetSpec.FacetSortSpec sortspec = ospec.OrderBy; if (sortspec == FacetSpec.FacetSortSpec.OrderValueAsc) { facetColl = new LinkedList <BrowseFacet>(); for (int i = 1; i < countlength; ++i) // exclude zero { int hits = count.Get(i); if (hits >= minCount) { BrowseFacet facet = new BrowseFacet(valList.Get(i), hits); facetColl.AddLast(facet); } if (facetColl.Count >= max) { break; } } } else //if (sortspec == FacetSortSpec.OrderHitsDesc) { IComparatorFactory comparatorFactory; if (sortspec == FacetSpec.FacetSortSpec.OrderHitsDesc) { comparatorFactory = new FacetHitcountComparatorFactory(); } else { comparatorFactory = ospec.CustomComparatorFactory; } if (comparatorFactory == null) { throw new ArgumentException("facet comparator factory not specified"); } IComparer <int> comparator = comparatorFactory.NewComparator(new DefaultFacetCountCollectorFieldAccessor(valList), count); facetColl = new LinkedList <BrowseFacet>(); int forbidden = -1; IntBoundedPriorityQueue pq = new IntBoundedPriorityQueue(comparator, max, forbidden); for (int i = 1; i < countlength; ++i) // exclude zero { int hits = count.Get(i); if (hits >= minCount) { pq.Offer(i); } } int val; while ((val = pq.Poll()) != forbidden) { BrowseFacet facet = new BrowseFacet(valList[val], count.Get(val)); facetColl.AddFirst(facet); } } return(facetColl); } else { return(FacetCountCollector_Fields.EMPTY_FACET_LIST); } }
public RecencyBoostScorer(RecencyBoostScorerBuilder parent, Scorer innerScorer, BigSegmentedArray orderArray, TermInt64List termList) : base(innerScorer.Weight) { m_parent = parent; m_innerScorer = innerScorer; m_orderArray = orderArray; m_termList = termList; }
public CompactMultiValueFacetFilterDocIdSet(FacetDataCache dataCache, int[] indexes, int finalBits, BigSegmentedArray orderArray) { this.dataCache = dataCache; this.indexes = indexes; this.finalBits = finalBits; this.orderArray = orderArray; }
public virtual void CollectAll() { m_count = BigInt32Array.FromArray(m_dataCache.Freqs); }
public virtual IComparer <int> NewComparer(IFieldValueAccessor fieldValueAccessor, BigSegmentedArray counts) { return(new FacetValueComparerFactoryComparer()); }
public virtual ICollection <BrowseFacet> GetFacets() { BigSegmentedArray counts = GetCollapsedCounts(); return(DefaultFacetCountCollector.GetFacets(m_ospec, counts, counts.Length, m_bucketValues)); }