internal PathFacetCountCollector(string name, string sep, BrowseSelection sel, FacetSpec ospec, FacetDataCache dataCache) { _sel = sel; _ospec = ospec; _name = name; _dataCache = dataCache; _sep = sep; _sepArray = sep.ToCharArray(); _count = new LazyBigIntArray(_dataCache.Freqs.Length); log.Info(name + ": " + _count.Size()); _orderArray = _dataCache.OrderArray; _minHitCount = ospec.MinHitCount; _maxCount = ospec.MaxCount; if (_maxCount < 1) { _maxCount = _count.Size(); } FacetSpec.FacetSortSpec sortOption = ospec.OrderBy; switch (sortOption) { case FacetSpec.FacetSortSpec.OrderHitsDesc: _comparatorFactory = new FacetHitcountComparatorFactory(); break; case FacetSpec.FacetSortSpec.OrderValueAsc: _comparatorFactory = null; break; case FacetSpec.FacetSortSpec.OrderByCustom: _comparatorFactory = ospec.CustomComparatorFactory; break; default: throw new ArgumentOutOfRangeException("invalid sort option: " + sortOption); } _splitPat = new Regex(_sep, RegexOptions.Compiled); _stringData = new string[10]; _patStart = 0; _patEnd = 0; }
public HistogramFacetIterator(BigSegmentedArray count, string format) { _idx = -1; _count = count; _maxMinusOne = count.Size() - 1; _format = format; }
public virtual BrowseFacet GetFacet(string value) { if (!_isAggregated) { Aggregate(); } int idx = int.Parse(value); if (idx >= 0 && idx < _count.Size()) { return(new BrowseFacet(value, _count.Get(idx))); } return(null); }
private void CollectTotalGroups() { if (_facetCountCollector is GroupByFacetCountCollector) { _totalGroups += ((GroupByFacetCountCollector)_facetCountCollector).GetTotalGroups(); return; } BigSegmentedArray count = _facetCountCollector.GetCountDistribution(); for (int i = 0; i < count.Size(); i++) { int c = count.Get(i); if (c > 0) { ++_totalGroups; } } }
private BigSegmentedArray GetCollapsedCounts() { if (_collapsedCounts == null) { _collapsedCounts = new LazyBigIntArray(_bucketValues.Count); FacetDataCache dataCache = _subCollector.DataCache; ITermValueList subList = dataCache.ValArray; BigSegmentedArray subcounts = _subCollector.Count; BitVector indexSet = new BitVector(subcounts.Size()); int c = 0; int i = 0; foreach (string val in _bucketValues) { if (val.Length > 0) { string[] subVals = _predefinedBuckets.Get(val); int count = 0; foreach (string subVal in subVals) { int index = subList.IndexOf(subVal); if (index > 0) { int subcount = subcounts.Get(index); count += subcount; if (!indexSet.Get(index)) { indexSet.Set(index); c += dataCache.Freqs[index]; } } } _collapsedCounts.Add(i, count); } i++; } _collapsedCounts.Add(0, (_numdocs - c)); } return(_collapsedCounts); }
private IEnumerable <BrowseFacet> GetFacetsForPath(string selectedPath, int depth, bool strict, int minCount, int maxCount) { List <BrowseFacet> list = new List <BrowseFacet>(); BoundedPriorityQueue <BrowseFacet> pq = null; if (_comparatorFactory != null) { IComparer <BrowseFacet> comparator = _comparatorFactory.NewComparator(); pq = new BoundedPriorityQueue <BrowseFacet>(new PathFacetCountCollectorComparator(comparator), maxCount); } string[] startParts = null; int startDepth = 0; if (selectedPath != null && selectedPath.Length > 0) { startParts = selectedPath.Split(new string[] { _sep }, StringSplitOptions.RemoveEmptyEntries); startDepth = startParts.Length; if (!selectedPath.EndsWith(_sep)) { selectedPath += _sep; } } string currentPath = null; int currentCount = 0; int wantedDepth = startDepth + depth; int index = 0; if (selectedPath != null && selectedPath.Length > 0) { index = _dataCache.ValArray.IndexOf(selectedPath); if (index < 0) { index = -(index + 1); } } //string[] pathParts; // NOT USED StringBuilder buf = new StringBuilder(); for (int i = index; i < _count.Size(); ++i) { if (_count.Get(i) >= minCount) { string path = _dataCache.ValArray.Get(i); //if (path==null || path.equals(selectedPath)) continue; int subCount = _count.Get(i); // do not use Java split string in a loop ! // string[] pathParts=path.split(_sep); int pathDepth = 0; if (!SplitString(path)) { pathDepth = 0; } else { pathDepth = PatListSize(); } int tmpdepth = 0; if ((startDepth == 0) || (startDepth > 0 && path.StartsWith(selectedPath))) { buf = new StringBuilder(); int minDepth = Math.Min(wantedDepth, pathDepth); tmpdepth = 0; for (int k = _patStart; ((k < _patEnd) && (tmpdepth < minDepth)); ++k, tmpdepth++) { buf.Append(_stringData[k]); if (!_stringData[k].EndsWith(_sep)) { if (pathDepth != wantedDepth || k < (wantedDepth - 1)) { buf.Append(_sep); } } } string wantedPath = buf.ToString(); if (currentPath == null) { currentPath = wantedPath; currentCount = subCount; } else if (wantedPath.Equals(currentPath)) { if (!strict) { currentCount += subCount; } } else { bool directNode = false; if (wantedPath.EndsWith(_sep)) { if (currentPath.Equals(wantedPath.Substring(0, wantedPath.Length - 1))) { directNode = true; } } if (strict) { if (directNode) { currentCount += subCount; } else { BrowseFacet ch = new BrowseFacet(currentPath, currentCount); if (pq != null) { pq.Add(ch); } else { if (list.Count < maxCount) { list.Add(ch); } } currentPath = wantedPath; currentCount = subCount; } } else { if (!directNode) { BrowseFacet ch = new BrowseFacet(currentPath, currentCount); if (pq != null) { pq.Add(ch); } else { if (list.Count < maxCount) { list.Add(ch); } } currentPath = wantedPath; currentCount = subCount; } else { currentCount += subCount; } } } } else { break; } } } if (currentPath != null && currentCount > 0) { BrowseFacet ch = new BrowseFacet(currentPath, currentCount); if (pq != null) { pq.Add(ch); } else { if (list.Count < maxCount) { list.Add(ch); } } } if (pq != null) { BrowseFacet val; while ((val = pq.Poll()) != null) { list.Insert(0, val); } } return(list); }
public virtual FacetIterator Iterator() { BigSegmentedArray counts = GetCollapsedCounts(); return(new DefaultFacetIterator(_bucketValues, counts, counts.Size(), true)); }
public virtual IEnumerable <BrowseFacet> GetFacets() { BigSegmentedArray counts = GetCollapsedCounts(); return(DefaultFacetCountCollector.GetFacets(_ospec, counts, counts.Size(), _bucketValues)); }