Пример #1
0
        private OpenBitSetDISI InitialResult(IndexReader reader, Logic logic, int[] index)
        {
            OpenBitSetDISI result;

            /**
             * First AND operation takes place against a completely false
             * bitset and will always return zero results.
             */
            if (logic == Logic.AND)
            {
                result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc());
                ++index[0];
            }
            else if (logic == Logic.ANDNOT)
            {
                result = new OpenBitSetDISI(GetDISI(chain[index[0]], reader), reader.MaxDoc());
                result.Flip(0, reader.MaxDoc()); // NOTE: may set bits for deleted docs.
                ++index[0];
            }
            else
            {
                result = new OpenBitSetDISI(reader.MaxDoc());
            }
            return(result);
        }
Пример #2
0
 internal HitsPerFacet(FacetName facetName, IndexReader reader, DocIdSet queryDocidSet, OpenBitSetDISI groupBitSet, int maxDocPerFacet)
 {
     this._FacetName      = facetName;
     this._Reader         = reader;
     this._MaxDocPerFacet = maxDocPerFacet;
     this._QueryDocidSet  = queryDocidSet;
     this._GroupBitSet    = groupBitSet;
 }
Пример #3
0
        private static long CalculateFacetCount(DocIdSet baseBitSet, DocIdSet filterDocSet)
        {
            var baseDisi       = new OpenBitSetDISI(baseBitSet.Iterator(), 25000);
            var filterIterator = filterDocSet.Iterator();

            baseDisi.InPlaceAnd(filterIterator);
            var total = baseDisi.Cardinality();

            return(total);
        }
Пример #4
0
        /**
         * Delegates to each filter in the chain.
         * @param reader IndexReader
         * @param logic Logical operation
         * @return DocIdSet
         */
        private DocIdSet GetDocIdSet(IndexReader reader, Logic logic, int[] index)
        {
            OpenBitSetDISI result = InitialResult(reader, logic, index);

            for (; index[0] < chain.Length; index[0]++)
            {
                DoChain(result, logic, chain[index[0]].GetDocIdSet(reader));
            }
            return(FinalResult(result, reader.MaxDoc()));
        }
        private IEnumerable <FacetMatch> FindMatchesInQuery(Query baseQueryWithoutFacetDrilldown, IList <FacetFieldInfo> allFacetFieldInfos, FacetFieldInfo facetFieldInfoToCalculateFor)
        {
            var calculations = 0;

            var queryFilter = new CachingWrapperFilter(new QueryWrapperFilter(CreateFacetedQuery(baseQueryWithoutFacetDrilldown, allFacetFieldInfos, facetFieldInfoToCalculateFor.FieldName)));
            var bitsQueryWithoutFacetDrilldown     = new OpenBitSetDISI(queryFilter.GetDocIdSet(IndexReader).Iterator(), IndexReader.MaxDoc);
            var baseQueryWithoutFacetDrilldownCopy = new OpenBitSetDISI(bitsQueryWithoutFacetDrilldown.Bits.Length)
            {
                Bits = new long[bitsQueryWithoutFacetDrilldown.Bits.Length]
            };

            var calculatedFacetCounts = new ResultCollection(facetFieldInfoToCalculateFor);

            foreach (var facetValueBitSet in GetOrCreateFacetBitSet(facetFieldInfoToCalculateFor.FieldName).FacetValueBitSetList)
            {
                var isSelected = calculatedFacetCounts.IsSelected(facetValueBitSet.Value);

                if (!isSelected && facetValueBitSet.Count < calculatedFacetCounts.MinCountForNonSelected) //Impossible to get a better result
                {
                    if (calculatedFacetCounts.HaveEnoughResults)
                    {
                        break;
                    }
                }

                bitsQueryWithoutFacetDrilldown.Bits.CopyTo(baseQueryWithoutFacetDrilldownCopy.Bits, 0);
                baseQueryWithoutFacetDrilldownCopy.NumWords = bitsQueryWithoutFacetDrilldown.NumWords;

                var bitset = facetValueBitSet.Bitset ?? CalculateOpenBitSetDisi(facetFieldInfoToCalculateFor.FieldName, facetValueBitSet.Value);
                baseQueryWithoutFacetDrilldownCopy.And(bitset);
                var count = baseQueryWithoutFacetDrilldownCopy.Cardinality();
                if (count == 0)
                {
                    continue;
                }
                var match = new FacetMatch
                {
                    Count          = count,
                    Value          = facetValueBitSet.Value,
                    FacetFieldName = facetFieldInfoToCalculateFor.FieldName
                };

                calculations++;
                if (isSelected)
                {
                    calculatedFacetCounts.AddToSelected(match);
                }
                else
                {
                    calculatedFacetCounts.AddToNonSelected(match);
                }
            }

            return(calculatedFacetCounts.GetList());
        }
Пример #6
0
        /// <inheritdoc />
        public ISearchBit GetBits()
        {
            var query                  = CreateQuery();
            var filter                 = new QueryWrapperFilter(query);
            var indexSearcher          = _indexSearcherFactory();
            var context                = (AtomicReaderContext)indexSearcher.IndexReader.Context;
            var bits                   = filter.GetDocIdSet(context, context.AtomicReader.LiveDocs);
            var documentSetIDInterator = new OpenBitSetDISI(bits.GetIterator(), indexSearcher.IndexReader.MaxDoc);

            return(new SearchBit(documentSetIDInterator));
        }
Пример #7
0
        public SimpleFacetedSearch(IndexReader reader, string[] groupByFields)
        {
            this._Reader = reader;

            List<FieldValuesBitSets> fieldValuesBitSets = new List<FieldValuesBitSets>();

            //STEP 1
            //f1 = A, B
            //f2 = I, J
            //f3 = 1, 2, 3
            int maxFacets = 1;
            IList<IList<string>> inputToCP = new List<IList<string>>();
            foreach (string field in groupByFields)
            {
                FieldValuesBitSets f = new FieldValuesBitSets(reader, field);
                maxFacets *= f.FieldValueBitSetPair.Count;
                if (maxFacets > MAX_FACETS) throw new Exception("Facet count exceeded " + MAX_FACETS);
                fieldValuesBitSets.Add(f);
                inputToCP.Add(f.FieldValueBitSetPair.Keys.ToList());
            }

            //STEP 2
            // comb1: A I 1
            // comb2: A I 2 etc.
            var cp = inputToCP.CartesianProduct();

            //SETP 3
            //create a single BitSet for each combination
            //BitSet1: A AND I AND 1
            //BitSet2: A AND I AND 2 etc.
            //and remove impossible comb's (for ex, B J 3) from list.
#if !NET35
            Parallel.ForEach(cp, combinations =>
#else
            foreach(var combinations in cp)
#endif
            {
                OpenBitSetDISI bitSet = new OpenBitSetDISI(_Reader.MaxDoc);
                bitSet.Set(0, bitSet.Size());
                List<string> comb = combinations.ToList();

                for (int j = 0; j < comb.Count; j++)
                {
                    bitSet.And(fieldValuesBitSets[j].FieldValueBitSetPair[comb[j]]);
                }

                //STEP 3
                if (bitSet.Cardinality() > 0)
                {
                    lock(_Groups)
                        _Groups.Add(new KeyValuePair<List<string>, OpenBitSetDISI>(comb, bitSet));
                }
            }
Пример #8
0
        /**
         * Delegates to each filter in the chain.
         * @param reader IndexReader
         * @param logic Logical operation
         * @return DocIdSet
         */
        private DocIdSet GetDocIdSet(IndexReader reader, Logic[] logic, int[] index)
        {
            if (logic.Length != chain.Length)
            {
                throw new ArgumentException("Invalid number of elements in logic array");
            }

            OpenBitSetDISI result = InitialResult(reader, logic[0], index);

            for (; index[0] < chain.Length; index[0]++)
            {
                DoChain(result, logic[index[0]], chain[index[0]].GetDocIdSet(reader));
            }
            return(FinalResult(result, reader.MaxDoc()));
        }
Пример #9
0
            internal void Calculate()
            {
                if (_QueryDocidSet == DocIdBitSet.EMPTY_DOCIDSET)
                {
                    _ResultBitSet = new OpenBitSet(0);
                }
                else
                {
                    _ResultBitSet = (OpenBitSet)((OpenBitSet)_QueryDocidSet).Clone();
                    _ResultBitSet.And(_GroupBitSet);
                }

                _ResultIterator = _ResultBitSet.Iterator();

                _HitCount = _ResultBitSet.Cardinality();

                _ResultBitSet  = null;
                _QueryDocidSet = null;
                _GroupBitSet   = null;
            }
        public ISearchBits GetBits()
        {
            var           query = CreateQuery();
            IndexSearcher searcher;

            try {
                searcher = new IndexSearcher(_directory, true);
            }
            catch {
                // index might not exist if it has been rebuilt
                Logger.Information("Attempt to read a none existing index");
                return(null);
            }

            using (searcher) {
                var filter = new QueryWrapperFilter(query);
                var bits   = filter.GetDocIdSet(searcher.IndexReader);
                var disi   = new OpenBitSetDISI(bits.Iterator(), searcher.MaxDoc);
                return(new SearchBits(disi));
            }
        }
 /// <summary>
 ///     Calculates number of facets found in the filter doc set.
 /// </summary>
 /// <param name="baseBitSet">The base bit set.</param>
 /// <param name="filterDocSet">The filter bit set.</param>
 /// <returns></returns>
 private long CalculateFacetCount(DocIdSet baseBitSet, DocIdSet filterDocSet)
 {
     var baseDISI = new OpenBitSetDISI(baseBitSet.Iterator(), 25000);
     var filterIterator = filterDocSet.Iterator();
     baseDISI.InPlaceAnd(filterIterator);
     var total = baseDISI.Cardinality();
     return total;
 }
Пример #12
0
        /// <summary>
        /// Get the id set for the filter.
        /// </summary>
        /// <param name="reader">The reader.</param>
        /// <returns>The filter set to use.</returns>
        public override DocIdSet GetDocIdSet(IndexReader reader)
        {
            OpenBitSetDISI res = null;

            if (shouldFilters != null)
            {
                for (int i = 0; i < shouldFilters.Count; i++)
                {
                    if (res == null)
                    {
                        res = new OpenBitSetDISI(GetDISI(shouldFilters, i, reader), reader.MaxDoc);
                    }
                    else
                    {
                        DocIdSet dis = shouldFilters[i].GetDocIdSet(reader);
                        if (dis is OpenBitSet)
                        {
                            // optimized case for OpenBitSets
                            res.Or((OpenBitSet)dis);
                        }
                        else
                        {
                            res.InPlaceOr(GetDISI(shouldFilters, i, reader));
                        }
                    }
                }
            }

            if (notFilters != null)
            {
                for (int i = 0; i < notFilters.Count; i++)
                {
                    if (res == null)
                    {
                        res = new OpenBitSetDISI(GetDISI(notFilters, i, reader), reader.MaxDoc);
                        res.Flip(0, reader.MaxDoc); // NOTE: may set bits on deleted docs
                    }
                    else
                    {
                        DocIdSet dis = notFilters[i].GetDocIdSet(reader);
                        if (dis is OpenBitSet)
                        {
                            // optimized case for OpenBitSets
                            res.AndNot((OpenBitSet)dis);
                        }
                        else
                        {
                            res.InPlaceNot(GetDISI(notFilters, i, reader));
                        }
                    }
                }
            }

            if (mustFilters != null)
            {
                for (int i = 0; i < mustFilters.Count; i++)
                {
                    if (res == null)
                    {
                        res = new OpenBitSetDISI(GetDISI(mustFilters, i, reader), reader.MaxDoc);
                    }
                    else
                    {
                        DocIdSet dis = mustFilters[i].GetDocIdSet(reader);
                        if (dis is OpenBitSet)
                        {
                            // optimized case for OpenBitSets
                            res.And((OpenBitSet)dis);
                        }
                        else
                        {
                            res.InPlaceAnd(GetDISI(mustFilters, i, reader));
                        }
                    }
                }
            }

            if (res != null)
            {
                return(FinalResult(res, reader.MaxDoc));
            }

            return(DocIdSet.EMPTY_DOCIDSET);
        }
Пример #13
0
 /* Provide a SortedVIntList when it is definitely smaller
  * than an OpenBitSet.
  * @deprecated Either use CachingWrapperFilter, or
  * switch to a different DocIdSet implementation yourself.
  * This method will be removed in Lucene 4.0
  */
 protected DocIdSet FinalResult(OpenBitSetDISI result, int maxDocs)
 {
     return(result);
 }
Пример #14
0
        private void DoChain(OpenBitSetDISI result, Logic logic, DocIdSet dis)
        {
            if (dis is OpenBitSet)
            {
                // optimized case for OpenBitSets
                switch (logic)
                {
                case Logic.OR:
                    result.Or((OpenBitSet)dis);
                    break;

                case Logic.AND:
                    result.And((OpenBitSet)dis);
                    break;

                case Logic.ANDNOT:
                    result.AndNot((OpenBitSet)dis);
                    break;

                case Logic.XOR:
                    result.Xor((OpenBitSet)dis);
                    break;

                default:
                    DoChain(result, DEFAULT, dis);
                    break;
                }
            }
            else
            {
                DocIdSetIterator disi;
                if (dis == null)
                {
                    disi = DocIdSet.EMPTY_DOCIDSET.Iterator();
                }
                else
                {
                    disi = dis.Iterator();
                    if (disi == null)
                    {
                        disi = DocIdSet.EMPTY_DOCIDSET.Iterator();
                    }
                }

                switch (logic)
                {
                case Logic.OR:
                    result.InPlaceOr(disi);
                    break;

                case Logic.AND:
                    result.InPlaceAnd(disi);
                    break;

                case Logic.ANDNOT:
                    result.InPlaceNot(disi);
                    break;

                case Logic.XOR:
                    result.InPlaceXor(disi);
                    break;

                default:
                    DoChain(result, DEFAULT, dis);
                    break;
                }
            }
        }
Пример #15
0
        public IEnumerable <IHit> Query(int pageIndex, int pageSize, out int totalCount, out IEnumerable <FacetGroup> facetedResults)
        {
            totalCount     = 0;
            facetedResults = null;

            if (searchPaths == null || searchPaths.Count <= 0)
            {
                searchPaths.AddRange(indexPaths.Values.Select(o => o.Path));
            }

            List <LuceneHit> results = new List <LuceneHit>();

            List <IndexSearcher> subSearchs = new List <IndexSearcher>();

            searchPaths.ForEach(o => subSearchs.Add(new IndexSearcher(FSDirectory.Open(o))));

            if (facetFields != null && facetFields.Count > 0)
            {
                var         facetGroups     = new List <FacetGroup>();
                var         mainQueryFilter = new CachingWrapperFilter(new QueryWrapperFilter(query));
                MultiReader readers         = new MultiReader(subSearchs.Select(o => o.IndexReader).ToArray());

                foreach (var facetField in facetFields)
                {
                    FacetGroup fg = new FacetGroup();
                    fg.FieldName = facetFieldNameProvider.GetMapName(TypeName, facetField);
                    var items = new List <FacetItem>();

                    var allDistinctField = FieldCache_Fields.DEFAULT.GetStrings(readers, facetField).Distinct().ToArray();
                    int totalHits        = 0;

                    Parallel.ForEach(allDistinctField, fieldValue =>
                    {
                        //foreach (var fieldValue in allDistinctField)
                        //{
                        var facetQuery       = new TermQuery(new Term(facetField, fieldValue));
                        var facetQueryFilter = new CachingWrapperFilter(new QueryWrapperFilter(facetQuery));

                        var bs = new OpenBitSetDISI(facetQueryFilter.GetDocIdSet(readers).Iterator(), readers.MaxDoc);
                        bs.InPlaceAnd(mainQueryFilter.GetDocIdSet(readers).Iterator());
                        int count = (Int32)bs.Cardinality();

                        FacetItem item  = new FacetItem();
                        item.GroupValue = fieldValue;
                        item.Count      = count;

                        items.Add(item);
                        totalHits += count;
                    }
                                     );

                    fg.FacetItems = items.OrderByDescending(o => o.Count);
                    fg.TotalHits  = totalHits;

                    facetGroups.Add(fg);
                }

                facetedResults = facetGroups.OrderBy(o => o.FieldName);
            }
            ParallelMultiSearcher searcher = new ParallelMultiSearcher(subSearchs.ToArray());
            Sort sort = null;

            if (sortFields != null && sortFields.Count > 0)
            {
                sort = new Sort(sortFields.ToArray());
            }

            int maxDoc     = searcher.MaxDoc;
            int startIndex = 0;

            if (pageIndex >= 0 && pageSize > 0)
            {
                startIndex = pageIndex * pageSize;
                maxDoc     = pageSize * (pageIndex + 1);
            }
            var docs = sort == null?searcher.Search(query, null, maxDoc) : searcher.Search(query, null, maxDoc, sort);

            totalCount = docs.TotalHits;
            int endIndex = docs.TotalHits - startIndex;

            for (int i = startIndex; i < endIndex; i++)
            {
                LuceneHit h = new LuceneHit(TypeName, DocumentBuilder, searcher.Doc(docs.ScoreDocs[i].Doc));
                results.Add(h);
            }
            return(results);
        }
Пример #16
0
        // TODO: in 3.0, instead of removing this deprecated
        // method, make it a no-op and mark it final

        /** Provide a SortedVIntList when it is definitely smaller
         * than an OpenBitSet.
         * @deprecated Either use CachingWrapperFilter, or
         * switch to a different DocIdSet implementation yourself. */
        protected DocIdSet FinalResult(OpenBitSetDISI result, int maxDocs)
        {
            return((result.Cardinality() < (maxDocs / 9))
              ? (DocIdSet) new SortedVIntList(result)
              : (DocIdSet)result);
        }