예제 #1
0
        public OpenBitSetDISI TermToBitSet(string term, IndexReader indexReader)
        {
            var facetQuery = new TermQuery(new Term(this.Field, term));
            var facetQueryFilter = new CachingWrapperFilter(new QueryWrapperFilter(facetQuery));
            var bitSet = new OpenBitSetDISI(facetQueryFilter.GetDocIdSet(indexReader).Iterator(), indexReader.MaxDoc());

            return bitSet;
        }
예제 #2
0
        private List<FacetMatch> FindMatchesInQuery(Facet facet, Filter query, Filter filter, IndexReader indexReader)
        {
            var matches = facet.Values.Select(value =>
            {
                var bitsQuery = new OpenBitSetDISI(query.GetDocIdSet(indexReader).Iterator(), indexReader.MaxDoc());
                bitsQuery.And(value.Item2);

                if (filter != null)
                {
                    //TODO: Remove this hard coded value (1000)
                    var bitsFilter = new OpenBitSetDISI(filter.GetDocIdSet(indexReader).Iterator(), 1000);
                    bitsQuery.And(bitsFilter);
                }

                var count = bitsQuery.Cardinality();

                return new FacetMatch() { Count = count, Value = value.Item1, Id = facet.Id };
            }).ToList();

            return matches;
        }
예제 #3
0
        private void doChain(OpenBitSetDISI result, int logic, DocIdSet dis)
        {

            if (dis is OpenBitSet)
            {
                // optimized case for OpenBitSets
                switch (logic)
                {
                    case OR:
                        result.Or((OpenBitSet)dis);
                        break;
                    case AND:
                        result.And((OpenBitSet)dis);
                        break;
                    case ANDNOT:
                        result.AndNot((OpenBitSet)dis);
                        break;
                    case XOR:
                        result.Xor((OpenBitSet)dis);
                        break;
                    default:
                        doChain(result, DEFAULT, dis);
                        break;
                }
            }
            else
            {
                DocIdSetIterator disi;
                if (dis == null)
                {
                    disi = DocIdSet.EMPTY_DOCIDSET.Iterator();
                }
                else
                {
                    disi = dis.Iterator();
                    if (disi == null)
                    {
                        disi = DocIdSet.EMPTY_DOCIDSET.Iterator();
                    }
                }

                switch (logic)
                {
                    case OR:
                        result.InPlaceOr(disi);
                        break;
                    case AND:
                        result.InPlaceAnd(disi);
                        break;
                    case ANDNOT:
                        result.InPlaceNot(disi);
                        break;
                    case XOR:
                        result.InPlaceXor(disi);
                        break;
                    default:
                        doChain(result, DEFAULT, dis);
                        break;
                }
            }
        }
예제 #4
0
 /** Provide a SortedVIntList when it is definitely
  *  smaller than an OpenBitSet
  *  @deprecated Either use CachingWrapperFilter, or
  *  switch to a different DocIdSet implementation yourself.
  *  This method will be removed in Lucene 4.0 
  **/
 protected DocIdSet finalResult(OpenBitSetDISI result, int maxDocs)
 {
     return result;
 }
예제 #5
0
 private OpenBitSetDISI initialResult(IndexReader reader, int logic, int[] index)
 {
     OpenBitSetDISI result;
     /**
      * First AND operation takes place against a completely false
      * bitset and will always return zero results.
      */
     if (logic == AND)
     {
         result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.MaxDoc);
         ++index[0];
     }
     else if (logic == ANDNOT)
     {
         result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.MaxDoc);
         result.Flip(0, reader.MaxDoc); // NOTE: may set bits for deleted docs.
         ++index[0];
     }
     else
     {
         result = new OpenBitSetDISI(reader.MaxDoc);
     }
     return result;
 }
예제 #6
0
        public ISearchBits GetBits()
        {
            var query = CreateQuery();
            IndexSearcher searcher;

            try {
                searcher = new IndexSearcher(_directory, true);
            }
            catch {
                // index might not exist if it has been rebuilt
                Logger.Information("Attempt to read a none existing index");
                return null;
            }

            try {
                var filter = new QueryWrapperFilter(query);
                var bits = filter.GetDocIdSet(searcher.GetIndexReader());
                var disi = new OpenBitSetDISI(bits.Iterator(), searcher.MaxDoc());
                return new SearchBits(disi);
            }
            finally {
                searcher.Close();
            }
        }
        private IEnumerable<FacetMatch> FindMatchesInQuery(Query baseQueryWithoutFacetDrilldown, IList<FacetFieldInfo> allFacetFieldInfos, FacetFieldInfo facetFieldInfoToCalculateFor)
        {
            var calculations = 0;

            var queryFilter = new CachingWrapperFilter(new QueryWrapperFilter(CreateFacetedQuery(baseQueryWithoutFacetDrilldown, allFacetFieldInfos, facetFieldInfoToCalculateFor.FieldName)));
            var bitsQueryWithoutFacetDrilldown = new OpenBitSetDISI(queryFilter.GetDocIdSet(IndexReader).Iterator(), IndexReader.MaxDoc);
            var baseQueryWithoutFacetDrilldownCopy = new OpenBitSetDISI(bitsQueryWithoutFacetDrilldown.Bits.Length)
            {
                Bits = new long[bitsQueryWithoutFacetDrilldown.Bits.Length]
            };

            var calculatedFacetCounts = new ResultCollection(facetFieldInfoToCalculateFor);
            foreach (var facetValueBitSet in GetOrCreateFacetBitSet(facetFieldInfoToCalculateFor.FieldName).FacetValueBitSetList)
            {
                var isSelected = calculatedFacetCounts.IsSelected(facetValueBitSet.Value);

                if (!isSelected && facetValueBitSet.Count < calculatedFacetCounts.MinCountForNonSelected) //Impossible to get a better result
                {
                    if (calculatedFacetCounts.HaveEnoughResults)
                        break;
                }

                bitsQueryWithoutFacetDrilldown.Bits.CopyTo(baseQueryWithoutFacetDrilldownCopy.Bits, 0);
                baseQueryWithoutFacetDrilldownCopy.NumWords = bitsQueryWithoutFacetDrilldown.NumWords;

                var bitset = facetValueBitSet.Bitset ?? CalculateOpenBitSetDisi(facetFieldInfoToCalculateFor.FieldName, facetValueBitSet.Value);
                baseQueryWithoutFacetDrilldownCopy.And(bitset);
                var count = baseQueryWithoutFacetDrilldownCopy.Cardinality();
                if (count == 0)
                    continue;
                var match = new FacetMatch
                {
                    Count = count,
                    Value = facetValueBitSet.Value,
                    FacetFieldName = facetFieldInfoToCalculateFor.FieldName
                };

                calculations++;
                if (isSelected)
                    calculatedFacetCounts.AddToSelected(match);
                else
                    calculatedFacetCounts.AddToNonSelected(match);
            }

            return calculatedFacetCounts.GetList();
        }
예제 #8
0
        /// <summary>
        /// Get the id set for the filter.
        /// </summary>
        /// <param name="reader">The reader.</param>
        /// <returns>The filter set to use.</returns>
        public override DocIdSet GetDocIdSet(IndexReader reader)
        {
            OpenBitSetDISI res = null;

            if (shouldFilters != null)
            {
                for (int i = 0; i < shouldFilters.Count; i++)
                {
                    if (res == null)
                    {
                        res = new OpenBitSetDISI(GetDISI(shouldFilters, i, reader), reader.MaxDoc());
                    }
                    else
                    {
                        DocIdSet dis = ((Filter)shouldFilters[i]).GetDocIdSet(reader);
                        if (dis is OpenBitSet)
                        {
                            // optimized case for OpenBitSets
                            res.Or((OpenBitSet)dis);
                        }
                        else
                        {
                            res.InPlaceOr(GetDISI(shouldFilters, i, reader));
                        }
                    }
                }
            }

            if (notFilters != null)
            {
                for (int i = 0; i < notFilters.Count; i++)
                {
                    if (res == null)
                    {
                        res = new OpenBitSetDISI(GetDISI(notFilters, i, reader), reader.MaxDoc());
                        res.Flip(0, reader.MaxDoc()); // NOTE: may set bits on deleted docs
                    }
                    else
                    {
                        DocIdSet dis = ((Filter)notFilters[i]).GetDocIdSet(reader);
                        if (dis is OpenBitSet)
                        {
                            // optimized case for OpenBitSets
                            res.AndNot((OpenBitSet)dis);
                        }
                        else
                        {
                            res.InPlaceNot(GetDISI(notFilters, i, reader));
                        }
                    }
                }
            }

            if (mustFilters != null)
            {
                for (int i = 0; i < mustFilters.Count; i++)
                {
                    if (res == null)
                    {
                        res = new OpenBitSetDISI(GetDISI(mustFilters, i, reader), reader.MaxDoc());
                    }
                    else
                    {
                        DocIdSet dis = ((Filter)mustFilters[i]).GetDocIdSet(reader);
                        if (dis is OpenBitSet)
                        {
                            // optimized case for OpenBitSets
                            res.And((OpenBitSet)dis);
                        }
                        else
                        {
                            res.InPlaceAnd(GetDISI(mustFilters, i, reader));
                        }
                    }
                }
            }

            if (res != null)
                return FinalResult(res, reader.MaxDoc());

            else
            {
                //TODO: 2.- change return DocIdSet.EMPTY_DOCIDSET;
                return null;
            }
        }
예제 #9
0
 // TODO: in 3.0, instead of removing this deprecated
 // method, make it a no-op and mark it final
 /** Provide a SortedVIntList when it is definitely smaller
  * than an OpenBitSet.
  * @deprecated Either use CachingWrapperFilter, or
  * switch to a different DocIdSet implementation yourself. */
 protected DocIdSet FinalResult(OpenBitSetDISI result, int maxDocs)
 {
     return (result.Cardinality() < (maxDocs / 9))
       ? (DocIdSet)new SortedVIntList(result)
       : (DocIdSet)result;
 }
        public SimpleFacetedSearch(IndexReader reader, string[] groupByFields)
        {
            this._Reader = reader;

            List<FieldValuesBitSets> fieldValuesBitSets = new List<FieldValuesBitSets>();

            //STEP 1
            //f1 = A, B
            //f2 = I, J
            //f3 = 1, 2, 3
            int maxFacets = 1;
            List<List<string>> inputToCP = new List<List<string>>();
            foreach (string field in groupByFields)
            {
                FieldValuesBitSets f = new FieldValuesBitSets(reader, field);
                maxFacets *= f.FieldValueBitSetPair.Count;
                if (maxFacets > MAX_FACETS) throw new Exception("Facet count exceeded " + MAX_FACETS);
                fieldValuesBitSets.Add(f);
                inputToCP.Add(f.FieldValueBitSetPair.Keys.ToList());
            }

            //STEP 2
            // comb1: A I 1
            // comb2: A I 2 etc.
            var cp = inputToCP.CartesianProduct();

            //SETP 3
            //create a single BitSet for each combination
            //BitSet1: A AND I AND 1
            //BitSet2: A AND I AND 2 etc.
            //and remove impossible comb's (for ex, B J 3) from list.
            Parallel.ForEach(cp, combinations =>
            {
                OpenBitSetDISI bitSet = new OpenBitSetDISI(_Reader.MaxDoc());
                bitSet.Set(0, bitSet.Size());

                List<string> comb = combinations.ToList();

                for (int j = 0; j < comb.Count; j++)
                {
                    bitSet.And(fieldValuesBitSets[j].FieldValueBitSetPair[comb[j]]);
                }

                //STEP 3
                if (bitSet.Cardinality() > 0)
                {
                    lock(_Groups)
                        _Groups.Add(new KeyValuePair<List<string>, OpenBitSetDISI>(comb, bitSet));
                }
            });

            //Now _Groups has 7 rows (as <List<string>, BitSet> pairs) 
        }
예제 #11
0
        public IEnumerable<IHit> Query(int pageIndex, int pageSize, out int totalCount, out IEnumerable<FacetGroup> facetedResults)
        {
            totalCount = 0;
            facetedResults = null;

            if (searchPaths == null || searchPaths.Count <= 0)
                searchPaths.AddRange(indexPaths.Values.Select(o => o.Path));

            List<LuceneHit> results = new List<LuceneHit>();

            List<IndexSearcher> subSearchs = new List<IndexSearcher>();

            searchPaths.ForEach(o => subSearchs.Add(new IndexSearcher(FSDirectory.Open(o))));

            if (facetFields != null && facetFields.Count > 0)
            {
                var facetGroups = new List<FacetGroup>();
                var mainQueryFilter = new CachingWrapperFilter(new QueryWrapperFilter(query));
                MultiReader readers = new MultiReader(subSearchs.Select(o => o.IndexReader).ToArray());

                foreach (var facetField in facetFields)
                {
                    FacetGroup fg = new FacetGroup();
                    fg.FieldName = facetFieldNameProvider.GetMapName(TypeName, facetField);
                    var items = new List<FacetItem>();

                    var allDistinctField = FieldCache_Fields.DEFAULT.GetStrings(readers, facetField).Distinct().ToArray();
                    int totalHits = 0;

                    Parallel.ForEach(allDistinctField, fieldValue =>
                        {
                        //foreach (var fieldValue in allDistinctField)
                        //{
                            var facetQuery = new TermQuery(new Term(facetField, fieldValue));
                            var facetQueryFilter = new CachingWrapperFilter(new QueryWrapperFilter(facetQuery));

                            var bs = new OpenBitSetDISI(facetQueryFilter.GetDocIdSet(readers).Iterator(), readers.MaxDoc);
                            bs.InPlaceAnd(mainQueryFilter.GetDocIdSet(readers).Iterator());
                            int count = (Int32)bs.Cardinality();

                            FacetItem item = new FacetItem();
                            item.GroupValue = fieldValue;
                            item.Count = count;

                            items.Add(item);
                            totalHits += count;
                        }
                    );

                    fg.FacetItems = items.OrderByDescending(o => o.Count);
                    fg.TotalHits = totalHits;

                    facetGroups.Add(fg);
                }

                facetedResults = facetGroups.OrderBy(o => o.FieldName);
            }
            ParallelMultiSearcher searcher = new ParallelMultiSearcher(subSearchs.ToArray());
            Sort sort = null;
            if (sortFields != null && sortFields.Count > 0)
            {
                sort = new Sort(sortFields.ToArray());
            }

            int maxDoc = searcher.MaxDoc;
            int startIndex = 0;
            if (pageIndex >= 0 && pageSize > 0)
            {
                startIndex = pageIndex * pageSize;
                maxDoc = pageSize * (pageIndex + 1);
            }
            var docs = sort == null ?  searcher.Search(query, null, maxDoc) : searcher.Search(query, null, maxDoc, sort);
            totalCount = docs.TotalHits;
            int endIndex = docs.TotalHits - startIndex;
            for (int i = startIndex; i < endIndex; i++)
            {
                LuceneHit h = new LuceneHit(TypeName, DocumentBuilder, searcher.Doc(docs.ScoreDocs[i].Doc));
                results.Add(h);
            }
            return results;
        }
 private static long CalculateFacetCount(DocIdSet baseBitSet, DocIdSet filterDocSet)
 {
     var baseDisi = new OpenBitSetDISI(baseBitSet.Iterator(), 25000);
     var filterIterator = filterDocSet.Iterator();
     baseDisi.InPlaceAnd(filterIterator);
     var total = baseDisi.Cardinality();
     return total;
 }
예제 #13
0
        public OpenBitSetDISI GetBitSetFromFilter(Filter filter, IndexReader indexReader)
        {
            var bitSet = new OpenBitSetDISI(filter.GetDocIdSet(indexReader).Iterator(), indexReader.MaxDoc());

            return bitSet;
        }
예제 #14
0
 public void AddValue(string value, OpenBitSetDISI matchingDocuments, Filter facetFilter)
 {
     this.Values.Add(new Tuple<string, OpenBitSetDISI, Filter>(value, matchingDocuments, facetFilter));
 }