public OpenBitSetDISI TermToBitSet(string term, IndexReader indexReader) { var facetQuery = new TermQuery(new Term(this.Field, term)); var facetQueryFilter = new CachingWrapperFilter(new QueryWrapperFilter(facetQuery)); var bitSet = new OpenBitSetDISI(facetQueryFilter.GetDocIdSet(indexReader).Iterator(), indexReader.MaxDoc()); return bitSet; }
private List<FacetMatch> FindMatchesInQuery(Facet facet, Filter query, Filter filter, IndexReader indexReader) { var matches = facet.Values.Select(value => { var bitsQuery = new OpenBitSetDISI(query.GetDocIdSet(indexReader).Iterator(), indexReader.MaxDoc()); bitsQuery.And(value.Item2); if (filter != null) { //TODO: Remove this hard coded value (1000) var bitsFilter = new OpenBitSetDISI(filter.GetDocIdSet(indexReader).Iterator(), 1000); bitsQuery.And(bitsFilter); } var count = bitsQuery.Cardinality(); return new FacetMatch() { Count = count, Value = value.Item1, Id = facet.Id }; }).ToList(); return matches; }
private void doChain(OpenBitSetDISI result, int logic, DocIdSet dis) { if (dis is OpenBitSet) { // optimized case for OpenBitSets switch (logic) { case OR: result.Or((OpenBitSet)dis); break; case AND: result.And((OpenBitSet)dis); break; case ANDNOT: result.AndNot((OpenBitSet)dis); break; case XOR: result.Xor((OpenBitSet)dis); break; default: doChain(result, DEFAULT, dis); break; } } else { DocIdSetIterator disi; if (dis == null) { disi = DocIdSet.EMPTY_DOCIDSET.Iterator(); } else { disi = dis.Iterator(); if (disi == null) { disi = DocIdSet.EMPTY_DOCIDSET.Iterator(); } } switch (logic) { case OR: result.InPlaceOr(disi); break; case AND: result.InPlaceAnd(disi); break; case ANDNOT: result.InPlaceNot(disi); break; case XOR: result.InPlaceXor(disi); break; default: doChain(result, DEFAULT, dis); break; } } }
/** Provide a SortedVIntList when it is definitely * smaller than an OpenBitSet * @deprecated Either use CachingWrapperFilter, or * switch to a different DocIdSet implementation yourself. * This method will be removed in Lucene 4.0 **/ protected DocIdSet finalResult(OpenBitSetDISI result, int maxDocs) { return result; }
private OpenBitSetDISI initialResult(IndexReader reader, int logic, int[] index) { OpenBitSetDISI result; /** * First AND operation takes place against a completely false * bitset and will always return zero results. */ if (logic == AND) { result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.MaxDoc); ++index[0]; } else if (logic == ANDNOT) { result = new OpenBitSetDISI(getDISI(chain[index[0]], reader), reader.MaxDoc); result.Flip(0, reader.MaxDoc); // NOTE: may set bits for deleted docs. ++index[0]; } else { result = new OpenBitSetDISI(reader.MaxDoc); } return result; }
public ISearchBits GetBits() { var query = CreateQuery(); IndexSearcher searcher; try { searcher = new IndexSearcher(_directory, true); } catch { // index might not exist if it has been rebuilt Logger.Information("Attempt to read a none existing index"); return null; } try { var filter = new QueryWrapperFilter(query); var bits = filter.GetDocIdSet(searcher.GetIndexReader()); var disi = new OpenBitSetDISI(bits.Iterator(), searcher.MaxDoc()); return new SearchBits(disi); } finally { searcher.Close(); } }
private IEnumerable<FacetMatch> FindMatchesInQuery(Query baseQueryWithoutFacetDrilldown, IList<FacetFieldInfo> allFacetFieldInfos, FacetFieldInfo facetFieldInfoToCalculateFor) { var calculations = 0; var queryFilter = new CachingWrapperFilter(new QueryWrapperFilter(CreateFacetedQuery(baseQueryWithoutFacetDrilldown, allFacetFieldInfos, facetFieldInfoToCalculateFor.FieldName))); var bitsQueryWithoutFacetDrilldown = new OpenBitSetDISI(queryFilter.GetDocIdSet(IndexReader).Iterator(), IndexReader.MaxDoc); var baseQueryWithoutFacetDrilldownCopy = new OpenBitSetDISI(bitsQueryWithoutFacetDrilldown.Bits.Length) { Bits = new long[bitsQueryWithoutFacetDrilldown.Bits.Length] }; var calculatedFacetCounts = new ResultCollection(facetFieldInfoToCalculateFor); foreach (var facetValueBitSet in GetOrCreateFacetBitSet(facetFieldInfoToCalculateFor.FieldName).FacetValueBitSetList) { var isSelected = calculatedFacetCounts.IsSelected(facetValueBitSet.Value); if (!isSelected && facetValueBitSet.Count < calculatedFacetCounts.MinCountForNonSelected) //Impossible to get a better result { if (calculatedFacetCounts.HaveEnoughResults) break; } bitsQueryWithoutFacetDrilldown.Bits.CopyTo(baseQueryWithoutFacetDrilldownCopy.Bits, 0); baseQueryWithoutFacetDrilldownCopy.NumWords = bitsQueryWithoutFacetDrilldown.NumWords; var bitset = facetValueBitSet.Bitset ?? CalculateOpenBitSetDisi(facetFieldInfoToCalculateFor.FieldName, facetValueBitSet.Value); baseQueryWithoutFacetDrilldownCopy.And(bitset); var count = baseQueryWithoutFacetDrilldownCopy.Cardinality(); if (count == 0) continue; var match = new FacetMatch { Count = count, Value = facetValueBitSet.Value, FacetFieldName = facetFieldInfoToCalculateFor.FieldName }; calculations++; if (isSelected) calculatedFacetCounts.AddToSelected(match); else calculatedFacetCounts.AddToNonSelected(match); } return calculatedFacetCounts.GetList(); }
/// <summary> /// Get the id set for the filter. /// </summary> /// <param name="reader">The reader.</param> /// <returns>The filter set to use.</returns> public override DocIdSet GetDocIdSet(IndexReader reader) { OpenBitSetDISI res = null; if (shouldFilters != null) { for (int i = 0; i < shouldFilters.Count; i++) { if (res == null) { res = new OpenBitSetDISI(GetDISI(shouldFilters, i, reader), reader.MaxDoc()); } else { DocIdSet dis = ((Filter)shouldFilters[i]).GetDocIdSet(reader); if (dis is OpenBitSet) { // optimized case for OpenBitSets res.Or((OpenBitSet)dis); } else { res.InPlaceOr(GetDISI(shouldFilters, i, reader)); } } } } if (notFilters != null) { for (int i = 0; i < notFilters.Count; i++) { if (res == null) { res = new OpenBitSetDISI(GetDISI(notFilters, i, reader), reader.MaxDoc()); res.Flip(0, reader.MaxDoc()); // NOTE: may set bits on deleted docs } else { DocIdSet dis = ((Filter)notFilters[i]).GetDocIdSet(reader); if (dis is OpenBitSet) { // optimized case for OpenBitSets res.AndNot((OpenBitSet)dis); } else { res.InPlaceNot(GetDISI(notFilters, i, reader)); } } } } if (mustFilters != null) { for (int i = 0; i < mustFilters.Count; i++) { if (res == null) { res = new OpenBitSetDISI(GetDISI(mustFilters, i, reader), reader.MaxDoc()); } else { DocIdSet dis = ((Filter)mustFilters[i]).GetDocIdSet(reader); if (dis is OpenBitSet) { // optimized case for OpenBitSets res.And((OpenBitSet)dis); } else { res.InPlaceAnd(GetDISI(mustFilters, i, reader)); } } } } if (res != null) return FinalResult(res, reader.MaxDoc()); else { //TODO: 2.- change return DocIdSet.EMPTY_DOCIDSET; return null; } }
// TODO: in 3.0, instead of removing this deprecated // method, make it a no-op and mark it final /** Provide a SortedVIntList when it is definitely smaller * than an OpenBitSet. * @deprecated Either use CachingWrapperFilter, or * switch to a different DocIdSet implementation yourself. */ protected DocIdSet FinalResult(OpenBitSetDISI result, int maxDocs) { return (result.Cardinality() < (maxDocs / 9)) ? (DocIdSet)new SortedVIntList(result) : (DocIdSet)result; }
public SimpleFacetedSearch(IndexReader reader, string[] groupByFields) { this._Reader = reader; List<FieldValuesBitSets> fieldValuesBitSets = new List<FieldValuesBitSets>(); //STEP 1 //f1 = A, B //f2 = I, J //f3 = 1, 2, 3 int maxFacets = 1; List<List<string>> inputToCP = new List<List<string>>(); foreach (string field in groupByFields) { FieldValuesBitSets f = new FieldValuesBitSets(reader, field); maxFacets *= f.FieldValueBitSetPair.Count; if (maxFacets > MAX_FACETS) throw new Exception("Facet count exceeded " + MAX_FACETS); fieldValuesBitSets.Add(f); inputToCP.Add(f.FieldValueBitSetPair.Keys.ToList()); } //STEP 2 // comb1: A I 1 // comb2: A I 2 etc. var cp = inputToCP.CartesianProduct(); //SETP 3 //create a single BitSet for each combination //BitSet1: A AND I AND 1 //BitSet2: A AND I AND 2 etc. //and remove impossible comb's (for ex, B J 3) from list. Parallel.ForEach(cp, combinations => { OpenBitSetDISI bitSet = new OpenBitSetDISI(_Reader.MaxDoc()); bitSet.Set(0, bitSet.Size()); List<string> comb = combinations.ToList(); for (int j = 0; j < comb.Count; j++) { bitSet.And(fieldValuesBitSets[j].FieldValueBitSetPair[comb[j]]); } //STEP 3 if (bitSet.Cardinality() > 0) { lock(_Groups) _Groups.Add(new KeyValuePair<List<string>, OpenBitSetDISI>(comb, bitSet)); } }); //Now _Groups has 7 rows (as <List<string>, BitSet> pairs) }
public IEnumerable<IHit> Query(int pageIndex, int pageSize, out int totalCount, out IEnumerable<FacetGroup> facetedResults) { totalCount = 0; facetedResults = null; if (searchPaths == null || searchPaths.Count <= 0) searchPaths.AddRange(indexPaths.Values.Select(o => o.Path)); List<LuceneHit> results = new List<LuceneHit>(); List<IndexSearcher> subSearchs = new List<IndexSearcher>(); searchPaths.ForEach(o => subSearchs.Add(new IndexSearcher(FSDirectory.Open(o)))); if (facetFields != null && facetFields.Count > 0) { var facetGroups = new List<FacetGroup>(); var mainQueryFilter = new CachingWrapperFilter(new QueryWrapperFilter(query)); MultiReader readers = new MultiReader(subSearchs.Select(o => o.IndexReader).ToArray()); foreach (var facetField in facetFields) { FacetGroup fg = new FacetGroup(); fg.FieldName = facetFieldNameProvider.GetMapName(TypeName, facetField); var items = new List<FacetItem>(); var allDistinctField = FieldCache_Fields.DEFAULT.GetStrings(readers, facetField).Distinct().ToArray(); int totalHits = 0; Parallel.ForEach(allDistinctField, fieldValue => { //foreach (var fieldValue in allDistinctField) //{ var facetQuery = new TermQuery(new Term(facetField, fieldValue)); var facetQueryFilter = new CachingWrapperFilter(new QueryWrapperFilter(facetQuery)); var bs = new OpenBitSetDISI(facetQueryFilter.GetDocIdSet(readers).Iterator(), readers.MaxDoc); bs.InPlaceAnd(mainQueryFilter.GetDocIdSet(readers).Iterator()); int count = (Int32)bs.Cardinality(); FacetItem item = new FacetItem(); item.GroupValue = fieldValue; item.Count = count; items.Add(item); totalHits += count; } ); fg.FacetItems = items.OrderByDescending(o => o.Count); fg.TotalHits = totalHits; facetGroups.Add(fg); } facetedResults = facetGroups.OrderBy(o => o.FieldName); } ParallelMultiSearcher searcher = new ParallelMultiSearcher(subSearchs.ToArray()); Sort sort = null; if (sortFields != null && sortFields.Count > 0) { sort = new Sort(sortFields.ToArray()); } int maxDoc = searcher.MaxDoc; int startIndex = 0; if (pageIndex >= 0 && pageSize > 0) { startIndex = pageIndex * pageSize; maxDoc = pageSize * (pageIndex + 1); } var docs = sort == null ? searcher.Search(query, null, maxDoc) : searcher.Search(query, null, maxDoc, sort); totalCount = docs.TotalHits; int endIndex = docs.TotalHits - startIndex; for (int i = startIndex; i < endIndex; i++) { LuceneHit h = new LuceneHit(TypeName, DocumentBuilder, searcher.Doc(docs.ScoreDocs[i].Doc)); results.Add(h); } return results; }
private static long CalculateFacetCount(DocIdSet baseBitSet, DocIdSet filterDocSet) { var baseDisi = new OpenBitSetDISI(baseBitSet.Iterator(), 25000); var filterIterator = filterDocSet.Iterator(); baseDisi.InPlaceAnd(filterIterator); var total = baseDisi.Cardinality(); return total; }
public OpenBitSetDISI GetBitSetFromFilter(Filter filter, IndexReader indexReader) { var bitSet = new OpenBitSetDISI(filter.GetDocIdSet(indexReader).Iterator(), indexReader.MaxDoc()); return bitSet; }
public void AddValue(string value, OpenBitSetDISI matchingDocuments, Filter facetFilter) { this.Values.Add(new Tuple<string, OpenBitSetDISI, Filter>(value, matchingDocuments, facetFilter)); }