public IEnumerable <IDictionary <string, object> > GroupBy(string indexName, string groupField, string term) { var groupingSearch = new GroupingSearch(groupField); //groupingSearch.SetGroupSort(groupSort); //groupingSearch.SetFillSortFields(fillFields); var searcher = _resources.GetIndexSearcher(indexName); var mapping = _resources.GetMapping(indexName); Query query = null; if (String.IsNullOrEmpty(term)) { query = new MatchAllDocsQuery(); } else { var parser = new MultiFieldQueryParser( AppLuceneVersion, mapping.PrimaryFields.ToArray(), _resources.GetAnalyzer(indexName)); query = parser.Parse(term); } var topGroups = groupingSearch.Search(searcher, query, 0, 100000); return(topGroups.Groups .Select(g => { var value = g.GroupValue; if (g.GroupValue is BytesRef) { value = Encoding.UTF8.GetString(((BytesRef)g.GroupValue).Bytes); } return new Dictionary <string, object>() { { "value", value }, { "_hits", g.TotalHits }, //{ "_score", g.Score } }; })); }
//Two-pass grouping search with cacheing (kinda working) private void TwoPassGroupingSearch(Directory directory) { var indexReader = DirectoryReader.Open(directory); var indexSearcher = new IndexSearcher(indexReader); //GroupingSearch groupingSearch = new GroupingSearch("Repetition"); GroupingSearch groupingSearch = new GroupingSearch("Category"); groupingSearch.SetAllGroups(true); groupingSearch.SetGroupDocsLimit(10); groupingSearch.SetCachingInMB(40.0, true); MatchAllDocsQuery all_query = new MatchAllDocsQuery(); NumericRangeQuery <int> numeric_query = NumericRangeQuery.NewInt32Range("Repetition", 1, 2, true, false); TermQuery term_query = new TermQuery(new Term("Category", "Cat 1")); //Use different queries above to test var topGroups = groupingSearch.Search(indexSearcher, term_query, 0, 10); Console.WriteLine("Total group count: " + topGroups.TotalGroupCount); Console.WriteLine("Total group hit count: " + topGroups.TotalGroupedHitCount); foreach (var groupDocs in topGroups.Groups) { Console.WriteLine("Group: " + ((BytesRef)groupDocs.GroupValue).Utf8ToString()); foreach (var scoreDoc in groupDocs.ScoreDocs) { var doc = indexSearcher.Doc(scoreDoc.Doc); Console.WriteLine("Category: " + doc.GetField("Category").GetStringValue() + ", BookId: " + doc.GetField("BookId").GetStringValue() + ", Rep: " + doc.GetField("Repetition").GetInt32Value()); } } indexReader.Dispose(); }
//Lookup by group int value (Alternative Syntax) private void LookupGroupsByIntAlt(Directory directory) { Filter groupEndDocs = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupEnd", "x")))); IndexReader indexReader = DirectoryReader.Open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); GroupingSearch groupingSearch = new GroupingSearch(groupEndDocs); groupingSearch.SetGroupSort(new Sort()); groupingSearch.SetIncludeScores(true); Query query = NumericRangeQuery.NewInt32Range("Repetition", 1, 2, true, false); var groupsResult = groupingSearch.Search(indexSearcher, query, 0, 10); //search(indexSearcher, query, groupOffset, groupLimit); indexReader.Dispose(); }
//Lookup by group string value (Alternative Syntax) private void LookupGroupsByStringAlt(Directory directory) { Filter groupEndDocs = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupEnd", "x")))); IndexReader indexReader = DirectoryReader.Open(directory); IndexSearcher indexSearcher = new IndexSearcher(indexReader); GroupingSearch groupingSearch = new GroupingSearch(groupEndDocs); groupingSearch.SetGroupSort(new Sort()); groupingSearch.SetIncludeScores(true); TermQuery query = new TermQuery(new Term("Category", "Cat 1")); var groupsResult = groupingSearch.Search(indexSearcher, query, 0, 10); //search(indexSearcher, query, groupOffset, groupLimit); indexReader.Dispose(); }
private IEnumerable <KeyValuePair <string, int> > _GroupBy(int skip, int pageSize, string fieldName) { GroupingSearch groupingSearch = new GroupingSearch(fieldName); groupingSearch.SetGroupSort(Sort.RELEVANCE); groupingSearch.SetFillSortFields(false); groupingSearch.SetCachingInMB(40.0, true); groupingSearch.SetAllGroups(true); // Render groupsResult... try { var reader = DirectoryReader.Open(_directory); var searcher = new Lucene.Net.Search.IndexSearcher(reader); Sort groupSort = Sort.RELEVANCE; int groupOffset = 0; int groupLimit = 10000000; string rawQuery = _queryProvider.GetBooleanQuery().ToString(); if (!rawQuery.Contains("isdeleted")) { rawQuery += "+isdeleted:0"; } var queryParser = new QueryParser(LuceneVersion.LUCENE_48, "isdeleted", analyzer); queryParser.AllowLeadingWildcard = _queryProvider.GetContainsWildCard(); var query = queryParser.Parse(rawQuery); ITopGroups <object> result = groupingSearch.Search(searcher, query, groupOffset, groupLimit); if (result.Groups == null || result.Groups.Count() <= 0) { return(new List <KeyValuePair <string, int> >()); } var d = result.Groups.OrderByDescending(p => p.TotalHits).ToList(); if (d.FirstOrDefault().GroupValue == null) { d.RemoveAt(0); } _groupCount = d.Count; if (pageSize > d.Count) { pageSize = d.Count; } d = d.Skip(skip).Take(pageSize).ToList(); if (d.Count > 0) { var rs = d.Select(p => new KeyValuePair <string, int>(((BytesRef)p.GroupValue)?.Utf8ToString(), p.TotalHits)).ToList(); return(rs); } else { return(new List <KeyValuePair <string, int> >()); } } catch { throw; } finally { } }
public virtual void TestBasic() { string groupField = "author"; FieldType customType = new FieldType(); customType.Stored = (true); Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); bool canUseIDV = !"Lucene3x".Equals(w.w.Config.Codec.Name, StringComparison.Ordinal); List<Document> documents = new List<Document>(); // 0 Document doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV); doc.Add(new TextField("content", "random text", Field.Store.YES)); doc.Add(new Field("id", "1", customType)); documents.Add(doc); // 1 doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV); doc.Add(new TextField("content", "some more random text", Field.Store.YES)); doc.Add(new Field("id", "2", customType)); documents.Add(doc); // 2 doc = new Document(); AddGroupField(doc, groupField, "author1", canUseIDV); doc.Add(new TextField("content", "some more random textual data", Field.Store.YES)); doc.Add(new Field("id", "3", customType)); doc.Add(new StringField("groupend", "x", Field.Store.NO)); documents.Add(doc); w.AddDocuments(documents); documents.Clear(); // 3 doc = new Document(); AddGroupField(doc, groupField, "author2", canUseIDV); doc.Add(new TextField("content", "some random text", Field.Store.YES)); doc.Add(new Field("id", "4", customType)); doc.Add(new StringField("groupend", "x", Field.Store.NO)); w.AddDocument(doc); // 4 doc = new Document(); AddGroupField(doc, groupField, "author3", canUseIDV); doc.Add(new TextField("content", "some more random text", Field.Store.YES)); doc.Add(new Field("id", "5", customType)); documents.Add(doc); // 5 doc = new Document(); AddGroupField(doc, groupField, "author3", canUseIDV); doc.Add(new TextField("content", "random", Field.Store.YES)); doc.Add(new Field("id", "6", customType)); doc.Add(new StringField("groupend", "x", Field.Store.NO)); documents.Add(doc); w.AddDocuments(documents); documents.Clear(); // 6 -- no author field doc = new Document(); doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES)); doc.Add(new Field("id", "6", customType)); doc.Add(new StringField("groupend", "x", Field.Store.NO)); w.AddDocument(doc); IndexSearcher indexSearcher = NewSearcher(w.Reader); w.Dispose(); Sort groupSort = Sort.RELEVANCE; GroupingSearch groupingSearch = CreateRandomGroupingSearch(groupField, groupSort, 5, canUseIDV); ITopGroups<object> groups = groupingSearch.Search(indexSearcher, (Filter)null, new TermQuery(new Index.Term("content", "random")), 0, 10); assertEquals(7, groups.TotalHitCount); assertEquals(7, groups.TotalGroupedHitCount); assertEquals(4, groups.Groups.Length); // relevance order: 5, 0, 3, 4, 1, 2, 6 // the later a document is added the higher this docId // value IGroupDocs<object> group = groups.Groups[0]; CompareGroupValue("author3", group); assertEquals(2, group.ScoreDocs.Length); assertEquals(5, group.ScoreDocs[0].Doc); assertEquals(4, group.ScoreDocs[1].Doc); assertTrue(group.ScoreDocs[0].Score > group.ScoreDocs[1].Score); group = groups.Groups[1]; CompareGroupValue("author1", group); assertEquals(3, group.ScoreDocs.Length); assertEquals(0, group.ScoreDocs[0].Doc); assertEquals(1, group.ScoreDocs[1].Doc); assertEquals(2, group.ScoreDocs[2].Doc); assertTrue(group.ScoreDocs[0].Score > group.ScoreDocs[1].Score); assertTrue(group.ScoreDocs[1].Score > group.ScoreDocs[2].Score); group = groups.Groups[2]; CompareGroupValue("author2", group); assertEquals(1, group.ScoreDocs.Length); assertEquals(3, group.ScoreDocs[0].Doc); group = groups.Groups[3]; CompareGroupValue(null, group); assertEquals(1, group.ScoreDocs.Length); assertEquals(6, group.ScoreDocs[0].Doc); Filter lastDocInBlock = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Index.Term("groupend", "x")))); groupingSearch = new GroupingSearch(lastDocInBlock); groups = groupingSearch.Search(indexSearcher, null, new TermQuery(new Index.Term("content", "random")), 0, 10); assertEquals(7, groups.TotalHitCount); assertEquals(7, groups.TotalGroupedHitCount); assertEquals(4, groups.TotalGroupCount.GetValueOrDefault()); assertEquals(4, groups.Groups.Length); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
public virtual void TestSetAllGroups() { Directory dir = NewDirectory(); RandomIndexWriter w = new RandomIndexWriter( Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy())); Document doc = new Document(); doc.Add(NewField("group", "foo", StringField.TYPE_NOT_STORED)); w.AddDocument(doc); IndexSearcher indexSearcher = NewSearcher(w.Reader); w.Dispose(); GroupingSearch gs = new GroupingSearch("group"); gs.SetAllGroups(true); ITopGroups<object> groups = gs.Search(indexSearcher, null, new TermQuery(new Index.Term("group", "foo")), 0, 10); assertEquals(1, groups.TotalHitCount); //assertEquals(1, groups.totalGroupCount.intValue()); assertEquals(1, groups.TotalGroupedHitCount); assertEquals(1, gs.GetAllMatchingGroups().Count); indexSearcher.IndexReader.Dispose(); dir.Dispose(); }
private GroupingSearch CreateRandomGroupingSearch(string groupField, Sort groupSort, int docsInGroup, bool canUseIDV) { GroupingSearch groupingSearch; if (Random().nextBoolean()) { ValueSource vs = new BytesRefFieldSource(groupField); groupingSearch = new GroupingSearch(vs, new Hashtable()); } else { groupingSearch = new GroupingSearch(groupField); } groupingSearch.SetGroupSort(groupSort); groupingSearch.SetGroupDocsLimit(docsInGroup); if (Random().nextBoolean()) { groupingSearch.SetCachingInMB(4.0, true); } return groupingSearch; }