Ejemplo n.º 1
0
        public IEnumerable <IDictionary <string, object> > GroupBy(string indexName, string groupField, string term)
        {
            var groupingSearch = new GroupingSearch(groupField);
            //groupingSearch.SetGroupSort(groupSort);
            //groupingSearch.SetFillSortFields(fillFields);

            var searcher = _resources.GetIndexSearcher(indexName);

            var mapping = _resources.GetMapping(indexName);

            Query query = null;

            if (String.IsNullOrEmpty(term))
            {
                query = new MatchAllDocsQuery();
            }
            else
            {
                var parser = new MultiFieldQueryParser(
                    AppLuceneVersion,
                    mapping.PrimaryFields.ToArray(),
                    _resources.GetAnalyzer(indexName));
                query = parser.Parse(term);
            }

            var topGroups = groupingSearch.Search(searcher, query, 0, 100000);

            return(topGroups.Groups
                   .Select(g =>
            {
                var value = g.GroupValue;
                if (g.GroupValue is BytesRef)
                {
                    value = Encoding.UTF8.GetString(((BytesRef)g.GroupValue).Bytes);
                }

                return new Dictionary <string, object>()
                {
                    { "value", value },
                    { "_hits", g.TotalHits },
                    //{ "_score", g.Score }
                };
            }));
        }
Ejemplo n.º 2
0
        //Two-pass grouping search with cacheing (kinda working)
        private void TwoPassGroupingSearch(Directory directory)
        {
            var indexReader = DirectoryReader.Open(directory);

            var indexSearcher = new IndexSearcher(indexReader);

            //GroupingSearch groupingSearch = new GroupingSearch("Repetition");

            GroupingSearch groupingSearch = new GroupingSearch("Category");

            groupingSearch.SetAllGroups(true);

            groupingSearch.SetGroupDocsLimit(10);

            groupingSearch.SetCachingInMB(40.0, true);

            MatchAllDocsQuery all_query = new MatchAllDocsQuery();

            NumericRangeQuery <int> numeric_query = NumericRangeQuery.NewInt32Range("Repetition", 1, 2, true, false);

            TermQuery term_query = new TermQuery(new Term("Category", "Cat 1"));

            //Use different queries above to test
            var topGroups = groupingSearch.Search(indexSearcher, term_query, 0, 10);

            Console.WriteLine("Total group count: " + topGroups.TotalGroupCount);

            Console.WriteLine("Total group hit count: " + topGroups.TotalGroupedHitCount);

            foreach (var groupDocs in topGroups.Groups)
            {
                Console.WriteLine("Group: " + ((BytesRef)groupDocs.GroupValue).Utf8ToString());

                foreach (var scoreDoc in groupDocs.ScoreDocs)
                {
                    var doc = indexSearcher.Doc(scoreDoc.Doc);

                    Console.WriteLine("Category: " + doc.GetField("Category").GetStringValue() + ", BookId: " + doc.GetField("BookId").GetStringValue() + ", Rep: " + doc.GetField("Repetition").GetInt32Value());
                }
            }

            indexReader.Dispose();
        }
Ejemplo n.º 3
0
        //Lookup by group int value (Alternative Syntax)
        private void LookupGroupsByIntAlt(Directory directory)
        {
            Filter groupEndDocs = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupEnd", "x"))));

            IndexReader indexReader = DirectoryReader.Open(directory);

            IndexSearcher indexSearcher = new IndexSearcher(indexReader);

            GroupingSearch groupingSearch = new GroupingSearch(groupEndDocs);

            groupingSearch.SetGroupSort(new Sort());

            groupingSearch.SetIncludeScores(true);

            Query query = NumericRangeQuery.NewInt32Range("Repetition", 1, 2, true, false);

            var groupsResult = groupingSearch.Search(indexSearcher, query, 0, 10);             //search(indexSearcher, query, groupOffset, groupLimit);

            indexReader.Dispose();
        }
Ejemplo n.º 4
0
        //Lookup by group string value (Alternative Syntax)
        private void LookupGroupsByStringAlt(Directory directory)
        {
            Filter groupEndDocs = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupEnd", "x"))));

            IndexReader indexReader = DirectoryReader.Open(directory);

            IndexSearcher indexSearcher = new IndexSearcher(indexReader);

            GroupingSearch groupingSearch = new GroupingSearch(groupEndDocs);

            groupingSearch.SetGroupSort(new Sort());

            groupingSearch.SetIncludeScores(true);

            TermQuery query = new TermQuery(new Term("Category", "Cat 1"));

            var groupsResult = groupingSearch.Search(indexSearcher, query, 0, 10);             //search(indexSearcher, query, groupOffset, groupLimit);

            indexReader.Dispose();
        }
Ejemplo n.º 5
0
        private IEnumerable <KeyValuePair <string, int> > _GroupBy(int skip, int pageSize, string fieldName)
        {
            GroupingSearch groupingSearch = new GroupingSearch(fieldName);

            groupingSearch.SetGroupSort(Sort.RELEVANCE);
            groupingSearch.SetFillSortFields(false);
            groupingSearch.SetCachingInMB(40.0, true);
            groupingSearch.SetAllGroups(true);
            // Render groupsResult...
            try
            {
                var reader = DirectoryReader.Open(_directory);

                var searcher = new Lucene.Net.Search.IndexSearcher(reader);

                Sort groupSort   = Sort.RELEVANCE;
                int  groupOffset = 0;
                int  groupLimit  = 10000000;

                string rawQuery = _queryProvider.GetBooleanQuery().ToString();

                if (!rawQuery.Contains("isdeleted"))
                {
                    rawQuery += "+isdeleted:0";
                }
                var queryParser = new QueryParser(LuceneVersion.LUCENE_48, "isdeleted", analyzer);

                queryParser.AllowLeadingWildcard = _queryProvider.GetContainsWildCard();

                var query = queryParser.Parse(rawQuery);

                ITopGroups <object> result = groupingSearch.Search(searcher, query, groupOffset, groupLimit);

                if (result.Groups == null || result.Groups.Count() <= 0)
                {
                    return(new List <KeyValuePair <string, int> >());
                }

                var d = result.Groups.OrderByDescending(p => p.TotalHits).ToList();

                if (d.FirstOrDefault().GroupValue == null)
                {
                    d.RemoveAt(0);
                }

                _groupCount = d.Count;

                if (pageSize > d.Count)
                {
                    pageSize = d.Count;
                }
                d = d.Skip(skip).Take(pageSize).ToList();

                if (d.Count > 0)
                {
                    var rs = d.Select(p => new KeyValuePair <string, int>(((BytesRef)p.GroupValue)?.Utf8ToString(), p.TotalHits)).ToList();

                    return(rs);
                }
                else
                {
                    return(new List <KeyValuePair <string, int> >());
                }
            }

            catch
            {
                throw;
            }

            finally
            {
            }
        }
Ejemplo n.º 6
0
        public virtual void TestBasic()
        {

            string groupField = "author";

            FieldType customType = new FieldType();
            customType.Stored = (true);

            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(
                Random(),
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                    new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
            bool canUseIDV = !"Lucene3x".Equals(w.w.Config.Codec.Name, StringComparison.Ordinal);
            List<Document> documents = new List<Document>();
            // 0
            Document doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV);
            doc.Add(new TextField("content", "random text", Field.Store.YES));
            doc.Add(new Field("id", "1", customType));
            documents.Add(doc);

            // 1
            doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV);
            doc.Add(new TextField("content", "some more random text", Field.Store.YES));
            doc.Add(new Field("id", "2", customType));
            documents.Add(doc);

            // 2
            doc = new Document();
            AddGroupField(doc, groupField, "author1", canUseIDV);
            doc.Add(new TextField("content", "some more random textual data", Field.Store.YES));
            doc.Add(new Field("id", "3", customType));
            doc.Add(new StringField("groupend", "x", Field.Store.NO));
            documents.Add(doc);
            w.AddDocuments(documents);
            documents.Clear();

            // 3
            doc = new Document();
            AddGroupField(doc, groupField, "author2", canUseIDV);
            doc.Add(new TextField("content", "some random text", Field.Store.YES));
            doc.Add(new Field("id", "4", customType));
            doc.Add(new StringField("groupend", "x", Field.Store.NO));
            w.AddDocument(doc);

            // 4
            doc = new Document();
            AddGroupField(doc, groupField, "author3", canUseIDV);
            doc.Add(new TextField("content", "some more random text", Field.Store.YES));
            doc.Add(new Field("id", "5", customType));
            documents.Add(doc);

            // 5
            doc = new Document();
            AddGroupField(doc, groupField, "author3", canUseIDV);
            doc.Add(new TextField("content", "random", Field.Store.YES));
            doc.Add(new Field("id", "6", customType));
            doc.Add(new StringField("groupend", "x", Field.Store.NO));
            documents.Add(doc);
            w.AddDocuments(documents);
            documents.Clear();

            // 6 -- no author field
            doc = new Document();
            doc.Add(new TextField("content", "random word stuck in alot of other text", Field.Store.YES));
            doc.Add(new Field("id", "6", customType));
            doc.Add(new StringField("groupend", "x", Field.Store.NO));

            w.AddDocument(doc);

            IndexSearcher indexSearcher = NewSearcher(w.Reader);
            w.Dispose();

            Sort groupSort = Sort.RELEVANCE;
            GroupingSearch groupingSearch = CreateRandomGroupingSearch(groupField, groupSort, 5, canUseIDV);

            ITopGroups<object> groups = groupingSearch.Search(indexSearcher, (Filter)null, new TermQuery(new Index.Term("content", "random")), 0, 10);

            assertEquals(7, groups.TotalHitCount);
            assertEquals(7, groups.TotalGroupedHitCount);
            assertEquals(4, groups.Groups.Length);

            // relevance order: 5, 0, 3, 4, 1, 2, 6

            // the later a document is added the higher this docId
            // value
            IGroupDocs<object> group = groups.Groups[0];
            CompareGroupValue("author3", group);
            assertEquals(2, group.ScoreDocs.Length);
            assertEquals(5, group.ScoreDocs[0].Doc);
            assertEquals(4, group.ScoreDocs[1].Doc);
            assertTrue(group.ScoreDocs[0].Score > group.ScoreDocs[1].Score);

            group = groups.Groups[1];
            CompareGroupValue("author1", group);
            assertEquals(3, group.ScoreDocs.Length);
            assertEquals(0, group.ScoreDocs[0].Doc);
            assertEquals(1, group.ScoreDocs[1].Doc);
            assertEquals(2, group.ScoreDocs[2].Doc);
            assertTrue(group.ScoreDocs[0].Score > group.ScoreDocs[1].Score);
            assertTrue(group.ScoreDocs[1].Score > group.ScoreDocs[2].Score);

            group = groups.Groups[2];
            CompareGroupValue("author2", group);
            assertEquals(1, group.ScoreDocs.Length);
            assertEquals(3, group.ScoreDocs[0].Doc);

            group = groups.Groups[3];
            CompareGroupValue(null, group);
            assertEquals(1, group.ScoreDocs.Length);
            assertEquals(6, group.ScoreDocs[0].Doc);

            Filter lastDocInBlock = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Index.Term("groupend", "x"))));
            groupingSearch = new GroupingSearch(lastDocInBlock);
            groups = groupingSearch.Search(indexSearcher, null, new TermQuery(new Index.Term("content", "random")), 0, 10);

            assertEquals(7, groups.TotalHitCount);
            assertEquals(7, groups.TotalGroupedHitCount);
            assertEquals(4, groups.TotalGroupCount.GetValueOrDefault());
            assertEquals(4, groups.Groups.Length);

            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 7
0
        public virtual void TestSetAllGroups()
        {
            Directory dir = NewDirectory();
            RandomIndexWriter w = new RandomIndexWriter(
                Random(),
                dir,
                NewIndexWriterConfig(TEST_VERSION_CURRENT,
                    new MockAnalyzer(Random())).SetMergePolicy(NewLogMergePolicy()));
            Document doc = new Document();
            doc.Add(NewField("group", "foo", StringField.TYPE_NOT_STORED));
            w.AddDocument(doc);

            IndexSearcher indexSearcher = NewSearcher(w.Reader);
            w.Dispose();

            GroupingSearch gs = new GroupingSearch("group");
            gs.SetAllGroups(true);
            ITopGroups<object> groups = gs.Search(indexSearcher, null, new TermQuery(new Index.Term("group", "foo")), 0, 10);
            assertEquals(1, groups.TotalHitCount);
            //assertEquals(1, groups.totalGroupCount.intValue());
            assertEquals(1, groups.TotalGroupedHitCount);
            assertEquals(1, gs.GetAllMatchingGroups().Count);
            indexSearcher.IndexReader.Dispose();
            dir.Dispose();
        }
Ejemplo n.º 8
0
        private GroupingSearch CreateRandomGroupingSearch(string groupField, Sort groupSort, int docsInGroup, bool canUseIDV)
        {
            GroupingSearch groupingSearch;
            if (Random().nextBoolean())
            {
                ValueSource vs = new BytesRefFieldSource(groupField);
                groupingSearch = new GroupingSearch(vs, new Hashtable());
            }
            else
            {
                groupingSearch = new GroupingSearch(groupField);
            }

            groupingSearch.SetGroupSort(groupSort);
            groupingSearch.SetGroupDocsLimit(docsInGroup);

            if (Random().nextBoolean())
            {
                groupingSearch.SetCachingInMB(4.0, true);
            }

            return groupingSearch;
        }