/// <summary>
        /// Annotates the given sequence of <see cref="Document"/> objects by adding a <b>_highlight</b> field;
        /// the <b>_highlight</b> field will contain the best matching text fragment from the <see cref="Document"/> 
        /// object's full-text field.
        /// </summary>
        /// <param name="hits">The sequence of <see cref="Document"/> objects.</param>
        /// <param name="criteria">The search criteria that produced the hits.</param>
        /// <returns>
        /// The original sequence of Document objects, with a <b>_highlight</b> field added to each Document.
        /// </returns>
        public static IEnumerable<Document> GenerateHighlights(this IEnumerable<Document> hits, SearchCriteria criteria)
        {
            if (hits == null)
                throw new ArgumentNullException(nameof(hits));
            if (criteria == null)
                throw new ArgumentNullException(nameof(criteria));
            if (String.IsNullOrWhiteSpace(criteria.Query))
                throw new ArgumentException("SearchCriteria.Query cannot be empty");

            var documents = hits.ToList();
            try
            {
                var indexDirectory = new RAMDirectory();
                var analyzer = new FullTextAnalyzer();
                var config = new IndexWriterConfig(analyzer).SetRAMBufferSizeMB(_ramBufferSizeMB);
                var writer = new IndexWriter(indexDirectory, config);

                BuidIndex(documents, writer);
                GenerateHighlights(documents, writer, criteria);

                writer.DeleteAll();
                writer.Commit();
                writer.Close();
                indexDirectory.Close();
            }
            catch (Exception ex)
            {
                _log.Error(ex);
            }

            return documents;
        }
        public void Can_search_for_null_value()
        {
            var book1 = TestUtils.CreateBook("The Hitchhiker's Guide to the Galaxy", "Douglas Adams", new DateTime(1979, 10, 12, 12, 0, 0, DateTimeKind.Utc), 10, "The Hitchhiker's Guide to the Galaxy is a comedy science fiction series created by Douglas Adams. Originally a radio comedy broadcast on BBC Radio 4 in 1978, it was later adapted to other formats, and over several years it gradually became an international multi-media phenomenon.");
            var book2 = TestUtils.CreateBook("The Restaurant at the End of the Universe", "Douglas Adams", new DateTime(1980, 10, 12, 12, 0, 0, DateTimeKind.Utc), 9, "The Restaurant at the End of the Universe (1980, ISBN 0-345-39181-0) is the second book in the Hitchhiker's Guide to the Galaxy comedy science fiction 'trilogy' by Douglas Adams, and is a sequel.");
            var book3 = TestUtils.CreateBook("Life, the Universe and Everything", "Douglas Adams", new DateTime(1982, 10, 12, 12, 0, 0, DateTimeKind.Utc), 9, "Life, the Universe and Everything (1982, ISBN 0-345-39182-9) is the third book in the five-volume Hitchhiker's Guide to the Galaxy science fiction trilogy by British writer Douglas Adams. The title refers to the Answer to Life, the Universe, and Everything.");
            var book4 = TestUtils.CreateBook("So Long, and Thanks for All the Fish", null, new DateTime(1984, 10, 12, 12, 0, 0, DateTimeKind.Utc), 9, "So Long, and Thanks for All the Fish is the fourth book of the Hitchhiker's Guide to the Galaxy trilogy written by Douglas Adams. Its title is the message left by the dolphins when they departed Planet Earth just before it was demolished to make way for a hyperspace bypass, as described in The Hitchhiker's Guide to the Galaxy.");

            foreach (var book in new[] { book1, book2, book3, book4 })
                _collection.InsertAsync(book).Wait();

            Thread.Sleep(1000);

            var criteria = new SearchCriteria
            {
                Query = @"Author:_null_",     // LuceneField.DEFAULT_NULL_VALUE_TOKEN
                SortByField = "Title"
            };

            var result = _collection.SearchAsync(criteria).Result;
            dynamic firstItem = result.Items.First();

            var expected = "So Long, and Thanks for All the Fish";
            var actual = firstItem.Title as string;

            Assert.AreEqual<string>(expected, actual);
        }
        public void Can_count_document()
        {
            var criteria = new SearchCriteria { Query = "*:*" };
            var count = _db["books"].Count(criteria);

            Assert.AreEqual<int>(5, count);
        }
        public void Can_add_documents_and_search()
        {
            var documents = new List<Document>();
            for(var i = 0; i < 10; i++)
            {
                dynamic newDocument = CreateDocument();
                newDocument.BookId = 9-i;

                documents.Add(newDocument);
                _luceneIndex.Insert(newDocument);
                Thread.Sleep(1200);
            }

            var documentIds = documents.Select(c => c._id.Value).ToList();
            documentIds.Reverse();

            _luceneIndex.Refresh();

            var criteria = new SearchCriteria
            {
                Query = "hitchhiker AND galaxy",
                SortByField = "BookId",
                TopN = 10
            };

            var result = _luceneIndex.Search(criteria);

            Assert.AreEqual<int?>(result.ItemCount, 10);
            Assert.AreEqual<int?>(result.ItemCount, result.TotalHits);
            Assert.IsTrue(result.Items.SequenceEqual(documentIds));
        }
        public void Can_search_for_documents()
        {
            var criteria = new SearchCriteria { Query = "Author:Douglas", SortByField = "-Title", TopN = 1 };
            var result = _db["books"].SearchAsync(criteria).Result;

            Assert.AreEqual<int?>(1, result.ItemCount);
            Assert.AreEqual<int?>(5, result.TotalHits);

            dynamic firstItem = result.Items.First();
            Assert.AreEqual<string>("The Restaurant at the End of the Universe", firstItem.Title);
        }
        public static SearchCriteria ToSearchCriteria(this SearchRequestDto dto)
        {
            var searchCriteria = new SearchCriteria
            {
                Query = dto.where,
                SortByField = dto.sortBy,
                TopN = dto.topN,
                ItemsPerPage = dto.itemsPerPage,
                PageNumber = dto.pageNumber
            };

            return searchCriteria;
        }
        public void Can_dispose_and_reload()
        {
            _db.Dispose();
            Thread.Sleep(2000);

            _db = new Database(_dataPath);
            Thread.Sleep(1000);

            var criteria = new SearchCriteria { Query = "Author:Douglas", SortByField = "-Title", TopN = 1 };
            var result = _db["books"].SearchAsync(criteria).Result;

            Assert.AreEqual<int?>(1, result.ItemCount);
            Assert.AreEqual<int?>(5, result.TotalHits);

            dynamic firstItem = result.Items.First();
            Assert.AreEqual<string>("The Restaurant at the End of the Universe", firstItem.Title);
        }
Example #8
0
        /// <summary>
        /// Searches the Lucene index for Documents that match the specified search criteria.
        /// </summary>
        /// <param name="criteria">The search criteria.</param>
        /// <returns></returns>
        /// <exception cref="ArgumentNullException"></exception>
        /// <exception cref="System.ArgumentNullException"></exception>
        public SearchResult<Guid> Search(SearchCriteria criteria)
        {
            if (criteria == null)
                throw new ArgumentNullException(nameof(criteria));

            criteria.Query = String.IsNullOrWhiteSpace(criteria.Query) ? ALL_DOCS_QUERY : criteria.Query;
            criteria.TopN = criteria.TopN > 0 ? criteria.TopN : SearchCriteria.DEFAULT_TOP_N;
            criteria.ItemsPerPage = criteria.ItemsPerPage > 0 ? criteria.ItemsPerPage : SearchCriteria.DEFAULT_ITEMS_PER_PAGE;
            criteria.PageNumber = criteria.PageNumber > 0 ? criteria.PageNumber : 1;
            criteria.Validate();

            var result = new SearchResult<Guid>(criteria);
            var queryParser = new LuceneQueryParser(Schema.StandardField.FULL_TEXT, _compositeAnalyzer, Schema);
            var query = queryParser.Parse(criteria.Query);

            var instance = _searcherTaxonomyManager.Acquire() as SearcherTaxonomyManagerSearcherAndTaxonomy;
            if (instance != null)
            {
                var searcher = instance.Searcher;
                var taxonomyReader = instance.TaxonomyReader;

                try
                {
                    var sort = GetSortCriteria(criteria.SortByField);
                    var selectedFacets = criteria.SelectCategories.ToFacetFields();
                    var topDocs = (TopDocs)null;                                        
                    var categories = (IEnumerable<Category>)null;

                    if (selectedFacets.Count() == 0)
                    {
                        // We are not going to do a drill-down on specific facets.
                        // Instead we will just take the top N facets from the matching Documents.
                        var facetsCollector = new FacetsCollector();

                        // Get the matching Documents
                        topDocs = FacetsCollector.Search(searcher, query, criteria.TopN, sort, facetsCollector);

                        // Get the Facet counts from the matching Documents
                        var facetCounts = new FastTaxonomyFacetCounts(taxonomyReader, _facetBuilder.FacetsConfig, facetsCollector);                        
                        categories = facetCounts.GetCategories(criteria.TopNCategories);
                    }
                    else
                    {
                        // Perform a drill-sideways query
                        var drillDownQuery = new DrillDownQuery(_facetBuilder.FacetsConfig, query);
                        foreach (var facetField in selectedFacets)
                            drillDownQuery.Add(facetField.Dim, facetField.Path);                        

                        var drillSideways = new DrillSideways(searcher, _facetBuilder.FacetsConfig, taxonomyReader);
                        var drillSidewaysResult = drillSideways.Search(drillDownQuery, null, null, criteria.TopN, sort, false, false);

                        // Get the matching documents
                        topDocs = drillSidewaysResult.Hits;                        

                        // Get the Facet counts from the matching Documents
                        categories = drillSidewaysResult.Facets.GetCategories(criteria.TopNCategories, selectedFacets);
                    }

                    // TODO: Don't pass TopDocs; pass an IEnumerable<Guid>
                    result.PopulateWith(topDocs, categories, id => searcher.Doc(id));                    
                }
                finally
                {
                    _searcherTaxonomyManager.Release(instance); 
                    searcher = null;
                    taxonomyReader = null;
                }
            }

            return result;
        }
        private static void GenerateHighlights(IList<Document> documents, IndexWriter writer, SearchCriteria criteria)
        {
            var documentHightlightMap = documents.ToDictionary(c => c._id.ToString());

            var reader = DirectoryReader.Open(writer, true, true);
            var queryParser = new HighlighterQueryParser(writer.GetAnalyzer());
            queryParser.SetMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);

            var query = queryParser.Parse(criteria.Query)
                                   .Rewrite(reader);

            var highlighter = CreateHighlighter();
            var fieldQuery = highlighter.GetFieldQuery(query);

            var searcher = new IndexSearcher(reader);
            var topFieldDocs = searcher.Search(query, documents.Count, Sort.RELEVANCE);
            var scoreDocs = topFieldDocs.ScoreDocs;

            foreach (var sd in scoreDocs)
            {
                var bestFragments = highlighter.GetBestFragments(fieldQuery, reader, sd.Doc, Schema.StandardField.FULL_TEXT, FRAGMENT_SIZE, FRAGMENT_COUNT);
                var document = searcher.Doc(sd.Doc);
                var docId = document.Get(Schema.StandardField.ID);

                if (documentHightlightMap.ContainsKey(docId) && bestFragments.Length > 0)
                {
                    var dictionary = documentHightlightMap[docId].AsDictionary();
                    var highlight = String.Join($"{Environment.NewLine} ... {Environment.NewLine}", bestFragments);
                    dictionary[HIGHLIGHT_FIELD_NAME] = highlight;
                }
            }
        }
Example #10
0
        /// <summary>
        /// Counts the Documents that match the specified search criteria.
        /// </summary>
        /// <param name="criteria">The search criteria.</param>
        /// <returns></returns>
        public int Count(SearchCriteria criteria)
        {
            EnsureCollectionIsNotDropped();

            if (criteria == null)
                throw new ArgumentNullException(nameof(criteria));
            
            criteria.TopN = 1;  // We're not interested in the docs, just the total hits.
            var luceneResult = _luceneIndex.Search(criteria);
            return luceneResult.TotalHits;
        }
Example #11
0
        /// <summary>
        /// Searches the Document Collection for Document objects that match the specified search criteria.
        /// </summary>
        /// <param name="criteria">The search criteria.</param>
        /// <returns></returns>
        /// <exception cref="System.ArgumentNullException"></exception>
        public async Task<SearchResult<Document>> SearchAsync(SearchCriteria criteria)
        {
            EnsureCollectionIsNotDropped();

            if (criteria == null)
                throw new ArgumentNullException(nameof(criteria));

            var luceneResult = _luceneIndex.Search(criteria);
            var searchResult = new SearchResult<Document>(criteria, luceneResult.ItemCount, luceneResult.TotalHits, luceneResult.PageCount);

            if (searchResult.ItemCount > 0)
            {
                searchResult.Items = await _documentStorage.GetAsync(Name, luceneResult.Items.ToList()).ConfigureAwait(false);
                searchResult.Categories = luceneResult.Categories;
            }

            // NOTE: At this point the Items collection only contains the compressed binary form of the Document objects.
            // The Items collection will be deserialized to Document objects only when enumerated.

            if (criteria.IncludeHighlight)                            
                searchResult.Items = searchResult.Items.GenerateHighlights(criteria);            

            return searchResult;
        }
        public static SearchCriteria ToSearchCriteria(this CountRequestDto dto)
        {
            var searchCriteria = new SearchCriteria
            {
                Query = dto.where,
                TopN = int.MaxValue
            };

            return searchCriteria;
        }
Example #13
0
        /// <summary>
        /// Converts the given <see cref="CountRequestDto"/> to a <see cref="SearchCriteria"/> object./>
        /// </summary>
        /// <param name="dto">The CountRequestDto object.</param>
        /// <returns></returns>
        public static SearchCriteria ToSearchCriteria(this CountRequestDto dto)
        {
            var searchCriteria = new SearchCriteria
            {
                Query = dto.where,
                TopN = 1  // We're not interested in the docs, just the total hits.
            };

            return searchCriteria;
        }
Example #14
0
        /// <summary>
        /// Converts the given <see cref="SearchRequestDto"/> to a <see cref="SearchCriteria"/> object./>
        /// </summary>
        /// <param name="dto">The SearchRequestDto object.</param>
        /// <returns></returns>
        public static SearchCriteria ToSearchCriteria(this SearchRequestDto dto)
        {
            var searchCriteria = new SearchCriteria
            {
                Query = dto.where,
                SortByField = dto.orderBy,
                TopN = dto.topN ?? SearchCriteria.DEFAULT_TOP_N,
                ItemsPerPage = dto.documentsPerPage ?? SearchCriteria.DEFAULT_ITEMS_PER_PAGE,
                PageNumber = dto.pageNumber ?? 1,
                IncludeHighlight = dto.highlight ?? false,
                SelectCategories = dto.selectCategories,
                TopNCategories = dto.topNCategories ?? SearchCriteria.DEFAULT_TOP_N_CATEGORIES
            };

            return searchCriteria;
        }