private string GetHighlightedText(FastVectorHighlighter highlighter, FieldQuery fieldQuery, IndexSearcher searcher, ScoreDoc match, string tag, int length) { var s = highlighter.GetBestFragment(fieldQuery, searcher.IndexReader, match.Doc, tag, length); if (!string.IsNullOrEmpty(s)) { s = HttpUtility.HtmlEncode(s).Replace(HighlightPreTag, HtmlPreTag).Replace(HighlightPostTag, HtmlPostTag); } return s; }
private void SetupHighlighter(Query luceneQuery) { if (indexQuery.HighlightedFields != null && indexQuery.HighlightedFields.Length > 0) { highlighter = new FastVectorHighlighter( FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, new SimpleFragListBuilder(), new SimpleFragmentsBuilder( indexQuery.HighlighterPreTags != null && indexQuery.HighlighterPreTags.Any() ? indexQuery.HighlighterPreTags : BaseFragmentsBuilder.COLORED_PRE_TAGS, indexQuery.HighlighterPostTags != null && indexQuery.HighlighterPostTags.Any() ? indexQuery.HighlighterPostTags : BaseFragmentsBuilder.COLORED_POST_TAGS)); fieldQuery = highlighter.GetFieldQuery(luceneQuery); } }
public IEnumerable<string> FindText(string queryText, int phraseLimit, int numHits = 100) { var results = new List<string>(); var searcher = new IndexSearcher(_reader); var collector = TopScoreDocCollector.Create(numHits: numHits, docsScoredInOrder: true); var phraseQuery = new PhraseQuery(); var words = queryText.Split(' '); foreach (var w in words) { phraseQuery.Add(new Term("text", w)); } var booleanQuery = new BooleanQuery { { phraseQuery, Occur.MUST } }; searcher.Search(booleanQuery, collector); var topDocs = collector.TopDocs(0, numHits); var scoreDocs = topDocs.ScoreDocs; foreach (var scoreDoc in scoreDocs) { var docId = scoreDoc.Doc; var fieldQuery = new FieldQuery(booleanQuery, true, false); var highlighter = new FastVectorHighlighter(true, false) { PhraseLimit = phraseLimit }; var fragments = highlighter.GetBestFragments(fieldQuery, _reader, docId, "text", 75, numHits); if (fragments == null || fragments.Length <= 0) continue; foreach (var fragment in fragments) { int firstChar = fragment.IndexOf(' ') + 1; int lastChar = fragment.LastIndexOf(' '); string truncated; if (firstChar > 0 && lastChar > firstChar) truncated = fragment.Substring(firstChar, lastChar - firstChar); else truncated = fragment; truncated = Regex.Replace(truncated, @"</?b>", ""); truncated = Regex.Replace(truncated, @"\r|\n", " "); truncated = truncated.Trim(); results.Add(truncated); } } return results; }
public IEnumerable<IndexQueryResult> Query() { parent.MarkQueried(); using (IndexStorage.EnsureInvariantCulture()) { AssertQueryDoesNotContainFieldsThatAreNotIndexed(); IndexSearcher indexSearcher; using (parent.GetSearcher(out indexSearcher)) { var luceneQuery = ApplyIndexTriggers(GetLuceneQuery()); int start = indexQuery.Start; int pageSize = indexQuery.PageSize; int returnedResults = 0; int skippedResultsInCurrentLoop = 0; bool readAll; bool adjustStart = true; var recorder = new DuplicateDocumentRecorder(indexSearcher, parent, documentsAlreadySeenInPreviousPage, alreadyReturned, fieldsToFetch, parent.IsMapReduce || fieldsToFetch.IsProjection); do { if (skippedResultsInCurrentLoop > 0) { start = start + pageSize - (start - indexQuery.Start); // need to "undo" the index adjustment // trying to guesstimate how many results we will need to read from the index // to get enough unique documents to match the page size pageSize = Math.Max(2, skippedResultsInCurrentLoop) * pageSize; skippedResultsInCurrentLoop = 0; } TopDocs search; int moreRequired; do { search = ExecuteQuery(indexSearcher, luceneQuery, start, pageSize, indexQuery); moreRequired = recorder.RecordResultsAlreadySeenForDistinctQuery(search, adjustStart, ref start); pageSize += moreRequired * 2; } while (moreRequired > 0); indexQuery.TotalSize.Value = search.TotalHits; adjustStart = false; FastVectorHighlighter highlighter = null; FieldQuery fieldQuery = null; if (indexQuery.HighlightedFields != null && indexQuery.HighlightedFields.Length > 0) { highlighter = new FastVectorHighlighter( FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, new SimpleFragListBuilder(), new SimpleFragmentsBuilder( indexQuery.HighlighterPreTags != null && indexQuery.HighlighterPreTags.Any() ? indexQuery.HighlighterPreTags : BaseFragmentsBuilder.COLORED_PRE_TAGS, indexQuery.HighlighterPostTags != null && indexQuery.HighlighterPostTags.Any() ? indexQuery.HighlighterPostTags : BaseFragmentsBuilder.COLORED_POST_TAGS)); fieldQuery = highlighter.GetFieldQuery(luceneQuery); } for (var i = start; (i - start) < pageSize && i < search.ScoreDocs.Length; i++) { var scoreDoc = search.ScoreDocs[i]; var document = indexSearcher.Doc(scoreDoc.Doc); var indexQueryResult = parent.RetrieveDocument(document, fieldsToFetch, scoreDoc.Score); if (ShouldIncludeInResults(indexQueryResult) == false) { indexQuery.SkippedResults.Value++; skippedResultsInCurrentLoop++; continue; } if (highlighter != null) { var highlightings = from highlightedField in this.indexQuery.HighlightedFields select new { highlightedField.Field, highlightedField.FragmentsField, Fragments = highlighter.GetBestFragments( fieldQuery, indexSearcher.IndexReader, scoreDoc.Doc, highlightedField.Field, highlightedField.FragmentLength, highlightedField.FragmentCount) } into fieldHighlitings where fieldHighlitings.Fragments != null && fieldHighlitings.Fragments.Length > 0 select fieldHighlitings; if (fieldsToFetch.IsProjection || parent.IsMapReduce) { foreach (var highlighting in highlightings) if (!string.IsNullOrEmpty(highlighting.FragmentsField)) indexQueryResult.Projection[highlighting.FragmentsField] = new RavenJArray(highlighting.Fragments); } else indexQueryResult.Highligtings = highlightings .ToDictionary(x => x.Field, x => x.Fragments); } returnedResults++; yield return indexQueryResult; if (returnedResults == indexQuery.PageSize) yield break; } readAll = search.TotalHits == search.ScoreDocs.Length; } while (returnedResults < indexQuery.PageSize && readAll == false); } } }
public static SearchResultsViewModel SearchWithSuggestions(SearchQuery searchQuery, bool suggestOnlyWhenNoResults = false) { var ret = new SearchResultsViewModel { SearchResults = new List<SearchResultsViewModel.SearchResult>(PageSize), Query = searchQuery }; // Parse query, possibly throwing a ParseException Query query; if (searchQuery.TitlesOnly) { var qp = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "Title", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29) ); query = qp.Parse(searchQuery.Query); } else { query = MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, searchQuery.Query, SearchFields, SearchFlags, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29) ); } // Perform the actual search var tsdc = TopScoreDocCollector.create(PageSize * searchQuery.CurrentPage, true); Searcher.Search(query, tsdc); ret.TotalResults = tsdc.GetTotalHits(); var hits = tsdc.TopDocs().ScoreDocs; // Do the suggestion magic if (suggestOnlyWhenNoResults && ret.TotalResults == 0 || (!suggestOnlyWhenNoResults)) { ret.Suggestions = new List<string>(); var spellChecker = new SpellChecker.Net.Search.Spell.SpellChecker(Searcher.GetIndexReader().Directory()); // This is kind of a hack to get things working quickly // for real-world usage we probably want to get the analyzed terms from the Query object var individualTerms = searchQuery.Query.Split(new[] { ' ', '\t', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries); foreach (var term in individualTerms) { // we only specify field name if we actually got results, // to improve suggestions relevancy ret.Suggestions.AddRange(spellChecker.SuggestSimilar(term, searchQuery.MaxSuggestions, null, ret.TotalResults == 0 ? null : "Title", true)); } } // Init the highlighter instance var fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, new SimpleFragListBuilder(), new ScoreOrderFragmentsBuilder(new[] { "[b]" }, new[] { "[/b]" })); int i; for (i = (searchQuery.CurrentPage - 1) * PageSize; i < hits.Length; ++i) { var d = Searcher.Doc(hits[i].doc); var fq = fvh.GetFieldQuery(query); var fragment = fvh.GetBestFragment(fq, Searcher.GetIndexReader(), hits[i].doc, "Content", 400); ret.SearchResults.Add(new SearchResultsViewModel.SearchResult { Id = d.Get("Id"), Title = d.Get("Title"), Score = hits[i].score, LuceneDocId = hits[i].doc, Fragment = MvcHtmlString.Create(fragment.HtmlStripFragment()), }); } return ret; }
public static SearchResultsViewModel Search(SearchQuery searchQuery) { var ret = new SearchResultsViewModel { SearchResults = new List<SearchResultsViewModel.SearchResult>(PageSize), Query = searchQuery }; // Parse query, possibly throwing a ParseException Query query; if (searchQuery.TitlesOnly) // we only need to query on one field { var qp = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, "Title", new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29)); query = qp.Parse(searchQuery.Query); } else // querying on both fields, Content and Title { query = MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, searchQuery.Query, SearchFields, SearchFlags, new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_29) ); } // Init the highlighter instance var fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, new SimpleFragListBuilder(), new ScoreOrderFragmentsBuilder(new[] { "[b]" }, new[] { "[/b]" }) ); // Perform the actual search var tsdc = TopScoreDocCollector.create(PageSize * searchQuery.CurrentPage, true); Searcher.Search(query, tsdc); ret.TotalResults = tsdc.GetTotalHits(); var hits = tsdc.TopDocs().ScoreDocs; int i; for (i = (searchQuery.CurrentPage - 1) * PageSize; i < hits.Length; ++i) { var d = Searcher.Doc(hits[i].doc); var fq = fvh.GetFieldQuery(query); var fragment = fvh.GetBestFragment(fq, Searcher.GetIndexReader(), hits[i].doc, "Content", 400); ret.SearchResults.Add(new SearchResultsViewModel.SearchResult { Id = d.Get("Id"), Title = d.Get("Title"), Score = hits[i].score, LuceneDocId = hits[i].doc, Fragment = MvcHtmlString.Create(fragment.HtmlStripFragment()), }); } return ret; }
public IEnumerable<SearchResult> Search(SearchQuery searchQuery, out int totalHitCount) { var searcher = GetSearcher(searchQuery.IndexName); if (searcher == null) throw new ArgumentException("Index not found: " + searchQuery.IndexName); // Parse query, possibly throwing a ParseException Query query; if (searchQuery.SearchType == SearchType.LuceneDefault) { query = MultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, searchQuery.Query, searchFieldsLucenesDefault, searchFlags, GetAnalyzer(searchQuery.SearchType)); } else { query = HebrewMultiFieldQueryParser.Parse(Lucene.Net.Util.Version.LUCENE_29, searchQuery.Query, searchFields, searchFlags, GetAnalyzer(searchQuery.SearchType)); } // Log search, if doc-store exists if (MvcApplication.RavenDocStore != null) { using (var session = MvcApplication.RavenDocStore.OpenSession()) { session.Store(searchQuery); session.SaveChanges(); } } // Init var fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, new SimpleFragListBuilder(), new CustomFragmentsBuilder("Content", new String[] { "[b]" }, new String[] { "[/b]" })); var contentFieldName = searchQuery.SearchType == SearchType.LuceneDefault ? "ContentDefault" : "Content"; // Perform actual search var tsdc = TopScoreDocCollector.create(PageSize * searchQuery.CurrentPage, true); searcher.Search(query, tsdc); totalHitCount = tsdc.GetTotalHits(); var hits = tsdc.TopDocs().scoreDocs; var ret = new List<SearchResult>(PageSize); int i; for (i = (searchQuery.CurrentPage - 1) * PageSize; i < hits.Length; ++i) { var d = searcher.Doc(hits[i].doc); var fq = fvh.GetFieldQuery(query); var fragment = fvh.GetBestFragment(fq, searcher.GetIndexReader(), hits[i].doc, contentFieldName, 400); ret.Add(new SearchResult { Id = d.Get("Id"), Title = d.Get("Title"), Score = hits[i].score, LuceneDocId = hits[i].doc, Fragment = MvcHtmlString.Create(fragment.HtmlStripFragment()), }); } return ret; }